ml-agents/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


								import pytest

								import attr


								from mlagents.trainers.tests.simple_test_envs import (

								    SimpleEnvironment,

								    MemoryEnvironment,

								    RecordEnvironment,

								)


								from mlagents.trainers.demo_loader import write_demo


								from mlagents.trainers.settings import (

								    NetworkSettings,

								    SelfPlaySettings,

								    BehavioralCloningSettings,

								    GAILSettings,

								    RewardSignalType,

								    EncoderType,

								    FrameworkType,

								)


								from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (

								    DemonstrationMetaProto,

								)

								from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto

								from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous


								from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config

								from mlagents.trainers.tests.check_env_trains import (

								    check_environment_trains,

								    default_reward_processor,

								)


								BRAIN_NAME = "1D"


								PPO_TORCH_CONFIG = attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)

								SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)


								def test_hybrid_ppo():

								    env = SimpleEnvironment(

								        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=1

								    )

								    config = attr.evolve(PPO_TORCH_CONFIG)

								    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)


								def test_conthybrid_ppo():

								    env = SimpleEnvironment(

								        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=0

								    )

								    config = attr.evolve(PPO_TORCH_CONFIG)

								    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)


								def test_dischybrid_ppo():

								    env = SimpleEnvironment(

								        [BRAIN_NAME], continuous_action_size=0, discrete_action_size=1

								    )

								    config = attr.evolve(PPO_TORCH_CONFIG)

								    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)


								@pytest.mark.parametrize("num_visual", [1, 2])

								def test_visual_ppo(num_visual):

								    env = SimpleEnvironment(

								        [BRAIN_NAME],

								        num_visual=num_visual,

								        num_vector=0,

								        continuous_action_size=1,

								        discrete_action_size=1,

								    )

								    new_hyperparams = attr.evolve(

								        PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4

								    )

								    config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)

								    check_environment_trains(env, {BRAIN_NAME: config})


								def test_recurrent_ppo():

								    env = MemoryEnvironment(

								        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=1

								    )

								    new_network_settings = attr.evolve(

								        PPO_TORCH_CONFIG.network_settings,

								        memory=NetworkSettings.MemorySettings(memory_size=16),

								    )

								    new_hyperparams = attr.evolve(

								        PPO_TORCH_CONFIG.hyperparameters,

								        learning_rate=1.0e-3,

								        batch_size=64,

								        buffer_size=128,

								    )

								    config = attr.evolve(

								        PPO_TORCH_CONFIG,

								        hyperparameters=new_hyperparams,

								        network_settings=new_network_settings,

								        max_steps=100000,

								    )

								    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


								# def test_3cdhybrid_ppo():

								#    env = SimpleEnvironment(

								#        [BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8

								#    )

								#    new_hyperparams = attr.evolve(

								#        PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01

								#    )

								#    config = attr.evolve(

								#        PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000

								#    )

								#    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)

								#

								#

								# def test_3ddhybrid_ppo():

								#    env = SimpleEnvironment(

								#        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8

								#    )

								#    new_hyperparams = attr.evolve(

								#        PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01

								#    )

								#    config = attr.evolve(

								#        PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000

								#    )

								#    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)