|
|
|
|
|
|
from mlagents.trainers.tests.simple_test_envs import ( |
|
|
|
SimpleEnvironment, |
|
|
|
MemoryEnvironment, |
|
|
|
RecordEnvironment, |
|
|
|
from mlagents.trainers.demo_loader import write_demo |
|
|
|
|
|
|
|
from mlagents.trainers.settings import ( |
|
|
|
NetworkSettings, |
|
|
|
SelfPlaySettings, |
|
|
|
BehavioralCloningSettings, |
|
|
|
GAILSettings, |
|
|
|
RewardSignalType, |
|
|
|
EncoderType, |
|
|
|
FrameworkType, |
|
|
|
) |
|
|
|
|
|
|
|
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import ( |
|
|
|
DemonstrationMetaProto, |
|
|
|
) |
|
|
|
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto |
|
|
|
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous |
|
|
|
from mlagents.trainers.settings import NetworkSettings, FrameworkType |
|
|
|
from mlagents.trainers.tests.check_env_trains import ( |
|
|
|
check_environment_trains, |
|
|
|
default_reward_processor, |
|
|
|
) |
|
|
|
from mlagents.trainers.tests.check_env_trains import check_environment_trains |
|
|
|
|
|
|
|
BRAIN_NAME = "1D" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_hybrid_ppo(): |
|
|
|
env = SimpleEnvironment( |
|
|
|
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=1 |
|
|
|
) |
|
|
|
config = attr.evolve(PPO_TORCH_CONFIG) |
|
|
|
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0) |
|
|
|
|
|
|
|
|
|
|
|
def test_conthybrid_ppo(): |
|
|
|
env = SimpleEnvironment( |
|
|
|
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=0 |
|
|
|
) |
|
|
|
config = attr.evolve(PPO_TORCH_CONFIG) |
|
|
|
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0) |
|
|
|
|
|
|
|
|
|
|
|
def test_dischybrid_ppo(): |
|
|
|
env = SimpleEnvironment( |
|
|
|
[BRAIN_NAME], continuous_action_size=0, discrete_action_size=1 |
|
|
|
) |
|
|
|
env = SimpleEnvironment([BRAIN_NAME], action_sizes=(1, 1)) |
|
|
|
config = attr.evolve(PPO_TORCH_CONFIG) |
|
|
|
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0) |
|
|
|
|
|
|
|
|
|
|
env = SimpleEnvironment( |
|
|
|
[BRAIN_NAME], |
|
|
|
num_visual=num_visual, |
|
|
|
num_vector=0, |
|
|
|
continuous_action_size=1, |
|
|
|
discrete_action_size=1, |
|
|
|
[BRAIN_NAME], num_visual=num_visual, num_vector=0, action_sizes=(1, 1) |
|
|
|
) |
|
|
|
new_hyperparams = attr.evolve( |
|
|
|
PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_recurrent_ppo(): |
|
|
|
env = MemoryEnvironment( |
|
|
|
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=1 |
|
|
|
) |
|
|
|
env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1)) |
|
|
|
new_network_settings = attr.evolve( |
|
|
|
PPO_TORCH_CONFIG.network_settings, |
|
|
|
memory=NetworkSettings.MemorySettings(memory_size=16), |
|
|
|
|
|
|
PPO_TORCH_CONFIG, |
|
|
|
hyperparameters=new_hyperparams, |
|
|
|
network_settings=new_network_settings, |
|
|
|
max_steps=100000, |
|
|
|
max_steps=10000, |
|
|
|
|
|
|
|
|
|
|
|
# def test_3cdhybrid_ppo(): |
|
|
|
# env = SimpleEnvironment( |
|
|
|
# [BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8 |
|
|
|
# ) |
|
|
|
# new_hyperparams = attr.evolve( |
|
|
|
# PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01 |
|
|
|
# ) |
|
|
|
# config = attr.evolve( |
|
|
|
# PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000 |
|
|
|
# ) |
|
|
|
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0) |
|
|
|
# |
|
|
|
# |
|
|
|
# def test_3ddhybrid_ppo(): |
|
|
|
# env = SimpleEnvironment( |
|
|
|
# [BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8 |
|
|
|
# ) |
|
|
|
# new_hyperparams = attr.evolve( |
|
|
|
# PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01 |
|
|
|
# ) |
|
|
|
# config = attr.evolve( |
|
|
|
# PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000 |
|
|
|
# ) |
|
|
|
# check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0) |