浏览代码

update simple rl tests

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
601f02a8
共有 1 个文件被更改,包括 44 次插入44 次删除
  1. 88
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py

88
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_ppo(use_discrete):
# env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# config = attr.evolve(PPO_CONFIG)
# config = attr.evolve(PPO_TORCH_CONFIG)
# _check_environment_trains(env, {BRAIN_NAME: config})

)
new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, batch_size=32, buffer_size=1280
PPO_TORCH_CONFIG.hyperparameters, batch_size=32, buffer_size=1280
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
def test_conthybrid_ppo():

config = attr.evolve(PPO_CONFIG)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
config = attr.evolve(PPO_TORCH_CONFIG)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
def test_dischybrid_ppo():

config = attr.evolve(PPO_CONFIG)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
config = attr.evolve(PPO_TORCH_CONFIG)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
def test_3chybrid_ppo():

new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
def test_3ddhybrid_ppo():

new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.05
PPO_TORCH_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.05
config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=1.0)
# @pytest.mark.parametrize("use_discrete", [True, False])

# )
# new_hyperparams = attr.evolve(
# PPO_CONFIG.hyperparameters, batch_size=64, buffer_size=640
# PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
# config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
# config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
# _check_environment_trains(env, {BRAIN_NAME: config})
#
#

# num_vector=0,
# step_size=0.2,
# )
# new_hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3.0e-4)
# config = attr.evolve(PPO_CONFIG, hyperparameters=new_hyperparams)
# new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4)
# config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
# _check_environment_trains(env, {BRAIN_NAME: config})
#
#

# vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
# )
# new_networksettings = attr.evolve(
# SAC_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
# SAC_TORCH_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
# new_hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3.0e-4)
# new_hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4)
# PPO_CONFIG,
# PPO_TORCH_CONFIG,
# hyperparameters=new_hyperparams,
# network_settings=new_networksettings,
# max_steps=700,

# def test_recurrent_ppo(use_discrete):
# env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# new_network_settings = attr.evolve(
# PPO_CONFIG.network_settings,
# PPO_TORCH_CONFIG.network_settings,
# PPO_CONFIG.hyperparameters, learning_rate=1.0e-3, batch_size=64, buffer_size=128
# PPO_TORCH_CONFIG.hyperparameters, learning_rate=1.0e-3, batch_size=64, buffer_size=128
# PPO_CONFIG,
# PPO_TORCH_CONFIG,
# hyperparameters=new_hyperparams,
# network_settings=new_network_settings,
# max_steps=5000,

# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_sac(use_discrete):
# env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# config = attr.evolve(SAC_CONFIG)
# config = attr.evolve(SAC_TORCH_CONFIG)
# _check_environment_trains(env, {BRAIN_NAME: config})
#
#

# [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
# )
# new_hyperparams = attr.evolve(SAC_CONFIG.hyperparameters, buffer_init_steps=2000)
# config = attr.evolve(SAC_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
# new_hyperparams = attr.evolve(SAC_TORCH_CONFIG.hyperparameters, buffer_init_steps=2000)
# config = attr.evolve(SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000)
# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
#
#

# step_size=0.2,
# )
# new_hyperparams = attr.evolve(
# SAC_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4
# SAC_TORCH_CONFIG.hyperparameters, batch_size=16, learning_rate=3e-4
# config = attr.evolve(SAC_CONFIG, hyperparameters=new_hyperparams)
# config = attr.evolve(SAC_TORCH_CONFIG, hyperparameters=new_hyperparams)
# _check_environment_trains(env, {BRAIN_NAME: config})
#
#

# vis_obs_size=(5, 5, 5) if vis_encode_type == "match3" else (36, 36, 3),
# )
# new_networksettings = attr.evolve(
# SAC_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
# SAC_TORCH_CONFIG.network_settings, vis_encode_type=EncoderType(vis_encode_type)
# SAC_CONFIG.hyperparameters,
# SAC_TORCH_CONFIG.hyperparameters,
# SAC_CONFIG,
# SAC_TORCH_CONFIG,
# hyperparameters=new_hyperparams,
# network_settings=new_networksettings,
# max_steps=100,

# [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
# )
# new_networksettings = attr.evolve(
# SAC_CONFIG.network_settings,
# SAC_TORCH_CONFIG.network_settings,
# SAC_CONFIG.hyperparameters,
# SAC_TORCH_CONFIG.hyperparameters,
# batch_size=128,
# learning_rate=1e-3,
# buffer_init_steps=1000,

# SAC_CONFIG,
# SAC_TORCH_CONFIG,
# hyperparameters=new_hyperparams,
# network_settings=new_networksettings,
# max_steps=5000,

# self_play_settings = SelfPlaySettings(
# play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000
# )
# config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2500)
# config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=2500)
# _check_environment_trains(env, {BRAIN_NAME: config})
#
#

# self_play_settings = SelfPlaySettings(
# play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=4000
# )
# config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2500)
# config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=2500)
# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=None)
# processed_rewards = [
# default_reward_processor(rewards) for rewards in env.final_rewards.values()

# swap_steps=10000,
# team_change=400,
# )
# config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=4000)
# config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=4000)
# _check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
#
#

# swap_steps=5000,
# team_change=2000,
# )
# config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=3000)
# config = attr.evolve(PPO_TORCH_CONFIG, self_play=self_play_settings, max_steps=3000)
# _check_environment_trains(
# env, {BRAIN_NAME: config, brain_name_opp: config}, success_threshold=None
# )

#
#
# @pytest.mark.parametrize("use_discrete", [True, False])
# @pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
# @pytest.mark.parametrize("trainer_config", [PPO_TORCH_CONFIG, SAC_TORCH_CONFIG])
# def test_gail(simple_record, use_discrete, trainer_config):
# demo_path = simple_record(use_discrete)
# env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)

# reward_signals = {
# RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
# }
# hyperparams = attr.evolve(PPO_CONFIG.hyperparameters, learning_rate=3e-4)
# hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3e-4)
# PPO_CONFIG,
# PPO_TORCH_CONFIG,
# reward_signals=reward_signals,
# hyperparameters=hyperparams,
# behavioral_cloning=bc_settings,

# RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
# }
# hyperparams = attr.evolve(
# SAC_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16
# SAC_TORCH_CONFIG.hyperparameters, learning_rate=3e-4, batch_size=16
# SAC_CONFIG,
# SAC_TORCH_CONFIG,
# reward_signals=reward_signals,
# hyperparameters=hyperparams,
# behavioral_cloning=bc_settings,

正在加载...
取消
保存