浏览代码

reccurent gail tests

/test-recurrent-gail
Andrew Cohen 5 年前
当前提交
f6d6e3d0
共有 1 个文件被更改,包括 205 次插入159 次删除
  1. 364
      ml-agents/mlagents/trainers/tests/test_simple_rl.py

364
ml-agents/mlagents/trainers/tests/test_simple_rl.py


assert all(reward > success_threshold for reward in processed_rewards)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = generate_config(PPO_CONFIG)
_check_environment_trains(env, config)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_ppo(num_visual, use_discrete):
env = Simple1DEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=0,
step_size=0.2,
)
override_vals = {"learning_rate": 3.0e-4}
config = generate_config(PPO_CONFIG, override_vals)
_check_environment_trains(env, config)
@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
def test_visual_advanced_ppo(vis_encode_type, num_visual):
env = Simple1DEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,
vis_obs_size=(36, 36, 3),
)
override_vals = {
"learning_rate": 3.0e-4,
"vis_encode_type": vis_encode_type,
"max_steps": 500,
"summary_freq": 100,
}
config = generate_config(PPO_CONFIG, override_vals)
# The number of steps is pretty small for these encoders
_check_environment_trains(env, config, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_ppo(use_discrete):
env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
override_vals = {
"max_steps": 3000,
"batch_size": 64,
"buffer_size": 128,
"use_recurrent": True,
}
config = generate_config(PPO_CONFIG, override_vals)
_check_environment_trains(env, config)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_sac(use_discrete):
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = generate_config(SAC_CONFIG)
_check_environment_trains(env, config)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("num_visual", [1, 2])
def test_visual_sac(num_visual, use_discrete):
env = Simple1DEnvironment(
[BRAIN_NAME],
use_discrete=use_discrete,
num_visual=num_visual,
num_vector=0,
step_size=0.2,
)
override_vals = {"batch_size": 16, "learning_rate": 3e-4}
config = generate_config(SAC_CONFIG, override_vals)
_check_environment_trains(env, config)
@pytest.mark.parametrize("num_visual", [1, 2])
@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
def test_visual_advanced_sac(vis_encode_type, num_visual):
env = Simple1DEnvironment(
[BRAIN_NAME],
use_discrete=True,
num_visual=num_visual,
num_vector=0,
step_size=0.5,
vis_obs_size=(36, 36, 3),
)
override_vals = {
"batch_size": 16,
"learning_rate": 3.0e-4,
"vis_encode_type": vis_encode_type,
"buffer_init_steps": 0,
"max_steps": 100,
}
config = generate_config(SAC_CONFIG, override_vals)
# The number of steps is pretty small for these encoders
_check_environment_trains(env, config, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
override_vals = {"batch_size": 32, "use_recurrent": True, "max_steps": 2000}
config = generate_config(SAC_CONFIG, override_vals)
_check_environment_trains(env, config)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
env = Simple1DEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
override_vals = {
"max_steps": 2500,
"self_play": {
"play_against_current_self_ratio": 1.0,
"save_steps": 2000,
"swap_steps": 2000,
},
}
config = generate_config(PPO_CONFIG, override_vals)
_check_environment_trains(env, config)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost_fails(use_discrete):
env = Simple1DEnvironment(
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
)
# This config should fail because the ghosted policy is never swapped with a competent policy.
# Swap occurs after max step is reached.
override_vals = {
"max_steps": 2500,
"self_play": {
"play_against_current_self_ratio": 1.0,
"save_steps": 2000,
"swap_steps": 4000,
},
}
config = generate_config(PPO_CONFIG, override_vals)
_check_environment_trains(env, config, success_threshold=None)
processed_rewards = [
default_reward_processor(rewards) for rewards in env.final_rewards.values()
]
success_threshold = 0.99
assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_ppo(use_discrete):
# env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# config = generate_config(PPO_CONFIG)
# _check_environment_trains(env, config)
#
#
# @pytest.mark.parametrize("use_discrete", [True, False])
# @pytest.mark.parametrize("num_visual", [1, 2])
# def test_visual_ppo(num_visual, use_discrete):
# env = Simple1DEnvironment(
# [BRAIN_NAME],
# use_discrete=use_discrete,
# num_visual=num_visual,
# num_vector=0,
# step_size=0.2,
# )
# override_vals = {"learning_rate": 3.0e-4}
# config = generate_config(PPO_CONFIG, override_vals)
# _check_environment_trains(env, config)
#
#
# @pytest.mark.parametrize("num_visual", [1, 2])
# @pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
# def test_visual_advanced_ppo(vis_encode_type, num_visual):
# env = Simple1DEnvironment(
# [BRAIN_NAME],
# use_discrete=True,
# num_visual=num_visual,
# num_vector=0,
# step_size=0.5,
# vis_obs_size=(36, 36, 3),
# )
# override_vals = {
# "learning_rate": 3.0e-4,
# "vis_encode_type": vis_encode_type,
# "max_steps": 500,
# "summary_freq": 100,
# }
# config = generate_config(PPO_CONFIG, override_vals)
# # The number of steps is pretty small for these encoders
# _check_environment_trains(env, config, success_threshold=0.5)
#
#
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_recurrent_ppo(use_discrete):
# env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# override_vals = {
# "max_steps": 3000,
# "batch_size": 64,
# "buffer_size": 128,
# "use_recurrent": True,
# }
# config = generate_config(PPO_CONFIG, override_vals)
# _check_environment_trains(env, config)
#
#
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_sac(use_discrete):
# env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# config = generate_config(SAC_CONFIG)
# _check_environment_trains(env, config)
#
#
# @pytest.mark.parametrize("use_discrete", [True, False])
# @pytest.mark.parametrize("num_visual", [1, 2])
# def test_visual_sac(num_visual, use_discrete):
# env = Simple1DEnvironment(
# [BRAIN_NAME],
# use_discrete=use_discrete,
# num_visual=num_visual,
# num_vector=0,
# step_size=0.2,
# )
# override_vals = {"batch_size": 16, "learning_rate": 3e-4}
# config = generate_config(SAC_CONFIG, override_vals)
# _check_environment_trains(env, config)
#
#
# @pytest.mark.parametrize("num_visual", [1, 2])
# @pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn"])
# def test_visual_advanced_sac(vis_encode_type, num_visual):
# env = Simple1DEnvironment(
# [BRAIN_NAME],
# use_discrete=True,
# num_visual=num_visual,
# num_vector=0,
# step_size=0.5,
# vis_obs_size=(36, 36, 3),
# )
# override_vals = {
# "batch_size": 16,
# "learning_rate": 3.0e-4,
# "vis_encode_type": vis_encode_type,
# "buffer_init_steps": 0,
# "max_steps": 100,
# }
# config = generate_config(SAC_CONFIG, override_vals)
# # The number of steps is pretty small for these encoders
# _check_environment_trains(env, config, success_threshold=0.5)
#
#
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_recurrent_sac(use_discrete):
# env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# override_vals = {"batch_size": 32, "use_recurrent": True, "max_steps": 2000}
# config = generate_config(SAC_CONFIG, override_vals)
# _check_environment_trains(env, config)
#
#
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_ghost(use_discrete):
# env = Simple1DEnvironment(
# [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
# )
# override_vals = {
# "max_steps": 2500,
# "self_play": {
# "play_against_current_self_ratio": 1.0,
# "save_steps": 2000,
# "swap_steps": 2000,
# },
# }
# config = generate_config(PPO_CONFIG, override_vals)
# _check_environment_trains(env, config)
#
#
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_ghost_fails(use_discrete):
# env = Simple1DEnvironment(
# [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
# )
# # This config should fail because the ghosted policy is never swapped with a competent policy.
# # Swap occurs after max step is reached.
# override_vals = {
# "max_steps": 2500,
# "self_play": {
# "play_against_current_self_ratio": 1.0,
# "save_steps": 2000,
# "swap_steps": 4000,
# },
# }
# config = generate_config(PPO_CONFIG, override_vals)
# _check_environment_trains(env, config, success_threshold=None)
# processed_rewards = [
# default_reward_processor(rewards) for rewards in env.final_rewards.values()
# ]
# success_threshold = 0.99
# assert any(reward > success_threshold for reward in processed_rewards) and any(
# reward < success_threshold for reward in processed_rewards
# )
#
#
config = generate_config(PPO_CONFIG)
override_vals = {"max_steps": 5000}
config = generate_config(PPO_CONFIG, override_vals)
_check_environment_trains(env, config)
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()

write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)
# @pytest.mark.parametrize("use_discrete", [True, False])
# @pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
# def test_gail(use_discrete, trainer_config):
# env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# action_type = "Discrete" if use_discrete else "Continuous"
# demo_path = "demos/1DTest" + action_type + ".demo"
# override_vals = {
# "max_steps": 500,
# "behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 2000},
# "reward_signals": {
# "gail": {
# "strength": 1.0,
# "gamma": 0.99,
# "encoding_size": 32,
# "demo_path": demo_path,
# }
# },
# }
# config = generate_config(trainer_config, override_vals)
# _check_environment_trains(env, config, success_threshold=0.9)
@pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
def test_gail(use_discrete, trainer_config):
env = Simple1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
def test_recurrent_gail_ppo(use_discrete):
env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
"max_steps": 1000,
"batch_size": 64,
"buffer_size": 128,
"use_recurrent": True,
"encoding_size": 128,
"encoding_size": 32,
config = generate_config(trainer_config, override_vals)
config = generate_config(PPO_CONFIG, override_vals)
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_recurrent_sac_gail(use_discrete):
# env = Memory1DEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# action_type = "Discrete" if use_discrete else "Continuous"
# demo_path = "demos/1DTest" + action_type + ".demo"
# override_vals = {"batch_size": 32, "use_recurrent": True, "max_steps": 1000,
# "behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 2000},
# "reward_signals": {
# "gail": {
# "strength": 1.0,
# "gamma": 0.99,
# "encoding_size": 128,
# "demo_path": demo_path,
# }
# },
# }
# config = generate_config(SAC_CONFIG, override_vals)
# _check_environment_trains(env, config, success_threshold=0.9)
正在加载...
取消
保存