|
|
|
|
|
|
assert all(reward > success_threshold for reward in processed_rewards) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_simple_ppo(use_discrete): |
|
|
|
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete) |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_simple_ppo(action_sizes): |
|
|
|
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes) |
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_2d_ppo(use_discrete): |
|
|
|
env = SimpleEnvironment( |
|
|
|
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8 |
|
|
|
) |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)]) |
|
|
|
def test_2d_ppo(action_sizes): |
|
|
|
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8) |
|
|
|
new_hyperparams = attr.evolve( |
|
|
|
PPO_TF_CONFIG.hyperparameters, batch_size=64, buffer_size=640 |
|
|
|
) |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_visual_ppo(num_visual, use_discrete): |
|
|
|
def test_visual_ppo(num_visual, action_sizes): |
|
|
|
use_discrete=use_discrete, |
|
|
|
action_sizes=action_sizes, |
|
|
|
num_visual=num_visual, |
|
|
|
num_vector=0, |
|
|
|
step_size=0.2, |
|
|
|
|
|
|
def test_visual_advanced_ppo(vis_encode_type, num_visual): |
|
|
|
env = SimpleEnvironment( |
|
|
|
[BRAIN_NAME], |
|
|
|
use_discrete=True, |
|
|
|
action_sizes=(0, 1), |
|
|
|
num_visual=num_visual, |
|
|
|
num_vector=0, |
|
|
|
step_size=0.5, |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_recurrent_ppo(use_discrete): |
|
|
|
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete) |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_recurrent_ppo(action_sizes): |
|
|
|
env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes) |
|
|
|
new_network_settings = attr.evolve( |
|
|
|
PPO_TF_CONFIG.network_settings, |
|
|
|
memory=NetworkSettings.MemorySettings(memory_size=16), |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_simple_sac(use_discrete): |
|
|
|
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete) |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_simple_sac(action_sizes): |
|
|
|
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes) |
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_2d_sac(use_discrete): |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_2d_sac(action_sizes): |
|
|
|
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8 |
|
|
|
[BRAIN_NAME], action_sizes=action_sizes, action_size=2, step_size=0.8 |
|
|
|
) |
|
|
|
new_hyperparams = attr.evolve(SAC_TF_CONFIG.hyperparameters, buffer_init_steps=2000) |
|
|
|
config = attr.evolve( |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_visual_sac(num_visual, use_discrete): |
|
|
|
def test_visual_sac(num_visual, action_sizes): |
|
|
|
use_discrete=use_discrete, |
|
|
|
action_sizes=action_sizes, |
|
|
|
num_visual=num_visual, |
|
|
|
num_vector=0, |
|
|
|
step_size=0.2, |
|
|
|
|
|
|
def test_visual_advanced_sac(vis_encode_type, num_visual): |
|
|
|
env = SimpleEnvironment( |
|
|
|
[BRAIN_NAME], |
|
|
|
use_discrete=True, |
|
|
|
action_sizes=(0, 1), |
|
|
|
num_visual=num_visual, |
|
|
|
num_vector=0, |
|
|
|
step_size=0.5, |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_recurrent_sac(use_discrete): |
|
|
|
step_size = 0.2 if use_discrete else 0.5 |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_recurrent_sac(action_sizes): |
|
|
|
step_size = 0.2 if action_sizes else 0.5 |
|
|
|
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size |
|
|
|
[BRAIN_NAME], action_sizes=action_sizes, step_size=step_size |
|
|
|
) |
|
|
|
new_networksettings = attr.evolve( |
|
|
|
SAC_TF_CONFIG.network_settings, |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_simple_ghost(use_discrete): |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_simple_ghost(action_sizes): |
|
|
|
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete |
|
|
|
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes |
|
|
|
) |
|
|
|
self_play_settings = SelfPlaySettings( |
|
|
|
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000 |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_simple_ghost_fails(use_discrete): |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_simple_ghost_fails(action_sizes): |
|
|
|
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete |
|
|
|
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes |
|
|
|
) |
|
|
|
# This config should fail because the ghosted policy is never swapped with a competent policy. |
|
|
|
# Swap occurs after max step is reached. |
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_simple_asymm_ghost(use_discrete): |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_simple_asymm_ghost(action_sizes): |
|
|
|
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete |
|
|
|
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes |
|
|
|
) |
|
|
|
self_play_settings = SelfPlaySettings( |
|
|
|
play_against_latest_model_ratio=1.0, |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config}) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_simple_asymm_ghost_fails(use_discrete): |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_simple_asymm_ghost_fails(action_sizes): |
|
|
|
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete |
|
|
|
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes |
|
|
|
) |
|
|
|
# This config should fail because the team that us not learning when both have reached |
|
|
|
# max step should be executing the initial, untrained poliy. |
|
|
|
|
|
|
|
|
|
|
@pytest.fixture(scope="session") |
|
|
|
def simple_record(tmpdir_factory): |
|
|
|
def record_demo(use_discrete, num_visual=0, num_vector=1): |
|
|
|
def record_demo(action_sizes, num_visual=0, num_vector=1): |
|
|
|
use_discrete=use_discrete, |
|
|
|
action_sizes=action_sizes, |
|
|
|
num_visual=num_visual, |
|
|
|
num_vector=num_vector, |
|
|
|
n_demos=100, |
|
|
|
|
|
|
env.solve() |
|
|
|
continuous_size, discrete_size = action_sizes |
|
|
|
use_discrete = True if discrete_size > 0 else False |
|
|
|
agent_info_protos = env.demonstration_protos[BRAIN_NAME] |
|
|
|
meta_data_proto = DemonstrationMetaProto() |
|
|
|
brain_param_proto = BrainParametersProto( |
|
|
|
|
|
|
return record_demo |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_gail(simple_record, use_discrete, trainer_config): |
|
|
|
demo_path = simple_record(use_discrete) |
|
|
|
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2) |
|
|
|
def test_gail(simple_record, action_sizes, trainer_config): |
|
|
|
demo_path = simple_record(action_sizes) |
|
|
|
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2) |
|
|
|
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000) |
|
|
|
reward_signals = { |
|
|
|
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path) |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_gail_visual_ppo(simple_record, use_discrete): |
|
|
|
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0) |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_gail_visual_ppo(simple_record, action_sizes): |
|
|
|
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0) |
|
|
|
use_discrete=use_discrete, |
|
|
|
action_sizes=action_sizes, |
|
|
|
step_size=0.2, |
|
|
|
) |
|
|
|
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500) |
|
|
|
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9) |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("use_discrete", [True, False]) |
|
|
|
def test_gail_visual_sac(simple_record, use_discrete): |
|
|
|
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0) |
|
|
|
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)]) |
|
|
|
def test_gail_visual_sac(simple_record, action_sizes): |
|
|
|
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0) |
|
|
|
use_discrete=use_discrete, |
|
|
|
action_sizes=action_sizes, |
|
|
|
step_size=0.2, |
|
|
|
) |
|
|
|
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000) |
|
|
|