浏览代码

[tests] Make end-to-end tests more stable (#3697)

/develop/add-fire
GitHub 5 年前
当前提交
56b75555
共有 2 个文件被更改,包括 17 次插入9 次删除
  1. 2
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  2. 24
      ml-agents/mlagents/trainers/tests/test_simple_rl.py

2
ml-agents/mlagents/trainers/tests/simple_test_envs.py


VIS_OBS_SIZE = (20, 20, 3)
STEP_SIZE = 0.1
TIME_PENALTY = 0.001
TIME_PENALTY = 0.01
MIN_STEPS = int(1.0 / STEP_SIZE) + 1
SUCCESS_REWARD = 1.0 + MIN_STEPS * TIME_PENALTY

24
ml-agents/mlagents/trainers/tests/test_simple_rl.py


lambd: 0.95
learning_rate: 5.0e-3
learning_rate_schedule: constant
max_steps: 2000
max_steps: 3000
memory_size: 16
normalize: false
num_epoch: 3

# Custom reward processors shuld be built within the test function and passed to _check_environment_trains
# Default is average over the last 5 final rewards
def default_reward_processor(rewards, last_n_rewards=5):
rewards_to_use = rewards[-last_n_rewards:]
# For debugging tests
print("Last {} rewards:".format(last_n_rewards), rewards_to_use)
return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()

trainer_config,
reward_processor=default_reward_processor,
meta_curriculum=None,
success_threshold=0.99,
success_threshold=0.9,
env_manager=None,
):
# Create controller and begin training.

if (
success_threshold is not None
): # For tests where we are just checking setup and not reward
processed_rewards = [
reward_processor(rewards) for rewards in env.final_rewards.values()
]

@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_sac(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.5
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
override_vals = {"buffer_init_steps": 2000, "max_steps": 3000}
override_vals = {"buffer_init_steps": 2000, "max_steps": 4000}
_check_environment_trains(env, config)
_check_environment_trains(env, config, success_threshold=0.8)
@pytest.mark.parametrize("use_discrete", [True, False])

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
override_vals = {"batch_size": 32, "use_recurrent": True, "max_steps": 2000}
override_vals = {
"batch_size": 64,
"use_recurrent": True,
"max_steps": 3000,
"learning_rate": 1e-3,
"buffer_init_steps": 500,
}
config = generate_config(SAC_CONFIG, override_vals)
_check_environment_trains(env, config)

processed_rewards = [
default_reward_processor(rewards) for rewards in env.final_rewards.values()
]
success_threshold = 0.99
success_threshold = 0.9
assert any(reward > success_threshold for reward in processed_rewards) and any(
reward < success_threshold for reward in processed_rewards
)

正在加载...
取消
保存