浏览代码

fixed test_ghost and test_ppo

/internal-policy-ghost
Andrew Cohen 5 年前
当前提交
d1bee64b
共有 2 个文件被更改,包括 28 次插入32 次删除
  1. 12
      ml-agents/mlagents/trainers/tests/test_ghost.py
  2. 48
      ml-agents/mlagents/trainers/tests/test_ppo.py

12
ml-agents/mlagents/trainers/tests/test_ghost.py


)
# first policy encountered becomes policy trained by wrapped PPO
policy = trainer.create_policy(brain_params_team0)
trainer.add_policy(parsed_behavior_id0, policy)
trainer.add_policy(parsed_behavior_id0, brain_params_team0)
policy = trainer.create_policy(brain_params_team1)
trainer.add_policy(parsed_behavior_id1, policy)
trainer.add_policy(parsed_behavior_id1, brain_params_team1)
trajectory_queue1 = AgentManagerQueue(brain_params_team1.brain_name)
trainer.subscribe_trajectory_queue(trajectory_queue1)

# First policy encountered becomes policy trained by wrapped PPO
# This queue should remain empty after swap snapshot
policy = trainer.create_policy(brain_params_team0)
trainer.add_policy(parsed_behavior_id0, policy)
trainer.add_policy(parsed_behavior_id0, brain_params_team0)
policy = trainer.create_policy(brain_params_team1)
trainer.add_policy(parsed_behavior_id1, policy)
trainer.add_policy(parsed_behavior_id1, brain_params_team1)
policy_queue1 = AgentManagerQueue(brain_params_team1.brain_name)
trainer.publish_policy_queue(policy_queue1)

48
ml-agents/mlagents/trainers/tests/test_ppo.py


)
@mock.patch("mlagents.trainers.ppo.trainer.NNPolicy")
def test_trainer_increment_step(ppo_optimizer, dummy_config):
def test_trainer_increment_step(ppo_optimizer, nn_policy, dummy_config):
mock_policy = mock.Mock()
mock_policy.get_current_step = mock.Mock(return_value=0)
step_count = (
5
) # 10 hacked because this function is no longer called through trainer
mock_policy.increment_step = mock.Mock(return_value=step_count)
nn_policy.return_value = mock_policy
brain_params = BrainParameters(
brain_name="test_brain",

trainer = PPOTrainer(
brain_params.brain_name, 0, trainer_params, True, False, 0, "0"
)
policy_mock = mock.Mock(spec=NNPolicy)
policy_mock.get_current_step.return_value = 0
step_count = (
5
) # 10 hacked because this function is no longer called through trainer
policy_mock.increment_step = mock.Mock(return_value=step_count)
trainer.add_policy("testbehavior", policy_mock)
trainer.add_policy("testbehavior", brain_params)
policy = trainer.get_policy("testbehavior")
policy_mock.increment_step.assert_called_with(5)
policy.increment_step.assert_called_with(5)
@pytest.mark.parametrize("use_discrete", [True, False])

trainer_params["reward_signals"]["curiosity"]["encoding_size"] = 128
trainer = PPOTrainer(mock_brain.brain_name, 0, trainer_params, True, False, 0, "0")
policy = trainer.create_policy(mock_brain)
trainer.add_policy(mock_brain.brain_name, policy)
trainer.add_policy(mock_brain.brain_name, mock_brain)
# Test update with sequence length smaller than batch size
buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, mock_brain)
# Mock out reward signal eval

dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
policy = trainer.create_policy(brain_params)
trainer.add_policy(brain_params.brain_name, policy)
trainer.add_policy(brain_params.brain_name, brain_params)
trajectory_queue = AgentManagerQueue("testbrain")
trainer.subscribe_trajectory_queue(trajectory_queue)
time_horizon = 15

assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
@mock.patch("mlagents.trainers.ppo.trainer.NNPolicy")
def test_add_get_policy(ppo_optimizer, dummy_config):
def test_add_get_policy(ppo_optimizer, nn_policy, dummy_config):
brain_params = make_brain_parameters(
discrete_action=False, visual_inputs=0, vec_obs_size=6
)

mock_policy = mock.Mock()
mock_policy.get_current_step = mock.Mock(return_value=2000)
nn_policy.return_value = mock_policy
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
policy = mock.Mock(spec=NNPolicy)
policy.get_current_step.return_value = 2000
trainer.add_policy(brain_params.brain_name, policy)
assert trainer.get_policy(brain_params.brain_name) == policy
# nn_policy.get_current_step.return_value = 2000
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
trainer.add_policy(brain_params.brain_name, brain_params)
# Test incorrect class of policy
policy = mock.Mock()
with pytest.raises(RuntimeError):
trainer.add_policy(brain_params, policy)
def test_bad_config(dummy_config):

正在加载...
取消
保存