浏览代码

adjusting tests to expect trainer.add_policy to be called

/develop/tanhsquash
Andrew Cohen 5 年前
当前提交
ef2dfd4c
共有 6 个文件被更改,包括 32 次插入12 次删除
  1. 10
      ml-agents/mlagents/trainers/tests/test_ppo.py
  2. 7
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  3. 6
      ml-agents/mlagents/trainers/tests/test_sac.py
  4. 4
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  5. 12
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  6. 5
      ml-agents/mlagents/trainers/trainer_controller.py

10
ml-agents/mlagents/trainers/tests/test_ppo.py


trainer_params["reward_signals"]["curiosity"]["gamma"] = 0.99
trainer_params["reward_signals"]["curiosity"]["encoding_size"] = 128
trainer = PPOTrainer(mock_brain, 0, trainer_params, True, False, 0, "0", False)
trainer = PPOTrainer(
mock_brain.brain_name, 0, trainer_params, True, False, 0, "0", False
)
trainer.add_policy(mock_brain)
# Test update with sequence length smaller than batch size
buffer = mb.simulate_rollout(env, trainer.policy, BUFFER_INIT_SAMPLES)
# Mock out reward signal eval

)
dummy_config["summary_path"] = "./summaries/test_trainer_summary"
dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False)
trainer = PPOTrainer(
brain_params.brain_name, 0, dummy_config, True, False, 0, "0", False
)
trainer.add_policy(brain_params)
rewardsout = AllRewardsOutput(
reward_signals={
"extrinsic": RewardSignalResult(

7
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


def create_rl_trainer():
mock_brainparams = create_mock_brain()
trainer = RLTrainer(mock_brainparams, dummy_config(), True, 0)
trainer = RLTrainer(mock_brainparams.brain_name, dummy_config(), True, 0)
return trainer

def test_rl_trainer(add_policy_outputs, add_rewards_outputs, num_vis_obs):
trainer = create_rl_trainer()
trainer.policy = create_mock_policy()
fake_id = "fake_behavior_id"
fake_action_outputs = {
"action": [0.1, 0.1],
"value_heads": {},

num_vector_acts=2,
num_vis_observations=num_vis_obs,
)
trainer.add_experiences(mock_braininfo, mock_braininfo, fake_action_outputs)
trainer.add_experiences(
fake_id, mock_braininfo, mock_braininfo, fake_action_outputs
)
# Remove one of the agents
next_mock_braininfo = mb.create_mock_braininfo(

6
ml-agents/mlagents/trainers/tests/test_sac.py


trainer_params["summary_path"] = str(tmpdir)
trainer_params["model_path"] = str(tmpdir)
trainer_params["save_replay_buffer"] = True
trainer = SACTrainer(mock_brain, 1, trainer_params, True, False, 0, 0)
trainer = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, False, 0, 0)
trainer.add_policy(mock_brain)
trainer.update_buffer = mb.simulate_rollout(
env, trainer.policy, BUFFER_INIT_SAMPLES
)

# Wipe Trainer and try to load
trainer2 = SACTrainer(mock_brain, 1, trainer_params, True, True, 0, 0)
trainer2 = SACTrainer(mock_brain.brain_name, 1, trainer_params, True, True, 0, 0)
trainer2.add_policy(mock_brain)
assert trainer2.update_buffer.num_experiences == buffer_len

4
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


env_mock.reset.assert_not_called()
env_mock.step.assert_called_once()
trainer_mock.add_experiences.assert_called_once_with(
brain_name,
brain_name,
new_step_info.previous_all_brain_info[brain_name],
new_step_info.current_all_brain_info[brain_name],
)

env_mock.reset.assert_not_called()
env_mock.step.assert_called_once()
trainer_mock.add_experiences.assert_called_once_with(
brain_name,
brain_name,
new_step_info.previous_all_brain_info[brain_name],
new_step_info.current_all_brain_info[brain_name],
)

12
ml-agents/mlagents/trainers/tests/test_trainer_util.py


external_brains = {"testbrain": brain_params_mock}
def mock_constructor(self, brain, trainer_parameters, training, load, seed, run_id):
assert brain == brain_params_mock
assert brain == brain_params_mock.brain_name
assert trainer_parameters == expected_config
assert training == train_model
assert load == load_model

)
trainers = {}
for _, brain_parameters in external_brains.items():
trainers["testbrain"] = trainer_factory.generate(brain_parameters)
trainers["testbrain"] = trainer_factory.generate(
brain_parameters.brain_name
)
assert "testbrain" in trainers
assert isinstance(trainers["testbrain"], OfflineBCTrainer)

multi_gpu,
):
self.trainer_metrics = TrainerMetrics("", "")
assert brain == brain_params_mock
assert brain == brain_params_mock.brain_name
assert trainer_parameters == expected_config
assert reward_buff_cap == expected_reward_buff_cap
assert training == train_model

)
trainers = {}
for brain_name, brain_parameters in external_brains.items():
trainers[brain_name] = trainer_factory.generate(brain_parameters)
trainers[brain_name] = trainer_factory.generate(brain_parameters.brain_name)
assert "testbrain" in trainers
assert isinstance(trainers["testbrain"], PPOTrainer)

)
trainers = {}
for brain_name, brain_parameters in external_brains.items():
trainers[brain_name] = trainer_factory.generate(brain_parameters)
trainers[brain_name] = trainer_factory.generate(brain_parameters.brain_name)
def test_load_config_missing_file():

5
ml-agents/mlagents/trainers/trainer_controller.py


external_brain_behavior_ids = set(env_manager.external_brains.keys())
new_behavior_ids = external_brain_behavior_ids - last_brain_behavior_ids
for name_behavior_id in new_behavior_ids:
brain_name, _ = name_behavior_id.split("?")
try:
brain_name, _ = name_behavior_id.split("?")
except ValueError:
brain_name = name_behavior_id
# This could be done with a try/except which may improve performance?
try:

正在加载...
取消
保存