浏览代码

[bug-fix] Don't load non-wrapped policy (#4593)

* Always initialize non-wrapped policy

* Load ghosted policy

* Update changelog

* Resume test

* Add test

* Add torch test and fix torch.
/release_2_verified_load_fix
Ruo-Ping Dong 4 年前
当前提交
2ca79207
共有 3 个文件被更改,包括 77 次插入6 次删除
  1. 3
      com.unity.ml-agents/CHANGELOG.md
  2. 16
      ml-agents/mlagents/trainers/ghost/trainer.py
  3. 64
      ml-agents/mlagents/trainers/tests/test_ghost.py

3
com.unity.ml-agents/CHANGELOG.md


#### com.unity.ml-agents (C#)
- Fixed a bug with visual observations using .onnx model files and newer versions of Barracuda (1.1.0 or later). (#4533)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)
## [1.0.5] - 2020-09-23
### Minor Changes
#### com.unity.ml-agents (C#)

16
ml-agents/mlagents/trainers/ghost/trainer.py


@property
def reward_buffer(self) -> Deque[float]:
"""
Returns the reward buffer. The reward buffer contains the cumulative
rewards of the most recent episodes completed by agents using this
trainer.
:return: the reward buffer.
"""
Returns the reward buffer. The reward buffer contains the cumulative
rewards of the most recent episodes completed by agents using this
trainer.
:return: the reward buffer.
"""
return self.trainer.reward_buffer
@property

"""
policy = self.trainer.create_policy(parsed_behavior_id, brain_parameters)
policy.create_tf_graph()
policy.initialize_or_load()
policy.init_load_weights()
team_id = parsed_behavior_id.team_id
self.controller.subscribe_team_id(team_id, self)

self._save_snapshot() # Need to save after trainer initializes policy
self._learning_team = self.controller.get_learning_team
self.wrapped_trainer_team = team_id
else:
# Load the weights of the ghost policy from the wrapped one
policy.load_weights(
self.trainer.get_policy(parsed_behavior_id).get_weights()
)
return policy
def add_policy(

64
ml-agents/mlagents/trainers/tests/test_ghost.py


np.testing.assert_array_equal(w, lw)
def test_resume(dummy_config, tmp_path):
brain_params_team0 = BrainParameters(
brain_name="test_brain?team=0",
vector_observation_space_size=1,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=0,
)
brain_name = BehaviorIdentifiers.from_name_behavior_id(
brain_params_team0.brain_name
).brain_name
brain_params_team1 = BrainParameters(
brain_name="test_brain?team=1",
vector_observation_space_size=1,
camera_resolutions=[],
vector_action_space_size=[2],
vector_action_descriptions=[],
vector_action_space_type=0,
)
tmp_path = tmp_path.as_posix()
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
controller = GhostController(100)
trainer = GhostTrainer(
ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
)
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(
brain_params_team0.brain_name
)
policy = trainer.create_policy(parsed_behavior_id0, brain_params_team0)
trainer.add_policy(parsed_behavior_id0, policy)
parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(
brain_params_team1.brain_name
)
policy = trainer.create_policy(parsed_behavior_id1, brain_params_team1)
trainer.add_policy(parsed_behavior_id1, policy)
trainer.save_model(parsed_behavior_id0.behavior_id)
# Make a new trainer, check that the policies are the same
ppo_trainer2 = PPOTrainer(brain_name, 0, dummy_config, True, True, 0, tmp_path)
trainer2 = GhostTrainer(
ppo_trainer2, brain_name, controller, 0, dummy_config, True, tmp_path
)
policy = trainer2.create_policy(parsed_behavior_id0, brain_params_team0)
trainer2.add_policy(parsed_behavior_id0, policy)
policy = trainer2.create_policy(parsed_behavior_id1, brain_params_team1)
trainer2.add_policy(parsed_behavior_id1, policy)
trainer1_policy = trainer.get_policy(parsed_behavior_id1.behavior_id)
trainer2_policy = trainer2.get_policy(parsed_behavior_id1.behavior_id)
weights = trainer1_policy.get_weights()
weights2 = trainer2_policy.get_weights()
for w, lw in zip(weights, weights2):
np.testing.assert_array_equal(w, lw)
def test_process_trajectory(dummy_config):
brain_params_team0 = BrainParameters(
brain_name="test_brain?team=0",

正在加载...
取消
保存