浏览代码

[bug-fix] When agent isn't training, don't clear update buffer (#5205)

* Don't clear update buffer, but don't append to it either

* Update changelog

* Address comments

* Make experience replay buffer saving more verbose

(cherry picked from commit 63e7ad44d96b7663b91f005ca1d88f4f3b11dd2a)
/release_16_branch
Ervin Teng 4 年前
当前提交
d1c24251
共有 6 个文件被更改,包括 63 次插入19 次删除
  1. 5
      com.unity.ml-agents/CHANGELOG.md
  2. 5
      ml-agents/mlagents/trainers/poca/trainer.py
  3. 6
      ml-agents/mlagents/trainers/ppo/trainer.py
  4. 10
      ml-agents/mlagents/trainers/sac/trainer.py
  5. 39
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  6. 17
      ml-agents/mlagents/trainers/trainer/rl_trainer.py

5
com.unity.ml-agents/CHANGELOG.md


and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [1.9.1-preview]
### Bug Fixes
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed a bug where the SAC replay buffer would not be saved out at the end of a run, even if `save_replay_buffer` was enabled. (#5205)
## [1.9.0-preview] - 2021-03-17
### Major Changes
#### com.unity.ml-agents (C#)

5
ml-agents/mlagents/trainers/poca/trainer.py


)
agent_buffer_trajectory[BufferKey.ADVANTAGES].set(global_advantages)
# Append to update buffer
agent_buffer_trajectory.resequence_and_append(
self.update_buffer, training_length=self.policy.sequence_length
)
self._append_to_update_buffer(agent_buffer_trajectory)
# If this was a terminal trajectory, append stats and reset reward collection
if trajectory.done_reached:

6
ml-agents/mlagents/trainers/ppo/trainer.py


global_returns = list(np.mean(np.array(tmp_returns, dtype=np.float32), axis=0))
agent_buffer_trajectory[BufferKey.ADVANTAGES].set(global_advantages)
agent_buffer_trajectory[BufferKey.DISCOUNTED_RETURNS].set(global_returns)
# Append to update buffer
agent_buffer_trajectory.resequence_and_append(
self.update_buffer, training_length=self.policy.sequence_length
)
self._append_to_update_buffer(agent_buffer_trajectory)
# If this was a terminal trajectory, append stats and reset reward collection
if trajectory.done_reached:

10
ml-agents/mlagents/trainers/sac/trainer.py


Save the training buffer's update buffer to a pickle file.
"""
filename = os.path.join(self.artifact_path, "last_replay_buffer.hdf5")
logger.info(f"Saving Experience Replay Buffer to {filename}")
logger.info(f"Saving Experience Replay Buffer to {filename}...")
logger.info(
f"Saved Experience Replay Buffer ({os.path.getsize(filename)} bytes)."
)
def load_replay_buffer(self) -> None:
"""

agent_buffer_trajectory[ObsUtil.get_name_at_next(i)][-1] = obs
agent_buffer_trajectory[BufferKey.DONE][-1] = False
# Append to update buffer
agent_buffer_trajectory.resequence_and_append(
self.update_buffer, training_length=self.policy.sequence_length
)
self._append_to_update_buffer(agent_buffer_trajectory)
if trajectory.done_reached:
self._update_end_episode_stats(agent_id, self.optimizer)

39
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


@mock.patch("mlagents.trainers.trainer.trainer.Trainer.save_model")
@mock.patch("mlagents.trainers.trainer.rl_trainer.RLTrainer._clear_update_buffer")
def test_advance(mocked_clear_update_buffer, mocked_save_model):
def test_advance(mocked_save_model):
trainer = create_rl_trainer()
mock_policy = mock.Mock()
trainer.add_policy("TestBrain", mock_policy)

with pytest.raises(AgentManagerQueue.Empty):
policy_queue.get_nowait()
# Check that the buffer has been cleared
# Check that no model has been saved
assert mocked_clear_update_buffer.call_count > 0
assert mocked_save_model.call_count == 0

for step in checkpoint_range
]
mock_add_checkpoint.assert_has_calls(add_checkpoint_calls)
def test_update_buffer_append():
trainer = create_rl_trainer()
mock_policy = mock.Mock()
trainer.add_policy("TestBrain", mock_policy)
trajectory_queue = AgentManagerQueue("testbrain")
policy_queue = AgentManagerQueue("testbrain")
trainer.subscribe_trajectory_queue(trajectory_queue)
trainer.publish_policy_queue(policy_queue)
time_horizon = 10
trajectory = mb.make_fake_trajectory(
length=time_horizon,
observation_specs=create_observation_specs_with_shapes([(1,)]),
max_step_complete=True,
action_spec=ActionSpec.create_discrete((2,)),
)
agentbuffer_trajectory = trajectory.to_agentbuffer()
assert trainer.update_buffer.num_experiences == 0
# Check that if we append, our update buffer gets longer.
# max_steps = 100
for i in range(10):
trainer._process_trajectory(trajectory)
trainer._append_to_update_buffer(agentbuffer_trajectory)
assert trainer.update_buffer.num_experiences == (i + 1) * time_horizon
# Check that if we append after stopping training, nothing happens.
# We process enough trajectories to hit max steps
trainer.set_is_policy_updating(False)
trainer._process_trajectory(trajectory)
trainer._append_to_update_buffer(agentbuffer_trajectory)
assert trainer.update_buffer.num_experiences == (i + 1) * time_horizon
class RLTrainerWarningTest(unittest.TestCase):

17
ml-agents/mlagents/trainers/trainer/rl_trainer.py


if step_after_process >= self._next_summary_step and self.get_step != 0:
self._write_summary(self._next_summary_step)
def _append_to_update_buffer(self, agentbuffer_trajectory: AgentBuffer) -> None:
"""
Append an AgentBuffer to the update buffer. If the trainer isn't training,
don't update to avoid a memory leak.
"""
if self.should_still_train:
seq_len = (
self.trainer_settings.network_settings.memory.sequence_length
if self.trainer_settings.network_settings.memory is not None
else 1
)
agentbuffer_trajectory.resequence_and_append(
self.update_buffer, training_length=seq_len
)
def _maybe_save_model(self, step_after_process: int) -> None:
"""
If processing the trajectory will make the step exceed the next model write,

for q in self.policy_queues:
# Get policies that correspond to the policy queue in question
q.put(self.get_policy(q.behavior_id))
else:
self._clear_update_buffer()
正在加载...
取消
保存