浏览代码

Fix "memory leak" during inference (#2722)

* Clear buffer if not training
* Add tests
/develop-gpu-test
GitHub 5 年前
当前提交
5d3e05d1
共有 5 个文件被更改,包括 49 次插入1 次删除
  1. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  2. 7
      ml-agents/mlagents/trainers/rl_trainer.py
  3. 10
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  4. 28
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  5. 3
      ml-agents/mlagents/trainers/trainer_controller.py

2
ml-agents/mlagents/trainers/ppo/trainer.py


update_stats = self.policy.bc_module.update()
for stat, val in update_stats.items():
self.stats[stat].append(val)
self.training_buffer.reset_update_buffer()
self.clear_update_buffer()
self.trainer_metrics.end_policy_update()

7
ml-agents/mlagents/trainers/rl_trainer.py


for agent_id in rewards:
rewards[agent_id] = 0
def clear_update_buffer(self) -> None:
"""
Clear the buffers that have been built up during inference. If
we're not training, this should be called instead of update_policy.
"""
self.training_buffer.reset_update_buffer()
def add_policy_outputs(
self, take_action_outputs: ActionInfoOutputs, agent_id: str, agent_idx: int
) -> None:

10
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


import mlagents.trainers.tests.mock_brain as mb
import numpy as np
from mlagents.trainers.rl_trainer import RLTrainer
from mlagents.trainers.tests.test_buffer import construct_fake_buffer
@pytest.fixture

for rewards in trainer.collected_rewards.values():
for agent_id in rewards:
assert rewards[agent_id] == 0
def test_clear_update_buffer():
trainer = create_rl_trainer()
trainer.training_buffer = construct_fake_buffer()
trainer.training_buffer.append_update_buffer(2, batch_size=None, training_length=2)
trainer.clear_update_buffer()
for _, arr in trainer.training_buffer.update_buffer.items():
assert len(arr) == 0

28
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


)
trainer_mock.update_policy.assert_called_once()
trainer_mock.increment_step.assert_called_once()
def test_take_step_if_not_training():
tc, trainer_mock = trainer_controller_with_take_step_mocks()
tc.train_model = False
action_info_dict = {"testbrain": MagicMock()}
old_step_info = EnvironmentStep(Mock(), Mock(), action_info_dict)
new_step_info = EnvironmentStep(Mock(), Mock(), action_info_dict)
trainer_mock.is_ready_update = MagicMock(return_value=False)
env_mock = MagicMock()
env_mock.step.return_value = [new_step_info]
env_mock.reset.return_value = [old_step_info]
tc.advance(env_mock)
env_mock.reset.assert_not_called()
env_mock.step.assert_called_once()
trainer_mock.add_experiences.assert_called_once_with(
new_step_info.previous_all_brain_info,
new_step_info.current_all_brain_info,
new_step_info.brain_name_to_action_info["testbrain"].outputs,
)
trainer_mock.process_experiences.assert_called_once_with(
new_step_info.previous_all_brain_info, new_step_info.current_all_brain_info
)
trainer_mock.clear_update_buffer.assert_called_once()

3
ml-agents/mlagents/trainers/trainer_controller.py


with hierarchical_timer("update_policy"):
trainer.update_policy()
env.set_policy(brain_name, trainer.policy)
else:
# Avoid memory leak during inference
trainer.clear_update_buffer()
return len(new_step_infos)
正在加载...
取消
保存