浏览代码

Fix failure on Academy Done() with parallel envs

When using parallel SubprocessUnityEnvironment instances along
with Academy Done(), a new step might be taken when reset should
have been called because some environments may have been done while
others were not (making "global done" less useful).

This change manages the reset on `global_done` at the level of the
environment worker, and removes the global reset from
TrainerController.
/develop-generalizationTraining-TrainerController
Jonathan Harper 5 年前
当前提交
d9a7e5b6
共有 4 个文件被更改,包括 32 次插入30 次删除
  1. 5
      ml-agents-envs/mlagents/envs/subprocess_environment.py
  2. 29
      ml-agents-envs/mlagents/envs/tests/test_subprocess_unity_environment.py
  3. 24
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  4. 4
      ml-agents/mlagents/trainers/trainer_controller.py

5
ml-agents-envs/mlagents/envs/subprocess_environment.py


cmd: EnvironmentCommand = parent_conn.recv()
if cmd.name == "step":
vector_action, memory, text_action, value = cmd.payload
all_brain_info = env.step(vector_action, memory, text_action, value)
if env.global_done:
all_brain_info = env.reset()
else:
all_brain_info = env.step(vector_action, memory, text_action, value)
_send_response("step", all_brain_info)
elif cmd.name == "external_brains":
_send_response("external_brains", env.external_brains)

29
ml-agents-envs/mlagents/envs/tests/test_subprocess_unity_environment.py


import unittest.mock as mock
from unittest.mock import MagicMock
from unittest.mock import Mock, MagicMock
import unittest
from mlagents.envs.subprocess_environment import *

[[1.0, 2.0], [1.0, 2.0], [3.0, 4.0]],
)
self.assertEqual(combined_braininfo.agents, ["0-1", "0-2", "1-3"])
def test_step_resets_on_global_done(self):
env_mock = Mock()
env_mock.reset = Mock(return_value="reset_data")
env_mock.global_done = True
def mock_global_done_env_factory(worker_id: int):
return env_mock
mock_parent_connection = Mock()
step_command = EnvironmentCommand("step", (None, None, None, None))
close_command = EnvironmentCommand("close")
mock_parent_connection.recv = Mock()
mock_parent_connection.recv.side_effect = [step_command, close_command]
mock_parent_connection.send = Mock()
worker(
mock_parent_connection, cloudpickle.dumps(mock_global_done_env_factory), 0
)
# recv called twice to get step and close command
self.assertEqual(mock_parent_connection.recv.call_count, 2)
# worker returns the data from the reset
mock_parent_connection.send.assert_called_with(
EnvironmentResponse("step", 0, "reset_data")
)

24
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


return tc, trainer_mock
def test_take_step_resets_env_on_global_done():
tc, trainer_mock = trainer_controller_with_take_step_mocks()
brain_info_mock = MagicMock()
trainer_mock.add_experiences = MagicMock()
trainer_mock.process_experiences = MagicMock()
trainer_mock.update_policy = MagicMock()
trainer_mock.write_summary = MagicMock()
trainer_mock.trainer.increment_step_and_update_last_reward = MagicMock()
env_mock = MagicMock()
step_data_mock_out = MagicMock()
env_mock.step = MagicMock(return_value=step_data_mock_out)
env_mock.close = MagicMock()
env_mock.reset = MagicMock(return_value=brain_info_mock)
env_mock.global_done = True
trainer_mock.get_action = MagicMock(
return_value=ActionInfo(None, None, None, None, None)
)
tc.take_step(env_mock, brain_info_mock)
env_mock.reset.assert_called_once()
def test_take_step_adds_experiences_to_trainer_and_trains():
tc, trainer_mock = trainer_controller_with_take_step_mocks()

4
ml-agents/mlagents/trainers/trainer_controller.py


for brain_name, changed in lessons_incremented.items():
if changed:
self.trainers[brain_name].reward_buffer.clear()
elif env.global_done:
curr_info = self._reset_env(env)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
# Decide and take an action
take_action_vector = {}

正在加载...
取消
保存