浏览代码

Remove env creation logic from TrainerController (#1562)

* Remove env creation logic from TrainerController

Currently TrainerController includes logic related to creating the
UnityEnvironment, which causes poor separation of concerns between
the learn.py application script, TrainerController and UnityEnvironment:

* TrainerController must know about the proper way to instantiate the
  UnityEnvironment, which may differ from application to application.
  This also makes mocking or subclassing UnityEnvironment more
  difficult.
* Many arguments are passed by learn.py to TrainerController and passed
  along to UnityEnvironment.

This change moves environment construction logic into learn.py, as part
of the greater refactor to separate trainer logic from actor / environment.
/develop-generalizationTraining-TrainerController
GitHub 6 年前
当前提交
517e3a0a
共有 5 个文件被更改,包括 593 次插入302 次删除
  1. 144
      ml-agents/mlagents/trainers/learn.py
  2. 315
      ml-agents/mlagents/trainers/trainer_controller.py
  3. 4
      ml-agents/tests/trainers/test_bc.py
  4. 358
      ml-agents/tests/trainers/test_trainer_controller.py
  5. 74
      ml-agents/tests/trainers/test_learn.py

144
ml-agents/mlagents/trainers/learn.py


import logging
from multiprocessing import Process, Queue
import os
import glob
import shutil
import yaml
from typing import Optional
from mlagents.trainers import MetaCurriculumError, MetaCurriculum
from mlagents.envs import UnityEnvironment
from mlagents.envs.exception import UnityEnvironmentException
def run_training(sub_id, run_seed, run_options, process_queue):
def run_training(sub_id: int, run_seed: int, run_options, process_queue):
"""
Launches training session.
:param process_queue: Queue used to send signal back to main.

"""
# Docker Parameters
docker_target_name = (run_options['--docker-target-name']
if run_options['--docker-target-name'] != 'None' else None)
if run_options['--docker-target-name'] != 'None' else None)
if run_options['--env'] != 'None' else None)
if run_options['--env'] != 'None' else None)
run_id = run_options['--run-id']
load_model = run_options['--load']
train_model = run_options['--train']

curriculum_file = (run_options['--curriculum']
if run_options['--curriculum'] != 'None' else None)
curriculum_folder = (run_options['--curriculum']
if run_options['--curriculum'] != 'None' else None)
# Create controller and launch environment.
tc = TrainerController(env_path, run_id + '-' + str(sub_id),
save_freq, curriculum_file, fast_simulation,
load_model, train_model, worker_id + sub_id,
keep_checkpoints, lesson, run_seed,
docker_target_name, trainer_config_path, no_graphics)
# Recognize and use docker volume if one is passed as an argument
if not docker_target_name:
model_path = './models/{run_id}'.format(run_id=run_id)
summaries_dir = './summaries'
else:
trainer_config_path = \
'/{docker_target_name}/{trainer_config_path}'.format(
docker_target_name=docker_target_name,
trainer_config_path=trainer_config_path)
if curriculum_folder is not None:
curriculum_folder = \
'/{docker_target_name}/{curriculum_folder}'.format(
docker_target_name=docker_target_name,
curriculum_folder=curriculum_folder)
model_path = '/{docker_target_name}/models/{run_id}'.format(
docker_target_name=docker_target_name,
run_id=run_id)
summaries_dir = '/{docker_target_name}/summaries'.format(
docker_target_name=docker_target_name)
trainer_config = load_config(trainer_config_path)
env = init_environment(env_path, docker_target_name, no_graphics, worker_id + sub_id, fast_simulation, run_seed)
maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)
external_brains = {}
for brain_name in env.external_brain_names:
external_brains[brain_name] = env.brains[brain_name]
# Create controller and begin training.
tc = TrainerController(model_path, summaries_dir, run_id + '-' + str(sub_id),
save_freq, maybe_meta_curriculum,
load_model, train_model,
keep_checkpoints, lesson, external_brains, run_seed)
tc.start_learning()
tc.start_learning(env, trainer_config)
def try_create_meta_curriculum(curriculum_folder: Optional[str], env: UnityEnvironment) -> Optional[MetaCurriculum]:
if curriculum_folder is None:
return None
else:
meta_curriculum = MetaCurriculum(curriculum_folder, env._resetParameters)
if meta_curriculum:
for brain_name in meta_curriculum.brains_to_curriculums.keys():
if brain_name not in env.external_brain_names:
raise MetaCurriculumError('One of the curricula '
'defined in ' +
curriculum_folder + ' '
'does not have a corresponding '
'Brain. Check that the '
'curriculum file has the same '
'name as the Brain '
'whose curriculum it defines.')
return meta_curriculum
def prepare_for_docker_run(docker_target_name, env_path):
for f in glob.glob('/{docker_target_name}/*'.format(
docker_target_name=docker_target_name)):
if env_path in f:
try:
b = os.path.basename(f)
if os.path.isdir(f):
shutil.copytree(f,
'/ml-agents/{b}'.format(b=b))
else:
src_f = '/{docker_target_name}/{b}'.format(
docker_target_name=docker_target_name, b=b)
dst_f = '/ml-agents/{b}'.format(b=b)
shutil.copyfile(src_f, dst_f)
os.chmod(dst_f, 0o775) # Make executable
except Exception as e:
logging.getLogger('mlagents.trainers').info(e)
env_path = '/ml-agents/{env_path}'.format(env_path=env_path)
return env_path
def load_config(trainer_config_path):
try:
with open(trainer_config_path) as data_file:
trainer_config = yaml.load(data_file)
return trainer_config
except IOError:
raise UnityEnvironmentException('Parameter file could not be found '
'at {}.'
.format(trainer_config_path))
except UnicodeDecodeError:
raise UnityEnvironmentException('There was an error decoding '
'Trainer Config from this path : {}'
.format(trainer_config_path))
def init_environment(env_path, docker_target_name, no_graphics, worker_id, fast_simulation, seed):
if env_path is not None:
# Strip out executable extensions if passed
env_path = (env_path.strip()
.replace('.app', '')
.replace('.exe', '')
.replace('.x86_64', '')
.replace('.x86', ''))
docker_training = docker_target_name is not None
if docker_training and env_path is not None:
"""
Comments for future maintenance:
Some OS/VM instances (e.g. COS GCP Image) mount filesystems
with COS flag which prevents execution of the Unity scene,
to get around this, we will copy the executable into the
container.
"""
# Navigate in docker path and find env_path and copy it.
env_path = prepare_for_docker_run(docker_target_name,
env_path)
return UnityEnvironment(
file_name=env_path,
worker_id=worker_id,
seed=seed,
docker_training=docker_training,
no_graphics=no_graphics
)
def main():

315
ml-agents/mlagents/trainers/trainer_controller.py


"""Launches trainers for each External Brains in a Unity Environment."""
import os
import glob
import logging
import shutil
import sys

from typing import *
import yaml
import re
from tensorflow.python.tools import freeze_graph
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.exception import UnityEnvironmentException
from mlagents.envs import BrainInfo
from mlagents.envs.exception import UnityEnvironmentException
from mlagents.trainers.exception import MetaCurriculumError
def __init__(self, env_path, run_id, save_freq, curriculum_folder,
fast_simulation, load, train, worker_id, keep_checkpoints,
lesson, seed, docker_target_name,
trainer_config_path, no_graphics):
def __init__(self, model_path: str, summaries_dir: str,
run_id: str, save_freq: int, meta_curriculum: Optional[MetaCurriculum],
load: bool, train: bool, keep_checkpoints: int, lesson: Optional[int],
external_brains: Dict[str, BrainInfo], training_seed: int):
:param env_path: Location to the environment executable to be loaded.
:param model_path: Path to save the model.
:param summaries_dir: Folder to save training summaries.
:param curriculum_folder: Folder containing JSON curriculums for the
environment.
:param fast_simulation: Whether to run the game at training speed.
:param meta_curriculum: MetaCurriculum object which stores information about all curricula.
:param worker_id: Number to add to communication port (5005).
Used for multi-environment
:param seed: Random seed used for training.
:param docker_target_name: Name of docker volume that will contain all
data.
:param trainer_config_path: Fully qualified path to location of trainer
configuration file.
:param no_graphics: Whether to run the Unity simulator in no-graphics
mode.
:param external_brains: dictionary of external brain names to BrainInfo objects.
:param training_seed: Seed to use for Numpy and Tensorflow random number generation.
if env_path is not None:
# Strip out executable extensions if passed
env_path = (env_path.strip()
.replace('.app', '')
.replace('.exe', '')
.replace('.x86_64', '')
.replace('.x86', ''))
# Recognize and use docker volume if one is passed as an argument
if not docker_target_name:
self.docker_training = False
self.trainer_config_path = trainer_config_path
self.model_path = './models/{run_id}'.format(run_id=run_id)
self.curriculum_folder = curriculum_folder
self.summaries_dir = './summaries'
else:
self.docker_training = True
self.trainer_config_path = \
'/{docker_target_name}/{trainer_config_path}'.format(
docker_target_name=docker_target_name,
trainer_config_path = trainer_config_path)
self.model_path = '/{docker_target_name}/models/{run_id}'.format(
docker_target_name=docker_target_name,
run_id=run_id)
if env_path is not None:
"""
Comments for future maintenance:
Some OS/VM instances (e.g. COS GCP Image) mount filesystems
with COS flag which prevents execution of the Unity scene,
to get around this, we will copy the executable into the
container.
"""
# Navigate in docker path and find env_path and copy it.
env_path = self._prepare_for_docker_run(docker_target_name,
env_path)
if curriculum_folder is not None:
self.curriculum_folder = \
'/{docker_target_name}/{curriculum_folder}'.format(
docker_target_name=docker_target_name,
curriculum_folder=curriculum_folder)
self.summaries_dir = '/{docker_target_name}/summaries'.format(
docker_target_name=docker_target_name)
self.model_path = model_path
self.summaries_dir = summaries_dir
self.external_brains = external_brains
self.external_brain_names = external_brains.keys()
self.fast_simulation = fast_simulation
self.worker_id = worker_id
self.seed = seed
self.meta_curriculum = meta_curriculum
self.seed = training_seed
self.env = UnityEnvironment(file_name=env_path,
worker_id=self.worker_id,
seed=self.seed,
docker_training=self.docker_training,
no_graphics=no_graphics)
if env_path is None:
self.env_name = 'editor_' + self.env.academy_name
else:
# Extract out name of environment
self.env_name = os.path.basename(os.path.normpath(env_path))
if curriculum_folder is None:
self.meta_curriculum = None
else:
self.meta_curriculum = MetaCurriculum(self.curriculum_folder,
self.env._resetParameters)
if self.meta_curriculum:
for brain_name in self.meta_curriculum.brains_to_curriculums.keys():
if brain_name not in self.env.external_brain_names:
raise MetaCurriculumError('One of the curriculums '
'defined in ' +
self.curriculum_folder + ' '
'does not have a corresponding '
'Brain. Check that the '
'curriculum file has the same '
'name as the Brain '
'whose curriculum it defines.')
def _prepare_for_docker_run(self, docker_target_name, env_path):
for f in glob.glob('/{docker_target_name}/*'.format(
docker_target_name=docker_target_name)):
if env_path in f:
try:
b = os.path.basename(f)
if os.path.isdir(f):
shutil.copytree(f,
'/ml-agents/{b}'.format(b=b))
else:
src_f = '/{docker_target_name}/{b}'.format(
docker_target_name=docker_target_name, b=b)
dst_f = '/ml-agents/{b}'.format(b=b)
shutil.copyfile(src_f, dst_f)
os.chmod(dst_f, 0o775) # Make executable
except Exception as e:
self.logger.info(e)
env_path = '/ml-agents/{env_name}'.format(env_name=env_path)
return env_path
def _get_measure_vals(self):
if self.meta_curriculum:

else:
return None
def _save_model(self,steps=0):
def _save_model(self, steps=0):
"""
Saves current model to checkpoint folder.
:param steps: Current number of steps in training process.

for brain_name in self.trainers.keys():
self.trainers[brain_name].export_model()
def _initialize_trainers(self, trainer_config):
def initialize_trainers(self, trainer_config):
for brain_name in self.env.external_brain_names:
for brain_name in self.external_brains:
trainer_parameters = trainer_config['default'].copy()
trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
basedir=self.summaries_dir,

for k in trainer_config[_brain_key]:
trainer_parameters[k] = trainer_config[_brain_key][k]
trainer_parameters_dict[brain_name] = trainer_parameters.copy()
for brain_name in self.env.external_brain_names:
for brain_name in self.external_brains:
self.env.brains[brain_name],
self.external_brains[brain_name],
self.env.brains[brain_name],
self.external_brains[brain_name],
self.env.brains[brain_name],
self.external_brains[brain_name],
self.meta_curriculum
.brains_to_curriculums[brain_name]
.min_lesson_length if self.meta_curriculum else 0,

'brain {}'
.format(brain_name))
def _load_config(self):
try:
with open(self.trainer_config_path) as data_file:
trainer_config = yaml.load(data_file)
return trainer_config
except IOError:
raise UnityEnvironmentException('Parameter file could not be found '
'at {}.'
.format(self.trainer_config_path))
except UnicodeDecodeError:
raise UnityEnvironmentException('There was an error decoding '
'Trainer Config from this path : {}'
.format(self.trainer_config_path))
@staticmethod
def _create_model_path(model_path):
try:

'permissions are set correctly.'
.format(model_path))
def _reset_env(self):
def _reset_env(self, env):
"""Resets the environment.
Returns:

if self.meta_curriculum is not None:
return self.env.reset(config=self.meta_curriculum.get_config(),
train_mode=self.fast_simulation)
return env.reset(config=self.meta_curriculum.get_config())
return self.env.reset(train_mode=self.fast_simulation)
return env.reset()
def start_learning(self):
def start_learning(self, env, trainer_config):
trainer_config = self._load_config()
self._initialize_trainers(trainer_config)
self.initialize_trainers(trainer_config)
curr_info = self._reset_env()
curr_info = self._reset_env(env)
if self.train_model:
for brain_name, trainer in self.trainers.items():
trainer.write_tensorboard_text('Hyperparameters',

while any([t.get_step <= t.get_max_steps \
for k, t in self.trainers.items()]) \
or not self.train_model:
if self.meta_curriculum:
# Get the sizes of the reward buffers.
reward_buff_sizes = {k:len(t.reward_buffer) \
for (k,t) in self.trainers.items()}
# Attempt to increment the lessons of the brains who
# were ready.
lessons_incremented = \
self.meta_curriculum.increment_lessons(
self._get_measure_vals(),
reward_buff_sizes=reward_buff_sizes)
# If any lessons were incremented or the environment is
# ready to be reset
if (self.meta_curriculum
and any(lessons_incremented.values())):
curr_info = self._reset_env()
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
for brain_name, changed in lessons_incremented.items():
if changed:
self.trainers[brain_name].reward_buffer.clear()
elif self.env.global_done:
curr_info = self._reset_env()
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
# Decide and take an action
take_action_vector, \
take_action_memories, \
take_action_text, \
take_action_value, \
take_action_outputs \
= {}, {}, {}, {}, {}
for brain_name, trainer in self.trainers.items():
(take_action_vector[brain_name],
take_action_memories[brain_name],
take_action_text[brain_name],
take_action_value[brain_name],
take_action_outputs[brain_name]) = \
trainer.take_action(curr_info)
new_info = self.env.step(vector_action=take_action_vector,
memory=take_action_memories,
text_action=take_action_text,
value=take_action_value)
for brain_name, trainer in self.trainers.items():
trainer.add_experiences(curr_info, new_info,
take_action_outputs[brain_name])
trainer.process_experiences(curr_info, new_info)
if trainer.is_ready_update() and self.train_model \
and trainer.get_step <= trainer.get_max_steps:
# Perform gradient descent with experience buffer
trainer.update_policy()
# Write training statistics to Tensorboard.
if self.meta_curriculum is not None:
trainer.write_summary(
self.global_step,
lesson_num=self.meta_curriculum
.brains_to_curriculums[brain_name]
.lesson_num)
else:
trainer.write_summary(self.global_step)
if self.train_model \
and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step_and_update_last_reward()
new_info = self.take_step(env, curr_info)
self.global_step += 1
if self.global_step % self.save_freq == 0 and self.global_step != 0 \
and self.train_model:

if self.train_model:
self._save_model_when_interrupted(steps=self.global_step)
pass
self.env.close()
env.close()
def take_step(self, env, curr_info):
if self.meta_curriculum:
# Get the sizes of the reward buffers.
reward_buff_sizes = {k: len(t.reward_buffer) \
for (k, t) in self.trainers.items()}
# Attempt to increment the lessons of the brains who
# were ready.
lessons_incremented = \
self.meta_curriculum.increment_lessons(
self._get_measure_vals(),
reward_buff_sizes=reward_buff_sizes)
# If any lessons were incremented or the environment is
# ready to be reset
if (self.meta_curriculum
and any(lessons_incremented.values())):
curr_info = self._reset_env(env)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
for brain_name, changed in lessons_incremented.items():
if changed:
self.trainers[brain_name].reward_buffer.clear()
elif env.global_done:
curr_info = self._reset_env(env)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
# Decide and take an action
take_action_vector, \
take_action_memories, \
take_action_text, \
take_action_value, \
take_action_outputs \
= {}, {}, {}, {}, {}
for brain_name, trainer in self.trainers.items():
(take_action_vector[brain_name],
take_action_memories[brain_name],
take_action_text[brain_name],
take_action_value[brain_name],
take_action_outputs[brain_name]) = \
trainer.take_action(curr_info)
new_info = env.step(vector_action=take_action_vector,
memory=take_action_memories,
text_action=take_action_text,
value=take_action_value)
for brain_name, trainer in self.trainers.items():
trainer.add_experiences(curr_info, new_info,
take_action_outputs[brain_name])
trainer.process_experiences(curr_info, new_info)
if trainer.is_ready_update() and self.train_model \
and trainer.get_step <= trainer.get_max_steps:
# Perform gradient descent with experience buffer
trainer.update_policy()
# Write training statistics to Tensorboard.
if self.meta_curriculum is not None:
trainer.write_summary(
self.global_step,
lesson_num=self.meta_curriculum
.brains_to_curriculums[brain_name]
.lesson_num)
else:
trainer.write_summary(self.global_step)
if self.train_model \
and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step_and_update_last_reward()
return new_info

4
ml-agents/tests/trainers/test_bc.py


@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_bc_policy_evaluate(mock_communicator, mock_launcher):
def test_bc_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
tf.reset_default_graph()
mock_communicator.return_value = MockCommunicator(
discrete_action=False, visual_inputs=0)

trainer_parameters = dummy_config()
trainer_parameters = dummy_config
model_path = env.brain_names[0]
trainer_parameters['model_path'] = model_path
trainer_parameters['keep_checkpoints'] = 3

358
ml-agents/tests/trainers/test_trainer_controller.py


import json
import unittest.mock as mock
from unittest.mock import *
import tensorflow as tf
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.ppo.trainer import PPOTrainer

from tests.mock_communicator import MockCommunicator
@pytest.fixture

curiosity_enc_size: 1
''')
@pytest.fixture
def dummy_offline_bc_config_with_override():
base = dummy_offline_bc_config()
base['testbrain'] = {}
base['testbrain']['normalize'] = False
return base
@pytest.fixture
def dummy_bad_config():

memory_size: 8
''')
@pytest.fixture
def basic_trainer_controller(brain_info):
return TrainerController(
model_path='test_model_path',
summaries_dir='test_summaries_dir',
run_id='test_run_id',
save_freq=100,
meta_curriculum=None,
load=True,
train=True,
keep_checkpoints=False,
lesson=None,
external_brains={'testbrain': brain_info},
training_seed=99
)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_initialization(mock_communicator, mock_launcher):
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=1)
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '', "tests/test_mlagents.trainers.py", False)
assert (tc.env.brain_names[0] == 'RealFakeBrain')
@patch('numpy.random.seed')
@patch('tensorflow.set_random_seed')
def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
seed = 27
TrainerController('', '', '1', 1, None, True, False, False, None, {}, seed)
numpy_random_seed.assert_called_with(seed)
tensorflow_set_seed.assert_called_with(seed)
def assert_bc_trainer_constructed(trainer_cls, input_config, tc, expected_brain_info, expected_config):
def mock_constructor(self, brain, trainer_params, training, load, seed, run_id):
assert(brain == expected_brain_info)
assert(trainer_params == expected_config)
assert(training == tc.train_model)
assert(load == tc.load_model)
assert(seed == tc.seed)
assert(run_id == tc.run_id)
with patch.object(trainer_cls, "__init__", mock_constructor):
tc.initialize_trainers(input_config)
assert('testbrain' in tc.trainers)
assert(isinstance(tc.trainers['testbrain'], trainer_cls))
def assert_ppo_trainer_constructed(input_config, tc, expected_brain_info,
expected_config, expected_reward_buff_cap=0):
def mock_constructor(self, brain, reward_buff_cap, trainer_parameters, training, load, seed, run_id):
assert(brain == expected_brain_info)
assert(trainer_parameters == expected_config)
assert(reward_buff_cap == expected_reward_buff_cap)
assert(training == tc.train_model)
assert(load == tc.load_model)
assert(seed == tc.seed)
assert(run_id == tc.run_id)
with patch.object(PPOTrainer, "__init__", mock_constructor):
tc.initialize_trainers(input_config)
assert('testbrain' in tc.trainers)
assert(isinstance(tc.trainers['testbrain'], PPOTrainer))
@patch('mlagents.envs.BrainInfo')
def test_initialize_trainer_parameters_uses_defaults(BrainInfoMock):
brain_info_mock = BrainInfoMock()
tc = basic_trainer_controller(brain_info_mock)
full_config = dummy_offline_bc_config()
expected_config = full_config['default']
expected_config['summary_path'] = tc.summaries_dir + '/test_run_id_testbrain'
expected_config['model_path'] = tc.model_path + '/testbrain'
expected_config['keep_checkpoints'] = tc.keep_checkpoints
assert_bc_trainer_constructed(OfflineBCTrainer, full_config, tc, brain_info_mock, expected_config)
@patch('mlagents.envs.BrainInfo')
def test_initialize_trainer_parameters_override_defaults(BrainInfoMock):
brain_info_mock = BrainInfoMock()
tc = basic_trainer_controller(brain_info_mock)
full_config = dummy_offline_bc_config_with_override()
expected_config = full_config['default']
expected_config['summary_path'] = tc.summaries_dir + '/test_run_id_testbrain'
expected_config['model_path'] = tc.model_path + '/testbrain'
expected_config['keep_checkpoints'] = tc.keep_checkpoints
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_load_config(mock_communicator, mock_launcher, dummy_config):
open_name = 'mlagents.trainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_load.return_value = dummy_config
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=1)
mock_load.return_value = dummy_config
tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
1, 1, 1, '', '', False)
config = tc._load_config()
assert (len(config) == 1)
assert (config['default']['trainer'] == "ppo")
# Override value from specific brain config
expected_config['normalize'] = False
assert_bc_trainer_constructed(OfflineBCTrainer, full_config, tc, brain_info_mock, expected_config)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_initialize_trainers(mock_communicator, mock_launcher, dummy_config,
dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config):
open_name = 'mlagents.trainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_communicator.return_value = MockCommunicator(
discrete_action=True, visual_inputs=1)
tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1,
1, 1, '', "tests/test_mlagents.trainers.py",
False)
# Test for PPO trainer
mock_load.return_value = dummy_config
config = tc._load_config()
tf.reset_default_graph()
tc._initialize_trainers(config)
assert (len(tc.trainers) == 1)
assert (isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
@patch('mlagents.envs.BrainInfo')
def test_initialize_online_bc_trainer(BrainInfoMock):
brain_info_mock = BrainInfoMock()
tc = basic_trainer_controller(brain_info_mock)
# Test for Online Behavior Cloning Trainer
mock_load.return_value = dummy_online_bc_config
config = tc._load_config()
tf.reset_default_graph()
tc._initialize_trainers(config)
assert (isinstance(tc.trainers['RealFakeBrain'], OnlineBCTrainer))
full_config = dummy_online_bc_config()
expected_config = full_config['default']
expected_config['summary_path'] = tc.summaries_dir + '/test_run_id_testbrain'
expected_config['model_path'] = tc.model_path + '/testbrain'
expected_config['keep_checkpoints'] = tc.keep_checkpoints
# Test for proper exception when trainer name is incorrect
mock_load.return_value = dummy_bad_config
config = tc._load_config()
tf.reset_default_graph()
with pytest.raises(UnityEnvironmentException):
tc._initialize_trainers(config)
assert_bc_trainer_constructed(OnlineBCTrainer, full_config, tc, brain_info_mock, expected_config)
@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
def test_initialize_offline_trainers(mock_communicator, mock_launcher, dummy_config,
dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config):
open_name = 'mlagents.trainers.trainer_controller' + '.open'
with mock.patch('yaml.load') as mock_load:
with mock.patch(open_name, create=True) as _:
mock_communicator.return_value = MockCommunicator(
discrete_action=False, stack=False, visual_inputs=0,
brain_name="Ball3DBrain", vec_obs_size=8)
tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1,
1, 1, '', "tests/test_mlagents.trainers.py",
False)
@patch('mlagents.envs.BrainInfo')
def test_initialize_ppo_trainer(BrainInfoMock):
brain_info_mock = BrainInfoMock()
tc = basic_trainer_controller(brain_info_mock)
# Test for Offline Behavior Cloning Trainer
mock_load.return_value = dummy_offline_bc_config
config = tc._load_config()
tf.reset_default_graph()
tc._initialize_trainers(config)
assert (isinstance(tc.trainers['Ball3DBrain'], OfflineBCTrainer))
full_config = dummy_config()
expected_config = full_config['default']
expected_config['summary_path'] = tc.summaries_dir + '/test_run_id_testbrain'
expected_config['model_path'] = tc.model_path + '/testbrain'
expected_config['keep_checkpoints'] = tc.keep_checkpoints
assert_ppo_trainer_constructed(full_config, tc, brain_info_mock, expected_config)
@patch('mlagents.envs.BrainInfo')
def test_initialize_invalid_trainer_raises_exception(BrainInfoMock):
brain_info_mock = BrainInfoMock()
tc = basic_trainer_controller(brain_info_mock)
bad_config = dummy_bad_config()
try:
tc.initialize_trainers(bad_config)
assert(1 == 0, "Initialize trainers with bad config did not raise an exception.")
except UnityEnvironmentException:
pass
def trainer_controller_with_start_learning_mocks():
trainer_mock = MagicMock()
trainer_mock.get_step = 0
trainer_mock.get_max_steps = 5
trainer_mock.parameters = {'some': 'parameter'}
trainer_mock.write_tensorboard_text = MagicMock()
brain_info_mock = MagicMock()
tc = basic_trainer_controller(brain_info_mock)
tc.initialize_trainers = MagicMock()
tc.trainers = {'testbrain': trainer_mock}
tc.take_step = MagicMock()
def take_step_sideeffect(env, curr_info):
tc.trainers['testbrain'].get_step += 1
if tc.trainers['testbrain'].get_step > 10:
raise KeyboardInterrupt
tc.take_step.side_effect = take_step_sideeffect
tc._export_graph = MagicMock()
tc._save_model = MagicMock()
return tc, trainer_mock
@patch('tensorflow.reset_default_graph')
def test_start_learning_trains_forever_if_no_train_model(tf_reset_graph):
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
tc.train_model = False
trainer_config = dummy_config()
tf_reset_graph.return_value = None
env_mock = MagicMock()
env_mock.close = MagicMock()
env_mock.reset = MagicMock()
tc.start_learning(env_mock, trainer_config)
tf_reset_graph.assert_called_once()
tc.initialize_trainers.assert_called_once_with(trainer_config)
env_mock.reset.assert_called_once()
assert (tc.take_step.call_count == 11)
tc._export_graph.assert_not_called()
tc._save_model.assert_not_called()
env_mock.close.assert_called_once()
@patch('tensorflow.reset_default_graph')
def test_start_learning_trains_until_max_steps_then_saves(tf_reset_graph):
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
trainer_config = dummy_config()
tf_reset_graph.return_value = None
brain_info_mock = MagicMock()
env_mock = MagicMock()
env_mock.close = MagicMock()
env_mock.reset = MagicMock(return_value=brain_info_mock)
tc.start_learning(env_mock, trainer_config)
tf_reset_graph.assert_called_once()
tc.initialize_trainers.assert_called_once_with(trainer_config)
env_mock.reset.assert_called_once()
assert(tc.take_step.call_count == trainer_mock.get_max_steps + 1)
env_mock.close.assert_called_once()
tc._save_model.assert_called_once_with(steps=6)
def test_start_learning_updates_meta_curriculum_lesson_number():
tc, trainer_mock = trainer_controller_with_start_learning_mocks()
trainer_config = dummy_config()
brain_info_mock = MagicMock()
env_mock = MagicMock()
env_mock.close = MagicMock()
env_mock.reset = MagicMock(return_value=brain_info_mock)
meta_curriculum_mock = MagicMock()
meta_curriculum_mock.set_all_curriculums_to_lesson_num = MagicMock()
tc.meta_curriculum = meta_curriculum_mock
tc.lesson = 5
tc.start_learning(env_mock, trainer_config)
meta_curriculum_mock.set_all_curriculums_to_lesson_num.assert_called_once_with(tc.lesson)
def trainer_controller_with_take_step_mocks():
trainer_mock = MagicMock()
trainer_mock.get_step = 0
trainer_mock.get_max_steps = 5
trainer_mock.parameters = {'some': 'parameter'}
trainer_mock.write_tensorboard_text = MagicMock()
brain_info_mock = MagicMock()
tc = basic_trainer_controller(brain_info_mock)
tc.trainers = {'testbrain': trainer_mock}
return tc, trainer_mock
def test_take_step_resets_env_on_global_done():
tc, trainer_mock = trainer_controller_with_take_step_mocks()
brain_info_mock = MagicMock()
action_data_mock_out = [None, None, None, None, None]
trainer_mock.take_action = MagicMock(return_value=action_data_mock_out)
trainer_mock.add_experiences = MagicMock()
trainer_mock.process_experiences = MagicMock()
trainer_mock.update_policy = MagicMock()
trainer_mock.write_summary = MagicMock()
trainer_mock.trainer.increment_step_and_update_last_reward = MagicMock()
env_mock = MagicMock()
step_data_mock_out = MagicMock()
env_mock.step = MagicMock(return_value=step_data_mock_out)
env_mock.close = MagicMock()
env_mock.reset = MagicMock(return_value=brain_info_mock)
env_mock.global_done = True
tc.take_step(env_mock, brain_info_mock)
env_mock.reset.assert_called_once()
def test_take_step_adds_experiences_to_trainer_and_trains():
tc, trainer_mock = trainer_controller_with_take_step_mocks()
curr_info_mock = MagicMock()
trainer_action_output_mock = [
'action',
'memory',
'actiontext',
'value',
'output',
]
trainer_mock.take_action = MagicMock(return_value=trainer_action_output_mock)
trainer_mock.is_ready_update = MagicMock(return_value=True)
env_mock = MagicMock()
env_step_output_mock = MagicMock()
env_mock.step = MagicMock(return_value=env_step_output_mock)
env_mock.close = MagicMock()
env_mock.reset = MagicMock(return_value=curr_info_mock)
env_mock.global_done = False
tc.take_step(env_mock, curr_info_mock)
env_mock.reset.assert_not_called()
trainer_mock.take_action.assert_called_once_with(curr_info_mock)
env_mock.step.assert_called_once_with(
vector_action={'testbrain': trainer_action_output_mock[0]},
memory={'testbrain': trainer_action_output_mock[1]},
text_action={'testbrain': trainer_action_output_mock[2]},
value={'testbrain': trainer_action_output_mock[3]}
)
trainer_mock.add_experiences.assert_called_once_with(
curr_info_mock, env_step_output_mock, trainer_action_output_mock[4]
)
trainer_mock.process_experiences.assert_called_once_with(curr_info_mock, env_step_output_mock)
trainer_mock.update_policy.assert_called_once()
trainer_mock.write_summary.assert_called_once()
trainer_mock.increment_step_and_update_last_reward.assert_called_once()

74
ml-agents/tests/trainers/test_learn.py


import unittest.mock as mock
import pytest
from unittest.mock import *
from mlagents.trainers import learn, TrainerController
@pytest.fixture
def basic_options():
return {
'--docker-target-name': 'None',
'--env': 'None',
'--run-id': 'ppo',
'--load': False,
'--train': False,
'--save-freq': '50000',
'--keep-checkpoints': '5',
'--worker-id': '0',
'--curriculum': 'None',
'--lesson': '0',
'--slow': False,
'--no-graphics': False,
'<trainer-config-path>': 'basic_path',
}
@patch('mlagents.trainers.learn.init_environment')
@patch('mlagents.trainers.learn.load_config')
def test_run_training(load_config, init_environment):
mock_env = MagicMock()
mock_env.external_brain_names = []
mock_env.academy_name = 'TestAcademyName'
init_environment.return_value = mock_env
trainer_config_mock = MagicMock()
load_config.return_value = trainer_config_mock
mock_init = MagicMock(return_value=None)
with patch.object(TrainerController, "__init__", mock_init):
with patch.object(TrainerController, "start_learning", MagicMock()):
learn.run_training(0, 0, basic_options(), MagicMock())
mock_init.assert_called_once_with(
'./models/ppo',
'./summaries',
'ppo-0',
50000,
None,
False,
False,
5,
0,
{},
0
)
@patch('mlagents.trainers.learn.init_environment')
@patch('mlagents.trainers.learn.load_config')
def test_docker_target_path(load_config, init_environment):
mock_env = MagicMock()
mock_env.external_brain_names = []
mock_env.academy_name = 'TestAcademyName'
init_environment.return_value = mock_env
trainer_config_mock = MagicMock()
load_config.return_value = trainer_config_mock
options_with_docker_target = basic_options()
options_with_docker_target['--docker-target-name'] = 'dockertarget'
mock_init = MagicMock(return_value=None)
with patch.object(TrainerController, "__init__", mock_init):
with patch.object(TrainerController, "start_learning", MagicMock()):
learn.run_training(0, 0, options_with_docker_target, MagicMock())
mock_init.assert_called_once()
assert(mock_init.call_args[0][0] == '/dockertarget/models/ppo')
assert(mock_init.call_args[0][1] == '/dockertarget/summaries')
正在加载...
取消
保存