Merge pull request #1058 from dericp/develop-trainer-controller-cleanup

Fixing trainer controller line lengths and splitting unitytrainers tests.
7 年前 · 514cd757
--- a/python/unitytrainers/trainer_controller.py
+++ b/python/unitytrainers/trainer_controller.py
 # # Unity ML-Agents Toolkit
 # ## ML-Agent Learning
-# Launches unitytrainers for each External Brains in a Unity Environment
+"""Launches unitytrainers for each External Brains in a Unity Environment."""
-import logging
+import logging
+
-
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.tools import freeze_graph


 class TrainerController(object):
-    def __init__(self, env_path, run_id, save_freq, curriculum_folder, fast_simulation, load, train,
-                 worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path,
+    def __init__(self, env_path, run_id, save_freq, curriculum_folder,
+                 fast_simulation, load, train, worker_id, keep_checkpoints,
+                 lesson, seed, docker_target_name, trainer_config_path,
-        :param curriculum_folder: Folder containing JSON curriculums for the env
-        :param fast_simulation: Whether to run the game at training speed
-        :param load: Whether to load the model or randomly initialize
-        :param train: Whether to train model, or only run inference
-        :param worker_id: Number to add to communication port (5005). Used for multi-environment
-        :param keep_checkpoints: How many model checkpoints to keep
-        :param lesson: Start learning from this lesson
+        :param curriculum_folder: Folder containing JSON curriculums for the
+               environment.
+        :param fast_simulation: Whether to run the game at training speed.
+        :param load: Whether to load the model or randomly initialize.
+        :param train: Whether to train model, or only run inference.
+        :param worker_id: Number to add to communication port (5005).
+               Used for multi-environment
+        :param keep_checkpoints: How many model checkpoints to keep.
+        :param lesson: Start learning from this lesson.
-        :param docker_target_name: Name of docker volume that will contain all data.
-        :param trainer_config_path: Fully qualified path to location of trainer configuration file
-        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
+        :param docker_target_name: Name of docker volume that will contain all
+               data.
+        :param trainer_config_path: Fully qualified path to location of trainer
+               configuration file.
+        :param no_graphics: Whether to run the Unity simulator in no-graphics
+                            mode.
+            # Strip out executable extensions if passed
-                        .replace('.x86', ''))  # Strip out executable extensions if passed
+                        .replace('.x86', ''))

        # Recognize and use docker volume if one is passed as an argument
        if docker_target_name == '':
                docker_target_name=docker_target_name,
                run_id=run_id)
            if env_path is not None:
-                env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name,
-                                                                     env_name=env_path)
+                env_path = '/{docker_target_name}/{env_name}'.format(
+                    docker_target_name=docker_target_name, env_name=env_path)
-                self.curriculum_folder = '/{docker_target_name}/{curriculum_file}'.format(
+                self.curriculum_folder = \
+                    '/{docker_target_name}/{curriculum_file}'.format(
-            self.summaries_dir = '/{docker_target_name}/summaries'.format(docker_target_name=docker_target_name)
+            self.summaries_dir = '/{docker_target_name}/summaries'.format(
+                docker_target_name=docker_target_name)

        self.logger = logging.getLogger("unityagents")
        self.run_id = run_id
        self.seed = seed
        np.random.seed(self.seed)
        tf.set_random_seed(self.seed)
-        self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
-                                    seed=self.seed, docker_training=self.docker_training,
+        self.env = UnityEnvironment(file_name=env_path,
+                                    worker_id=self.worker_id,
+                                    seed=self.seed,
+                                    docker_training=self.docker_training,
-            self.env_name = os.path.basename(os.path.normpath(env_path))  # Extract out name of environment
+            # Extract out name of environment
+            self.env_name = os.path.basename(os.path.normpath(env_path))
-            self.meta_curriculum = MetaCurriculum(self.curriculum_folder, self.env._resetParameters)
+            self.meta_curriculum = MetaCurriculum(self.curriculum_folder,
+                self.env._resetParameters)
-        if self.meta_curriculum is not None:
+        if self.meta_curriculum:
            for brain_name in self.meta_curriculum.brains_to_curriculums.keys():
                if brain_name not in self.env.external_brain_names:
                    raise MetaCurriculumError('One of the curriculums '
                                              'whose curriculum it defines.')

    def _get_progresses(self):
-        if self.meta_curriculum is not None:
+        if self.meta_curriculum:
-            for brain_name, curriculum in self.meta_curriculum.brains_to_curriculums.items():
+            for brain_name, curriculum \
+                in self.meta_curriculum.brains_to_curriculums.items():
-                    progress = self.trainers[brain_name].get_step / self.trainers[brain_name].get_max_steps
+                    progress = (self.trainers[brain_name].get_step /
+                        self.trainers[brain_name].get_max_steps)
                    brain_names_to_progresses[brain_name] = progress
                elif curriculum.measure == "reward":
                    progress = self.trainers[brain_name].get_last_reward
                if scope == '/':
                    scope = ''
                scopes += [scope]
-                if self.trainers[brain_name].parameters["trainer"] == "imitation":
+                if self.trainers[brain_name].parameters["trainer"] \
+                   == "imitation":
-                    nodes += [scope + x for x in ["action", "value_estimate", "action_probs", "value_estimate"]]
+                    nodes += [scope + x for x in ["action", "value_estimate",
+                        "action_probs", "value_estimate"]]
-                    nodes += [scope + x for x in ["recurrent_out", "memory_size"]]
+                    nodes += [scope + x for x in ["recurrent_out",
+                                                  "memory_size"]]
        if len(scopes) > 1:
            self.logger.info("List of available scopes :")
            for scope in scopes:
        """
        last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk'
        saver.save(sess, last_checkpoint)
-        tf.train.write_graph(sess.graph_def, self.model_path, 'raw_graph_def.pb', as_text=False)
+        tf.train.write_graph(sess.graph_def, self.model_path,
+                             'raw_graph_def.pb', as_text=False)
        self.logger.info("Saved Model")

    def _export_graph(self):
        target_nodes = ','.join(self._process_graph())
        ckpt = tf.train.get_checkpoint_state(self.model_path)
-        freeze_graph.freeze_graph(input_graph=self.model_path + '/raw_graph_def.pb',
-                                  input_binary=True,
-                                  input_checkpoint=ckpt.model_checkpoint_path,
-                                  output_node_names=target_nodes,
-                                  output_graph=self.model_path + '/' + self.env_name + "_" + self.run_id + '.bytes',
-                                  clear_devices=True, initializer_nodes="", input_saver="",
-                                  restore_op_name="save/restore_all", filename_tensor_name="save/Const:0")
+        freeze_graph.freeze_graph(
+            input_graph=self.model_path + '/raw_graph_def.pb',
+            input_binary=True,
+            input_checkpoint=ckpt.model_checkpoint_path,
+            output_node_names=target_nodes,
+            output_graph=(self.model_path + '/' + self.env_name + "_"
+                + self.run_id + '.bytes'),
+            clear_devices=True, initializer_nodes="", input_saver="",
+            restore_op_name="save/restore_all",
+            filename_tensor_name="save/Const:0")

    def _initialize_trainers(self, trainer_config, sess):
        trainer_parameters_dict = {}
            trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in self.env.external_brain_names:
            if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
-                self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
-                                                                     trainer_parameters_dict[brain_name],
-                                                                     self.train_model, self.seed, self.run_id)
+                self.trainers[brain_name] = BehavioralCloningTrainer(
+                    sess, self.env, brain_name,
+                    trainer_parameters_dict[brain_name], self.train_model,
+                    self.seed, self.run_id)
-                self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name],
-                                                       self.train_model, self.seed, self.run_id)
+                self.trainers[brain_name] = PPOTrainer(
+                    sess, self.env, brain_name,
+                    trainer_parameters_dict[brain_name],
+                    self.train_model, self.seed, self.run_id)
-                raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
+                raise UnityEnvironmentException('The trainer config contains '
+                                                'an unknown trainer type for '
+                                                'brain {}'
                                                .format(brain_name))

    def _load_config(self):
                return trainer_config
        except IOError:
-            raise UnityEnvironmentException("""Parameter file could not be found here {}.
-                                            Will use default Hyper parameters"""
+            raise UnityEnvironmentException('Parameter file could not be found '
+                                            'here {}. Will use default Hyper '
+                                            'parameters.'
-            raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}"
+            raise UnityEnvironmentException('There was an error decoding '
+                                            'Trainer Config from this path : {}'
                                            .format(self.trainer_config_path))

    @staticmethod
                os.makedirs(model_path)
        except Exception:
-            raise UnityEnvironmentException("The folder {} containing the generated model could not be accessed."
-                                            " Please make sure the permissions are set correctly."
+            raise UnityEnvironmentException('The folder {} containing the '
+                                            'generated model could not be '
+                                            'accessed. Please make sure the '
+                                            'permissions are set correctly.'
+    def _increment_lessons_and_reset_env(self):
+        """Increments the lessons of curriculums if there is a metacurriculum
+        and resets the environment.
+
+        Returns:
+            A Data structure corresponding to the initial reset state of the
+            environment.
+        """
+        if self.meta_curriculum is not None:
+            self.meta_curriculum.increment_lessons(self._get_progresses())
+            return self.env.reset(config=self.meta_curriculum.get_config(),
+                                       train_mode=self.fast_simulation)
+        else:
+            return self.env.reset(train_mode=self.fast_simulation)
+
-        # TODO: Should be able to start learning at different lesson numbers for each curriculum.
+        # TODO: Should be able to start learning at different lesson numbers
+        # for each curriculum.
        if self.meta_curriculum is not None:
            self.meta_curriculum.set_all_curriculums_to_lesson_num(self.lesson)
        trainer_config = self._load_config()
                self.logger.info('Loading Model...')
                ckpt = tf.train.get_checkpoint_state(self.model_path)
                if ckpt is None:
-                    self.logger.info('The model {0} could not be found. Make sure you specified the right '
-                                     '--run-id'.format(self.model_path))
+                    self.logger.info('The model {0} could not be found. Make '
+                                     'sure you specified the right '
+                                     '--run-id'
+                                     .format(self.model_path))
-            if self.meta_curriculum is not None:
-                self.meta_curriculum.increment_lessons(self._get_progresses())
-                curr_info = self.env.reset(config=self.meta_curriculum.get_config(), train_mode=self.fast_simulation)
-            else:
-                curr_info = self.env.reset(train_mode=self.fast_simulation)
+            curr_info = self._increment_lessons_and_reset_env()
-                    trainer.write_tensorboard_text('Hyperparameters', trainer.parameters)
+                    trainer.write_tensorboard_text('Hyperparameters',
+                                                   trainer.parameters)
-                while any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()]) or not self.train_model:
+                while any([t.get_step <= t.get_max_steps \
+                           for k, t in self.trainers.items()]) \
+                      or not self.train_model:
-                        if self.meta_curriculum is not None:
-                            self.meta_curriculum.increment_lessons(self._get_progresses())
-                            curr_info = self.env.reset(config=self.meta_curriculum.get_config(), train_mode=self.fast_simulation)
-                        else:
-                            curr_info = self.env.reset(train_mode=self.fast_simulation)
+                        curr_info = self._increment_lessons_and_reset_env()
                        for brain_name, trainer in self.trainers.items():
                            trainer.end_episode()
                    # Decide and take an action
                         take_action_memories[brain_name],
                         take_action_text[brain_name],
                         take_action_value[brain_name],
-                         take_action_outputs[brain_name]) = trainer.take_action(curr_info)
-                    new_info = self.env.step(vector_action=take_action_vector, memory=take_action_memories,
-                                             text_action=take_action_text, value=take_action_value)
+                         take_action_outputs[brain_name]) = \
+                            trainer.take_action(curr_info)
+                    new_info = self.env.step(vector_action=take_action_vector,
+                                             memory=take_action_memories,
+                                             text_action=take_action_text,
+                                             value=take_action_value)
-                        trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name])
+                        trainer.add_experiences(curr_info, new_info,
+                            take_action_outputs[brain_name])
-                        if trainer.is_ready_update() and self.train_model and trainer.get_step <= trainer.get_max_steps:
+                        if trainer.is_ready_update() and self.train_model \
+                           and trainer.get_step <= trainer.get_max_steps:
                            # Perform gradient descent with experience buffer
                            trainer.update_model()
                        # Write training statistics to Tensorboard.
-                                lesson=self.meta_curriculum.brains_to_curriculums[brain_name].lesson_num)
+                                lesson_num=self.meta_curriculum
+                                           .brains_to_curriculums[brain_name]
+                                           .lesson_num)
-                        if self.train_model and trainer.get_step <= trainer.get_max_steps:
+                        if self.train_model \
+                           and trainer.get_step <= trainer.get_max_steps:
-                    global_step += 1
-                    if global_step % self.save_freq == 0 and global_step != 0 and self.train_model:
+                    if self.train_model:
+                        global_step += 1
+                    if global_step % self.save_freq == 0 and global_step != 0 \
+                       and self.train_model:
                        # Save Tensorflow model
                        self._save_model(sess, steps=global_step, saver=saver)
                    curr_info = new_info
            except KeyboardInterrupt:
-                print('--------------------------Now saving model-------------------------')
+                print('--------------------------Now saving model--------------'
+                      '-----------')
-                    self.logger.info("Learning was interrupted. Please wait while the graph is generated.")
+                    self.logger.info('Learning was interrupted. Please wait '
+                                     'while the graph is generated.')
                    self._save_model(sess, steps=global_step, saver=saver)
                pass
        self.env.close()
--- a/python/tests/test_buffer.py
+++ b/python/tests/test_buffer.py
+import json
+import unittest.mock as mock
+
+import yaml
+import pytest
+import numpy as np
+
+from unitytrainers.trainer_controller import TrainerController
+from unitytrainers.buffer import Buffer
+from unitytrainers.ppo.trainer import PPOTrainer
+from unitytrainers.bc.trainer import BehavioralCloningTrainer
+from unitytrainers.curriculum import Curriculum
+from unitytrainers.exception import CurriculumError
+from unityagents.exception import UnityEnvironmentException
+from .mock_communicator import MockCommunicator
+
+
+def assert_array(a, b):
+    assert a.shape == b.shape
+    la = list(a.flatten())
+    lb = list(b.flatten())
+    for i in range(len(la)):
+        assert la[i] == lb[i]
+
+
+def test_buffer():
+    b = Buffer()
+    for fake_agent_id in range(4):
+        for step in range(9):
+            b[fake_agent_id]['vector_observation'].append(
+                [100 * fake_agent_id + 10 * step + 1,
+                 100 * fake_agent_id + 10 * step + 2,
+                 100 * fake_agent_id + 10 * step + 3]
+            )
+            b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
+                                               100 * fake_agent_id + 10 * step + 5])
+    a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True)
+    assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
+    a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
+    assert_array(a, np.array([
+        [[231, 232, 233], [241, 242, 243], [251, 252, 253]],
+        [[261, 262, 263], [271, 272, 273], [281, 282, 283]]
+    ]))
+    a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False)
+    assert_array(a, np.array([
+        [[251, 252, 253], [261, 262, 263], [271, 272, 273]],
+        [[261, 262, 263], [271, 272, 273], [281, 282, 283]]
+    ]))
+    b[4].reset_agent()
+    assert len(b[4]) == 0
+    b.append_update_buffer(3,
+                           batch_size=None, training_length=2)
+    b.append_update_buffer(2,
+                           batch_size=None, training_length=2)
+    assert len(b.update_buffer['action']) == 10
+    assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
--- a/python/tests/test_trainer_controller.py
+++ b/python/tests/test_trainer_controller.py
+import json
+import unittest.mock as mock
+
+import yaml
+import pytest
+import tensorflow as tf
+
+from unitytrainers.trainer_controller import TrainerController
+from unitytrainers.buffer import Buffer
+from unitytrainers.ppo.trainer import PPOTrainer
+from unitytrainers.bc.trainer import BehavioralCloningTrainer
+from unitytrainers.curriculum import Curriculum
+from unitytrainers.exception import CurriculumError
+from unityagents.exception import UnityEnvironmentException
+from .mock_communicator import MockCommunicator
+
+
+@pytest.fixture
+def dummy_start():
+  return '''{ "AcademyName": "RealFakeAcademy",
+              "resetParameters": {},
+              "brainNames": ["RealFakeBrain"],
+              "externalBrainNames": ["RealFakeBrain"],
+              "logPath":"RealFakePath",
+              "apiNumber":"API-3",
+              "brainParameters": [{
+                  "vectorObservationSize": 3,
+                  "numStackedVectorObservations" : 2,
+                  "vectorActionSize": 2,
+                  "memorySize": 0,
+                  "cameraResolutions": [],
+                  "vectorActionDescriptions": ["",""],
+                  "vectorActionSpaceType": 1
+                  }]
+            }'''.encode()
+
+
+@pytest.fixture
+def dummy_config():
+    return yaml.load(
+        '''
+        default:
+            trainer: ppo
+            batch_size: 32
+            beta: 5.0e-3
+            buffer_size: 512
+            epsilon: 0.2
+            gamma: 0.99
+            hidden_units: 128
+            lambd: 0.95
+            learning_rate: 3.0e-4
+            max_steps: 5.0e4
+            normalize: true
+            num_epoch: 5
+            num_layers: 2
+            time_horizon: 64
+            sequence_length: 64
+            summary_freq: 1000
+            use_recurrent: false
+            memory_size: 8
+            use_curiosity: false
+            curiosity_strength: 0.0
+            curiosity_enc_size: 1
+        ''')
+
+@pytest.fixture
+def dummy_bc_config():
+    return yaml.load(
+        '''
+        default:
+            trainer: imitation
+            brain_to_imitate: ExpertBrain
+            batches_per_epoch: 16
+            batch_size: 32
+            beta: 5.0e-3
+            buffer_size: 512
+            epsilon: 0.2
+            gamma: 0.99
+            hidden_units: 128
+            lambd: 0.95
+            learning_rate: 3.0e-4
+            max_steps: 5.0e4
+            normalize: true
+            num_epoch: 5
+            num_layers: 2
+            time_horizon: 64
+            sequence_length: 64
+            summary_freq: 1000
+            use_recurrent: false
+            memory_size: 8
+            use_curiosity: false
+            curiosity_strength: 0.0
+            curiosity_enc_size: 1
+        ''')
+
+@pytest.fixture
+def dummy_bad_config():
+    return yaml.load(
+        '''
+        default:
+            trainer: incorrect_trainer
+            brain_to_imitate: ExpertBrain
+            batches_per_epoch: 16
+            batch_size: 32
+            beta: 5.0e-3
+            buffer_size: 512
+            epsilon: 0.2
+            gamma: 0.99
+            hidden_units: 128
+            lambd: 0.95
+            learning_rate: 3.0e-4
+            max_steps: 5.0e4
+            normalize: true
+            num_epoch: 5
+            num_layers: 2
+            time_horizon: 64
+            sequence_length: 64
+            summary_freq: 1000
+            use_recurrent: false
+            memory_size: 8
+        ''')
+
+
+@mock.patch('unityagents.UnityEnvironment.executable_launcher')
+@mock.patch('unityagents.UnityEnvironment.get_communicator')
+def test_initialization(mock_communicator, mock_launcher):
+    mock_communicator.return_value = MockCommunicator(
+        discrete_action=True, visual_inputs=1)
+    tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
+                           1, 1, 1, '', "tests/test_unitytrainers.py", False)
+    assert(tc.env.brain_names[0] == 'RealFakeBrain')
+
+
+@mock.patch('unityagents.UnityEnvironment.executable_launcher')
+@mock.patch('unityagents.UnityEnvironment.get_communicator')
+def test_load_config(mock_communicator, mock_launcher, dummy_config):
+    open_name = 'unitytrainers.trainer_controller' + '.open'
+    with mock.patch('yaml.load') as mock_load:
+        with mock.patch(open_name, create=True) as _:
+            mock_load.return_value = dummy_config
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=True, visual_inputs=1)
+            mock_load.return_value = dummy_config
+            tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
+                                       1, 1, 1, '','', False)
+            config = tc._load_config()
+            assert(len(config) == 1)
+            assert(config['default']['trainer'] == "ppo")
+
+
+@mock.patch('unityagents.UnityEnvironment.executable_launcher')
+@mock.patch('unityagents.UnityEnvironment.get_communicator')
+def test_initialize_trainers(mock_communicator, mock_launcher, dummy_config,
+                             dummy_bc_config, dummy_bad_config):
+    open_name = 'unitytrainers.trainer_controller' + '.open'
+    with mock.patch('yaml.load') as mock_load:
+        with mock.patch(open_name, create=True) as _:
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=True, visual_inputs=1)
+            tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1,
+                                   1, 1, '', "tests/test_unitytrainers.py",
+                                   False)
+
+            # Test for PPO trainer
+            mock_load.return_value = dummy_config
+            config = tc._load_config()
+            tf.reset_default_graph()
+            with tf.Session() as sess:
+                tc._initialize_trainers(config, sess)
+                assert(len(tc.trainers) == 1)
+                assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
+
+            # Test for Behavior Cloning Trainer
+            mock_load.return_value = dummy_bc_config
+            config = tc._load_config()
+            tf.reset_default_graph()
+            with tf.Session() as sess:
+                tc._initialize_trainers(config, sess)
+                assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer))
+
+            # Test for proper exception when trainer name is incorrect
+            mock_load.return_value = dummy_bad_config
+            config = tc._load_config()
+            tf.reset_default_graph()
+            with tf.Session() as sess:
+                with pytest.raises(UnityEnvironmentException):
+                    tc._initialize_trainers(config, sess)
--- a/python/tests/test_unitytrainers.py
+++ b/python/tests/test_unitytrainers.py
-import json
-import yaml
-import unittest.mock as mock
-import pytest
-
-from unitytrainers.trainer_controller import TrainerController
-from unitytrainers.buffer import Buffer
-from unitytrainers.models import *
-from unitytrainers.ppo.trainer import PPOTrainer
-from unitytrainers.bc.trainer import BehavioralCloningTrainer
-from unitytrainers.curriculum import Curriculum
-from unitytrainers.exception import CurriculumError
-from unityagents.exception import UnityEnvironmentException
-from .mock_communicator import MockCommunicator
-
-dummy_start = '''{
-  "AcademyName": "RealFakeAcademy",
-  "resetParameters": {},
-  "brainNames": ["RealFakeBrain"],
-  "externalBrainNames": ["RealFakeBrain"],
-  "logPath":"RealFakePath",
-  "apiNumber":"API-3",
-  "brainParameters": [{
-      "vectorObservationSize": 3,
-      "numStackedVectorObservations" : 2,
-      "vectorActionSize": 2,
-      "memorySize": 0,
-      "cameraResolutions": [],
-      "vectorActionDescriptions": ["",""],
-      "vectorActionSpaceType": 1
-      }]
-}'''.encode()
-
-
-dummy_config = yaml.load('''
-default:
-    trainer: ppo
-    batch_size: 32
-    beta: 5.0e-3
-    buffer_size: 512
-    epsilon: 0.2
-    gamma: 0.99
-    hidden_units: 128
-    lambd: 0.95
-    learning_rate: 3.0e-4
-    max_steps: 5.0e4
-    normalize: true
-    num_epoch: 5
-    num_layers: 2
-    time_horizon: 64
-    sequence_length: 64
-    summary_freq: 1000
-    use_recurrent: false
-    memory_size: 8
-    use_curiosity: false
-    curiosity_strength: 0.0
-    curiosity_enc_size: 1
-''')
-
-dummy_bc_config = yaml.load('''
-default:
-    trainer: imitation
-    brain_to_imitate: ExpertBrain
-    batches_per_epoch: 16
-    batch_size: 32
-    beta: 5.0e-3
-    buffer_size: 512
-    epsilon: 0.2
-    gamma: 0.99
-    hidden_units: 128
-    lambd: 0.95
-    learning_rate: 3.0e-4
-    max_steps: 5.0e4
-    normalize: true
-    num_epoch: 5
-    num_layers: 2
-    time_horizon: 64
-    sequence_length: 64
-    summary_freq: 1000
-    use_recurrent: false
-    memory_size: 8
-    use_curiosity: false
-    curiosity_strength: 0.0
-    curiosity_enc_size: 1
-''')
-
-dummy_bad_config = yaml.load('''
-default:
-    trainer: incorrect_trainer
-    brain_to_imitate: ExpertBrain
-    batches_per_epoch: 16
-    batch_size: 32
-    beta: 5.0e-3
-    buffer_size: 512
-    epsilon: 0.2
-    gamma: 0.99
-    hidden_units: 128
-    lambd: 0.95
-    learning_rate: 3.0e-4
-    max_steps: 5.0e4
-    normalize: true
-    num_epoch: 5
-    num_layers: 2
-    time_horizon: 64
-    sequence_length: 64
-    summary_freq: 1000
-    use_recurrent: false
-    memory_size: 8
-''')
-
-
-@mock.patch('unityagents.UnityEnvironment.executable_launcher')
-@mock.patch('unityagents.UnityEnvironment.get_communicator')
-def test_initialization(mock_communicator, mock_launcher):
-    mock_communicator.return_value = MockCommunicator(
-        discrete_action=True, visual_inputs=1)
-    tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
-                           1, 1, 1, '', "tests/test_unitytrainers.py", False)
-    assert(tc.env.brain_names[0] == 'RealFakeBrain')
-
-
-@mock.patch('unityagents.UnityEnvironment.executable_launcher')
-@mock.patch('unityagents.UnityEnvironment.get_communicator')
-def test_load_config(mock_communicator, mock_launcher):
-    open_name = 'unitytrainers.trainer_controller' + '.open'
-    with mock.patch('yaml.load') as mock_load:
-        with mock.patch(open_name, create=True) as _:
-            mock_load.return_value = dummy_config
-            mock_communicator.return_value = MockCommunicator(
-                discrete_action=True, visual_inputs=1)
-            mock_load.return_value = dummy_config
-            tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
-                                       1, 1, 1, '','', False)
-            config = tc._load_config()
-            assert(len(config) == 1)
-            assert(config['default']['trainer'] == "ppo")
-
-
-@mock.patch('unityagents.UnityEnvironment.executable_launcher')
-@mock.patch('unityagents.UnityEnvironment.get_communicator')
-def test_initialize_trainers(mock_communicator, mock_launcher):
-    open_name = 'unitytrainers.trainer_controller' + '.open'
-    with mock.patch('yaml.load') as mock_load:
-        with mock.patch(open_name, create=True) as _:
-            mock_communicator.return_value = MockCommunicator(
-                discrete_action=True, visual_inputs=1)
-            tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
-                                   1, 1, 1, '', "tests/test_unitytrainers.py", False)
-
-            # Test for PPO trainer
-            mock_load.return_value = dummy_config
-            config = tc._load_config()
-            tf.reset_default_graph()
-            with tf.Session() as sess:
-                tc._initialize_trainers(config, sess)
-                assert(len(tc.trainers) == 1)
-                assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
-
-            # Test for Behavior Cloning Trainer
-            mock_load.return_value = dummy_bc_config
-            config = tc._load_config()
-            tf.reset_default_graph()
-            with tf.Session() as sess:
-                tc._initialize_trainers(config, sess)
-                assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer))
-
-            # Test for proper exception when trainer name is incorrect
-            mock_load.return_value = dummy_bad_config
-            config = tc._load_config()
-            tf.reset_default_graph()
-            with tf.Session() as sess:
-                with pytest.raises(UnityEnvironmentException):
-                    tc._initialize_trainers(config, sess)
-
-
-def assert_array(a, b):
-    assert a.shape == b.shape
-    la = list(a.flatten())
-    lb = list(b.flatten())
-    for i in range(len(la)):
-        assert la[i] == lb[i]
-
-
-def test_buffer():
-    b = Buffer()
-    for fake_agent_id in range(4):
-        for step in range(9):
-            b[fake_agent_id]['vector_observation'].append(
-                [100 * fake_agent_id + 10 * step + 1,
-                 100 * fake_agent_id + 10 * step + 2,
-                 100 * fake_agent_id + 10 * step + 3]
-            )
-            b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
-                                               100 * fake_agent_id + 10 * step + 5])
-    a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True)
-    assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
-    a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
-    assert_array(a, np.array([
-        [[231, 232, 233], [241, 242, 243], [251, 252, 253]],
-        [[261, 262, 263], [271, 272, 273], [281, 282, 283]]
-    ]))
-    a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False)
-    assert_array(a, np.array([
-        [[251, 252, 253], [261, 262, 263], [271, 272, 273]],
-        [[261, 262, 263], [271, 272, 273], [281, 282, 283]]
-    ]))
-    b[4].reset_agent()
-    assert len(b[4]) == 0
-    b.append_update_buffer(3,
-                           batch_size=None, training_length=2)
-    b.append_update_buffer(2,
-                           batch_size=None, training_length=2)
-    assert len(b.update_buffer['action']) == 10
-    assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
-
-
-if __name__ == '__main__':
-    pytest.main()