Fixed imports, all tests are passing.

6 年前 · 634280a6
--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md
 If you want to use the [Python API](Python-API.md) to interact with your executable, you can pass the name of the executable with the argument 'file_name' of the `UnityEnvironment`. For instance :

 ```python
-from unityagents import UnityEnvironment
+from mlagents.envs import UnityEnvironment
 env = UnityEnvironment(file_name=<env_name>)
 ```

--- a/mlagents/trainers/buffer.py
+++ b/mlagents/trainers/buffer.py
 import numpy as np

-from unityagents.exception import UnityException
+from mlagents.envs.exception import UnityException


 class BufferException(UnityException):
--- a/mlagents/trainers/curriculum.py
+++ b/mlagents/trainers/curriculum.py

 import logging

-logger = logging.getLogger('unitytrainers')
+logger = logging.getLogger('mlagents.trainers')


 class Curriculum(object):
--- a/mlagents/trainers/exception.py
+++ b/mlagents/trainers/exception.py
 """
-Contains exceptions for the unitytrainers package.
+Contains exceptions for the trainers package.
 """

 class TrainerError(Exception):
--- a/mlagents/trainers/meta_curriculum.py
+++ b/mlagents/trainers/meta_curriculum.py
 """Contains the MetaCurriculum class."""

 import os
-from unitytrainers.curriculum import Curriculum
-from unitytrainers.exception import MetaCurriculumError
+from mlagents.trainers.curriculum import Curriculum
+from mlagents.trainers.exception import MetaCurriculumError
-logger = logging.getLogger('unitytrainers')
+logger = logging.getLogger('mlagents.trainers')


 class MetaCurriculum(object):
--- a/mlagents/trainers/models.py
+++ b/mlagents/trainers/models.py
 import tensorflow as tf
 import tensorflow.contrib.layers as c_layers

-logger = logging.getLogger("unityagents")
+logger = logging.getLogger("mlagents.envs")


 class LearningModel(object):
--- a/mlagents/trainers/trainer.py
+++ b/mlagents/trainers/trainer.py
 import tensorflow as tf
 import numpy as np

-from unityagents import UnityException, AllBrainInfo
+from mlagents.envs import UnityException, AllBrainInfo
-logger = logging.getLogger("unitytrainers")
+logger = logging.getLogger("mlagents.trainers")


 class UnityTrainerException(UnityException):


 class Trainer(object):
-    """This class is the abstract class for the unitytrainers"""
+    """This class is the abstract class for the mlagents.trainers"""

    def __init__(self, sess, env, brain_name, trainer_parameters, training, run_id):
        """
--- a/mlagents/trainers/trainer_controller.py
+++ b/mlagents/trainers/trainer_controller.py
 # # Unity ML-Agents Toolkit
 # ## ML-Agent Learning
-"""Launches unitytrainers for each External Brains in a Unity Environment."""
+"""Launches trainers for each External Brains in a Unity Environment."""

 import os
 import logging
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.tools import freeze_graph
-from unityagents.environment import UnityEnvironment
-from unityagents.exception import UnityEnvironmentException
+from mlagents.envs.environment import UnityEnvironment
+from mlagents.envs.exception import UnityEnvironmentException
-from unitytrainers.ppo.trainer import PPOTrainer
-from unitytrainers.bc.trainer import BehavioralCloningTrainer
-from unitytrainers.meta_curriculum import MetaCurriculum
-from unitytrainers.exception import MetaCurriculumError
+from mlagents.trainers.ppo.trainer import PPOTrainer
+from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
+from mlagents.trainers.meta_curriculum import MetaCurriculum
+from mlagents.trainers.exception import MetaCurriculumError


 class TrainerController(object):
            self.summaries_dir = '/{docker_target_name}/summaries'.format(
                docker_target_name=docker_target_name)

-        self.logger = logging.getLogger("unityagents")
+        self.logger = logging.getLogger("mlagents.envs")
        self.run_id = run_id
        self.save_freq = save_freq
        self.lesson = lesson
--- a/mlagents/trainers/ppo/models.py
+++ b/mlagents/trainers/ppo/models.py
 import numpy as np

 import tensorflow as tf
-from unitytrainers.models import LearningModel
+from mlagents.trainers.models import LearningModel
-logger = logging.getLogger("unityagents")
+logger = logging.getLogger("mlagents.envs")


 class PPOModel(LearningModel):
--- a/mlagents/trainers/ppo/trainer.py
+++ b/mlagents/trainers/ppo/trainer.py
 import numpy as np
 import tensorflow as tf

-from unityagents import AllBrainInfo, BrainInfo
-from unitytrainers.buffer import Buffer
-from unitytrainers.ppo.models import PPOModel
-from unitytrainers.trainer import UnityTrainerException, Trainer
+from mlagents.envs import AllBrainInfo, BrainInfo
+from mlagents.trainers.buffer import Buffer
+from mlagents.trainers.ppo.models import PPOModel
+from mlagents.trainers.trainer import UnityTrainerException, Trainer
-logger = logging.getLogger("unityagents")
+logger = logging.getLogger("mlagents.envs")


 class PPOTrainer(Trainer):
--- a/mlagents/trainers/bc/models.py
+++ b/mlagents/trainers/bc/models.py
 import tensorflow as tf
 import tensorflow.contrib.layers as c_layers
-from unitytrainers.models import LearningModel
+from mlagents.trainers.models import LearningModel


 class BehavioralCloningModel(LearningModel):
--- a/mlagents/trainers/bc/trainer.py
+++ b/mlagents/trainers/bc/trainer.py
 import numpy as np
 import tensorflow as tf

-from unityagents import AllBrainInfo
-from unitytrainers.bc.models import BehavioralCloningModel
-from unitytrainers.buffer import Buffer
-from unitytrainers.trainer import UnityTrainerException, Trainer
+from mlagents.envs import AllBrainInfo
+from mlagents.trainers.bc.models import BehavioralCloningModel
+from mlagents.trainers.buffer import Buffer
+from mlagents.trainers.trainer import UnityTrainerException, Trainer
-logger = logging.getLogger("unityagents")
+logger = logging.getLogger("mlagents.envs")


 class BehavioralCloningTrainer(Trainer):
--- a/mlagents/envs/init.py
+++ b/mlagents/envs/init.py
+from .environment import *
+from .brain import *
+from .exception import *
--- a/mlagents/envs/brain.py
+++ b/mlagents/envs/brain.py
+from typing import Dict
+
+
+class BrainInfo:
+    def __init__(self, visual_observation, vector_observation, text_observations, memory=None,
+                 reward=None, agents=None, local_done=None,
+                 vector_action=None, text_action=None, max_reached=None):
+        """
+        Describes experience at current step of all agents linked to a brain.
+        """
+        self.visual_observations = visual_observation
+        self.vector_observations = vector_observation
+        self.text_observations = text_observations
+        self.memories = memory
+        self.rewards = reward
+        self.local_done = local_done
+        self.max_reached = max_reached
+        self.agents = agents
+        self.previous_vector_actions = vector_action
+        self.previous_text_actions = text_action
+
+
+AllBrainInfo = Dict[str, BrainInfo]
+
+
+class BrainParameters:
+    def __init__(self, brain_name, brain_param):
+        """
+        Contains all brain-specific parameters.
+        :param brain_name: Name of brain.
+        :param brain_param: Dictionary of brain parameters.
+        """
+        self.brain_name = brain_name
+        self.vector_observation_space_size = brain_param["vectorObservationSize"]
+        self.num_stacked_vector_observations = brain_param["numStackedVectorObservations"]
+        self.number_visual_observations = len(brain_param["cameraResolutions"])
+        self.camera_resolutions = brain_param["cameraResolutions"]
+        self.vector_action_space_size = brain_param["vectorActionSize"]
+        self.vector_action_descriptions = brain_param["vectorActionDescriptions"]
+        self.vector_action_space_type = ["discrete", "continuous"][brain_param["vectorActionSpaceType"]]
+
+    def __str__(self):
+        return '''Unity brain name: {}
+        Number of Visual Observations (per agent): {}
+        Vector Observation space size (per agent): {}
+        Number of stacked Vector Observation: {}
+        Vector Action space type: {}
+        Vector Action space size (per agent): {}
+        Vector Action descriptions: {}'''.format(self.brain_name,
+                                                 str(self.number_visual_observations),
+                                                 str(self.vector_observation_space_size),
+                                                 str(self.num_stacked_vector_observations),
+                                                 self.vector_action_space_type,
+                                                 str(self.vector_action_space_size),
+                                                 ', '.join(self.vector_action_descriptions))
--- a/mlagents/envs/communicator.py
+++ b/mlagents/envs/communicator.py
 import logging

-from communicator_objects import UnityOutput, UnityInput
+from .communicator_objects import UnityOutput, UnityInput
-logger = logging.getLogger("unityagents")
+logger = logging.getLogger("mlagents.envs")


 class Communicator(object):
--- a/mlagents/envs/environment.py
+++ b/mlagents/envs/environment.py
 from .brain import BrainInfo, BrainParameters, AllBrainInfo
 from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException

-from communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\
+from .communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\
    EnvironmentParametersProto, UnityRLInitializationInput, UnityRLInitializationOutput,\
    UnityInput, UnityOutput

 from PIL import Image

 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("unityagents")
+logger = logging.getLogger("mlagents.envs")


 class UnityEnvironment(object):
--- a/mlagents/envs/exception.py
+++ b/mlagents/envs/exception.py
 import logging
-logger = logging.getLogger("unityagents")
+logger = logging.getLogger("mlagents.envs")

 class UnityException(Exception):
    """
--- a/mlagents/envs/rpc_communicator.py
+++ b/mlagents/envs/rpc_communicator.py
 from concurrent.futures import ThreadPoolExecutor

 from .communicator import Communicator
-from communicator_objects import UnityToExternalServicer, add_UnityToExternalServicer_to_server
-from communicator_objects import UnityMessage, UnityInput, UnityOutput
+from .communicator_objects import UnityToExternalServicer, add_UnityToExternalServicer_to_server
+from .communicator_objects import UnityMessage, UnityInput, UnityOutput
-logger = logging.getLogger("unityagents")
+logger = logging.getLogger("mlagents.envs")


 class UnityToExternalServicerImplementation(UnityToExternalServicer):
--- a/mlagents/envs/socket_communicator.py
+++ b/mlagents/envs/socket_communicator.py
 import struct

 from .communicator import Communicator
-from communicator_objects import UnityMessage, UnityOutput, UnityInput
+from .communicator_objects import UnityMessage, UnityOutput, UnityInput
-logger = logging.getLogger("unityagents")
+logger = logging.getLogger("mlagents.envs")


 class SocketCommunicator(Communicator):
--- a/mlagents/envs/notebooks/getting-started.ipynb
+++ b/mlagents/envs/notebooks/getting-started.ipynb
    "import numpy as np\n",
    "import sys\n",
    "\n",
-    "from unityagents import UnityEnvironment\n",
+    "from mlagents.envs import UnityEnvironment\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
--- a/mlagents/envs/communicator_objects/brain_parameters_proto_pb2.py
+++ b/mlagents/envs/communicator_objects/brain_parameters_proto_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
-from communicator_objects import brain_type_proto_pb2 as communicator__objects_dot_brain__type__proto__pb2
-from communicator_objects import space_type_proto_pb2 as communicator__objects_dot_space__type__proto__pb2
+from . import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
+from . import brain_type_proto_pb2 as communicator__objects_dot_brain__type__proto__pb2
+from . import space_type_proto_pb2 as communicator__objects_dot_space__type__proto__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/brain_type_proto_pb2.py
+++ b/mlagents/envs/communicator_objects/brain_type_proto_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
+from . import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/space_type_proto_pb2.py
+++ b/mlagents/envs/communicator_objects/space_type_proto_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2
+from . import resolution_proto_pb2 as communicator__objects_dot_resolution__proto__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/unity_input_pb2.py
+++ b/mlagents/envs/communicator_objects/unity_input_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import unity_rl_input_pb2 as communicator__objects_dot_unity__rl__input__pb2
-from communicator_objects import unity_rl_initialization_input_pb2 as communicator__objects_dot_unity__rl__initialization__input__pb2
+from . import unity_rl_input_pb2 as communicator__objects_dot_unity__rl__input__pb2
+from . import unity_rl_initialization_input_pb2 as communicator__objects_dot_unity__rl__initialization__input__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/unity_message_pb2.py
+++ b/mlagents/envs/communicator_objects/unity_message_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import unity_output_pb2 as communicator__objects_dot_unity__output__pb2
-from communicator_objects import unity_input_pb2 as communicator__objects_dot_unity__input__pb2
-from communicator_objects import header_pb2 as communicator__objects_dot_header__pb2
+from . import unity_output_pb2 as communicator__objects_dot_unity__output__pb2
+from . import unity_input_pb2 as communicator__objects_dot_unity__input__pb2
+from . import header_pb2 as communicator__objects_dot_header__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/unity_output_pb2.py
+++ b/mlagents/envs/communicator_objects/unity_output_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import unity_rl_output_pb2 as communicator__objects_dot_unity__rl__output__pb2
-from communicator_objects import unity_rl_initialization_output_pb2 as communicator__objects_dot_unity__rl__initialization__output__pb2
+from . import unity_rl_output_pb2 as communicator__objects_dot_unity__rl__output__pb2
+from . import unity_rl_initialization_output_pb2 as communicator__objects_dot_unity__rl__initialization__output__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.py
+++ b/mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import brain_parameters_proto_pb2 as communicator__objects_dot_brain__parameters__proto__pb2
-from communicator_objects import environment_parameters_proto_pb2 as communicator__objects_dot_environment__parameters__proto__pb2
+from . import brain_parameters_proto_pb2 as communicator__objects_dot_brain__parameters__proto__pb2
+from . import environment_parameters_proto_pb2 as communicator__objects_dot_environment__parameters__proto__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/unity_rl_input_pb2.py
+++ b/mlagents/envs/communicator_objects/unity_rl_input_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import agent_action_proto_pb2 as communicator__objects_dot_agent__action__proto__pb2
-from communicator_objects import environment_parameters_proto_pb2 as communicator__objects_dot_environment__parameters__proto__pb2
-from communicator_objects import command_proto_pb2 as communicator__objects_dot_command__proto__pb2
+from . import agent_action_proto_pb2 as communicator__objects_dot_agent__action__proto__pb2
+from . import environment_parameters_proto_pb2 as communicator__objects_dot_environment__parameters__proto__pb2
+from . import command_proto_pb2 as communicator__objects_dot_command__proto__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/unity_rl_output_pb2.py
+++ b/mlagents/envs/communicator_objects/unity_rl_output_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import agent_info_proto_pb2 as communicator__objects_dot_agent__info__proto__pb2
+from . import agent_info_proto_pb2 as communicator__objects_dot_agent__info__proto__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/unity_to_external_pb2.py
+++ b/mlagents/envs/communicator_objects/unity_to_external_pb2.py
 _sym_db = _symbol_database.Default()


-from communicator_objects import unity_message_pb2 as communicator__objects_dot_unity__message__pb2
+from . import unity_message_pb2 as communicator__objects_dot_unity__message__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
--- a/mlagents/envs/communicator_objects/unity_to_external_pb2_grpc.py
+++ b/mlagents/envs/communicator_objects/unity_to_external_pb2_grpc.py
 # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
 import grpc

-from communicator_objects import unity_message_pb2 as communicator__objects_dot_unity__message__pb2
+from . import unity_message_pb2 as communicator__objects_dot_unity__message__pb2


 class UnityToExternalStub(object):
--- a/tests/mock_communicator.py
+++ b/tests/mock_communicator.py
-
-from unityagents.communicator import Communicator
-from communicator_objects import UnityMessage, UnityOutput, UnityInput,\
+from mlagents.envs.communicator import Communicator
+from mlagents.envs.communicator_objects import UnityMessage, UnityOutput, UnityInput,\
    ResolutionProto, BrainParametersProto, UnityRLInitializationOutput,\
    AgentInfoProto, UnityRLOutput

--- a/tests/trainers/test_curriculum.py
+++ b/tests/trainers/test_curriculum.py
 import json
 from unittest.mock import patch, mock_open

-from unitytrainers.exception import CurriculumError
-from unitytrainers import Curriculum
+from mlagents.trainers.exception import CurriculumError
+from mlagents.trainers import Curriculum


 dummy_curriculum_json_str = '''
--- a/tests/trainers/test_trainer_controller.py
+++ b/tests/trainers/test_trainer_controller.py
 import pytest
 import tensorflow as tf

-from unitytrainers.trainer_controller import TrainerController
-from unitytrainers.buffer import Buffer
-from unitytrainers.ppo.trainer import PPOTrainer
-from unitytrainers.bc.trainer import BehavioralCloningTrainer
-from unitytrainers.curriculum import Curriculum
-from unitytrainers.exception import CurriculumError
-from unityagents.exception import UnityEnvironmentException
-from .mock_communicator import MockCommunicator
+from mlagents.trainers.trainer_controller import TrainerController
+from mlagents.trainers.buffer import Buffer
+from mlagents.trainers.ppo.trainer import PPOTrainer
+from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
+from mlagents.trainers.curriculum import Curriculum
+from mlagents.trainers.exception import CurriculumError
+from mlagents.envs.exception import UnityEnvironmentException
+from tests.mock_communicator import MockCommunicator


@pytest.fixture
        ''')


-@mock.patch('unityagents.UnityEnvironment.executable_launcher')
-@mock.patch('unityagents.UnityEnvironment.get_communicator')
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
-                           1, 1, 1, '', "tests/test_unitytrainers.py", False)
+                           1, 1, 1, '', "tests/test_mlagents.trainers.py", False)
-@mock.patch('unityagents.UnityEnvironment.executable_launcher')
-@mock.patch('unityagents.UnityEnvironment.get_communicator')
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
-    open_name = 'unitytrainers.trainer_controller' + '.open'
+    open_name = 'mlagents.trainers.trainer_controller' + '.open'
    with mock.patch('yaml.load') as mock_load:
        with mock.patch(open_name, create=True) as _:
            mock_load.return_value = dummy_config
            assert(config['default']['trainer'] == "ppo")


-@mock.patch('unityagents.UnityEnvironment.executable_launcher')
-@mock.patch('unityagents.UnityEnvironment.get_communicator')
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
-    open_name = 'unitytrainers.trainer_controller' + '.open'
+    open_name = 'mlagents.trainers.trainer_controller' + '.open'
-                                   1, 1, '', "tests/test_unitytrainers.py",
+                                   1, 1, '', "tests/test_mlagents.trainers.py",
                                   False)

            # Test for PPO trainer
--- a/mlagents/init.py
+++ b/mlagents/init.py
--- a/mlagents/learn.py
+++ b/mlagents/learn.py
+# # Unity ML-Agents Toolkit
+# ## ML-Agent Learning
+
+import logging
+
+import os
+import multiprocessing
+import numpy as np
+from docopt import docopt
+
+
+from mlagents.trainers.trainer_controller import TrainerController
+from mlagents.trainers.exception import TrainerError
+
+def main():
+    print('''
+    
+                    ▄▄▄▓▓▓▓
+               ╓▓▓▓▓▓▓█▓▓▓▓▓
+          ,▄▄▄m▀▀▀'  ,▓▓▓▀▓▓▄                           ▓▓▓  ▓▓▌
+        ▄▓▓▓▀'      ▄▓▓▀  ▓▓▓      ▄▄     ▄▄ ,▄▄ ▄▄▄▄   ,▄▄ ▄▓▓▌▄ ▄▄▄    ,▄▄
+      ▄▓▓▓▀        ▄▓▓▀   ▐▓▓▌     ▓▓▌   ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌  ╒▓▓▌
+    ▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓      ▓▀      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌   ▐▓▓▄ ▓▓▌
+    ▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄     ▓▓      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌    ▐▓▓▐▓▓
+      ^█▓▓▓        ▀▓▓▄   ▐▓▓▌     ▓▓▓▓▄▓▓▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▓▄    ▓▓▓▓`
+        '▀▓▓▓▄      ^▓▓▓  ▓▓▓       └▀▀▀▀ ▀▀ ^▀▀    `▀▀ `▀▀   '▀▀    ▐▓▓▌
+           ▀▀▀▀▓▄▄▄   ▓▓▓▓▓▓,                                      ▓▓▓▓▀
+               `▀█▓▓▓▓▓▓▓▓▓▌
+                    ¬`▀▀▀█▓
+                    
+''')
+
+    logger = logging.getLogger("mlagents.learn")
+    _USAGE = '''
+    Usage:
+      learn (<env>) [options]
+      learn [options]
+      learn --help
+
+    Options:
+      --curriculum=<file>        Curriculum json file for environment [default: None].
+      --keep-checkpoints=<n>     How many model checkpoints to keep [default: 5].
+      --lesson=<n>               Start learning from this lesson [default: 0].
+      --load                     Whether to load the model or randomly initialize [default: False].
+      --run-id=<path>            The sub-directory name for model and summary statistics [default: ppo].
+      --num-runs=<n>             Number of concurrent training sessions [default: 1]. 
+      --save-freq=<n>            Frequency at which to save model [default: 50000].
+      --seed=<n>                 Random seed used for training [default: -1].
+      --slow                     Whether to run the game at training speed [default: False].
+      --train                    Whether to train model, or only run inference [default: False].
+      --worker-id=<n>            Number to add to communication port (5005). Used for multi-environment [default: 0].
+      --docker-target-name=<dt>  Docker Volume to store curriculum, executable and model files [default: Empty].
+      --no-graphics              Whether to run the Unity simulator in no-graphics mode [default: False].
+    '''
+
+    options = docopt(_USAGE)
+    logger.info(options)
+    # Docker Parameters
+    if options['--docker-target-name'] == 'Empty':
+        docker_target_name = ''
+    else:
+        docker_target_name = options['--docker-target-name']
+
+    # General parameters
+    run_id = options['--run-id']
+    num_runs = int(options['--num-runs'])
+    seed = int(options['--seed'])
+    load_model = options['--load']
+    train_model = options['--train']
+    save_freq = int(options['--save-freq'])
+    env_path = options['<env>']
+    keep_checkpoints = int(options['--keep-checkpoints'])
+    worker_id = int(options['--worker-id'])
+    curriculum_file = str(options['--curriculum'])
+    if curriculum_file == "None":
+        curriculum_file = None
+    lesson = int(options['--lesson'])
+    fast_simulation = not bool(options['--slow'])
+    no_graphics = options['--no-graphics']
+
+    # Constants
+    # Assumption that this yaml is present in same dir as this file
+    base_path = os.path.dirname(__file__)
+    TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml"))
+
+
+    def run_training(sub_id, use_seed):
+        tc = TrainerController(env_path, run_id + "-" + str(sub_id), save_freq, curriculum_file, fast_simulation,
+                               load_model, train_model, worker_id + sub_id, keep_checkpoints, lesson, use_seed,
+                               docker_target_name, TRAINER_CONFIG_PATH, no_graphics)
+        tc.start_learning()
+
+
+    if env_path is None and num_runs > 1:
+        raise TrainerError("It is not possible to launch more than one concurrent training session "
+                           "when training from the editor")
+
+    jobs = []
+    for i in range(num_runs):
+        if seed == -1:
+            use_seed = np.random.randint(0, 9999)
+        else:
+            use_seed = seed
+        p = multiprocessing.Process(target=run_training, args=(i, use_seed))
+        jobs.append(p)
+        p.start()
+
+
+if __name__ == '__main__':
+    main()
--- a/setup.py
+++ b/setup.py
+"""A setuptools based setup module.
+
+See:
+https://packaging.python.org/en/latest/distributing.html
+https://github.com/pypa/sampleproject
+"""
+
+# Always prefer setuptools over distutils
+from setuptools import setup, find_packages
+from os import path
+# io.open is needed for projects that support Python 2.7
+# It ensures open() defaults to text mode with universal newlines,
+# and accepts an argument to specify the text encoding
+# Python 3 only projects can skip this import
+from io import open
+
+here = path.abspath(path.dirname(__file__))
+
+# Get the long description from the README file
+with open(path.join(here, 'README.md'), encoding='utf-8') as f:
+    long_description = f.read()
+
+# Arguments marked as "Required" below must be included for upload to PyPI.
+# Fields marked as "Optional" may be commented out.
+
+setup(
+    # This is the name of your project. The first time you publish this
+    # package, this name will be registered for you. It will determine how
+    # users can install this project, e.g.:
+    #
+    # $ pip install sampleproject
+    #
+    # And where it will live on PyPI: https://pypi.org/project/sampleproject/
+    #
+    # There are some restrictions on what makes a valid project name
+    # specification here:
+    # https://packaging.python.org/specifications/core-metadata/#name
+    name='mlagents',  # Required
+
+    # Versions should comply with PEP 440:
+    # https://www.python.org/dev/peps/pep-0440/
+    #
+    # For a discussion on single-sourcing the version across setup.py and the
+    # project code, see
+    # https://packaging.python.org/en/latest/single_source_version.html
+    version='0.4.0',  # Required
+
+    # This is a one-line description or tagline of what your project does. This
+    # corresponds to the "Summary" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#summary
+    description='Unity Machine Learning Agents',  # Required
+
+    # This is an optional longer description of your project that represents
+    # the body of text which users will see when they visit PyPI.
+    #
+    # Often, this is the same as your README, so you can just read it in from
+    # that file directly (as we have already done above)
+    #
+    # This field corresponds to the "Description" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#description-optional
+    long_description=long_description,  # Optional
+
+    # Denotes that our long_description is in Markdown; valid values are
+    # text/plain, text/x-rst, and text/markdown
+    #
+    # Optional if long_description is written in reStructuredText (rst) but
+    # required for plain-text or Markdown; if unspecified, "applications should
+    # attempt to render [the long_description] as text/x-rst; charset=UTF-8 and
+    # fall back to text/plain if it is not valid rst" (see link below)
+    #
+    # This field corresponds to the "Description-Content-Type" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#description-content-type-optional
+    long_description_content_type='text/markdown',  # Optional (see note above)
+
+    # This should be a valid link to your project's main homepage.
+    #
+    # This field corresponds to the "Home-Page" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#home-page-optional
+    url='https://github.com/Unity-Technologies/ml-agents',  # Optional
+
+    # This should be your name or the name of the organization which owns the
+    # project.
+    author='Unity Technologies',  # Optional
+
+    # This should be a valid email address corresponding to the author listed
+    # above.
+    author_email='ML-Agents@unity3d.com',  # Optional
+
+    # Classifiers help users find your project by categorizing it.
+    #
+    # For a list of valid classifiers, see https://pypi.org/classifiers/
+    classifiers=[  # Optional
+        # How mature is this project? Common values are
+        #   3 - Alpha
+        #   4 - Beta
+        #   5 - Production/Stable
+        #'Development Status :: 3 - Alpha',
+
+        # Indicate who your project is intended for
+        'Intended Audience :: Developers',
+        'Topic :: Software Development :: Build Tools',
+
+        # Pick your license as you wish
+        'License :: OSI Approved :: Apache Software License',
+
+        # Specify the Python versions you support here. In particular, ensure
+        # that you indicate whether you support Python 2, Python 3 or both.
+        'Programming Language :: Python :: 3.6'
+    ],
+
+    # This field adds keywords for your project which will appear on the
+    # project page. What does your project relate to?
+    #
+    # Note that this is a string of words separated by whitespace, not a list.
+    #keywords='sample setuptools development',  # Optional
+
+    # You can just specify package directories manually here if your project is
+    # simple. Or you can use find_packages().
+    #
+    # Alternatively, if you just want to distribute a single Python file, use
+    # the `py_modules` argument instead as follows, which will expect a file
+    # called `my_module.py` to exist:
+    #
+    #   py_modules=["my_module"],
+    #
+    packages=find_packages(exclude=['tests', 'tests.*', '*.tests', '*.tests.*']),  # Required
+
+    # This field lists other packages that your project depends on to run.
+    # Any package you put here will be installed by pip when your project is
+    # installed, so they must be valid existing projects.
+    #
+    # For an analysis of "install_requires" vs pip's requirements files see:
+    # https://packaging.python.org/en/latest/requirements.html
+    install_requires=[
+        'tensorflow>=1.7.1',
+        'Pillow>=4.2.1',
+        'matplotlib',
+        'numpy>=1.11.0',
+        'jupyter',
+        'pytest>=3.2.2',
+        'docopt',
+        'pyyaml',
+        'protobuf>=3.6.0',
+        'grpcio>=1.11.0'],  # Optional
+
+    # List additional groups of dependencies here (e.g. development
+    # dependencies). Users will be able to install these using the "extras"
+    # syntax, for example:
+    #
+    #   $ pip install sampleproject[dev]
+    #
+    # Similar to `install_requires` above, these must be valid existing
+    # projects.
+    #extras_require={  # Optional
+    #    'dev': ['check-manifest'],
+    #    'test': ['coverage'],
+    #},
+
+    # If there are data files included in your packages that need to be
+    # installed, specify them here.
+    #
+    # If using Python 2.6 or earlier, then these have to be included in
+    # MANIFEST.in as well.
+    #package_data={  # Optional
+    #    'sample': ['package_data.dat'],
+    #},
+
+    # Although 'package_data' is the preferred approach, in some case you may
+    # need to place data files outside of your packages. See:
+    # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files
+    #
+    # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
+    #data_files=[('my_data', ['data/data_file'])],  # Optional
+
+    # To provide executable scripts, use entry points in preference to the
+    # "scripts" keyword. Entry points provide cross-platform support and allow
+    # `pip` to create the appropriate form of executable for the target
+    # platform.
+    #
+    # For example, the following would provide a command called `sample` which
+    # executes the function `main` from this package when invoked:
+    entry_points={  # Optional
+        'console_scripts': [
+            'learn=mlagents.learn:main',
+        ],
+    },
+
+    # List additional URLs that are relevant to your project as a dict.
+    #
+    # This field corresponds to the "Project-URL" metadata fields:
+    # https://packaging.python.org/specifications/core-metadata/#project-url-multiple-use
+    #
+    # Examples listed include a pattern for specifying where the package tracks
+    # issues, where the source is hosted, where to say thanks to the package
+    # maintainers, and where to support the project financially. The key is
+    # what's used to render the link text on PyPI.
+    #project_urls={  # Optional
+    #    'Bug Reports': 'https://github.com/pypa/sampleproject/issues',
+    #    'Funding': 'https://donate.pypi.org',
+    #    'Say Thanks!': 'http://saythanks.io/to/example',
+    #    'Source': 'https://github.com/pypa/sampleproject/',
+    #},
+)
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/envs/init.py
+++ b/tests/envs/init.py
--- a/tests/envs/test_envs.py
+++ b/tests/envs/test_envs.py
+import unittest.mock as mock
+import pytest
+import struct
+
+import numpy as np
+
+from mlagents.envs import UnityEnvironment, UnityEnvironmentException, UnityActionException, \
+    BrainInfo
+from tests.mock_communicator import MockCommunicator
+
+
+def test_handles_bad_filename():
+    with pytest.raises(UnityEnvironmentException):
+        UnityEnvironment(' ')
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_initialization(mock_communicator, mock_launcher):
+    mock_communicator.return_value = MockCommunicator(
+        discrete_action=False, visual_inputs=0)
+    env = UnityEnvironment(' ')
+    with pytest.raises(UnityActionException):
+        env.step([0])
+    assert env.brain_names[0] == 'RealFakeBrain'
+    env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_reset(mock_communicator, mock_launcher):
+    mock_communicator.return_value = MockCommunicator(
+        discrete_action=False, visual_inputs=0)
+    env = UnityEnvironment(' ')
+    brain = env.brains['RealFakeBrain']
+    brain_info = env.reset()
+    env.close()
+    assert not env.global_done
+    assert isinstance(brain_info, dict)
+    assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
+    assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
+    assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
+    assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
+    assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \
+           len(brain_info['RealFakeBrain'].agents)
+    assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \
+           brain.vector_observation_space_size * brain.num_stacked_vector_observations
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_step(mock_communicator, mock_launcher):
+    mock_communicator.return_value = MockCommunicator(
+        discrete_action=False, visual_inputs=0)
+    env = UnityEnvironment(' ')
+    brain = env.brains['RealFakeBrain']
+    brain_info = env.reset()
+    brain_info = env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
+    with pytest.raises(UnityActionException):
+        env.step([0])
+    brain_info = env.step([-1] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
+    with pytest.raises(UnityActionException):
+        env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents))
+    env.close()
+    assert env.global_done
+    assert isinstance(brain_info, dict)
+    assert isinstance(brain_info['RealFakeBrain'], BrainInfo)
+    assert isinstance(brain_info['RealFakeBrain'].visual_observations, list)
+    assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray)
+    assert len(brain_info['RealFakeBrain'].visual_observations) == brain.number_visual_observations
+    assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \
+           len(brain_info['RealFakeBrain'].agents)
+    assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \
+           brain.vector_observation_space_size * brain.num_stacked_vector_observations
+
+    print("\n\n\n\n\n\n\n" + str(brain_info['RealFakeBrain'].local_done))
+    assert not brain_info['RealFakeBrain'].local_done[0]
+    assert brain_info['RealFakeBrain'].local_done[2]
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_close(mock_communicator, mock_launcher):
+    comm = MockCommunicator(
+        discrete_action=False, visual_inputs=0)
+    mock_communicator.return_value = comm
+    env = UnityEnvironment(' ')
+    assert env._loaded
+    env.close()
+    assert not env._loaded
+    assert comm.has_been_closed
+
+
+if __name__ == '__main__':
+    pytest.main()
--- a/tests/trainers/init.py
+++ b/tests/trainers/init.py
--- a/tests/trainers/test_bc.py
+++ b/tests/trainers/test_bc.py
+import unittest.mock as mock
+import pytest
+
+import numpy as np
+import tensorflow as tf
+
+from mlagents.trainers.bc.models import BehavioralCloningModel
+from mlagents.envs import UnityEnvironment
+from tests.mock_communicator import MockCommunicator
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_cc_bc_model(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=False, visual_inputs=0)
+            env = UnityEnvironment(' ')
+            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.sample_action, model.policy]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                   [3, 4, 5, 3, 4, 5]])}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_dc_bc_model(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=True, visual_inputs=0)
+            env = UnityEnvironment(' ')
+            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.sample_action, model.action_probs]
+            feed_dict = {model.batch_size: 2,
+                         model.dropout_rate: 1.0,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                   [3, 4, 5, 3, 4, 5]])}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_visual_dc_bc_model(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=True, visual_inputs=2)
+            env = UnityEnvironment(' ')
+            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.sample_action, model.action_probs]
+            feed_dict = {model.batch_size: 2,
+                         model.dropout_rate: 1.0,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                   [3, 4, 5, 3, 4, 5]]),
+                         model.visual_in[0]: np.ones([2, 40, 30, 3]),
+                         model.visual_in[1]: np.ones([2, 40, 30, 3])}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_visual_cc_bc_model(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=False, visual_inputs=2)
+            env = UnityEnvironment(' ')
+            model = BehavioralCloningModel(env.brains["RealFakeBrain"])
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.sample_action, model.policy]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                   [3, 4, 5, 3, 4, 5]]),
+                         model.visual_in[0]: np.ones([2, 40, 30, 3]),
+                         model.visual_in[1]: np.ones([2, 40, 30, 3])}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+if __name__ == '__main__':
+    pytest.main()
--- a/tests/trainers/test_buffer.py
+++ b/tests/trainers/test_buffer.py
+import json
+import unittest.mock as mock
+
+import yaml
+import pytest
+import numpy as np
+
+from mlagents.trainers.trainer_controller import TrainerController
+from mlagents.trainers.buffer import Buffer
+from mlagents.trainers.ppo.trainer import PPOTrainer
+from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
+from mlagents.trainers.curriculum import Curriculum
+from mlagents.trainers.exception import CurriculumError
+from mlagents.envs.exception import UnityEnvironmentException
+from tests.mock_communicator import MockCommunicator
+
+
+def assert_array(a, b):
+    assert a.shape == b.shape
+    la = list(a.flatten())
+    lb = list(b.flatten())
+    for i in range(len(la)):
+        assert la[i] == lb[i]
+
+
+def test_buffer():
+    b = Buffer()
+    for fake_agent_id in range(4):
+        for step in range(9):
+            b[fake_agent_id]['vector_observation'].append(
+                [100 * fake_agent_id + 10 * step + 1,
+                 100 * fake_agent_id + 10 * step + 2,
+                 100 * fake_agent_id + 10 * step + 3]
+            )
+            b[fake_agent_id]['action'].append([100 * fake_agent_id + 10 * step + 4,
+                                               100 * fake_agent_id + 10 * step + 5])
+    a = b[1]['vector_observation'].get_batch(batch_size=2, training_length=1, sequential=True)
+    assert_array(a, np.array([[171, 172, 173], [181, 182, 183]]))
+    a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=True)
+    assert_array(a, np.array([
+        [[231, 232, 233], [241, 242, 243], [251, 252, 253]],
+        [[261, 262, 263], [271, 272, 273], [281, 282, 283]]
+    ]))
+    a = b[2]['vector_observation'].get_batch(batch_size=2, training_length=3, sequential=False)
+    assert_array(a, np.array([
+        [[251, 252, 253], [261, 262, 263], [271, 272, 273]],
+        [[261, 262, 263], [271, 272, 273], [281, 282, 283]]
+    ]))
+    b[4].reset_agent()
+    assert len(b[4]) == 0
+    b.append_update_buffer(3,
+                           batch_size=None, training_length=2)
+    b.append_update_buffer(2,
+                           batch_size=None, training_length=2)
+    assert len(b.update_buffer['action']) == 10
+    assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
--- a/tests/trainers/test_meta_curriculum.py
+++ b/tests/trainers/test_meta_curriculum.py
+import pytest
+from unittest.mock import patch, call, Mock
+
+from mlagents.trainers.meta_curriculum import MetaCurriculum
+from mlagents.trainers.exception import MetaCurriculumError
+
+
+class MetaCurriculumTest(MetaCurriculum):
+    """This class allows us to test MetaCurriculum objects without calling
+    MetaCurriculum's __init__ function.
+    """
+    def __init__(self, brains_to_curriculums):
+        self._brains_to_curriculums = brains_to_curriculums
+
+
+@pytest.fixture
+def default_reset_parameters():
+    return {'param1' : 1, 'param2' : 2, 'param3' : 3}
+
+
+@pytest.fixture
+def more_reset_parameters():
+    return {'param4' : 4, 'param5' : 5, 'param6' : 6}
+
+
+@pytest.fixture
+def progresses():
+    return {'Brain1' : 0.2, 'Brain2' : 0.3}
+
+
+@patch('mlagents.trainers.Curriculum.get_config', return_value={})
+@patch('mlagents.trainers.Curriculum.__init__', return_value=None)
+@patch('os.listdir', return_value=['Brain1.json', 'Brain2.json'])
+def test_init_meta_curriculum_happy_path(listdir, mock_curriculum_init,
+                                         mock_curriculum_get_config,
+                                         default_reset_parameters):
+    meta_curriculum = MetaCurriculum('test/', default_reset_parameters)
+
+    assert len(meta_curriculum.brains_to_curriculums) == 2
+
+    assert 'Brain1' in meta_curriculum.brains_to_curriculums
+    assert 'Brain2' in meta_curriculum.brains_to_curriculums
+
+    calls = [call('test/Brain1.json', default_reset_parameters),
+             call('test/Brain2.json', default_reset_parameters)]
+
+    mock_curriculum_init.assert_has_calls(calls)
+
+
+@patch('os.listdir', side_effect=NotADirectoryError())
+def test_init_meta_curriculum_bad_curriculum_folder_raises_error(listdir):
+    with pytest.raises(MetaCurriculumError):
+        MetaCurriculum('test/', default_reset_parameters)
+
+
+@patch('mlagents.trainers.Curriculum')
+@patch('mlagents.trainers.Curriculum')
+def test_set_lesson_nums(curriculum_a, curriculum_b):
+    meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
+                                          'Brain2' : curriculum_b})
+
+    meta_curriculum.lesson_nums = {'Brain1' : 1, 'Brain2' : 3}
+
+    assert curriculum_a.lesson_num == 1
+    assert curriculum_b.lesson_num == 3
+
+
+
+@patch('mlagents.trainers.Curriculum')
+@patch('mlagents.trainers.Curriculum')
+def test_increment_lessons(curriculum_a, curriculum_b, progresses):
+    meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
+                                          'Brain2' : curriculum_b})
+
+    meta_curriculum.increment_lessons(progresses)
+
+    curriculum_a.increment_lesson.assert_called_with(0.2)
+    curriculum_b.increment_lesson.assert_called_with(0.3)
+
+
+@patch('mlagents.trainers.Curriculum')
+@patch('mlagents.trainers.Curriculum')
+def test_set_all_curriculums_to_lesson_num(curriculum_a, curriculum_b):
+    meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
+                                          'Brain2' : curriculum_b})
+
+    meta_curriculum.set_all_curriculums_to_lesson_num(2)
+
+    assert curriculum_a.lesson_num == 2
+    assert curriculum_b.lesson_num == 2
+
+
+@patch('mlagents.trainers.Curriculum')
+@patch('mlagents.trainers.Curriculum')
+def test_get_config(curriculum_a, curriculum_b, default_reset_parameters,
+                    more_reset_parameters):
+    curriculum_a.get_config.return_value = default_reset_parameters
+    curriculum_b.get_config.return_value = default_reset_parameters
+    meta_curriculum = MetaCurriculumTest({'Brain1' : curriculum_a,
+                                          'Brain2' : curriculum_b})
+
+    assert meta_curriculum.get_config() == default_reset_parameters
+
+    curriculum_b.get_config.return_value = more_reset_parameters
+
+    new_reset_parameters = dict(default_reset_parameters)
+    new_reset_parameters.update(more_reset_parameters)
+
+    assert meta_curriculum.get_config() == new_reset_parameters
--- a/tests/trainers/test_ppo.py
+++ b/tests/trainers/test_ppo.py
+import unittest.mock as mock
+import pytest
+
+import numpy as np
+import tensorflow as tf
+
+from mlagents.trainers.ppo.models import PPOModel
+from mlagents.trainers.ppo.trainer import discount_rewards
+from mlagents.envs import UnityEnvironment
+from tests.mock_communicator import MockCommunicator
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_cc_vector(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=False, visual_inputs=0)
+            env = UnityEnvironment(' ')
+
+            model = PPOModel(env.brains["RealFakeBrain"])
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.log_probs, model.value, model.entropy,
+                        model.learning_rate]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]])}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_cc_visual(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=False, visual_inputs=2)
+            env = UnityEnvironment(' ')
+
+            model = PPOModel(env.brains["RealFakeBrain"])
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.log_probs, model.value, model.entropy,
+                        model.learning_rate]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]]),
+                         model.visual_in[0]: np.ones([2, 40, 30, 3]),
+                         model.visual_in[1]: np.ones([2, 40, 30, 3])}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_dc_visual(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=True, visual_inputs=2)
+            env = UnityEnvironment(' ')
+            model = PPOModel(env.brains["RealFakeBrain"])
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.all_log_probs, model.value, model.entropy,
+                        model.learning_rate]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]]),
+                         model.visual_in[0]: np.ones([2, 40, 30, 3]),
+                         model.visual_in[1]: np.ones([2, 40, 30, 3])
+                         }
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_dc_vector(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=True, visual_inputs=0)
+            env = UnityEnvironment(' ')
+            model = PPOModel(env.brains["RealFakeBrain"])
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.all_log_probs, model.value, model.entropy,
+                        model.learning_rate]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]])}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_dc_vector_rnn(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=True, visual_inputs=0)
+            env = UnityEnvironment(' ')
+            memory_size = 128
+            model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size)
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.all_log_probs, model.value, model.entropy,
+                        model.learning_rate, model.memory_out]
+            feed_dict = {model.batch_size: 1,
+                         model.sequence_length: 2,
+                         model.prev_action: [[0], [0]],
+                         model.memory_in: np.zeros((1, memory_size)),
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]])}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=False, visual_inputs=0)
+            env = UnityEnvironment(' ')
+            memory_size = 128
+            model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size)
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.all_log_probs, model.value, model.entropy,
+                        model.learning_rate, model.memory_out]
+            feed_dict = {model.batch_size: 1,
+                         model.sequence_length: 2,
+                         model.memory_in: np.zeros((1, memory_size)),
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]])}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_dc_vector_curio(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=True, visual_inputs=0)
+            env = UnityEnvironment(' ')
+            model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.all_log_probs, model.value, model.entropy,
+                        model.learning_rate, model.intrinsic_reward]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]]),
+                         model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                         [3, 4, 5, 3, 4, 5]]),
+                         model.action_holder: [[0], [0]]}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_cc_vector_curio(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=False, visual_inputs=0)
+            env = UnityEnvironment(' ')
+            model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.all_log_probs, model.value, model.entropy,
+                        model.learning_rate, model.intrinsic_reward]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]]),
+                         model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                         [3, 4, 5, 3, 4, 5]]),
+                         model.output: [[0.0, 0.0], [0.0, 0.0]]}
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_dc_visual_curio(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=True, visual_inputs=2)
+            env = UnityEnvironment(' ')
+            model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.all_log_probs, model.value, model.entropy,
+                        model.learning_rate, model.intrinsic_reward]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]]),
+                         model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                         [3, 4, 5, 3, 4, 5]]),
+                         model.action_holder: [[0], [0]],
+                         model.visual_in[0]: np.ones([2, 40, 30, 3]),
+                         model.visual_in[1]: np.ones([2, 40, 30, 3]),
+                         model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
+                         model.next_visual_in[1]: np.ones([2, 40, 30, 3])
+                         }
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_ppo_model_cc_visual_curio(mock_communicator, mock_launcher):
+    tf.reset_default_graph()
+    with tf.Session() as sess:
+        with tf.variable_scope("FakeGraphScope"):
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=False, visual_inputs=2)
+            env = UnityEnvironment(' ')
+            model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True)
+            init = tf.global_variables_initializer()
+            sess.run(init)
+
+            run_list = [model.output, model.all_log_probs, model.value, model.entropy,
+                        model.learning_rate, model.intrinsic_reward]
+            feed_dict = {model.batch_size: 2,
+                         model.sequence_length: 1,
+                         model.vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                    [3, 4, 5, 3, 4, 5]]),
+                         model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3],
+                                                         [3, 4, 5, 3, 4, 5]]),
+                         model.output: [[0.0, 0.0], [0.0, 0.0]],
+                         model.visual_in[0]: np.ones([2, 40, 30, 3]),
+                         model.visual_in[1]: np.ones([2, 40, 30, 3]),
+                         model.next_visual_in[0]: np.ones([2, 40, 30, 3]),
+                         model.next_visual_in[1]: np.ones([2, 40, 30, 3])
+                         }
+            sess.run(run_list, feed_dict=feed_dict)
+            env.close()
+
+
+def test_rl_functions():
+    rewards = np.array([0.0, 0.0, 0.0, 1.0])
+    gamma = 0.9
+    returns = discount_rewards(rewards, gamma, 0.0)
+    np.testing.assert_array_almost_equal(returns, np.array([0.729, 0.81, 0.9, 1.0]))
+
+
+if __name__ == '__main__':
+    pytest.main()
--- a/mlagents/setup.py
+++ b/mlagents/setup.py
-#!/usr/bin/env python
-
-from setuptools import setup, Command, find_packages
-
-
-with open('requirements.txt') as f:
-    required = f.read().splitlines()
-
-setup(name='unityagents',
-      version='0.4.0',
-      description='Unity Machine Learning Agents',
-      license='Apache License 2.0',
-      author='Unity Technologies',
-      author_email='ML-Agents@unity3d.com',
-      url='https://github.com/Unity-Technologies/ml-agents',
-      packages=find_packages(),
-      install_requires = required,
-      long_description= ("Unity Machine Learning Agents allows researchers and developers "
-       "to transform games and simulations created using the Unity Editor into environments "
-       "where intelligent agents can be trained using reinforcement learning, evolutionary " 
-       "strategies, or other machine learning methods through a simple to use Python API.")
-     )
--- a//mlagents/requirements.txt
+++ b//mlagents/requirements.txt
--- a//mlagents/mlagents/trainers
+++ b//mlagents/mlagents/trainers
--- a//mlagents/mlagents/envs
+++ b//mlagents/mlagents/envs
--- a//mlagents/tests/mock_communicator.py
+++ b//mlagents/tests/mock_communicator.py
--- a//mlagents/tests/trainers/test_curriculum.py
+++ b//mlagents/tests/trainers/test_curriculum.py
--- a//mlagents/tests/trainers/test_trainer_controller.py
+++ b//mlagents/tests/trainers/test_trainer_controller.py