Merge branch 'master' into develop-gym-wrapper

5 年前 · 6ddfe74f
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
 If applicable, add screenshots to help explain your problem.

 **Environment (please complete the following information):**
+- Unity Version: [e.g. Unity 2020.1f1]
 - OS + version: [e.g. Windows 10]
 - _ML-Agents version_: (e.g. ML-Agents v0.8, or latest `develop` branch from source)
 - _TensorFlow version_: (you can run `pip3 show tensorflow` to get this)
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
        files: "gym-unity/.*"
        args: [--ignore-missing-imports, --disallow-incomplete-defs]

+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.8.1
+    hooks:
+    -   id: flake8
+        exclude: >
+            (?x)^(
+                .*_pb2.py|
+                .*_pb2_grpc.py
+            )$
+        # flake8-tidy-imports is used for banned-modules, not actually tidying
+        additional_dependencies: [flake8-comprehensions==3.2.2, flake8-tidy-imports==4.1.0, flake8-bugbear==20.1.4]
+
-    rev: v2.4.0
+    rev: v2.5.0
    hooks:
    -   id: mixed-line-ending
        exclude: >
                .*.meta
            )$
        args: [--fix=lf]
-    -   id: flake8
-        exclude: >
-            (?x)^(
-                .*_pb2.py|
-                .*_pb2_grpc.py
-            )$
-        # flake8-tidy-imports is used for banned-modules, not actually tidying
-        additional_dependencies: [flake8-comprehensions==3.1.4, flake8-tidy-imports==4.0.0, flake8-bugbear==20.1.2]
+
    -   id: trailing-whitespace
        name: trailing-whitespace-markdown
        types: [markdown]
--- a/.yamato/com.unity.ml-agents-test.yml
+++ b/.yamato/com.unity.ml-agents-test.yml
  triggers:
    cancel_old_ci: true
    {% if platform.name == "mac" %}
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "ml-agents/tests/yamato/**"
-        - ".yamato/com.unity.ml-agents-test.yml"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR
+      pull_request.changes.any match ".yamato/com.unity.ml-agents-test.yml")
    {% endif %}
  {% endfor %}
 {% endfor %}
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - "ml-agents/**"
-        - "ml-agents-envs/**"
-        - ".yamato/gym-interface-test.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND 
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR 
+      pull_request.changes.any match "Project/**" OR 
+      pull_request.changes.any match "ml-agents/**" OR 
+      pull_request.changes.any match "ml-agents-envs/**" OR 
+      pull_request.changes.any match "gym-unity/**" OR 
+      pull_request.changes.any match ".yamato/gym-interface-test.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
 {% endfor %}
--- a/.yamato/protobuf-generation-test.yml
+++ b/.yamato/protobuf-generation-test.yml
      git diff -- :/ ":(exclude,top)$CS_PROTO_PATH/*.meta" > artifacts/proto.patch; exit $GIT_ERR; }
  triggers:
    cancel_old_ci: true
-    changes:
-      only:
-        - "protobuf-definitions/**"
-        - ".yamato/protobuf-generation-test.yml"
-      except:
-        - "protobuf-definitions/*.md"
-        - "protobuf-definitions/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "protobuf-definitions/**" OR
+      pull_request.changes.any match ".yamato/protobuf-generation-test.yml") AND
+      NOT pull_request.changes.all match "protobuf-definitions/**/*.md"
  artifacts:
    patch:
      paths:
--- a/.yamato/python-ll-api-test.yml
+++ b/.yamato/python-ll-api-test.yml
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - "ml-agents/**"
-        - "ml-agents-envs/**"
-        - ".yamato/python-ll-api-test.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND 
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR 
+      pull_request.changes.any match "Project/**" OR 
+      pull_request.changes.any match "ml-agents/**" OR 
+      pull_request.changes.any match "ml-agents-envs/**" OR 
+      pull_request.changes.any match ".yamato/python-ll-api-test.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
 {% endfor %}
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
    - pip install pyyaml
    - python -u -m ml-agents.tests.yamato.standalone_build_tests
    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity 
-    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity 
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - ".yamato/standalone-build-test.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR
+      pull_request.changes.any match ".yamato/standalone-build-test.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
  artifacts:
    logs:
      paths:
--- a/.yamato/training-int-tests.yml
+++ b/.yamato/training-int-tests.yml
    # Backwards-compatibility tests.
    # If we make a breaking change to the communication protocol, these will need
    # to be disabled until the next release.
-    # - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
-    # - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
+    - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.16.0
+    - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=1.0.0
-    changes:
-      only:
-        - "com.unity.ml-agents/**"
-        - "Project/**"
-        - "ml-agents/**"
-        - "ml-agents-envs/**"
-        - ".yamato/training-int-tests.yml"
-      except:
-        - "*.md"
-        - "com.unity.ml-agents/*.md"
-        - "com.unity.ml-agents/**/*.md"
+    expression: |
+      (pull_request.target eq "master" OR
+      pull_request.target match "release.+") AND
+      NOT pull_request.draft AND
+      (pull_request.changes.any match "com.unity.ml-agents/**" OR
+      pull_request.changes.any match "Project/**" OR
+      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents-envs/**" OR
+      pull_request.changes.any match ".yamato/training-int-tests.yml") AND
+      NOT pull_request.changes.all match "**/*.md"
  artifacts:
    logs:
      paths:
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
    {
        const string k_CommandLineModelOverrideFlag = "--mlagents-override-model";
        const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
+        const string k_CommandLineQuitOnLoadFailure = "--mlagents-quit-on-load-failure";

        // The attached Agent
        Agent m_Agent;
        int m_MaxEpisodes;

        int m_NumSteps;
+
+        bool m_QuitOnLoadFailure;

        /// <summary>
        /// Get the asset path to use from the commandline arguments.
            var maxEpisodes = 0;

            var args = Environment.GetCommandLineArgs();
-            for (var i = 0; i < args.Length - 1; i++)
+            for (var i = 0; i < args.Length; i++)
            {
                if (args[i] == k_CommandLineModelOverrideFlag && i < args.Length-2)
                {
                }
-                else if (args[i] == k_CommandLineQuitAfterEpisodesFlag)
+                else if (args[i] == k_CommandLineQuitAfterEpisodesFlag && i < args.Length-1)
+                }
+                else if (args[i] == k_CommandLineQuitOnLoadFailure)
+                {
+                    m_QuitOnLoadFailure = true;
                }
            }

            var behaviorName = bp.BehaviorName;

            var nnModel = GetModelForBehaviorName(behaviorName);
-            Debug.Log($"Overriding behavior {behaviorName} for agent with model {nnModel?.name}");
+            if (nnModel == null && m_QuitOnLoadFailure)
+            {
+                Debug.Log(
+                    $"Didn't find a model for behaviorName {behaviorName}. Make " +
+                    $"sure the behaviorName is set correctly in the commandline " +
+                    $"and that the model file exists"
+                );
+                Application.Quit(1);
+            }
+            var modelName = nnModel != null ? nnModel.name : "<null>";
+            Debug.Log($"Overriding behavior {behaviorName} for agent with model {modelName}");
            // This might give a null model; that's better because we'll fall back to the Heuristic
            m_Agent.SetModel($"Override_{behaviorName}", nnModel);

--- a/README.md
+++ b/README.md

 ## Releases & Documentation

-**Our latest, stable release is `Release 1`. Click [here](docs/Readme.md) to
-get started with the latest release of ML-Agents.**
+**Our latest, stable release is `Release 1`. Click
+[here](https://github.com/Unity-Technologies/ml-agents/tree/release_1/docs/Readme.md)
+to get started with the latest release of ML-Agents.**
-The table below lists all our releases, including our `master` branch which is under active
-development and may be unstable. A few helpful guidelines:
-* The docs links in the table below include installation and usage instructions specific to each
-release. Remember to always use the documentation that corresponds to the release version you're
-using.
-* See the [GitHub releases](https://github.com/Unity-Technologies/ml-agents/releases) for more
-details of the changes between versions.
-* If you have used an earlier version of the ML-Agents Toolkit, we strongly recommend our
-[guide on migrating from earlier versions](docs/Migrating.md).
+The table below lists all our releases, including our `master` branch which is
+under active development and may be unstable. A few helpful guidelines:
+- The [Versioning page](docs/Versioning.md) overviews how we manage our GitHub
+  releases and the versioning process for each of the ML-Agents components.
+- The [Releases page](https://github.com/Unity-Technologies/ml-agents/releases)
+  contains details of the changes between releases.
+- The [Migration page](docs/Migrating.md) contains details on how to upgrade
+  from earlier releases of the ML-Agents Toolkit.
+- The **Documentation** links in the table below include installation and usage
+  instructions specific to each release. Remember to always use the
+  documentation that corresponds to the release version you're using.

 | **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
 |:-------:|:------:|:-------------:|:-------:|:------------:|
 If you use Unity or the ML-Agents Toolkit to conduct research, we ask that you
 cite the following paper as a reference:

-Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D.
-(2018). Unity: A General Platform for Intelligent Agents. _arXiv preprint
-arXiv:1809.02627._ https://github.com/Unity-Technologies/ml-agents.
+Juliani, A., Berges, V., Teng, E., Cohen, A., Harper, J., Elion, C., Goy, C.,
+Gao, Y., Henry, H., Mattar, M., Lange, D. (2020). Unity: A General Platform for
+Intelligent Agents. _arXiv preprint
+[arXiv:1809.02627](https://arxiv.org/abs/1809.02627)._
+https://github.com/Unity-Technologies/ml-agents.
+- (May 12, 2020)
+  [Announcing ML-Agents Unity Package v1.0!](https://blogs.unity3d.com/2020/05/12/announcing-ml-agents-unity-package-v1-0/)
 - (February 28, 2020)
  [Training intelligent adversaries using self-play with ML-Agents](https://blogs.unity3d.com/2020/02/28/training-intelligent-adversaries-using-self-play-with-ml-agents/)
 - (November 11, 2019)
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
+- `beta` and `epsilon` in `PPO` are no longer decayed by default but follow the same schedule as learning rate. (#3940)
+- `get_behavior_names()` and `get_behavior_spec()` on UnityEnvironment were replaced by the `behavior_specs` property. (#3946)
 ### Minor Changes
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Unity Player logs are now written out to the results directory. (#3877)
 - Run configuration YAML files are written out to the results directory at the end of the run. (#3815)
 ### Bug Fixes
+- An issue was fixed where using `--initialize-from` would resume from the past step count. (#3962)
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)

--- a/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
+++ b/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
        static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
        {
            var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
-            vecActionSize.arraySize = 1;
+
+            // This check is here due to:
+            // https://fogbugz.unity3d.com/f/cases/1246524/
+            // If this case has been resolved, please remove this if condition.
+            if (vecActionSize.arraySize != 1)
+            {
+                vecActionSize.arraySize = 1;
+            }
            var continuousActionSize =
                vecActionSize.GetArrayElementAtIndex(0);
            EditorGUI.PropertyField(
        static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
        {
            var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
-            vecActionSize.arraySize = EditorGUI.IntField(
+            var newSize = EditorGUI.IntField(
+
+            // This check is here due to:
+            // https://fogbugz.unity3d.com/f/cases/1246524/
+            // If this case has been resolved, please remove this if condition.
+            if (newSize != vecActionSize.arraySize)
+            {
+                vecActionSize.arraySize = newSize;
+            }
+
            position.y += k_LineHeight;
            position.x += 20;
            position.width -= 20;
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
        {
            var agentInfoProto = ai.ToAgentInfoProto();

-            var agentActionProto = new AgentActionProto
+            var agentActionProto = new AgentActionProto();
+            if(ai.storedVectorActions != null)
-                VectorActions = { ai.storedVectorActions }
-            };
+                agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
+            }

            return new AgentInfoActionPairProto
            {
            var brainParametersProto = new BrainParametersProto
            {
                VectorActionSize = { bp.VectorActionSize },
-                VectorActionSpaceType =
-                    (SpaceTypeProto)bp.VectorActionSpaceType,
+                VectorActionSpaceType = (SpaceTypeProto) bp.VectorActionSpaceType,
-            brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+            if(bp.VectorActionDescriptions != null)
+            {
+                brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+            }
            return brainParametersProto;
        }

        /// </summary>
        public static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm)
        {
+            var demonstrationName = dm.demonstrationName ?? "";
            var demoProto = new DemonstrationMetaProto
            {
                ApiVersion = DemonstrationMetaData.ApiVersion,
-                DemonstrationName = dm.demonstrationName
+                DemonstrationName = demonstrationName
            };
            return demoProto;
        }
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
    {
        public Action OnRequestDecision;
        ObservationWriter m_ObsWriter = new ObservationWriter();
-        public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
-            foreach(var sensor in sensors){
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
+        {
+            foreach (var sensor in sensors)
+            {
                sensor.GetObservationProto(m_ObsWriter);
            }
            OnRequestDecision?.Invoke();
            agent1.SetPolicy(policy);

            StackingSensor sensor = null;
-            foreach(ISensor s in agent1.sensors){
-                if (s is  StackingSensor){
+            foreach (ISensor s in agent1.sensors)
+            {
+                if (s is  StackingSensor)
+                {
                    sensor = s as StackingSensor;
                }
            }
            {
                agent1.RequestDecision();
                aca.EnvironmentStep();
-
            }

            policy.OnRequestDecision = () =>  SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
    learning_rate: 3.0e-4
    learning_rate_schedule: linear
    max_steps: 5.0e4
+    memory_size: 128
    normalize: false
    num_epoch: 3
    num_layers: 2
    reward_signals:
        extrinsic:
-        strength: 1.0
-        gamma: 0.99
+            strength: 1.0
+            gamma: 0.99
 ```

 Since this example creates a very simple training environment with only a few
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
  0, rays will be used instead of spheres. Rays may be more efficient,
  especially in complex scenes.
 - _Ray Length_ The length of the casts
+- _Ray Layer Mask_ The [LayerMask](https://docs.unity3d.com/ScriptReference/LayerMask.html)
+  passed to the raycast or spherecast. This can be used to ignore certain types
+  of objects when casting.
 - _Observation Stacks_ The number of previous results to "stack" with the cast
  results. Note that this can be independent of the "Stacked Vectors" setting in
  `Behavior Parameters`.
--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md
 the directory where you installed the ML-Agents Toolkit, run:

 ```sh
-mlagents-learn ../config/ppo/3DBall.yaml --env=3DBall --run-id=firstRun
+mlagents-learn config/ppo/3DBall.yaml --env=3DBall --run-id=firstRun
 ```

 And you should see something like
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 - Trainer configuration, curriculum configuration, and parameter randomization
  configuration have all been moved to a single YAML file. (#3791)
 - `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
+- On the UnityEnvironment API, `get_behavior_names()` and `get_behavior_specs()` methods were combined into the property `behavior_specs` that contains a mapping from behavior names to behavior spec.

 ### Steps to Migrate
 - Before upgrading, copy your `Behavior Name` sections from `trainer_config.yaml` into
  the contents of the sampler config to `parameter_randomization` in the main trainer configuration.
 - If you are using `UnityEnvironment` directly, replace `max_step` with `interrupted`
 in the `TerminalStep` and `TerminalSteps` objects.
+ - Replace usage of `get_behavior_names()` and `get_behavior_specs()` in UnityEnvironment with `behavior_specs`.

 ## Migrating from 0.15 to Release 1

    data in the new MonoBehaviour instead.
  - If the class overrode the virtual methods, create a new MonoBehaviour and
    move the logic to it:
-    - Move the InitializeAcademy code to MonoBehaviour.OnAwake
+    - Move the InitializeAcademy code to MonoBehaviour.Awake
    - Move the AcademyStep code to MonoBehaviour.FixedUpdate
    - Move the OnDestroy code to MonoBehaviour.OnDestroy.
    - Move the AcademyReset code to a new method and add it to the
--- a/docs/Python-API.md
+++ b/docs/Python-API.md

 ```python
 from mlagents_envs.environment import UnityEnvironment
+# This is a non-blocking call that only loads the environment.
+# Start interacting with the evironment.
+env.reset()
+behavior_names = env.behavior_spec.keys()
+...
+**NOTE:** Please read [Interacting with a Unity Environment](#interacting-with-a-unity-environment)
+to read more about how you can interact with the Unity environment from Python.

 - `file_name` is the name of the environment binary (located in the root
  directory of the python project).
  act.
 - **Close : `env.close()`** Sends a shutdown signal to the environment and
  terminates the communication.
- **Get Behavior Names : `env.get_behavior_names()`** Returns a list of
-  `BehaviorName`. Note that the number of groups can change over time in the
-  simulation if new Agent behaviors are created in the simulation.
- **Get Behavior Spec : `env.get_behavior_spec(behavior_name: str)`** Returns
-  the `BehaviorSpec` corresponding to the behavior_name given as input. A
-  `BehaviorSpec` contains information such as the observation shapes, the action
-  type (multi-discrete or continuous) and the action shape. Note that the
-  `BehaviorSpec` for a specific group is fixed throughout the simulation.
+- **Behavior Specs : `env.behavior_specs`** Returns a Mapping of
+  `BehaviorName` to `BehaviorSpec` objects (read only).
+  A `BehaviorSpec` contains information such as the observation shapes, the
+  action type (multi-discrete or continuous) and the action shape. Note that
+  the `BehaviorSpec` for a specific group is fixed throughout the simulation.
+  The number of entries in the Mapping can change over time in the simulation
+  if new Agent behaviors are created in the simulation.
 - **Get Steps : `env.get_steps(behavior_name: str)`** Returns a tuple
  `DecisionSteps, TerminalSteps` corresponding to the behavior_name given as
  input. The `DecisionSteps` contains information about the state of the agents
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 #### Observing Training

 Regardless of which training methods, configurations or hyperparameters you
-provide, the training process will always generate three artifacts:
+provide, the training process will always generate three artifacts, all found
+in the `results/<run-identifier>` folder:
-1. Summaries (under the `summaries/` folder): these are training metrics that
+1. Summaries: these are training metrics that
-1. Models (under the `models/` folder): these contain the model checkpoints that
+1. Models: these contain the model checkpoints that
-1. Timers file (also under the `summaries/` folder): this contains aggregated
+1. Timers file (under `results/<run-identifier>/run_logs`): this contains aggregated
   metrics on your training process, including time spent on specific code
   blocks. See [Profiling in Python](Profiling-Python.md) for more information
   on the timers generated.
 This section offers a detailed guide into how to manage the different training
 set-ups withing the toolkit.

-More specifically, this section offers a detailed guide on four command-line
+More specifically, this section offers a detailed guide on the command-line
-  Behavior in the scene
- `--curriculum`: defines the set-up for Curriculum Learning
- `--sampler`: defines the set-up for Environment Parameter Randomization
+  Behavior in the scene, and the set-ups for Curriculum Learning and
+  Environment Parameter Randomization
 - `--num-envs`: number of concurrent Unity instances to use during training

 Reminder that a detailed description of all command-line options can be found by
 process when the default parameters don't seem to be giving the level of
 performance you would like. We provide sample configuration files for our
 example environments in the [config/](../config/) directory. The
-`config/trainer_config.yaml` was used to train the 3D Balance Ball in the
+`config/ppo/3DBall.yaml` was used to train the 3D Balance Ball in the
 [Getting Started](Getting-Started.md) guide. That configuration file uses the
 PPO trainer, but we also have configuration files for SAC and GAIL.

-add typically has its own training configurations or additional configuration
-files. For instance:
+add typically has its own training configurations. For instance:

 - Use PPO or SAC?
 - Use Recurrent Neural Networks for adding memory to your agents?
  demonstrations.)
 - Use self-play? (Assuming your environment includes multiple agents.)

-The answers to the above questions will dictate the configuration files and the
-parameters within them. The rest of this section breaks down the different
-configuration files and explains the possible settings for each.
+
+The trainer config file, `<trainer-config-file>`, determines the features you will
+use during training, and the answers to the above questions will dictate its contents.
+The rest of this guide breaks down the different sub-sections of the trainer config file
+and explains the possible settings for each.
-### Trainer Config File
+### Behavior Configurations
-We begin with the trainer config file, `<trainer-config-file>`, which includes a
-set of configurations for each Behavior in your scene. Some of the
+The primary section of the trainer config file is a
+set of configurations for each Behavior in your scene. These are defined under
+the sub-section `behaviors` in your trainer config file. Some of the
-curriculum and environment parameter randomization settings are not part of this
-file, but their settings live in different files that we'll cover in subsequent
-sections.
+curriculum and environment parameter randomization settings are not part of the `behaviors`
+configuration, but their settings live in different sections that we'll cover subsequently.
-BehaviorPPO:
-  trainer: ppo
+behaviors:
+  BehaviorPPO:
+    trainer: ppo
-  # Trainer configs common to PPO/SAC (excluding reward signals)
-  batch_size: 1024
-  buffer_size: 10240
-  hidden_units: 128
-  learning_rate: 3.0e-4
-  learning_rate_schedule: linear
-  max_steps: 5.0e5
-  normalize: false
-  num_layers: 2
-  time_horizon: 64
-  vis_encoder_type: simple
+    # Trainer configs common to PPO/SAC (excluding reward signals)
+    batch_size: 1024
+    buffer_size: 10240
+    hidden_units: 128
+    learning_rate: 3.0e-4
+    learning_rate_schedule: linear
+    max_steps: 5.0e5
+    normalize: false
+    num_layers: 2
+    time_horizon: 64
+    vis_encoder_type: simple
-  # PPO-specific configs
-  beta: 5.0e-3
-  epsilon: 0.2
-  lambd: 0.95
-  num_epoch: 3
-  threaded: true
+    # PPO-specific configs
+    beta: 5.0e-3
+    epsilon: 0.2
+    lambd: 0.95
+    num_epoch: 3
+    threaded: true
-  # memory
-  use_recurrent: true
-  sequence_length: 64
-  memory_size: 256
+    # memory
+    use_recurrent: true
+    sequence_length: 64
+    memory_size: 256
-  # behavior cloning
-  behavioral_cloning:
-    demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
-    strength: 0.5
-    steps: 150000
-    batch_size: 512
-    num_epoch: 3
-    samples_per_update: 0
-    init_path:
+    # behavior cloning
+    behavioral_cloning:
+      demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+      strength: 0.5
+      steps: 150000
+      batch_size: 512
+      num_epoch: 3
+      samples_per_update: 0
+      init_path:
-  reward_signals:
-    # environment reward
-    extrinsic:
-      strength: 1.0
-      gamma: 0.99
+    reward_signals:
+      # environment reward
+      extrinsic:
+        strength: 1.0
+        gamma: 0.99
-    # curiosity module
-    curiosity:
-      strength: 0.02
-      gamma: 0.99
-      encoding_size: 256
-      learning_rate: 3e-4
+      # curiosity module
+      curiosity:
+        strength: 0.02
+        gamma: 0.99
+        encoding_size: 256
+        learning_rate: 3e-4
-    # GAIL
-    gail:
-      strength: 0.01
-      gamma: 0.99
-      encoding_size: 128
-      demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
-      learning_rate: 3e-4
-      use_actions: false
-      use_vail: false
+      # GAIL
+      gail:
+        strength: 0.01
+        gamma: 0.99
+        encoding_size: 128
+        demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+        learning_rate: 3e-4
+        use_actions: false
+        use_vail: false
-  # self-play
-  self_play:
-    window: 10
-    play_against_latest_model_ratio: 0.5
-    save_steps: 50000
-    swap_steps: 50000
-    team_change: 100000
+    # self-play
+    self_play:
+      window: 10
+      play_against_latest_model_ratio: 0.5
+      save_steps: 50000
+      swap_steps: 50000
+      team_change: 100000
 ```

 Here is an equivalent file if we use an SAC trainer instead. Notice that the
 ```yaml
-BehaviorSAC:
-  trainer: sac
-
-  # Trainer configs common to PPO/SAC (excluding reward signals)
-  # same as PPO config
+behaviors:
+  BehaviorSAC:
+    trainer: sac
-  # SAC-specific configs (replaces the "PPO-specific configs" section above)
-  buffer_init_steps: 0
-  tau: 0.005
-  steps_per_update: 1
-  train_interval: 1
-  init_entcoef: 1.0
-  save_replay_buffer: false
+    # Trainer configs common to PPO/SAC (excluding reward signals)
+    # same as PPO config
-  # memory
-  # same as PPO config
+    # SAC-specific configs (replaces the "PPO-specific configs" section above)
+    buffer_init_steps: 0
+    tau: 0.005
+    steps_per_update: 1
+    train_interval: 1
+    init_entcoef: 1.0
+    save_replay_buffer: false
-  # pre-training using behavior cloning
-  behavioral_cloning:
+    # memory
-  reward_signals:
-    reward_signal_num_update: 1 # only applies to SAC
+    # pre-training using behavior cloning
+    behavioral_cloning:
+      # same as PPO config
+
+    reward_signals:
+      reward_signal_num_update: 1 # only applies to SAC
+
+      # environment reward
+      extrinsic:
+        # same as PPO config
-    # environment reward
-    extrinsic:
-      # same as PPO config
+      # curiosity module
+      curiosity:
+        # same as PPO config
-    # curiosity module
-    curiosity:
-      # same as PPO config
+      # GAIL
+      gail:
+        # same as PPO config
-    # GAIL
-    gail:
+    # self-play
+    self_play:
-
-  # self-play
-  self_play:
-    # same as PPO config
 ```

 We now break apart the components of the configuration file and describe what

 ### Curriculum Learning

-To enable curriculum learning, you need to provide the `--curriculum` CLI option
-and point to a YAML file that defines the curriculum. Here is one example file:
+To enable curriculum learning, you need to add a sub-section to the corresponding
+`behaivors` entry in the trainer config YAML file that defines the curriculum for that
+behavior. Here is one example:
-BehaviorY:
-  measure: progress
-  thresholds: [0.1, 0.3, 0.5]
-  min_lesson_length: 100
-  signal_smoothing: true
-  parameters:
-    wall_height: [1.5, 2.0, 2.5, 4.0]
+behaviors:
+  BehaviorY:
+    # < Same as above >
+
+    # Add this section
+    curriculum:
+      measure: progress
+      thresholds: [0.1, 0.3, 0.5]
+      min_lesson_length: 100
+      signal_smoothing: true
+      parameters:
+        wall_height: [1.5, 2.0, 2.5, 4.0]
 ```

 Each group of Agents under the same `Behavior Name` in an environment can have a
 In order to define the curricula, the first step is to decide which parameters
 of the environment will vary. In the case of the Wall Jump environment, the
 height of the wall is what varies. Rather than adjusting it by hand, we will
-create a YAML file which describes the structure of the curricula. Within it, we
+create a configuration which describes the structure of the curricula. Within it, we
 can specify which points in the training process our wall height will change,
 either based on the percentage of training steps which have taken place, or what
 the average reward the agent has received in the recent past is. Below is an
-BigWallJump:
-  measure: progress
-  thresholds: [0.1, 0.3, 0.5]
-  min_lesson_length: 100
-  signal_smoothing: true
-  parameters:
-    big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
-    big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
-SmallWallJump:
-  measure: progress
-  thresholds: [0.1, 0.3, 0.5]
-  min_lesson_length: 100
-  signal_smoothing: true
-  parameters:
-    small_wall_height: [1.5, 2.0, 2.5, 4.0]
+behaviors:
+  BigWallJump:
+    # < Trainer parameters for BigWallJump >
+    # Curriculum configuration
+    curriculum:
+      measure: progress
+      thresholds: [0.1, 0.3, 0.5]
+      min_lesson_length: 100
+      signal_smoothing: true
+      parameters:
+        big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
+        big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
+
+  SmallWallJump:
+    # < Trainer parameters for BigWallJump >
+    # Curriculum configuration
+    curriculum:
+      measure: progress
+      thresholds: [0.1, 0.3, 0.5]
+      min_lesson_length: 100
+      signal_smoothing: true
+      parameters:
+        small_wall_height: [1.5, 2.0, 2.5, 4.0]
 ```

 The curriculum for each Behavior has the following parameters:
 #### Training with a Curriculum

 Once we have specified our metacurriculum and curricula, we can launch
-`mlagents-learn` using the `–curriculum` flag to point to the config file for
+`mlagents-learn` using the config file for
-mlagents-learn config/trainer_config.yaml --curriculum=config/curricula/wall_jump.yaml --run-id=wall-jump-curriculum
+mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum
 ```

 We can then keep track of the current lessons and progresses via TensorBoard.

 ### Environment Parameter Randomization

-To enable parameter randomization, you need to provide the `--sampler` CLI
-option and point to a YAML file that defines the curriculum. Here is one example
-file:
+To enable parameter randomization, you need to add a `parameter-randomization` sub-section
+to your trainer config YAML file. Here is one example:
-resampling-interval: 5000
+behaviors:
+  # < Same as above>
-mass:
-  sampler-type: "uniform"
-  min_value: 0.5
-  max_value: 10
+parameter_randomization:
+  resampling-interval: 5000
-gravity:
-  sampler-type: "multirange_uniform"
-  intervals: [[7, 10], [15, 20]]
+  mass:
+    sampler-type: "uniform"
+    min_value: 0.5
+    max_value: 10
-scale:
-  sampler-type: "uniform"
-  min_value: 0.75
-  max_value: 3
+  gravity:
+    sampler-type: "multirange_uniform"
+    intervals: [[7, 10], [15, 20]]
+
+  scale:
+    sampler-type: "uniform"
+    min_value: 0.75
+    max_value: 3
 ```

 Note that `mass`, `gravity` and `scale` are the names of the environment
    `interval_2_max`], ...]
  - **sub-arguments** - `intervals`

-The implementation of the samplers can be found at
-`ml-agents-envs/mlagents_envs/sampler_class.py`.
+The implementation of the samplers can be found in the
+[sampler_class.py file](../ml-agents/mlagents/trainers/sampler_class.py).

 #### Defining a New Sampler Type


 #### Training with Environment Parameter Randomization

-After the sampler YAML file is defined, we proceed by launching `mlagents-learn`
-and specify our configured sampler file with the `--sampler` flag. For example,
+After the sampler configuration is defined, we proceed by launching `mlagents-learn`
+and specify trainer configuration with `parameter-randomization` defined. For example,
-`Environment Parameters` with `config/3dball_randomize.yaml` sampling setup, we
-would run
+`Environment Parameters` with sampling setup, we would run
-mlagents-learn config/trainer_config.yaml --sampler=config/3dball_randomize.yaml
--run-id=3D-Ball-randomize
+mlagents-learn config/ppo/3DBall_randomize.yaml --run-id=3D-Ball-randomize
 ```

 We can observe progress and metrics via Tensorboard.

 - **Buffer Size** - If you are having trouble getting an agent to train, even
  with multiple concurrent Unity instances, you could increase `buffer_size` in
-  the `config/trainer_config.yaml` file. A common practice is to multiply
+  the trainer config file. A common practice is to multiply
  `buffer_size` by `num-envs`.
 - **Resource Constraints** - Invoking concurrent Unity instances is constrained
  by the resources on the machine. Please use discretion when setting
--- a/docs/Using-Tensorboard.md
+++ b/docs/Using-Tensorboard.md
 [TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard).

 The `mlagents-learn` command saves training statistics to a folder named
-`summaries`, organized by the `run-id` value you assign to a training session.
+`results`, organized by the `run-id` value you assign to a training session.

 In order to observe the training process, either during training or afterward,
 start TensorBoard:
 the --port option.

 **Note:** If you don't assign a `run-id` identifier, `mlagents-learn` uses the
-default string, "ppo". All the statistics will be saved to the same sub-folder
-and displayed as one session in TensorBoard. After a few runs, the displays can
-become difficult to interpret in this situation. You can delete the folders
-under the `summaries` directory to clear out old statistics.
+default string, "ppo". You can delete the folders under the `results` directory
+to clear out old statistics.

 On the left side of the TensorBoard window, you can select which of the training
 runs you want to display. You can select multiple run-ids to compare statistics.
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py
        self._env = unity_env

        # Take a single step so that the brain information will be sent over
-        if not self._env.get_behavior_names():
+        if not self._env.behavior_specs:
-        self._n_agents = -1

        # Save the step result from the last time all Agents requested decisions.
        self._previous_decision_step: DecisionSteps = None
        self._allow_multiple_visual_obs = allow_multiple_visual_obs

        # Check brain configuration
-        if len(self._env.get_behavior_names()) != 1:
+        if len(self._env.behavior_specs) != 1:
-        self.name = self._env.get_behavior_names()[0]
-        self.group_spec = self._env.get_behavior_spec(self.name)
+        self.name = list(self._env.behavior_specs.keys())[0]
+        self.group_spec = self._env.behavior_specs[self.name]

        if use_visual and self._get_n_vis_obs() == 0:
            raise UnityGymException(

        self._env.step()
        decision_step, terminal_step = self._env.get_steps(self.name)
+        self._check_agents(max(len(decision_step), len(terminal_step)))
        if len(terminal_step) != 0:
            # The agent is done
            self.game_over = True
        logger.warning("Could not seed environment %s", self.name)
        return

-    def _check_agents(self, n_agents: int) -> None:
-        if self._n_agents > 1:
+    @staticmethod
+    def _check_agents(n_agents: int) -> None:
+        if n_agents > 1:
-                "There can only be one Agent in the environment but {n_agents} were detected."
+                f"There can only be one Agent in the environment but {n_agents} were detected."
            )

    @property
    @property
    def observation_space(self):
        return self._observation_space
-
-    @property
-    def number_agents(self):
-        return self._n_agents


 class ActionFlattener:
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
    ActionType,
    DecisionSteps,
    TerminalSteps,
+    BehaviorMapping,
 )


    setup_mock_unityenvironment(
        mock_env, mock_spec, mock_decision_step, mock_terminal_step
    )
-
    env = UnityToGymWrapper(mock_env, use_visual=False)
    assert isinstance(env, UnityToGymWrapper)
    assert isinstance(env.reset(), np.ndarray)
    :Mock mock_decision: A DecisionSteps object that will be returned at each step and reset.
    :Mock mock_termination: A TerminationSteps object that will be returned at each step and reset.
    """
-    mock_env.get_behavior_names.return_value = ["MockBrain"]
-    mock_env.get_behavior_spec.return_value = mock_spec
+    mock_env.behavior_specs = BehaviorMapping({"MockBrain": mock_spec})
    mock_env.get_steps.return_value = (mock_decision, mock_termination)
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py

 from abc import ABC, abstractmethod
 from collections.abc import Mapping
-from typing import List, NamedTuple, Tuple, Optional, Union, Dict, Iterator, Any
+from typing import (
+    List,
+    NamedTuple,
+    Tuple,
+    Optional,
+    Union,
+    Dict,
+    Iterator,
+    Any,
+    Mapping as MappingType,
+)
 import numpy as np
 from enum import Enum

            return np.zeros((n_agents, self.action_size), dtype=np.float32)


+class BehaviorMapping(Mapping):
+    def __init__(self, specs: Dict[BehaviorName, BehaviorSpec]):
+        self._dict = specs
+
+    def __len__(self) -> int:
+        return len(self._dict)
+
+    def __getitem__(self, behavior: BehaviorName) -> BehaviorSpec:
+        return self._dict[behavior]
+
+    def __iter__(self) -> Iterator[Any]:
+        yield from self._dict
+
+
 class BaseEnv(ABC):
    @abstractmethod
    def step(self) -> None:
        """
-        pass

    @abstractmethod
    def reset(self) -> None:
-        pass

    @abstractmethod
    def close(self) -> None:
-        pass
+    @property
-    def get_behavior_names(self) -> List[BehaviorName]:
+    def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
-        Returns the list of the behavior names present in the environment.
+        Returns a Mapping from behavior names to behavior specs.
-        This list can grow with time as new policies are instantiated.
-        :return: the list of agent BehaviorName.
+        Note that new keys can be added to this mapping as new policies are instantiated.
-        pass

    @abstractmethod
    def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
        :param action: A two dimensional np.ndarray corresponding to the action
        (either int or float)
        """
-        pass

    @abstractmethod
    def set_action_for_agent(
        :param action: A one dimensional np.ndarray corresponding to the action
        (either int or float)
        """
-        pass

    @abstractmethod
    def get_steps(
         rewards, agent ids and interrupted flags of the agents that had their
         episode terminated last step.
        """
-        pass
-
-    @abstractmethod
-    def get_behavior_spec(self, behavior_name: BehaviorName) -> BehaviorSpec:
-        """
-        Get the BehaviorSpec corresponding to the behavior name
-        :param behavior_name: The name of the behavior the agents are part of
-        :return: A BehaviorSpec corresponding to that behavior
-        """
-        pass
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
 import atexit
 from distutils.version import StrictVersion
-import glob
-import uuid
+
-from typing import Dict, List, Optional, Any, Tuple
+from typing import Dict, List, Optional, Tuple, Mapping as MappingType
-from mlagents_envs.side_channel.side_channel import SideChannel, IncomingMessage
+from mlagents_envs.side_channel.side_channel import SideChannel
+from mlagents_envs.side_channel.side_channel_manager import SideChannelManager
+from mlagents_envs import env_utils

 from mlagents_envs.base_env import (
    BaseEnv,
    BehaviorName,
    AgentId,
+    BehaviorMapping,
 )
 from mlagents_envs.timers import timed, hierarchical_timer
 from mlagents_envs.exception import (
 from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto

 from .rpc_communicator import RpcCommunicator
-from sys import platform
-import struct
-    SCALAR_ACTION_TYPES = (int, np.int32, np.int64, float, np.float32, np.float64)
-    SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
-
    # Communication protocol version.
    # When connecting to C#, this must be compatible with Academy.k_ApiVersion.
    # We follow semantic versioning on the communication version, so existing
    BASE_ENVIRONMENT_PORT = 5005

    # Command line argument used to pass the port to the executable environment.
-    PORT_COMMAND_LINE_ARG = "--mlagents-port"
+    _PORT_COMMAND_LINE_ARG = "--mlagents-port"

    @staticmethod
    def _raise_version_exception(unity_com_ver: str) -> None:
        )

    @staticmethod
-    def check_communication_compatibility(
+    def _check_communication_compatibility(
        unity_com_ver: str, python_api_version: str, unity_package_version: str
    ) -> bool:
        unity_communicator_version = StrictVersion(unity_com_ver)
        return True

    @staticmethod
-    def get_capabilities_proto() -> UnityRLCapabilitiesProto:
+    def _get_capabilities_proto() -> UnityRLCapabilitiesProto:
-    def warn_csharp_base_capabitlities(
+    def _warn_csharp_base_capabilities(
        caps: UnityRLCapabilitiesProto, unity_package_ver: str, python_package_ver: str
    ) -> None:
        if not caps.baseRLCapabilities:
        :str log_folder: Optional folder to write the Unity Player log file into.  Requires absolute path.
        """
        atexit.register(self._close)
-        self.additional_args = additional_args or []
-        self.no_graphics = no_graphics
+        self._additional_args = additional_args or []
+        self._no_graphics = no_graphics
        # If base port is not specified, use BASE_ENVIRONMENT_PORT if we have
        # an environment, otherwise DEFAULT_EDITOR_PORT
        if base_port is None:
-        self.port = base_port + worker_id
+        self._port = base_port + worker_id
-        self.proc1 = None
-        self.timeout_wait: int = timeout_wait
-        self.communicator = self.get_communicator(worker_id, base_port, timeout_wait)
-        self.worker_id = worker_id
-        self.side_channels: Dict[uuid.UUID, SideChannel] = {}
-        if side_channels is not None:
-            for _sc in side_channels:
-                if _sc.channel_id in self.side_channels:
-                    raise UnityEnvironmentException(
-                        "There cannot be two side channels with the same channel id {0}.".format(
-                            _sc.channel_id
-                        )
-                    )
-                self.side_channels[_sc.channel_id] = _sc
-        self.log_folder = log_folder
+        self._proc1 = None
+        self._timeout_wait: int = timeout_wait
+        self._communicator = self._get_communicator(worker_id, base_port, timeout_wait)
+        self._worker_id = worker_id
+        self._side_channel_manager = SideChannelManager(side_channels)
+        self._log_folder = log_folder

        # If the environment name is None, a new environment will not be launched
        # and the communicator will directly try to connect to an existing unity environment.
                "the worker-id must be 0 in order to connect with the Editor."
            )
        if file_name is not None:
-            self.executable_launcher(file_name, no_graphics, additional_args)
+            try:
+                self._proc1 = env_utils.launch_executable(
+                    file_name, self._executable_args()
+                )
+            except UnityEnvironmentException:
+                self._close(0)
+                raise
-                f"Listening on port {self.port}. "
+                f"Listening on port {self._port}. "
                f"Start training by pressing the Play button in the Unity Editor."
            )
        self._loaded = True
            communication_version=self.API_VERSION,
            package_version=mlagents_envs.__version__,
-            capabilities=UnityEnvironment.get_capabilities_proto(),
+            capabilities=UnityEnvironment._get_capabilities_proto(),
-            aca_output = self.send_academy_parameters(rl_init_parameters_in)
+            aca_output = self._send_academy_parameters(rl_init_parameters_in)
-        if not UnityEnvironment.check_communication_compatibility(
+        if not UnityEnvironment._check_communication_compatibility(
            aca_params.communication_version,
            UnityEnvironment.API_VERSION,
            aca_params.package_version,

-        UnityEnvironment.warn_csharp_base_capabitlities(
+        UnityEnvironment._warn_csharp_base_capabilities(
            aca_params.capabilities,
            aca_params.package_version,
            UnityEnvironment.API_VERSION,
        self._update_behavior_specs(aca_output)

    @staticmethod
-    def get_communicator(worker_id, base_port, timeout_wait):
+    def _get_communicator(worker_id, base_port, timeout_wait):
-    @staticmethod
-    def validate_environment_path(env_path: str) -> Optional[str]:
-        # Strip out executable extensions if passed
-        env_path = (
-            env_path.strip()
-            .replace(".app", "")
-            .replace(".exe", "")
-            .replace(".x86_64", "")
-            .replace(".x86", "")
-        )
-        true_filename = os.path.basename(os.path.normpath(env_path))
-        logger.debug("The true file name is {}".format(true_filename))
-
-        if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
-            return None
-
-        cwd = os.getcwd()
-        launch_string = None
-        true_filename = os.path.basename(os.path.normpath(env_path))
-        if platform == "linux" or platform == "linux2":
-            candidates = glob.glob(os.path.join(cwd, env_path) + ".x86_64")
-            if len(candidates) == 0:
-                candidates = glob.glob(os.path.join(cwd, env_path) + ".x86")
-            if len(candidates) == 0:
-                candidates = glob.glob(env_path + ".x86_64")
-            if len(candidates) == 0:
-                candidates = glob.glob(env_path + ".x86")
-            if len(candidates) > 0:
-                launch_string = candidates[0]
-
-        elif platform == "darwin":
-            candidates = glob.glob(
-                os.path.join(cwd, env_path + ".app", "Contents", "MacOS", true_filename)
-            )
-            if len(candidates) == 0:
-                candidates = glob.glob(
-                    os.path.join(env_path + ".app", "Contents", "MacOS", true_filename)
-                )
-            if len(candidates) == 0:
-                candidates = glob.glob(
-                    os.path.join(cwd, env_path + ".app", "Contents", "MacOS", "*")
-                )
-            if len(candidates) == 0:
-                candidates = glob.glob(
-                    os.path.join(env_path + ".app", "Contents", "MacOS", "*")
-                )
-            if len(candidates) > 0:
-                launch_string = candidates[0]
-        elif platform == "win32":
-            candidates = glob.glob(os.path.join(cwd, env_path + ".exe"))
-            if len(candidates) == 0:
-                candidates = glob.glob(env_path + ".exe")
-            if len(candidates) > 0:
-                launch_string = candidates[0]
-        return launch_string
-
-    def executable_args(self) -> List[str]:
+    def _executable_args(self) -> List[str]:
-        if self.no_graphics:
+        if self._no_graphics:
-        args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)]
-        if self.log_folder:
+        args += [UnityEnvironment._PORT_COMMAND_LINE_ARG, str(self._port)]
+        if self._log_folder:
-                self.log_folder, f"Player-{self.worker_id}.log"
+                self._log_folder, f"Player-{self._worker_id}.log"
-        args += self.additional_args
+        args += self._additional_args
-    def executable_launcher(self, file_name, no_graphics, args):
-        launch_string = self.validate_environment_path(file_name)
-        if launch_string is None:
-            self._close(0)
-            raise UnityEnvironmentException(
-                f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
-            )
-        else:
-            logger.debug("This is the launch string {}".format(launch_string))
-            # Launch Unity environment
-            subprocess_args = [launch_string] + self.executable_args()
-            try:
-                self.proc1 = subprocess.Popen(
-                    subprocess_args,
-                    # start_new_session=True means that signals to the parent python process
-                    # (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms.
-                    # This is generally good since we want the environment to have a chance to shutdown,
-                    # but may be undesirable in come cases; if so, we'll add a command-line toggle.
-                    # Note that on Windows, the CTRL_C signal will still be sent.
-                    start_new_session=True,
-                )
-            except PermissionError as perm:
-                # This is likely due to missing read or execute permissions on file.
-                raise UnityEnvironmentException(
-                    f"Error when trying to launch environment - make sure "
-                    f"permissions are set correctly. For example "
-                    f'"chmod -R 755 {launch_string}"'
-                ) from perm
-
    def _update_behavior_specs(self, output: UnityOutputProto) -> None:
        init_output = output.rl_initialization_output
        for brain_param in init_output.brain_parameters:
                    DecisionSteps.empty(self._env_specs[brain_name]),
                    TerminalSteps.empty(self._env_specs[brain_name]),
                )
-        self._parse_side_channel_message(self.side_channels, output.side_channel)
+        self._side_channel_manager.process_side_channel_message(output.side_channel)
-            outputs = self.communicator.exchange(self._generate_reset_input())
+            outputs = self._communicator.exchange(self._generate_reset_input())
            if outputs is None:
                raise UnityCommunicatorStoppedException("Communicator has exited.")
            self._update_behavior_specs(outputs)
                ].create_empty_action(n_agents)
        step_input = self._generate_step_input(self._env_actions)
        with hierarchical_timer("communicator.exchange"):
-            outputs = self.communicator.exchange(step_input)
+            outputs = self._communicator.exchange(step_input)
        if outputs is None:
            raise UnityCommunicatorStoppedException("Communicator has exited.")
        self._update_behavior_specs(outputs)

-    def get_behavior_names(self):
-        return list(self._env_specs.keys())
+    @property
+    def behavior_specs(self) -> MappingType[str, BehaviorSpec]:
+        return BehaviorMapping(self._env_specs)

    def _assert_behavior_exists(self, behavior_name: str) -> None:
        if behavior_name not in self._env_specs:
        expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
        if action.shape != expected_shape:
            raise UnityActionException(
-                "The behavior {0} needs an input of dimension {1} but received input of dimension {2}".format(
-                    behavior_name, expected_shape, action.shape
-                )
+                "The behavior {0} needs an input of dimension {1} for "
+                "(<number of agents>, <action size>) but received input of "
+                "dimension {2}".format(behavior_name, expected_shape, action.shape)
            )
        if action.dtype != expected_type:
            action = action.astype(expected_type)
        self._assert_behavior_exists(behavior_name)
        return self._env_state[behavior_name]

-    def get_behavior_spec(self, behavior_name: BehaviorName) -> BehaviorSpec:
-        self._assert_behavior_exists(behavior_name)
-        return self._env_specs[behavior_name]
-
    def close(self):
        """
        Sends a shutdown signal to the unity environment, and closes the socket connection.
            force-killing it.  Defaults to `self.timeout_wait`.
        """
        if timeout is None:
-            timeout = self.timeout_wait
+            timeout = self._timeout_wait
-        self.communicator.close()
-        if self.proc1 is not None:
+        self._communicator.close()
+        if self._proc1 is not None:
-                self.proc1.wait(timeout=timeout)
-                signal_name = self.returncode_to_signal_name(self.proc1.returncode)
+                self._proc1.wait(timeout=timeout)
+                signal_name = self._returncode_to_signal_name(self._proc1.returncode)
-                return_info = f"Environment shut down with return code {self.proc1.returncode}{signal_name}."
+                return_info = f"Environment shut down with return code {self._proc1.returncode}{signal_name}."
-                self.proc1.kill()
+                self._proc1.kill()
-            self.proc1 = None
-
-    @classmethod
-    def _flatten(cls, arr: Any) -> List[float]:
-        """
-        Converts arrays to list.
-        :param arr: numpy vector.
-        :return: flattened list.
-        """
-        if isinstance(arr, cls.SCALAR_ACTION_TYPES):
-            arr = [float(arr)]
-        if isinstance(arr, np.ndarray):
-            arr = arr.tolist()
-        if len(arr) == 0:
-            return arr
-        if isinstance(arr[0], np.ndarray):
-            # pylint: disable=no-member
-            arr = [item for sublist in arr for item in sublist.tolist()]
-        if isinstance(arr[0], list):
-            # pylint: disable=not-an-iterable
-            arr = [item for sublist in arr for item in sublist]
-        arr = [float(x) for x in arr]
-        return arr
-
-    @staticmethod
-    def _parse_side_channel_message(
-        side_channels: Dict[uuid.UUID, SideChannel], data: bytes
-    ) -> None:
-        offset = 0
-        while offset < len(data):
-            try:
-                channel_id = uuid.UUID(bytes_le=bytes(data[offset : offset + 16]))
-                offset += 16
-                message_len, = struct.unpack_from("<i", data, offset)
-                offset = offset + 4
-                message_data = data[offset : offset + message_len]
-                offset = offset + message_len
-            except Exception:
-                raise UnityEnvironmentException(
-                    "There was a problem reading a message in a SideChannel. "
-                    "Please make sure the version of MLAgents in Unity is "
-                    "compatible with the Python version."
-                )
-            if len(message_data) != message_len:
-                raise UnityEnvironmentException(
-                    "The message received by the side channel {0} was "
-                    "unexpectedly short. Make sure your Unity Environment "
-                    "sending side channel data properly.".format(channel_id)
-                )
-            if channel_id in side_channels:
-                incoming_message = IncomingMessage(message_data)
-                side_channels[channel_id].on_message_received(incoming_message)
-            else:
-                logger.warning(
-                    "Unknown side channel data received. Channel type "
-                    ": {0}.".format(channel_id)
-                )
-
-    @staticmethod
-    def _generate_side_channel_data(
-        side_channels: Dict[uuid.UUID, SideChannel]
-    ) -> bytearray:
-        result = bytearray()
-        for channel_id, channel in side_channels.items():
-            for message in channel.message_queue:
-                result += channel_id.bytes_le
-                result += struct.pack("<i", len(message))
-                result += message
-            channel.message_queue = []
-        return result
+            self._proc1 = None

    @timed
    def _generate_step_input(
                action = AgentActionProto(vector_actions=vector_action[b][i])
                rl_in.agent_actions[b].value.extend([action])
                rl_in.command = STEP
-        rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels))
-        return self.wrap_unity_input(rl_in)
+        rl_in.side_channel = bytes(
+            self._side_channel_manager.generate_side_channel_messages()
+        )
+        return self._wrap_unity_input(rl_in)
-        rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels))
-        return self.wrap_unity_input(rl_in)
+        rl_in.side_channel = bytes(
+            self._side_channel_manager.generate_side_channel_messages()
+        )
+        return self._wrap_unity_input(rl_in)
-    def send_academy_parameters(
+    def _send_academy_parameters(
-        return self.communicator.initialize(inputs)
+        return self._communicator.initialize(inputs)
-    def wrap_unity_input(rl_input: UnityRLInputProto) -> UnityInputProto:
+    def _wrap_unity_input(rl_input: UnityRLInputProto) -> UnityInputProto:
-    def returncode_to_signal_name(returncode: int) -> Optional[str]:
+    def _returncode_to_signal_name(returncode: int) -> Optional[str]:
        """
        Try to convert return codes into their corresponding signal name.
        E.g. returncode_to_signal_name(-2) -> "SIGINT"
--- a/ml-agents-envs/mlagents_envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_envs.py
 from mlagents_envs.mock_communicator import MockCommunicator


-@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
+@mock.patch("mlagents_envs.environment.UnityEnvironment._get_communicator")
-@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
+@mock.patch("mlagents_envs.env_utils.launch_executable")
+@mock.patch("mlagents_envs.environment.UnityEnvironment._get_communicator")
-    assert env.get_behavior_names() == ["RealFakeBrain"]
+    assert list(env.behavior_specs.keys()) == ["RealFakeBrain"]
    env.close()


        (None, None, UnityEnvironment.DEFAULT_EDITOR_PORT),
    ],
 )
-@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
+@mock.patch("mlagents_envs.env_utils.launch_executable")
+@mock.patch("mlagents_envs.environment.UnityEnvironment._get_communicator")
 def test_port_defaults(
    mock_communicator, mock_launcher, base_port, file_name, expected
 ):
    env = UnityEnvironment(file_name=file_name, worker_id=0, base_port=base_port)
-    assert expected == env.port
+    assert expected == env._port
-@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
+@mock.patch("mlagents_envs.env_utils.launch_executable")
+@mock.patch("mlagents_envs.environment.UnityEnvironment._get_communicator")
-    args = env.executable_args()
+    args = env._executable_args()
-@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
+@mock.patch("mlagents_envs.env_utils.launch_executable")
+@mock.patch("mlagents_envs.environment.UnityEnvironment._get_communicator")
-    spec = env.get_behavior_spec("RealFakeBrain")
+    spec = env.behavior_specs["RealFakeBrain"]
    env.reset()
    decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
    env.close()
        assert (n_agents,) + shape == obs.shape


-@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
+@mock.patch("mlagents_envs.env_utils.launch_executable")
+@mock.patch("mlagents_envs.environment.UnityEnvironment._get_communicator")
-    spec = env.get_behavior_spec("RealFakeBrain")
+    spec = env.behavior_specs["RealFakeBrain"]
    env.step()
    decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
    n_agents = len(decision_steps)
    assert 2 in terminal_steps


-@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
-@mock.patch("mlagents_envs.environment.UnityEnvironment.get_communicator")
+@mock.patch("mlagents_envs.env_utils.launch_executable")
+@mock.patch("mlagents_envs.environment.UnityEnvironment._get_communicator")
 def test_close(mock_communicator, mock_launcher):
    comm = MockCommunicator(discrete_action=False, visual_inputs=0)
    mock_communicator.return_value = comm
    unity_ver = "1.0.0"
    python_ver = "1.0.0"
    unity_package_version = "0.15.0"
-    assert UnityEnvironment.check_communication_compatibility(
+    assert UnityEnvironment._check_communication_compatibility(
-    assert UnityEnvironment.check_communication_compatibility(
+    assert UnityEnvironment._check_communication_compatibility(
-    assert not UnityEnvironment.check_communication_compatibility(
+    assert not UnityEnvironment._check_communication_compatibility(
-    assert UnityEnvironment.check_communication_compatibility(
+    assert UnityEnvironment._check_communication_compatibility(
-    assert not UnityEnvironment.check_communication_compatibility(
+    assert not UnityEnvironment._check_communication_compatibility(
-    assert not UnityEnvironment.check_communication_compatibility(
+    assert not UnityEnvironment._check_communication_compatibility(
-    assert UnityEnvironment.returncode_to_signal_name(-2) == "SIGINT"
-    assert UnityEnvironment.returncode_to_signal_name(42) is None
-    assert UnityEnvironment.returncode_to_signal_name("SIGINT") is None
+    assert UnityEnvironment._returncode_to_signal_name(-2) == "SIGINT"
+    assert UnityEnvironment._returncode_to_signal_name(42) is None
+    assert UnityEnvironment._returncode_to_signal_name("SIGINT") is None


 if __name__ == "__main__":
--- a/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
 import uuid
+import pytest
+from mlagents_envs.side_channel.side_channel_manager import SideChannelManager
-from mlagents_envs.environment import UnityEnvironment
+from mlagents_envs.side_channel.engine_configuration_channel import (
+    EngineConfigurationChannel,
+    EngineConfig,
+)
+from mlagents_envs.side_channel.environment_parameters_channel import (
+    EnvironmentParametersChannel,
+)
+from mlagents_envs.side_channel.stats_side_channel import (
+    StatsSideChannel,
+    StatsAggregationMethod,
+)
+from mlagents_envs.exception import (
+    UnitySideChannelException,
+    UnityCommunicationException,
+)


 class IntChannel(SideChannel):
    receiver = IntChannel()
    sender.send_int(5)
    sender.send_int(6)
-    data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
-    UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
+    data = SideChannelManager([sender]).generate_side_channel_messages()
+    SideChannelManager([receiver]).process_side_channel_message(data)
    assert receiver.list_int[0] == 5
    assert receiver.list_int[1] == 6


    sender.set_property("prop1", 1.0)

-    data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
-    UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
+    data = SideChannelManager([sender]).generate_side_channel_messages()
+    SideChannelManager([receiver]).process_side_channel_message(data)

    val = receiver.get_property("prop1")
    assert val == 1.0

-    data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
-    UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
+    data = SideChannelManager([sender]).generate_side_channel_messages()
+    SideChannelManager([receiver]).process_side_channel_message(data)

    val = receiver.get_property("prop1")
    assert val == 1.0
    sender.send_raw_data("foo".encode("ascii"))
    sender.send_raw_data("bar".encode("ascii"))

-    data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
-    UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)
+    data = SideChannelManager([sender]).generate_side_channel_messages()
+    SideChannelManager([receiver]).process_side_channel_message(data)

    messages = receiver.get_and_clear_received_messages()
    assert len(messages) == 2
    # Test reading with defaults
    assert [] == msg_in.read_float32_list()
    assert val == msg_in.read_float32_list(default_value=val)
+
+
+def test_engine_configuration():
+    sender = EngineConfigurationChannel()
+    # We use a raw bytes channel to interpred the data
+    receiver = RawBytesChannel(sender.channel_id)
+
+    config = EngineConfig.default_config()
+    sender.set_configuration(config)
+    data = SideChannelManager([sender]).generate_side_channel_messages()
+    SideChannelManager([receiver]).process_side_channel_message(data)
+
+    received_data = receiver.get_and_clear_received_messages()
+    assert len(received_data) == 5  # 5 different messages one for each setting
+
+    sent_time_scale = 4.5
+    sender.set_configuration_parameters(time_scale=sent_time_scale)
+
+    data = SideChannelManager([sender]).generate_side_channel_messages()
+    SideChannelManager([receiver]).process_side_channel_message(data)
+
+    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
+    message.read_int32()
+    time_scale = message.read_float32()
+    assert time_scale == sent_time_scale
+
+    with pytest.raises(UnitySideChannelException):
+        sender.set_configuration_parameters(width=None, height=42)
+
+    with pytest.raises(UnityCommunicationException):
+        # try to send data to the EngineConfigurationChannel
+        sender.set_configuration_parameters(time_scale=sent_time_scale)
+        data = SideChannelManager([sender]).generate_side_channel_messages()
+        SideChannelManager([sender]).process_side_channel_message(data)
+
+
+def test_environment_parameters():
+    sender = EnvironmentParametersChannel()
+    # We use a raw bytes channel to interpred the data
+    receiver = RawBytesChannel(sender.channel_id)
+
+    sender.set_float_parameter("param-1", 0.1)
+    data = SideChannelManager([sender]).generate_side_channel_messages()
+    SideChannelManager([receiver]).process_side_channel_message(data)
+
+    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
+    key = message.read_string()
+    dtype = message.read_int32()
+    value = message.read_float32()
+    assert key == "param-1"
+    assert dtype == EnvironmentParametersChannel.EnvironmentDataTypes.FLOAT
+    assert value - 0.1 < 1e-8
+
+    sender.set_float_parameter("param-1", 0.1)
+    sender.set_float_parameter("param-2", 0.1)
+    sender.set_float_parameter("param-3", 0.1)
+
+    data = SideChannelManager([sender]).generate_side_channel_messages()
+    SideChannelManager([receiver]).process_side_channel_message(data)
+
+    assert len(receiver.get_and_clear_received_messages()) == 3
+
+    with pytest.raises(UnityCommunicationException):
+        # try to send data to the EngineConfigurationChannel
+        sender.set_float_parameter("param-1", 0.1)
+        data = SideChannelManager([sender]).generate_side_channel_messages()
+        SideChannelManager([sender]).process_side_channel_message(data)
+
+
+def test_stats_channel():
+    receiver = StatsSideChannel()
+    message = OutgoingMessage()
+    message.write_string("stats-1")
+    message.write_float32(42.0)
+    message.write_int32(1)  # corresponds to StatsAggregationMethod.MOST_RECENT
+
+    receiver.on_message_received(IncomingMessage(message.buffer))
+
+    stats = receiver.get_and_reset_stats()
+
+    assert len(stats) == 1
+    val, method = stats["stats-1"]
+    assert val - 42.0 < 1e-8
+    assert method == StatsAggregationMethod.MOST_RECENT
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager
 from mlagents_envs.side_channel.side_channel import SideChannel
 from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
-from mlagents_envs.exception import UnityEnvironmentException
 from mlagents_envs.timers import (
    hierarchical_timer,
    get_timer_tree,
            os.path.join(base_path, options.run_id) if options.initialize_from else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
-        port = options.base_port
+        port: Optional[int] = options.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path, options.resume, options.force, maybe_init_path
        StatsReporter.add_writer(console_writer)

        if options.env_path is None:
-            port = UnityEnvironment.DEFAULT_EDITOR_PORT
+            port = None
        env_factory = create_environment_factory(
            options.env_path,
            options.no_graphics,
    env_path: Optional[str],
    no_graphics: bool,
    seed: int,
-    start_port: int,
+    start_port: Optional[int],
-    if env_path is not None:
-        launch_string = UnityEnvironment.validate_environment_path(env_path)
-        if launch_string is None:
-            raise UnityEnvironmentException(
-                f"Couldn't launch the {env_path} environment. Provided filename does not match any environments."
-            )
-
    def create_unity_environment(
        worker_id: int, side_channels: List[SideChannel]
    ) -> UnityEnvironment:
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
    RESNET = "resnet"


-class LearningRateSchedule(Enum):
+class ScheduleType(Enum):
    CONSTANT = "constant"
    LINEAR = "linear"

        return global_step, increment_step, steps_to_increment

    @staticmethod
-    def create_learning_rate(
-        lr_schedule: LearningRateSchedule,
-        lr: float,
+    def create_schedule(
+        schedule: ScheduleType,
+        parameter: float,
+        min_value: float,
    ) -> tf.Tensor:
        """
        Create a learning rate tensor.
        :param max_step: The maximum number of steps in the training run.
        :return: A Tensor containing the learning rate.
        """
-        if lr_schedule == LearningRateSchedule.CONSTANT:
-            learning_rate = tf.Variable(lr)
-        elif lr_schedule == LearningRateSchedule.LINEAR:
-            learning_rate = tf.train.polynomial_decay(
-                lr, global_step, max_step, 1e-10, power=1.0
+        if schedule == ScheduleType.CONSTANT:
+            parameter_rate = tf.Variable(parameter, trainable=False)
+        elif schedule == ScheduleType.LINEAR:
+            parameter_rate = tf.train.polynomial_decay(
+                parameter, global_step, max_step, min_value, power=1.0
-            raise UnityTrainerException(
-                "The learning rate schedule {} is invalid.".format(lr_schedule)
-            )
-        return learning_rate
+            raise UnityTrainerException("The schedule {} is invalid.".format(schedule))
+        return parameter_rate

    @staticmethod
    def scaled_init(scale):
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
                    )
                )
            if reset_global_steps:
+                self._set_step(0)
                logger.info(
                    "Starting training from step 0 and saving to {}.".format(
                        self.model_path
--- a/ml-agents/mlagents/trainers/ppo/optimizer.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer.py
 import numpy as np
 from mlagents.tf_utils import tf
 from mlagents_envs.timers import timed
-from mlagents.trainers.models import ModelUtils, EncoderType, LearningRateSchedule
+from mlagents.trainers.models import ModelUtils, EncoderType, ScheduleType
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
 from mlagents.trainers.buffer import AgentBuffer
                super().__init__(policy, trainer_params)

                lr = float(trainer_params["learning_rate"])
-                lr_schedule = LearningRateSchedule(
+                self._schedule = ScheduleType(
                    trainer_params.get("learning_rate_schedule", "linear")
                )
                h_size = int(trainer_params["hidden_units"])
                    "Losses/Value Loss": "value_loss",
                    "Losses/Policy Loss": "policy_loss",
                    "Policy/Learning Rate": "learning_rate",
+                    "Policy/Epsilon": "decay_epsilon",
+                    "Policy/Beta": "decay_beta",
                }
                if self.policy.use_recurrent:
                    self.m_size = self.policy.m_size
                else:
                    self._create_dc_critic(h_size, num_layers, vis_encode_type)

-                self.learning_rate = ModelUtils.create_learning_rate(
-                    lr_schedule, lr, self.policy.global_step, int(max_step)
+                self.learning_rate = ModelUtils.create_schedule(
+                    self._schedule,
+                    lr,
+                    self.policy.global_step,
+                    int(max_step),
+                    min_value=1e-10,
                )
                self._create_losses(
                    self.policy.total_log_probs,
                    "policy_loss": self.abs_policy_loss,
                    "update_batch": self.update_batch,
                    "learning_rate": self.learning_rate,
+                    "decay_epsilon": self.decay_epsilon,
+                    "decay_beta": self.decay_beta,
                }
            )

        )
        advantage = tf.expand_dims(self.advantage, -1)

-        decay_epsilon = tf.train.polynomial_decay(
-            epsilon, self.policy.global_step, max_step, 0.1, power=1.0
+        self.decay_epsilon = ModelUtils.create_schedule(
+            self._schedule, epsilon, self.policy.global_step, max_step, min_value=0.1
-        decay_beta = tf.train.polynomial_decay(
-            beta, self.policy.global_step, max_step, 1e-5, power=1.0
+        self.decay_beta = ModelUtils.create_schedule(
+            self._schedule, beta, self.policy.global_step, max_step, min_value=1e-5
        )

        value_losses = []
-                -decay_epsilon,
-                decay_epsilon,
+                -self.decay_epsilon,
+                self.decay_epsilon,
            )
            v_opt_a = tf.squared_difference(
                self.returns_holders[name], tf.reduce_sum(head, axis=1)
        r_theta = tf.exp(probs - old_probs)
        p_opt_a = r_theta * advantage
        p_opt_b = (
-            tf.clip_by_value(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon)
+            tf.clip_by_value(
+                r_theta, 1.0 - self.decay_epsilon, 1.0 + self.decay_epsilon
+            )
            * advantage
        )
        self.policy_loss = -tf.reduce_mean(
        self.loss = (
            self.policy_loss
            + 0.5 * self.value_loss
-            - decay_beta
+            - self.decay_beta
            * tf.reduce_mean(tf.dynamic_partition(entropy, self.policy.mask, 2)[1])
        )

--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
            self.update_buffer.shuffle(sequence_length=self.policy.sequence_length)
            buffer = self.update_buffer
            max_num_batch = buffer_length // batch_size
-            for l in range(0, max_num_batch * batch_size, batch_size):
+            for i in range(0, max_num_batch * batch_size, batch_size):
-                    buffer.make_mini_batch(l, l + batch_size), n_sequences
+                    buffer.make_mini_batch(i, i + batch_size), n_sequences
                )
                for stat_name, value in update_stats.items():
                    batch_update_stats[stat_name].append(value)
--- a/ml-agents/mlagents/trainers/sac/optimizer.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer.py

 from mlagents_envs.logging_util import get_logger
 from mlagents.trainers.sac.network import SACPolicyNetwork, SACTargetNetwork
-from mlagents.trainers.models import LearningRateSchedule, EncoderType, ModelUtils
+from mlagents.trainers.models import ScheduleType, EncoderType, ModelUtils
 from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.buffer import AgentBuffer
            with tf.variable_scope(""):
                super().__init__(policy, trainer_params)
                lr = float(trainer_params["learning_rate"])
-                lr_schedule = LearningRateSchedule(
+                lr_schedule = ScheduleType(
                    trainer_params.get("learning_rate_schedule", "constant")
                )
                self.policy = policy
                # The optimizer's m_size is 3 times the policy (Q1, Q2, and Value)
                self.m_size = 3 * self.policy.m_size
                self._create_inputs_and_outputs()
-                self.learning_rate = ModelUtils.create_learning_rate(
-                    lr_schedule, lr, self.policy.global_step, int(max_step)
+                self.learning_rate = ModelUtils.create_schedule(
+                    lr_schedule,
+                    lr,
+                    self.policy.global_step,
+                    int(max_step),
+                    min_value=1e-10,
                )
                self._create_losses(
                    self.policy_network.q1_heads,
--- a/ml-agents/mlagents/trainers/simple_env_manager.py
+++ b/ml-agents/mlagents/trainers/simple_env_manager.py
    @property
    def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
        result = {}
-        for brain_name in self.env.get_behavior_names():
-            result[brain_name] = behavior_spec_to_brain_parameters(
-                brain_name, self.env.get_behavior_spec(brain_name)
+        for behavior_name, behavior_spec in self.env.behavior_specs.items():
+            result[behavior_name] = behavior_spec_to_brain_parameters(
+                behavior_name, behavior_spec
            )
        return result


    def _generate_all_results(self) -> AllStepResult:
        all_step_result: AllStepResult = {}
-        for brain_name in self.env.get_behavior_names():
+        for brain_name in self.env.behavior_specs:
            all_step_result[brain_name] = self.env.get_steps(brain_name)
        return all_step_result
--- a/ml-agents/mlagents/trainers/subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py

    def _generate_all_results() -> AllStepResult:
        all_step_result: AllStepResult = {}
-        for brain_name in env.get_behavior_names():
+        for brain_name in env.behavior_specs:
-        for brain_name in env.get_behavior_names():
-            result[brain_name] = behavior_spec_to_brain_parameters(
-                brain_name, env.get_behavior_spec(brain_name)
+        for behavior_name, behavior_specs in env.behavior_specs.items():
+            result[behavior_name] = behavior_spec_to_brain_parameters(
+                behavior_name, behavior_specs
            )
        return result

        return self.env_workers[0].recv().payload

    def close(self) -> None:
-        logger.debug(f"SubprocessEnvManager closing.")
+        logger.debug("SubprocessEnvManager closing.")
        self.step_queue.close()
        self.step_queue.join_thread()
        for env_worker in self.env_workers:
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
    DecisionSteps,
    TerminalSteps,
    ActionType,
+    BehaviorMapping,
 )
 from mlagents_envs.tests.test_rpc_utils import proto_from_steps_and_action
 from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
            obs.append(np.ones((1,) + self.vis_obs_size, dtype=np.float32) * value)
        return obs

-    def get_behavior_names(self):
-        return self.names
-
-    def get_behavior_spec(self, behavior_name):
-        return self.behavior_spec
+    @property
+    def behavior_specs(self):
+        behavior_dict = {}
+        for n in self.names:
+            behavior_dict[n] = self.behavior_spec
+        return BehaviorMapping(behavior_dict)

    def set_action_for_agent(self, behavior_name, agent_id, action):
        pass
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py

 def test_bad_env_path():
    with pytest.raises(UnityEnvironmentException):
-        learn.create_environment_factory(
+        factory = learn.create_environment_factory(
-            seed=None,
+            seed=-1,
+        factory(worker_id=-1, side_channels=[])


@patch("builtins.open", new_callable=mock_open, read_data=MOCK_YAML)
--- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
    trainer_params["output_path"] = path1
    policy = create_policy_mock(trainer_params)
    policy.initialize_or_load()
+    policy._set_step(2000)
    policy.save_model(2000)

    assert len(os.listdir(tmp_path)) > 0
    policy2.initialize_or_load()
    _compare_two_policies(policy, policy2)
+    assert policy2.get_current_step() == 2000

    # Try initialize from path 1
    trainer_params["model_path"] = path2

    _compare_two_policies(policy2, policy3)
+    # Assert that the steps are 0.
+    assert policy3.get_current_step() == 0


 def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None:
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
        step_size=0.2,
    )
    override_vals = {
-        "max_steps": 750,
+        "max_steps": 1000,
-        "behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 1000},
+        "behavioral_cloning": {"demo_path": demo_path, "strength": 1.0, "steps": 1500},
        "reward_signals": {
            "gail": {
                "strength": 1.0,
--- a/ml-agents/tests/yamato/check_coverage_percent.py
+++ b/ml-agents/tests/yamato/check_coverage_percent.py
        # Rather than try to parse the XML, just look for a line of the form
        # <Linecoverage>73.9</Linecoverage>
        lines = f.readlines()
-        for l in lines:
-            if "Linecoverage" in l:
-                pct = l.replace("<Linecoverage>", "").replace("</Linecoverage>", "")
+        for line in lines:
+            if "Linecoverage" in line:
+                pct = line.replace("<Linecoverage>", "").replace("</Linecoverage>", "")
                pct = float(pct)
                if pct < min_percentage:
                    print(
--- a/ml-agents/tests/yamato/scripts/run_llapi.py
+++ b/ml-agents/tests/yamato/scripts/run_llapi.py
        env.reset()

        # Set the default brain to work with
-        group_name = env.get_behavior_names()[0]
-        group_spec = env.get_behavior_spec(group_name)
+        group_name = list(env.behavior_specs.keys())[0]
+        group_spec = env.behavior_specs[group_name]

        # Set the time scale of the engine
        engine_configuration_channel.set_configuration_parameters(time_scale=3.0)
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py
    run_standalone_build,
    init_venv,
    override_config_file,
+    override_legacy_config_file,
    checkout_csharp_version,
    undo_git_checkout,
 )
    print(
        f"Running training with python={python_version or latest} and c#={csharp_version or latest}"
    )
-    nn_file_expected = f"./results/{run_id}/3DBall.nn"
+    output_dir = "models" if python_version else "results"
+    nn_file_expected = f"./{output_dir}/{run_id}/3DBall.nn"
    if os.path.exists(nn_file_expected):
        # Should never happen - make sure nothing leftover from an old test.
        print("Artifacts from previous build found!")

    # Copy the default training config but override the max_steps parameter,
    # and reduce the batch_size and buffer_size enough to ensure an update step happens.
-    override_config_file(
-        "config/ppo/3DBall.yaml",
-        "override.yaml",
-        max_steps=100,
-        batch_size=10,
-        buffer_size=10,
-    )
+    overrides = {"max_steps": 100, "batch_size": 10, "buffer_size": 10}
+    yaml_out = "override.yaml"
+    if python_version:
+        override_legacy_config_file(
+            python_version, "config/trainer_config.yaml", yaml_out, **overrides
+        )
+    else:
+        override_config_file("config/ppo/3DBall.yaml", yaml_out, **overrides)
-        f"mlagents-learn override.yaml --train --env="
+        f"mlagents-learn {yaml_out} --force --env="
        f"{os.path.join(get_base_output_path(), standalone_player_path)} "
        f"--run-id={run_id} --no-graphics --env-args -logFile -"
    )  # noqa
--- a/ml-agents/tests/yamato/yamato_utils.py
+++ b/ml-agents/tests/yamato/yamato_utils.py
    if csharp_version is None:
        return

+    csharp_tag = f"com.unity.ml-agents_{csharp_version}"
-        subprocess.check_call(
-            f"git checkout {csharp_version} -- {csharp_dir}", shell=True
-        )
+        subprocess.check_call(f"git checkout {csharp_tag} -- {csharp_dir}", shell=True)


 def undo_git_checkout():
    subprocess.check_call("git reset HEAD .", shell=True)
    subprocess.check_call("git checkout -- .", shell=True)
    # Ensure the cache isn't polluted with old compiled assemblies.
-    subprocess.check_call(f"rm -rf Project/Library", shell=True)
+    subprocess.check_call("rm -rf Project/Library", shell=True)


 def override_config_file(src_path, dest_path, **kwargs):

    with open(dest_path, "w") as f:
        yaml.dump(configs, f)
+
+
+def override_legacy_config_file(python_version, src_path, dest_path, **kwargs):
+    """
+    Override settings in a trainer config file, using an old version of the src_path. For example,
+        override_config_file("0.16.0", src_path, dest_path, max_steps=42)
+    will sync the file at src_path from version 0.16.0, copy it to dest_path, and override the
+    max_steps field to 42 for all brains.
+    """
+    # Sync the old version of the file
+    python_tag = f"python-packages_{python_version}"
+    subprocess.check_call(f"git checkout {python_tag} -- {src_path}", shell=True)
+
+    with open(src_path) as f:
+        configs = yaml.safe_load(f)
+
+    for config in configs.values():
+        config.update(**kwargs)
+
+    with open(dest_path, "w") as f:
+        yaml.dump(configs, f)
--- a/utils/validate_versions.py
+++ b/utils/validate_versions.py

 def extract_version_string(filename):
    with open(filename) as f:
-        for l in f.readlines():
-            if l.startswith(VERSION_LINE_START):
-                return l.replace(VERSION_LINE_START, "").strip()
+        for line in f.readlines():
+            if line.startswith(VERSION_LINE_START):
+                return line.replace(VERSION_LINE_START, "").strip()
    return None


--- a/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
+using NUnit.Framework;
+using UnityEngine;
+using Unity.MLAgents.Policies;
+using Unity.MLAgents.Demonstrations;
+using Unity.MLAgents.Sensors;
+
+namespace Unity.MLAgents.Tests
+{
+    [TestFixture]
+    public class GrpcExtensionsTests
+    {
+        [Test]
+        public void TestDefaultBrainParametersToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var brain = new BrainParameters();
+            brain.ToProto("foo", false);
+        }
+
+        [Test]
+        public void TestDefaultAgentInfoToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var agentInfo = new AgentInfo();
+            agentInfo.ToInfoActionPairProto();
+            agentInfo.ToAgentInfoProto();
+        }
+
+        [Test]
+        public void TestDefaultDemonstrationMetaDataToProto()
+        {
+            // Should be able to convert a default instance to proto.
+            var demoMetaData = new DemonstrationMetaData();
+            demoMetaData.ToProto();
+        }
+    }
+}
--- a/docs/Versioning.md
+++ b/docs/Versioning.md
+# ML-Agents Versioning
+
+## Context
+As the ML-Agents project evolves into a more mature product, we want to communicate the process
+we use to version our packages and the data that flows into, through, and out of them clearly.
+Our project now has four packages (1 Unity, 3 Python) along with artifacts that are produced as
+well as consumed.  This document covers the versioning for these packages and artifacts.
+
+## GitHub Releases
+Up until now, all packages were in lockstep in-terms of versioning. As a result, the GitHub releases
+were tagged with the version of all those packages (e.g. v0.15.0, v0.15.1) and labeled accordingly.
+With the decoupling of package versions, we now need to revisit our GitHub release tagging.
+The proposal is that we move towards an integer release numbering for our repo and each such
+release will call out specific version upgrades of each package. For instance, with
+[the April 30th release](https://github.com/Unity-Technologies/ml-agents/releases/tag/release_1),
+we will have:
+- GitHub Release 1 (branch name: *release_1_branch*)
+  - com.unity.ml-agents release 1.0.0
+  - ml-agents release 0.16.0
+  - ml-agents-envs release 0.16.0
+  - gym-unity release 0.16.0
+
+Our release cadence will not be affected by these versioning changes.  We will keep having
+monthly releases to fix bugs and release new features.
+
+## Packages
+All of the software packages, and their generated artifacts will be versioned.  Any automation
+tools will not be versioned.
+
+### Unity package
+Package name: com.unity.ml-agents
+- Versioned following [Semantic Versioning Guidelines](https://www.semver.org)
+- This package consumes an artifact of the training process: the `.nn` file.  These files
+    are integer versioned and currently at version 2. The com.unity.ml-agents package
+    will need to support the version of `.nn` files which existed at its 1.0.0 release.
+    For example, consider that com.unity.ml-agents is at version 1.0.0 and the NN files
+    are at version 2.  If the NN files change to version 3, the next release of
+    com.unity.ml-agents at version 1.1.0 guarantees it will be able to read both of these
+    formats.  If the NN files were to change to version 4 and com.unity.ml-agents to
+    version 2.0.0, support for NN versions 2 and 3 could be dropped for com.unity.ml-agents
+    version 2.0.0.
+- This package produces one artifact, the `.demo` files.  These files will have integer
+    versioning. This means their version will increment by 1 at each change.  The
+    com.unity.ml-agents package must be backward compatible with version changes
+    that occur between minor versions.
+- To summarize, the artifacts produced and consumed by com.unity.ml-agents are guaranteed
+    to be supported for 1.x.x versions of com.unity.ml-agents.  We intend to provide stability
+    for our users by moving to a 1.0.0 release of com.unity.ml-agents.
+
+
+### Python Packages
+Package names: ml-agents / ml-agents-envs / gym-unity
+- The python packages remain in "Beta."  This means that breaking changes to the public
+    API of the python packages can change without having to have a major version bump.
+    Historically, the python and C# packages were in version lockstep.  This is no longer
+    the case.  The python packages will remain in lockstep with each other for now, while the
+    C# package will follow its own versioning as is appropriate.  However, the python package
+    versions may diverge in the future.
+- While the python packages will remain in Beta for now, we acknowledge that the most
+    heavily used portion of our python interface is the `mlagents-learn` CLI and strive
+    to make this part of our API backward compatible. We are actively working on this and
+    expect to have a stable CLI in the next few weeks.
+
+## Communicator
+
+Packages which communicate: com.unity.ml-agents / ml-agents-envs
+
+Another entity of the ML-Agents Toolkit that requires versioning is the communication layer
+between C# and Python, which will follow also semantic versioning.  This guarantees a level of
+backward compatibility between different versions of C# and Python packages which communicate.
+Any Communicator version 1.x.x of the Unity package should be compatible with any 1.x.x
+Communicator Version in Python.
+
+An RLCapabilities struct keeps track of which features exist. This struct is passed from C# to
+Python, and another from Python to C#.  With this feature level granularity, we can notify users
+more specifically about feature limitations based on what's available in both C# and Python.
+These notifications will be logged to the python terminal, or to the Unity Editor Console.
+
+
+## Side Channels
+
+The communicator is what manages data transfer between Unity and Python for the core
+training loop. Side Channels are another means of data transfer between Unity and Python.
+Side Channels are not versioned, but have been designed to support backward compatibility
+for what they are. As of today, we provide 4 side channels:
+- FloatProperties: shared float data between Unity - Python (bidirectional)
+- RawBytes: raw data that can be sent Unity - Python (bidirectional)
+- EngineConfig: a set of numeric fields in a pre-defined order sent from Python to Unity
+- Stats: (name, value, agg) messages sent from Unity to Python
+
+Aside from the specific implementations of side channels we provide (and use ourselves),
+the Side Channel interface is made available for users to create their own custom side
+channels. As such, we guarantee that the built in SideChannel interface between Unity and
+Python is backward compatible in packages that share the same major version.
+
--- a/ml-agents-envs/mlagents_envs/env_utils.py
+++ b/ml-agents-envs/mlagents_envs/env_utils.py
+import glob
+import os
+import subprocess
+from sys import platform
+from typing import Optional, List
+from mlagents_envs.logging_util import get_logger
+from mlagents_envs.exception import UnityEnvironmentException
+
+
+def get_platform():
+    """
+    returns the platform of the operating system : linux, darwin or win32
+    """
+    return platform
+
+
+def validate_environment_path(env_path: str) -> Optional[str]:
+    """
+    Strip out executable extensions of the env_path
+    :param env_path: The path to the executable
+    """
+    env_path = (
+        env_path.strip()
+        .replace(".app", "")
+        .replace(".exe", "")
+        .replace(".x86_64", "")
+        .replace(".x86", "")
+    )
+    true_filename = os.path.basename(os.path.normpath(env_path))
+    get_logger(__name__).debug("The true file name is {}".format(true_filename))
+
+    if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
+        return None
+
+    cwd = os.getcwd()
+    launch_string = None
+    true_filename = os.path.basename(os.path.normpath(env_path))
+    if get_platform() == "linux" or get_platform() == "linux2":
+        candidates = glob.glob(os.path.join(cwd, env_path) + ".x86_64")
+        if len(candidates) == 0:
+            candidates = glob.glob(os.path.join(cwd, env_path) + ".x86")
+        if len(candidates) == 0:
+            candidates = glob.glob(env_path + ".x86_64")
+        if len(candidates) == 0:
+            candidates = glob.glob(env_path + ".x86")
+        if len(candidates) > 0:
+            launch_string = candidates[0]
+
+    elif get_platform() == "darwin":
+        candidates = glob.glob(
+            os.path.join(cwd, env_path + ".app", "Contents", "MacOS", true_filename)
+        )
+        if len(candidates) == 0:
+            candidates = glob.glob(
+                os.path.join(env_path + ".app", "Contents", "MacOS", true_filename)
+            )
+        if len(candidates) == 0:
+            candidates = glob.glob(
+                os.path.join(cwd, env_path + ".app", "Contents", "MacOS", "*")
+            )
+        if len(candidates) == 0:
+            candidates = glob.glob(
+                os.path.join(env_path + ".app", "Contents", "MacOS", "*")
+            )
+        if len(candidates) > 0:
+            launch_string = candidates[0]
+    elif get_platform() == "win32":
+        candidates = glob.glob(os.path.join(cwd, env_path + ".exe"))
+        if len(candidates) == 0:
+            candidates = glob.glob(env_path + ".exe")
+        if len(candidates) > 0:
+            launch_string = candidates[0]
+    return launch_string
+
+
+def launch_executable(file_name: str, args: List[str]) -> subprocess.Popen:
+    """
+    Launches a Unity executable and returns the process handle for it.
+    :param file_name: the name of the executable
+    :param args: List of string that will be passed as command line arguments
+    when launching the executable.
+    """
+    launch_string = validate_environment_path(file_name)
+    if launch_string is None:
+        raise UnityEnvironmentException(
+            f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
+        )
+    else:
+        get_logger(__name__).debug("This is the launch string {}".format(launch_string))
+        # Launch Unity environment
+        subprocess_args = [launch_string] + args
+        try:
+            return subprocess.Popen(
+                subprocess_args,
+                # start_new_session=True means that signals to the parent python process
+                # (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms.
+                # This is generally good since we want the environment to have a chance to shutdown,
+                # but may be undesirable in come cases; if so, we'll add a command-line toggle.
+                # Note that on Windows, the CTRL_C signal will still be sent.
+                start_new_session=True,
+            )
+        except PermissionError as perm:
+            # This is likely due to missing read or execute permissions on file.
+            raise UnityEnvironmentException(
+                f"Error when trying to launch environment - make sure "
+                f"permissions are set correctly. For example "
+                f'"chmod -R 755 {launch_string}"'
+            ) from perm
--- a/ml-agents-envs/mlagents_envs/side_channel/side_channel_manager.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/side_channel_manager.py
+import uuid
+import struct
+from typing import Dict, Optional, List
+from mlagents_envs.side_channel import SideChannel, IncomingMessage
+from mlagents_envs.exception import UnityEnvironmentException
+from mlagents_envs.logging_util import get_logger
+
+
+class SideChannelManager:
+    def __init__(self, side_channels=Optional[List[SideChannel]]):
+        self._side_channels_dict = self._get_side_channels_dict(side_channels)
+
+    def process_side_channel_message(self, data: bytes) -> None:
+        """
+        Separates the data received from Python into individual messages for each
+        registered side channel and calls on_message_received on them.
+        :param data: The packed message sent by Unity
+        """
+        offset = 0
+        while offset < len(data):
+            try:
+                channel_id = uuid.UUID(bytes_le=bytes(data[offset : offset + 16]))
+                offset += 16
+                message_len, = struct.unpack_from("<i", data, offset)
+                offset = offset + 4
+                message_data = data[offset : offset + message_len]
+                offset = offset + message_len
+            except (struct.error, ValueError, IndexError):
+                raise UnityEnvironmentException(
+                    "There was a problem reading a message in a SideChannel. "
+                    "Please make sure the version of MLAgents in Unity is "
+                    "compatible with the Python version."
+                )
+            if len(message_data) != message_len:
+                raise UnityEnvironmentException(
+                    "The message received by the side channel {0} was "
+                    "unexpectedly short. Make sure your Unity Environment "
+                    "sending side channel data properly.".format(channel_id)
+                )
+            if channel_id in self._side_channels_dict:
+                incoming_message = IncomingMessage(message_data)
+                self._side_channels_dict[channel_id].on_message_received(
+                    incoming_message
+                )
+            else:
+                get_logger(__name__).warning(
+                    f"Unknown side channel data received. Channel type: {channel_id}."
+                )
+
+    def generate_side_channel_messages(self) -> bytearray:
+        """
+        Gathers the messages that the registered side channels will send to Unity
+        and combines them into a single message ready to be sent.
+        """
+        result = bytearray()
+        for channel_id, channel in self._side_channels_dict.items():
+            for message in channel.message_queue:
+                result += channel_id.bytes_le
+                result += struct.pack("<i", len(message))
+                result += message
+            channel.message_queue = []
+        return result
+
+    @staticmethod
+    def _get_side_channels_dict(
+        side_channels: Optional[List[SideChannel]]
+    ) -> Dict[uuid.UUID, SideChannel]:
+        """
+        Converts a list of side channels into a dictionary of channel_id to SideChannel
+        :param side_channels: The list of side channels.
+        """
+        side_channels_dict: Dict[uuid.UUID, SideChannel] = {}
+        if side_channels is not None:
+            for _sc in side_channels:
+                if _sc.channel_id in side_channels_dict:
+                    raise UnityEnvironmentException(
+                        f"There cannot be two side channels with "
+                        f"the same channel id {_sc.channel_id}."
+                    )
+                side_channels_dict[_sc.channel_id] = _sc
+        return side_channels_dict
--- a/ml-agents-envs/mlagents_envs/tests/test_env_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_env_utils.py
+from unittest import mock
+import pytest
+from mlagents_envs.env_utils import validate_environment_path, launch_executable
+from mlagents_envs.exception import UnityEnvironmentException
+from mlagents_envs.logging_util import (
+    set_log_level,
+    get_logger,
+    INFO,
+    ERROR,
+    FATAL,
+    CRITICAL,
+    DEBUG,
+)
+
+
+def mock_glob_method(path):
+    """
+    Given a path input, returns a list of candidates
+    """
+    if ".x86" in path:
+        return ["linux"]
+    if ".app" in path:
+        return ["darwin"]
+    if ".exe" in path:
+        return ["win32"]
+    if "*" in path:
+        return "Any"
+    return []
+
+
+@mock.patch("sys.platform")
+@mock.patch("glob.glob")
+def test_validate_path_empty(glob_mock, platform_mock):
+    glob_mock.return_value = None
+    path = validate_environment_path(" ")
+    assert path is None
+
+
+@mock.patch("mlagents_envs.env_utils.get_platform")
+@mock.patch("glob.glob")
+def test_validate_path(glob_mock, platform_mock):
+    glob_mock.side_effect = mock_glob_method
+    for platform in ["linux", "darwin", "win32"]:
+        platform_mock.return_value = platform
+        path = validate_environment_path(" ")
+        assert path == platform
+
+
+@mock.patch("glob.glob")
+@mock.patch("subprocess.Popen")
+def test_launch_executable(mock_popen, glob_mock):
+    with pytest.raises(UnityEnvironmentException):
+        launch_executable(" ", [])
+    glob_mock.return_value = ["FakeLaunchPath"]
+    launch_executable(" ", [])
+    mock_popen.side_effect = PermissionError("Fake permission error")
+    with pytest.raises(UnityEnvironmentException):
+        launch_executable(" ", [])
+
+
+def test_set_logging_level():
+    for level in [INFO, ERROR, FATAL, CRITICAL, DEBUG]:
+        set_log_level(level)
+        assert get_logger("test").level == level
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py
+import pytest
+import numpy as np
+
+from mlagents_envs.base_env import (
+    DecisionSteps,
+    TerminalSteps,
+    ActionType,
+    BehaviorSpec,
+)
+
+
+def test_decision_steps():
+    ds = DecisionSteps(
+        obs=[np.array(range(12), dtype=np.float32).reshape(3, 4)],
+        reward=np.array(range(3), dtype=np.float32),
+        agent_id=np.array(range(10, 13), dtype=np.int32),
+        action_mask=[np.zeros((3, 4), dtype=np.bool)],
+    )
+
+    assert ds.agent_id_to_index[10] == 0
+    assert ds.agent_id_to_index[11] == 1
+    assert ds.agent_id_to_index[12] == 2
+
+    with pytest.raises(KeyError):
+        assert ds.agent_id_to_index[-1] == -1
+
+    mask_agent = ds[10].action_mask
+    assert isinstance(mask_agent, list)
+    assert len(mask_agent) == 1
+    assert np.array_equal(mask_agent[0], np.zeros((4), dtype=np.bool))
+
+    for agent_id in ds:
+        assert ds.agent_id_to_index[agent_id] in range(3)
+
+
+def test_empty_decision_steps():
+    specs = BehaviorSpec(
+        observation_shapes=[(3, 2), (5,)],
+        action_type=ActionType.CONTINUOUS,
+        action_shape=3,
+    )
+    ds = DecisionSteps.empty(specs)
+    assert len(ds.obs) == 2
+    assert ds.obs[0].shape == (0, 3, 2)
+    assert ds.obs[1].shape == (0, 5)
+
+
+def test_terminal_steps():
+    ts = TerminalSteps(
+        obs=[np.array(range(12), dtype=np.float32).reshape(3, 4)],
+        reward=np.array(range(3), dtype=np.float32),
+        agent_id=np.array(range(10, 13), dtype=np.int32),
+        interrupted=np.array([1, 0, 1], dtype=np.bool),
+    )
+
+    assert ts.agent_id_to_index[10] == 0
+    assert ts.agent_id_to_index[11] == 1
+    assert ts.agent_id_to_index[12] == 2
+
+    assert ts[10].interrupted
+    assert not ts[11].interrupted
+    assert ts[12].interrupted
+
+    with pytest.raises(KeyError):
+        assert ts.agent_id_to_index[-1] == -1
+
+    for agent_id in ts:
+        assert ts.agent_id_to_index[agent_id] in range(3)
+
+
+def test_empty_terminal_steps():
+    specs = BehaviorSpec(
+        observation_shapes=[(3, 2), (5,)],
+        action_type=ActionType.CONTINUOUS,
+        action_shape=3,
+    )
+    ts = TerminalSteps.empty(specs)
+    assert len(ts.obs) == 2
+    assert ts.obs[0].shape == (0, 3, 2)
+    assert ts.obs[1].shape == (0, 5)
+
+
+def test_specs():
+    specs = BehaviorSpec(
+        observation_shapes=[(3, 2), (5,)],
+        action_type=ActionType.CONTINUOUS,
+        action_shape=3,
+    )
+    assert specs.discrete_action_branches is None
+    assert specs.action_size == 3
+    assert specs.create_empty_action(5).shape == (5, 3)
+    assert specs.create_empty_action(5).dtype == np.float32
+
+    specs = BehaviorSpec(
+        observation_shapes=[(3, 2), (5,)],
+        action_type=ActionType.DISCRETE,
+        action_shape=(3,),
+    )
+    assert specs.discrete_action_branches == (3,)
+    assert specs.action_size == 1
+    assert specs.create_empty_action(5).shape == (5, 1)
+    assert specs.create_empty_action(5).dtype == np.int32