Merge pull request #3038 from Unity-Technologies/develop

Merge develop to master
5 年前 · 35c995e9
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
    executor: << parameters.executor >>
    working_directory: ~/repo

+    # Run additional numpy checks on unit tests
+    environment:
+      TEST_ENFORCE_NUMPY_FLOAT32: 1
+
    steps:
      - checkout

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
 issues with the `contributions welcome` label.

 ## Git Branches
+The master branch corresponds to the most recent version of the project.
+Note that this may be newer that the [latest release](https://github.com/Unity-Technologies/ml-agents/releases/tag/latest_release).
-Starting with v0.3, we adopted the
-[Gitflow Workflow](http://nvie.com/posts/a-successful-git-branching-model/).
-Consequently, the `master` branch corresponds to the latest release of
-the project, while the `develop` branch corresponds to the most recent, stable,
-version of the project.
-
-Thus, when adding to the project, **please branch off `develop`**
-and make sure that your Pull Request (PR) contains the following:
+When contributing to the project, please make sure that your Pull Request (PR) contains the following:

 * Detailed description of the changes performed
 * Corresponding changes to documentation, unit tests and sample environments (if
--- a/README.md
+++ b/README.md
 [![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](docs/Readme.md)
 [![license badge](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE)

+([latest release](https://github.com/Unity-Technologies/ml-agents/releases/tag/latest_release))
+([all releases](https://github.com/Unity-Technologies/ml-agents/releases))
+
 **The Unity Machine Learning Agents Toolkit** (ML-Agents) is an open-source
 Unity plugin that enables games and simulations to serve as environments for
 training intelligent agents. Agents can be trained using reinforcement learning,
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();

            var academyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
                BindingFlags.Instance | BindingFlags.NonPublic);
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();

            var goA = new GameObject("goA");
            var bpA = goA.AddComponent<BehaviorParameters>();
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();
            Assert.AreEqual(0, aca.initializeAcademyCalls);
            Assert.AreEqual(0, aca.GetStepCount());
            Assert.AreEqual(0, aca.GetEpisodeCount());
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();

            Assert.AreEqual(false, agent1.IsDone());
            Assert.AreEqual(false, agent2.IsDone());
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();
            var academyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
                BindingFlags.Instance | BindingFlags.NonPublic);
            academyInitializeMethod?.Invoke(aca, new object[] { });
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();


            var agentEnableMethod = typeof(Agent).GetMethod(
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();
            var academyInitializeMethod = typeof(Academy).GetMethod(
                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
            academyInitializeMethod?.Invoke(aca, new object[] { });
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();


            var agentEnableMethod = typeof(Agent).GetMethod(
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();


            var agentEnableMethod = typeof(Agent).GetMethod(
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
-            aca.resetParameters = new ResetParameters();


            var agentEnableMethod = typeof(Agent).GetMethod(
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/TimerTest.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/TimerTest.cs
 using NUnit.Framework;
+using UnityEditor.Graphs;
 using UnityEngine;

 namespace MLAgents.Tests
        {
            TimerStack myTimer = TimerStack.Instance;
            myTimer.Reset();
-
            using (myTimer.Scoped("foo"))
            {
                for (int i = 0; i < 5; i++)
+                        myTimer.SetGauge("my_gauge", (float)i);
                    }
                }
            }
            Assert.AreEqual(rootChildren["foo"].NumCalls, 1);
+            var gauge = myTimer.RootNode.Gauges["my_gauge"];
+            Assert.NotNull(gauge);
+            Assert.AreEqual(5, gauge.count);
+            Assert.AreEqual(0, gauge.minValue);
+            Assert.AreEqual(4, gauge.maxValue);
+            Assert.AreEqual(4, gauge.value);

            var fooChildren = rootChildren["foo"].Children;
            Assert.That(fooChildren, Contains.Key("bar"));
--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAcademy.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAcademy.cs

 public class Ball3DAcademy : Academy
 {
-    public override void AcademyReset()
+    public override void InitializeAcademy()
-        Physics.gravity = new Vector3(0, -resetParameters["gravity"], 0);
+        FloatProperties.RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
-    public override void AcademyStep()
-    {
-    }
 }
--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
    [Header("Specific to Ball3D")]
    public GameObject ball;
    Rigidbody m_BallRb;
-    ResetParameters m_ResetParams;
+    IFloatProperties m_ResetParams;
-        m_ResetParams = academy.resetParameters;
+        m_ResetParams = academy.FloatProperties;
        SetResetParameters();
    }

    public void SetBall()
    {
        //Set the attributes of the ball by fetching the information from the academy
-        m_BallRb.mass = m_ResetParams["mass"];
-        var scale = m_ResetParams["scale"];
+        m_BallRb.mass = m_ResetParams.GetPropertyWithDefault("mass", 1.0f);
+        var scale = m_ResetParams.GetPropertyWithDefault("scale", 1.0f);
        ball.transform.localScale = new Vector3(scale, scale, scale);
    }

--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
    [Header("Specific to Ball3DHard")]
    public GameObject ball;
    Rigidbody m_BallRb;
-    ResetParameters m_ResetParams;
+    IFloatProperties m_ResetParams;
-        m_ResetParams = academy.resetParameters;
+        m_ResetParams = academy.FloatProperties;
        SetResetParameters();
    }

    public void SetBall()
    {
        //Set the attributes of the ball by fetching the information from the academy
-        m_BallRb.mass = m_ResetParams["mass"];
-        var scale = m_ResetParams["scale"];
+        m_BallRb.mass = m_ResetParams.GetPropertyWithDefault("mass", 1.0f);
+        var scale = m_ResetParams.GetPropertyWithDefault("scale", 1.0f);
        ball.transform.localScale = new Vector3(scale, scale, scale);
    }

--- a/UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs

    void WaitTimeInference()
    {
-        if (!m_Academy.GetIsInference())
+        if (!m_Academy.IsCommunicatorOn)
        {
            RequestDecision();
        }
--- a/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
    int m_NumberJumps = 20;
    int m_JumpLeft = 20;

-    ResetParameters m_ResetParams;
+    IFloatProperties m_ResetParams;

    public override void InitializeAgent()
    {
        var academy = FindObjectOfType<Academy>();
-        m_ResetParams = academy.resetParameters;
+        m_ResetParams = academy.FloatProperties;

        SetResetParameters();
    }

    public void SetTargetScale()
    {
-        var targetScale = m_ResetParams["target_scale"];
+        var targetScale = m_ResetParams.GetPropertyWithDefault("target_scale", 1.0f);
        target.transform.localScale = new Vector3(targetScale, targetScale, targetScale);
    }

--- a/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs

    public void SetLaserLengths()
    {
-        m_LaserLength = m_MyAcademy.resetParameters.TryGetValue("laser_length", out m_LaserLength) ? m_LaserLength : 1.0f;
+        m_LaserLength = m_MyAcademy.FloatProperties.GetPropertyWithDefault("laser_length", 1.0f);
-        float agentScale;
-        agentScale = m_MyAcademy.resetParameters.TryGetValue("agent_scale", out agentScale) ? agentScale : 1.0f;
+        float agentScale = m_MyAcademy.FloatProperties.GetPropertyWithDefault("agent_scale", 1.0f);
        gameObject.transform.localScale = new Vector3(agentScale, agentScale, agentScale);
    }

--- a/UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAcademy.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAcademy.cs
 {
    public Camera MainCamera;

-    public override void AcademyReset()
+    public override void InitializeAcademy()
-        MainCamera.transform.position = new Vector3(-((int)resetParameters["gridSize"] - 1) / 2f,
-            (int)resetParameters["gridSize"] * 1.25f,
-            -((int)resetParameters["gridSize"] - 1) / 2f);
-        MainCamera.orthographicSize = ((int)resetParameters["gridSize"] + 5f) / 2f;
+        FloatProperties.RegisterCallback("gridSize", f =>
+        {
+            MainCamera.transform.position = new Vector3(-(f - 1) / 2f, f * 1.25f, -(f - 1) / 2f);
+            MainCamera.orthographicSize = (f + 5f) / 2f;
+        });
+
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
        // Prevents the agent from picking an action that would make it collide with a wall
        var positionX = (int)transform.position.x;
        var positionZ = (int)transform.position.z;
-        var maxPosition = (int)m_Academy.resetParameters["gridSize"] - 1;
+        var maxPosition = (int)m_Academy.FloatProperties.GetPropertyWithDefault("gridSize", 5f) - 1;

        if (positionX == 0)
        {
            renderCamera.Render();
        }

-        if (!m_Academy.GetIsInference())
+        if (!m_Academy.IsCommunicatorOn)
        {
            RequestDecision();
        }
--- a/UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs

    public GameObject trueAgent;

-    ResetParameters m_ResetParameters;
+    IFloatProperties m_ResetParameters;

    Camera m_AgentCam;


    public void Awake()
    {
-        m_ResetParameters = FindObjectOfType<Academy>().resetParameters;
+        m_ResetParameters = FindObjectOfType<Academy>().FloatProperties;

        m_Objects = new[] { goalPref, pitPref };


    public void SetEnvironment()
    {
-        transform.position = m_InitialPosition * (m_ResetParameters["gridSize"] + 1);
+        transform.position = m_InitialPosition * (m_ResetParameters.GetPropertyWithDefault("gridSize", 5f) + 1);
-        for (var i = 0; i < (int)m_ResetParameters["numObstacles"]; i++)
+        for (var i = 0; i < (int)m_ResetParameters.GetPropertyWithDefault("numObstacles", 1); i++)
-        for (var i = 0; i < (int)m_ResetParameters["numGoals"]; i++)
+        for (var i = 0; i < (int)m_ResetParameters.GetPropertyWithDefault("numGoals", 1f); i++)
-        var gridSize = (int)m_ResetParameters["gridSize"];
+        var gridSize = (int)m_ResetParameters.GetPropertyWithDefault("gridSize", 5f);
        m_Plane.transform.localScale = new Vector3(gridSize / 10.0f, 1f, gridSize / 10.0f);
        m_Plane.transform.localPosition = new Vector3((gridSize - 1) / 2f, -0.5f, (gridSize - 1) / 2f);
        m_Sn.transform.localScale = new Vector3(1, 1, gridSize + 2);

    public void AreaReset()
    {
-        var gridSize = (int)m_ResetParameters["gridSize"];
+        var gridSize = (int)m_ResetParameters.GetPropertyWithDefault("gridSize", 5f); ;
        foreach (var actor in actorObjs)
        {
            DestroyImmediate(actor);
--- a/UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs

    public void SetGroundMaterialFriction()
    {
-        var resetParams = m_Academy.resetParameters;
+        var resetParams = m_Academy.FloatProperties;
-        groundCollider.material.dynamicFriction = resetParams["dynamic_friction"];
-        groundCollider.material.staticFriction = resetParams["static_friction"];
+        groundCollider.material.dynamicFriction = resetParams.GetPropertyWithDefault("dynamic_friction", 0);
+        groundCollider.material.staticFriction = resetParams.GetPropertyWithDefault("static_friction", 0);
-        var resetParams = m_Academy.resetParameters;
+        var resetParams = m_Academy.FloatProperties;
+        var scale = resetParams.GetPropertyWithDefault("block_scale", 2);
-        m_BlockRb.transform.localScale = new Vector3(resetParams["block_scale"], 0.75f, resetParams["block_scale"]);
+        m_BlockRb.transform.localScale = new Vector3(scale, 0.75f, scale);
-        m_BlockRb.drag = resetParams["block_drag"];
+        m_BlockRb.drag = resetParams.GetPropertyWithDefault("block_drag", 0.5f);
    }

    public void SetResetParameters()
--- a/UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAcademy.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAcademy.cs
 {
    public override void AcademyReset()
    {
-        Physics.gravity = new Vector3(0, -resetParameters["gravity"], 0);
+        FloatProperties.RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
+
    }

    public override void AcademyStep()
--- a/UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs

    public void SetResetParameters()
    {
-        m_GoalSize = m_MyAcademy.resetParameters["goal_size"];
-        m_GoalSpeed = Random.Range(-1f, 1f) * m_MyAcademy.resetParameters["goal_speed"];
-        m_Deviation = m_MyAcademy.resetParameters["deviation"];
-        m_DeviationFreq = m_MyAcademy.resetParameters["deviation_freq"];
+        var fp = m_MyAcademy.FloatProperties;
+        m_GoalSize = fp.GetPropertyWithDefault("goal_size", 5);
+        m_GoalSpeed = Random.Range(-1f, 1f) * fp.GetPropertyWithDefault("goal_speed", 1);
+        m_Deviation = fp.GetPropertyWithDefault("deviation", 0);
+        m_DeviationFreq = fp.GetPropertyWithDefault("deviation_freq", 0);
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerAcademy.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerAcademy.cs
        Physics.gravity *= gravityMultiplier; //for soccer a multiplier of 3 looks good
    }

-    public override void AcademyReset()
+    public override void InitializeAcademy()
-        Physics.gravity = new Vector3(0, -resetParameters["gravity"], 0);
+        FloatProperties.RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
    }

    public override void AcademyStep()
--- a/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
        ballRb.velocity = Vector3.zero;
        ballRb.angularVelocity = Vector3.zero;

-        var ballScale = m_Academy.resetParameters["ball_scale"];
+        var ballScale = m_Academy.FloatProperties.GetPropertyWithDefault("ball_scale", 0.015f);
        ballRb.transform.localScale = new Vector3(ballScale, ballScale, ballScale);
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAcademy.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAcademy.cs

 public class TennisAcademy : Academy
 {
-    public override void AcademyReset()
+    public override void InitializeAcademy()
-        Physics.gravity = new Vector3(0, -resetParameters["gravity"], 0);
+        FloatProperties.RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
+
    }

    public override void AcademyStep()
--- a/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
    Rigidbody m_AgentRb;
    Rigidbody m_BallRb;
    float m_InvertMult;
-    ResetParameters m_ResetParams;
+    IFloatProperties m_ResetParams;

    // Looks for the scoreboard based on the name of the gameObjects.
    // Do not modify the names of the Score GameObjects
        var canvas = GameObject.Find(k_CanvasName);
        GameObject scoreBoard;
        var academy = FindObjectOfType<Academy>();
-        m_ResetParams = academy.resetParameters;
+        m_ResetParams = academy.FloatProperties;
        if (invertX)
        {
            scoreBoard = canvas.transform.Find(k_ScoreBoardBName).gameObject;

    public void SetRacket()
    {
-        angle = m_ResetParams["angle"];
+        angle = m_ResetParams.GetPropertyWithDefault("angle", 55);
        gameObject.transform.eulerAngles = new Vector3(
            gameObject.transform.eulerAngles.x,
            gameObject.transform.eulerAngles.y,

    public void SetBall()
    {
-        scale = m_ResetParams["scale"];
+        scale = m_ResetParams.GetPropertyWithDefault("scale", 1);
        ball.transform.localScale = new Vector3(scale, scale, scale);
    }

--- a/UnitySDK/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAcademy.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAcademy.cs
        Physics.defaultSolverVelocityIterations = 12;
        Time.fixedDeltaTime = 0.01333f; //(75fps). default is .2 (60fps)
        Time.maximumDeltaTime = .15f; // Default is .33
-    }
-    public override void AcademyReset()
-    {
-        Physics.gravity = new Vector3(0, -resetParameters["gravity"], 0);
+        FloatProperties.RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
+
    }

    public override void AcademyStep()
--- a/UnitySDK/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs

 public class WalkerAgent : Agent
 {
-    [Header("Specific to Walker")][Header("Target To Walk Towards")][Space(10)]
+    [Header("Specific to Walker")]
+    [Header("Target To Walk Towards")]
+    [Space(10)]
    public Transform target;

    Vector3 m_DirToTarget;
    Rigidbody m_ChestRb;
    Rigidbody m_SpineRb;

-    ResetParameters m_ResetParams;
+    IFloatProperties m_ResetParams;

    public override void InitializeAgent()
    {
        m_SpineRb = spine.GetComponent<Rigidbody>();

        var academy = FindObjectOfType<WalkerAcademy>();
-        m_ResetParams = academy.resetParameters;
+        m_ResetParams = academy.FloatProperties;

        SetResetParameters();
    }

    public void SetTorsoMass()
    {
-        m_ChestRb.mass = m_ResetParams["chest_mass"];
-        m_SpineRb.mass = m_ResetParams["spine_mass"];
-        m_HipsRb.mass = m_ResetParams["hip_mass"];
+        m_ChestRb.mass = m_ResetParams.GetPropertyWithDefault("chest_mass", 8);
+        m_SpineRb.mass = m_ResetParams.GetPropertyWithDefault("spine_mass", 10);
+        m_HipsRb.mass = m_ResetParams.GetPropertyWithDefault("hip_mass", 15);
    }

    public void SetResetParameters()
--- a/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
        {
            localScale = new Vector3(
                localScale.x,
-                m_Academy.resetParameters["no_wall_height"],
+                m_Academy.FloatProperties.GetPropertyWithDefault("no_wall_height", 0),
                localScale.z);
            wall.transform.localScale = localScale;
            GiveModel("SmallWallJump", noWallBrain);
            localScale = new Vector3(
                localScale.x,
-                m_Academy.resetParameters["small_wall_height"],
+                m_Academy.FloatProperties.GetPropertyWithDefault("small_wall_height", 4),
                localScale.z);
            wall.transform.localScale = localScale;
            GiveModel("SmallWallJump", smallWallBrain);
-            var height =
-                m_Academy.resetParameters["big_wall_min_height"] +
-                Random.value * (m_Academy.resetParameters["big_wall_max_height"] -
-                    m_Academy.resetParameters["big_wall_min_height"]);
+            var min = m_Academy.FloatProperties.GetPropertyWithDefault("big_wall_min_height", 8);
+            var max = m_Academy.FloatProperties.GetPropertyWithDefault("big_wall_max_height", 8);
+            var height = min + Random.value * (max - min);
            localScale = new Vector3(
                localScale.x,
                height,
--- a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs

 namespace MLAgents
 {
-    /// <summary>
-    /// Wraps the environment-level parameters that are provided within the
-    /// Editor. These parameters can be provided for training and inference
-    /// modes separately and represent screen resolution, rendering quality and
-    /// frame rate.
-    /// </summary>
-    [System.Serializable]
-    public class EnvironmentConfiguration
-    {
-        [Tooltip("Width of the environment window in pixels.")]
-        public int width;
-
-        [Tooltip("Height of the environment window in pixels.")]
-        public int height;
-
-        [Tooltip("Rendering quality of environment. (Higher is better quality.)")]
-        [Range(0, 5)]
-        public int qualityLevel;
-
-        [Tooltip("Speed at which environment is run. (Higher is faster.)")]
-        [Range(1f, 100f)]
-        public float timeScale;
-
-        [Tooltip("Frames per second (FPS) engine attempts to maintain.")]
-        public int targetFrameRate;
-
-        /// Initializes a new instance of the
-        /// <see cref="EnvironmentConfiguration"/> class.
-        /// <param name="width">Width of environment window (pixels).</param>
-        /// <param name="height">Height of environment window (pixels).</param>
-        /// <param name="qualityLevel">
-        /// Rendering quality of environment. Ranges from 0 to 5, with higher.
-        /// </param>
-        /// <param name="timeScale">
-        /// Speed at which environment is run. Ranges from 1 to 100, with higher
-        /// values representing faster speed.
-        /// </param>
-        /// <param name="targetFrameRate">
-        /// Target frame rate (per second) that the engine tries to maintain.
-        /// </param>
-        public EnvironmentConfiguration(
-            int width, int height, int qualityLevel,
-            float timeScale, int targetFrameRate)
-        {
-            this.width = width;
-            this.height = height;
-            this.qualityLevel = qualityLevel;
-            this.timeScale = timeScale;
-            this.targetFrameRate = targetFrameRate;
-        }
-    }

    /// <summary>
    /// An Academy is where Agent objects go to train their behaviors.
        /// Used to restore original value when deriving Academy modifies it
        float m_OriginalMaximumDeltaTime;

-        // Fields provided in the Inspector
+        public IFloatProperties FloatProperties;
-        [FormerlySerializedAs("trainingConfiguration")]
-        [SerializeField]
-        [Tooltip("The engine-level settings which correspond to rendering " +
-            "quality and engine speed during Training.")]
-        EnvironmentConfiguration m_TrainingConfiguration =
-            new EnvironmentConfiguration(80, 80, 1, 100.0f, -1);
-
-        [FormerlySerializedAs("inferenceConfiguration")]
-        [SerializeField]
-        [Tooltip("The engine-level settings which correspond to rendering " +
-            "quality and engine speed during Inference.")]
-        EnvironmentConfiguration m_InferenceConfiguration =
-            new EnvironmentConfiguration(1280, 720, 5, 1.0f, 60);
-
-        /// <summary>
-        /// Contains a mapping from parameter names to float values. They are
-        /// used in <see cref="AcademyReset"/> and <see cref="AcademyStep"/>
-        /// to modify elements in the environment at reset time.
-        /// </summary>
-        /// <remarks>
-        /// Default reset parameters are specified in the academy Editor, and can
-        /// be modified when training by passing a config
-        /// dictionary at reset.
-        /// </remarks>
-        [SerializeField]
-        [Tooltip("List of custom parameters that can be changed in the " +
-            "environment when it resets.")]
-        public ResetParameters resetParameters;
-        public CommunicatorObjects.CustomResetParametersProto customResetParameters;

        // Fields not provided in the Inspector.

            get { return Communicator != null; }
        }

-        /// If true, the Academy will use inference settings. This field is
-        /// initialized in <see cref="Awake"/> depending on the presence
-        /// or absence of a communicator. Furthermore, it can be modified during
-        /// training via <see cref="SetIsInference"/>.
-        bool m_IsInference = true;
-
        /// The number of episodes completed by the environment. Incremented
        /// each time the environment is reset.
        int m_EpisodeCount;
        /// The number of total number of steps completed during the whole simulation. Incremented
        /// each time a step is taken in the environment.
        int m_TotalStepCount;
-
-        /// Flag that indicates whether the inference/training mode of the
-        /// environment was switched by the training process. This impacts the
-        /// engine settings at the next environment step.
-        bool m_ModeSwitched;

        /// Pointer to the communicator currently in use by the Academy.
        public ICommunicator Communicator;
            m_OriginalFixedDeltaTime = Time.fixedDeltaTime;
            m_OriginalMaximumDeltaTime = Time.maximumDeltaTime;

+            var floatProperties = new FloatPropertiesChannel();
+            FloatProperties = floatProperties;
+

            // Try to launch the communicator by using the arguments passed at launch
            try

            if (Communicator != null)
            {
+                Communicator.RegisterSideChannel(new EngineConfigurationChannel());
+                Communicator.RegisterSideChannel(floatProperties);
                // We try to exchange the first message with Python. If this fails, it means
                // no Python Process is ready to train the environment. In this case, the
                //environment must use Inference.
                        {
                            version = k_ApiVersion,
                            name = gameObject.name,
-                            environmentResetParameters = new EnvironmentResetParameters
-                            {
-                                resetParameters = resetParameters,
-                                customResetParameters = customResetParameters
-                            }
                        });
                    Random.InitState(unityRLInitParameters.seed);
                }
                {
                    Communicator.QuitCommandReceived += OnQuitCommandReceived;
                    Communicator.ResetCommandReceived += OnResetCommand;
-                    Communicator.RLInputReceived += OnRLInputReceived;
                }
            }

-
-            SetIsInference(!IsCommunicatorOn);

            DecideAction += () => { };
            DestroyAction += () => { };
            AgentAct += () => { };
            AgentForceReset += () => { };

-            ConfigureEnvironment();
        }

        static void OnQuitCommandReceived()
            Application.Quit();
        }

-        void OnResetCommand(EnvironmentResetParameters newResetParameters)
+        void OnResetCommand()
-            UpdateResetParameters(newResetParameters);
-        void OnRLInputReceived(UnityRLInputParameters inputParams)
-        {
-            m_IsInference = !inputParams.isTraining;
-        }
-
-        void UpdateResetParameters(EnvironmentResetParameters newResetParameters)
-        {
-            if (newResetParameters.resetParameters != null)
-            {
-                foreach (var kv in newResetParameters.resetParameters)
-                {
-                    resetParameters[kv.Key] = kv.Value;
-                }
-            }
-            customResetParameters = newResetParameters.customResetParameters;
-        }
-
-        /// <summary>
-        /// Configures the environment settings depending on the training/inference
-        /// mode and the corresponding parameters passed in the Editor.
-        /// </summary>
-        void ConfigureEnvironment()
-        {
-            if (m_IsInference)
-            {
-                ConfigureEnvironmentHelper(m_InferenceConfiguration);
-                Monitor.SetActive(true);
-            }
-            else
-            {
-                ConfigureEnvironmentHelper(m_TrainingConfiguration);
-                Monitor.SetActive(false);
-            }
-        }
-
-        /// <summary>
-        /// Helper method for initializing the environment based on the provided
-        /// configuration.
-        /// </summary>
-        /// <param name="config">
-        /// Environment configuration (specified in the Editor).
-        /// </param>
-        static void ConfigureEnvironmentHelper(EnvironmentConfiguration config)
-        {
-            Screen.SetResolution(config.width, config.height, false);
-            QualitySettings.SetQualityLevel(config.qualityLevel, true);
-            Time.timeScale = config.timeScale;
-            Time.captureFramerate = 60;
-            Application.targetFrameRate = config.targetFrameRate;
-        }
-
        /// <summary>
        /// Initializes the academy and environment. Called during the waking-up
        /// phase of the environment before any of the scene objects/agents have
        {
        }

-        /// <summary>
-        /// Returns the <see cref="m_IsInference"/> flag.
-        /// </summary>
-        /// <returns>
-        /// <c>true</c>, if current mode is inference, <c>false</c> if training.
-        /// </returns>
-        public bool GetIsInference()
-        {
-            return m_IsInference;
-        }
-
-        /// <summary>
-        /// Sets the <see cref="m_IsInference"/> flag to the provided value. If
-        /// the new flag differs from the current flag value, this signals that
-        /// the environment configuration needs to be updated.
-        /// </summary>
-        /// <param name="isInference">
-        /// Environment mode, if true then inference, otherwise training.
-        /// </param>
-        public void SetIsInference(bool isInference)
-        {
-            if (m_IsInference != isInference)
-            {
-                m_IsInference = isInference;
-
-                // This signals to the academy that at the next environment step
-                // the engine configurations need updating to the respective mode
-                // (i.e. training vs inference) configuration.
-                m_ModeSwitched = true;
-            }
-        }

        /// <summary>
        /// Returns the current episode counter.
        /// </summary>
        void EnvironmentStep()
        {
-            if (m_ModeSwitched)
-            {
-                ConfigureEnvironment();
-                m_ModeSwitched = false;
-            }
            if (!m_FirstAcademyReset)
            {
                ForcedFullReset();
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityRlInitializationOutput.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityRlInitializationOutput.cs
            "CkdtbGFnZW50cy9lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL3VuaXR5X3Js",
            "X2luaXRpYWxpemF0aW9uX291dHB1dC5wcm90bxIUY29tbXVuaWNhdG9yX29i",
            "amVjdHMaOW1sYWdlbnRzL2VudnMvY29tbXVuaWNhdG9yX29iamVjdHMvYnJh",
-            "aW5fcGFyYW1ldGVycy5wcm90bxo/bWxhZ2VudHMvZW52cy9jb21tdW5pY2F0",
-            "b3Jfb2JqZWN0cy9lbnZpcm9ubWVudF9wYXJhbWV0ZXJzLnByb3RvIusBCiBV",
-            "bml0eVJMSW5pdGlhbGl6YXRpb25PdXRwdXRQcm90bxIMCgRuYW1lGAEgASgJ",
-            "Eg8KB3ZlcnNpb24YAiABKAkSEAoIbG9nX3BhdGgYAyABKAkSRAoQYnJhaW5f",
-            "cGFyYW1ldGVycxgFIAMoCzIqLmNvbW11bmljYXRvcl9vYmplY3RzLkJyYWlu",
-            "UGFyYW1ldGVyc1Byb3RvElAKFmVudmlyb25tZW50X3BhcmFtZXRlcnMYBiAB",
-            "KAsyMC5jb21tdW5pY2F0b3Jfb2JqZWN0cy5FbnZpcm9ubWVudFBhcmFtZXRl",
-            "cnNQcm90b0IfqgIcTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0c2IGcHJv",
-            "dG8z"));
+            "aW5fcGFyYW1ldGVycy5wcm90byKfAQogVW5pdHlSTEluaXRpYWxpemF0aW9u",
+            "T3V0cHV0UHJvdG8SDAoEbmFtZRgBIAEoCRIPCgd2ZXJzaW9uGAIgASgJEhAK",
+            "CGxvZ19wYXRoGAMgASgJEkQKEGJyYWluX3BhcmFtZXRlcnMYBSADKAsyKi5j",
+            "b21tdW5pY2F0b3Jfb2JqZWN0cy5CcmFpblBhcmFtZXRlcnNQcm90b0oECAYQ",
+            "B0IfqgIcTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0c2IGcHJvdG8z"));
-          new pbr::FileDescriptor[] { global::MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor, global::MLAgents.CommunicatorObjects.EnvironmentParametersReflection.Descriptor, },
+          new pbr::FileDescriptor[] { global::MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor, },
-            new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLInitializationOutputProto), global::MLAgents.CommunicatorObjects.UnityRLInitializationOutputProto.Parser, new[]{ "Name", "Version", "LogPath", "BrainParameters", "EnvironmentParameters" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLInitializationOutputProto), global::MLAgents.CommunicatorObjects.UnityRLInitializationOutputProto.Parser, new[]{ "Name", "Version", "LogPath", "BrainParameters" }, null, null, null)
          }));
    }
    #endregion
      version_ = other.version_;
      logPath_ = other.logPath_;
      brainParameters_ = other.brainParameters_.Clone();
-      EnvironmentParameters = other.environmentParameters_ != null ? other.EnvironmentParameters.Clone() : null;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

      get { return brainParameters_; }
    }

-    /// <summary>Field number for the "environment_parameters" field.</summary>
-    public const int EnvironmentParametersFieldNumber = 6;
-    private global::MLAgents.CommunicatorObjects.EnvironmentParametersProto environmentParameters_;
-    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
-    public global::MLAgents.CommunicatorObjects.EnvironmentParametersProto EnvironmentParameters {
-      get { return environmentParameters_; }
-      set {
-        environmentParameters_ = value;
-      }
-    }
-
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as UnityRLInitializationOutputProto);
      if (Version != other.Version) return false;
      if (LogPath != other.LogPath) return false;
      if(!brainParameters_.Equals(other.brainParameters_)) return false;
-      if (!object.Equals(EnvironmentParameters, other.EnvironmentParameters)) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

      if (Version.Length != 0) hash ^= Version.GetHashCode();
      if (LogPath.Length != 0) hash ^= LogPath.GetHashCode();
      hash ^= brainParameters_.GetHashCode();
-      if (environmentParameters_ != null) hash ^= EnvironmentParameters.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }
        output.WriteString(LogPath);
      }
      brainParameters_.WriteTo(output, _repeated_brainParameters_codec);
-      if (environmentParameters_ != null) {
-        output.WriteRawTag(50);
-        output.WriteMessage(EnvironmentParameters);
-      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
        size += 1 + pb::CodedOutputStream.ComputeStringSize(LogPath);
      }
      size += brainParameters_.CalculateSize(_repeated_brainParameters_codec);
-      if (environmentParameters_ != null) {
-        size += 1 + pb::CodedOutputStream.ComputeMessageSize(EnvironmentParameters);
-      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
        LogPath = other.LogPath;
      }
      brainParameters_.Add(other.brainParameters_);
-      if (other.environmentParameters_ != null) {
-        if (environmentParameters_ == null) {
-          environmentParameters_ = new global::MLAgents.CommunicatorObjects.EnvironmentParametersProto();
-        }
-        EnvironmentParameters.MergeFrom(other.EnvironmentParameters);
-      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

          }
          case 42: {
            brainParameters_.AddEntriesFrom(input, _repeated_brainParameters_codec);
-            break;
-          }
-          case 50: {
-            if (environmentParameters_ == null) {
-              environmentParameters_ = new global::MLAgents.CommunicatorObjects.EnvironmentParametersProto();
-            }
-            input.ReadMessage(environmentParameters_);
            break;
          }
        }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityRlInput.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityRlInput.cs
            "CjdtbGFnZW50cy9lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL3VuaXR5X3Js",
            "X2lucHV0LnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cxo1bWxhZ2VudHMv",
            "ZW52cy9jb21tdW5pY2F0b3Jfb2JqZWN0cy9hZ2VudF9hY3Rpb24ucHJvdG8a",
-            "P21sYWdlbnRzL2VudnMvY29tbXVuaWNhdG9yX29iamVjdHMvZW52aXJvbm1l",
-            "bnRfcGFyYW1ldGVycy5wcm90bxowbWxhZ2VudHMvZW52cy9jb21tdW5pY2F0",
-            "b3Jfb2JqZWN0cy9jb21tYW5kLnByb3RvIsMDChFVbml0eVJMSW5wdXRQcm90",
-            "bxJQCg1hZ2VudF9hY3Rpb25zGAEgAygLMjkuY29tbXVuaWNhdG9yX29iamVj",
-            "dHMuVW5pdHlSTElucHV0UHJvdG8uQWdlbnRBY3Rpb25zRW50cnkSUAoWZW52",
-            "aXJvbm1lbnRfcGFyYW1ldGVycxgCIAEoCzIwLmNvbW11bmljYXRvcl9vYmpl",
-            "Y3RzLkVudmlyb25tZW50UGFyYW1ldGVyc1Byb3RvEhMKC2lzX3RyYWluaW5n",
-            "GAMgASgIEjMKB2NvbW1hbmQYBCABKA4yIi5jb21tdW5pY2F0b3Jfb2JqZWN0",
-            "cy5Db21tYW5kUHJvdG8aTQoUTGlzdEFnZW50QWN0aW9uUHJvdG8SNQoFdmFs",
-            "dWUYASADKAsyJi5jb21tdW5pY2F0b3Jfb2JqZWN0cy5BZ2VudEFjdGlvblBy",
-            "b3RvGnEKEUFnZW50QWN0aW9uc0VudHJ5EgsKA2tleRgBIAEoCRJLCgV2YWx1",
-            "ZRgCIAEoCzI8LmNvbW11bmljYXRvcl9vYmplY3RzLlVuaXR5UkxJbnB1dFBy",
-            "b3RvLkxpc3RBZ2VudEFjdGlvblByb3RvOgI4AUIfqgIcTUxBZ2VudHMuQ29t",
-            "bXVuaWNhdG9yT2JqZWN0c2IGcHJvdG8z"));
+            "MG1sYWdlbnRzL2VudnMvY29tbXVuaWNhdG9yX29iamVjdHMvY29tbWFuZC5w",
+            "cm90byL+AgoRVW5pdHlSTElucHV0UHJvdG8SUAoNYWdlbnRfYWN0aW9ucxgB",
+            "IAMoCzI5LmNvbW11bmljYXRvcl9vYmplY3RzLlVuaXR5UkxJbnB1dFByb3Rv",
+            "LkFnZW50QWN0aW9uc0VudHJ5EjMKB2NvbW1hbmQYBCABKA4yIi5jb21tdW5p",
+            "Y2F0b3Jfb2JqZWN0cy5Db21tYW5kUHJvdG8SFAoMc2lkZV9jaGFubmVsGAUg",
+            "ASgMGk0KFExpc3RBZ2VudEFjdGlvblByb3RvEjUKBXZhbHVlGAEgAygLMiYu",
+            "Y29tbXVuaWNhdG9yX29iamVjdHMuQWdlbnRBY3Rpb25Qcm90bxpxChFBZ2Vu",
+            "dEFjdGlvbnNFbnRyeRILCgNrZXkYASABKAkSSwoFdmFsdWUYAiABKAsyPC5j",
+            "b21tdW5pY2F0b3Jfb2JqZWN0cy5Vbml0eVJMSW5wdXRQcm90by5MaXN0QWdl",
+            "bnRBY3Rpb25Qcm90bzoCOAFKBAgCEANKBAgDEARCH6oCHE1MQWdlbnRzLkNv",
+            "bW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
-          new pbr::FileDescriptor[] { global::MLAgents.CommunicatorObjects.AgentActionReflection.Descriptor, global::MLAgents.CommunicatorObjects.EnvironmentParametersReflection.Descriptor, global::MLAgents.CommunicatorObjects.CommandReflection.Descriptor, },
+          new pbr::FileDescriptor[] { global::MLAgents.CommunicatorObjects.AgentActionReflection.Descriptor, global::MLAgents.CommunicatorObjects.CommandReflection.Descriptor, },
-            new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLInputProto), global::MLAgents.CommunicatorObjects.UnityRLInputProto.Parser, new[]{ "AgentActions", "EnvironmentParameters", "IsTraining", "Command" }, null, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLInputProto.Types.ListAgentActionProto), global::MLAgents.CommunicatorObjects.UnityRLInputProto.Types.ListAgentActionProto.Parser, new[]{ "Value" }, null, null, null),
+            new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLInputProto), global::MLAgents.CommunicatorObjects.UnityRLInputProto.Parser, new[]{ "AgentActions", "Command", "SideChannel" }, null, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLInputProto.Types.ListAgentActionProto), global::MLAgents.CommunicatorObjects.UnityRLInputProto.Types.ListAgentActionProto.Parser, new[]{ "Value" }, null, null, null),
            null, })
          }));
    }
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public UnityRLInputProto(UnityRLInputProto other) : this() {
      agentActions_ = other.agentActions_.Clone();
-      EnvironmentParameters = other.environmentParameters_ != null ? other.EnvironmentParameters.Clone() : null;
-      isTraining_ = other.isTraining_;
+      sideChannel_ = other.sideChannel_;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

      get { return agentActions_; }
    }

-    /// <summary>Field number for the "environment_parameters" field.</summary>
-    public const int EnvironmentParametersFieldNumber = 2;
-    private global::MLAgents.CommunicatorObjects.EnvironmentParametersProto environmentParameters_;
-    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
-    public global::MLAgents.CommunicatorObjects.EnvironmentParametersProto EnvironmentParameters {
-      get { return environmentParameters_; }
-      set {
-        environmentParameters_ = value;
-      }
-    }
-
-    /// <summary>Field number for the "is_training" field.</summary>
-    public const int IsTrainingFieldNumber = 3;
-    private bool isTraining_;
-    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
-    public bool IsTraining {
-      get { return isTraining_; }
-      set {
-        isTraining_ = value;
-      }
-    }
-
    /// <summary>Field number for the "command" field.</summary>
    public const int CommandFieldNumber = 4;
    private global::MLAgents.CommunicatorObjects.CommandProto command_ = 0;
      }
    }

+    /// <summary>Field number for the "side_channel" field.</summary>
+    public const int SideChannelFieldNumber = 5;
+    private pb::ByteString sideChannel_ = pb::ByteString.Empty;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pb::ByteString SideChannel {
+      get { return sideChannel_; }
+      set {
+        sideChannel_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as UnityRLInputProto);
        return true;
      }
      if (!AgentActions.Equals(other.AgentActions)) return false;
-      if (!object.Equals(EnvironmentParameters, other.EnvironmentParameters)) return false;
-      if (IsTraining != other.IsTraining) return false;
+      if (SideChannel != other.SideChannel) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

      hash ^= AgentActions.GetHashCode();
-      if (environmentParameters_ != null) hash ^= EnvironmentParameters.GetHashCode();
-      if (IsTraining != false) hash ^= IsTraining.GetHashCode();
+      if (SideChannel.Length != 0) hash ^= SideChannel.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public void WriteTo(pb::CodedOutputStream output) {
      agentActions_.WriteTo(output, _map_agentActions_codec);
-      if (environmentParameters_ != null) {
-        output.WriteRawTag(18);
-        output.WriteMessage(EnvironmentParameters);
-      }
-      if (IsTraining != false) {
-        output.WriteRawTag(24);
-        output.WriteBool(IsTraining);
-      }
+      }
+      if (SideChannel.Length != 0) {
+        output.WriteRawTag(42);
+        output.WriteBytes(SideChannel);
      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
    public int CalculateSize() {
      int size = 0;
      size += agentActions_.CalculateSize(_map_agentActions_codec);
-      if (environmentParameters_ != null) {
-        size += 1 + pb::CodedOutputStream.ComputeMessageSize(EnvironmentParameters);
-      }
-      if (IsTraining != false) {
-        size += 1 + 1;
-      }
+      if (SideChannel.Length != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeBytesSize(SideChannel);
+      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
        return;
      }
      agentActions_.Add(other.agentActions_);
-      if (other.environmentParameters_ != null) {
-        if (environmentParameters_ == null) {
-          environmentParameters_ = new global::MLAgents.CommunicatorObjects.EnvironmentParametersProto();
-        }
-        EnvironmentParameters.MergeFrom(other.EnvironmentParameters);
-      }
-      if (other.IsTraining != false) {
-        IsTraining = other.IsTraining;
-      }
+      }
+      if (other.SideChannel.Length != 0) {
+        SideChannel = other.SideChannel;
      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }
            agentActions_.AddEntriesFrom(input, _map_agentActions_codec);
            break;
          }
-          case 18: {
-            if (environmentParameters_ == null) {
-              environmentParameters_ = new global::MLAgents.CommunicatorObjects.EnvironmentParametersProto();
-            }
-            input.ReadMessage(environmentParameters_);
-            break;
-          }
-          case 24: {
-            IsTraining = input.ReadBool();
+          case 32: {
+            command_ = (global::MLAgents.CommunicatorObjects.CommandProto) input.ReadEnum();
-          case 32: {
-            command_ = (global::MLAgents.CommunicatorObjects.CommandProto) input.ReadEnum();
+          case 42: {
+            SideChannel = input.ReadBytes();
            break;
          }
        }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityRlOutput.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/UnityRlOutput.cs
          string.Concat(
            "CjhtbGFnZW50cy9lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL3VuaXR5X3Js",
            "X291dHB1dC5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMaM21sYWdlbnRz",
-            "L2VudnMvY29tbXVuaWNhdG9yX29iamVjdHMvYWdlbnRfaW5mby5wcm90byKj",
+            "L2VudnMvY29tbXVuaWNhdG9yX29iamVjdHMvYWdlbnRfaW5mby5wcm90byK5",
-            "bmZvc0VudHJ5GkkKEkxpc3RBZ2VudEluZm9Qcm90bxIzCgV2YWx1ZRgBIAMo",
-            "CzIkLmNvbW11bmljYXRvcl9vYmplY3RzLkFnZW50SW5mb1Byb3RvGm4KD0Fn",
-            "ZW50SW5mb3NFbnRyeRILCgNrZXkYASABKAkSSgoFdmFsdWUYAiABKAsyOy5j",
-            "b21tdW5pY2F0b3Jfb2JqZWN0cy5Vbml0eVJMT3V0cHV0UHJvdG8uTGlzdEFn",
-            "ZW50SW5mb1Byb3RvOgI4AUoECAEQAkIfqgIcTUxBZ2VudHMuQ29tbXVuaWNh",
-            "dG9yT2JqZWN0c2IGcHJvdG8z"));
+            "bmZvc0VudHJ5EhQKDHNpZGVfY2hhbm5lbBgDIAEoDBpJChJMaXN0QWdlbnRJ",
+            "bmZvUHJvdG8SMwoFdmFsdWUYASADKAsyJC5jb21tdW5pY2F0b3Jfb2JqZWN0",
+            "cy5BZ2VudEluZm9Qcm90bxpuCg9BZ2VudEluZm9zRW50cnkSCwoDa2V5GAEg",
+            "ASgJEkoKBXZhbHVlGAIgASgLMjsuY29tbXVuaWNhdG9yX29iamVjdHMuVW5p",
+            "dHlSTE91dHB1dFByb3RvLkxpc3RBZ2VudEluZm9Qcm90bzoCOAFKBAgBEAJC",
+            "H6oCHE1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLOutputProto), global::MLAgents.CommunicatorObjects.UnityRLOutputProto.Parser, new[]{ "AgentInfos" }, null, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLOutputProto.Types.ListAgentInfoProto), global::MLAgents.CommunicatorObjects.UnityRLOutputProto.Types.ListAgentInfoProto.Parser, new[]{ "Value" }, null, null, null),
+            new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLOutputProto), global::MLAgents.CommunicatorObjects.UnityRLOutputProto.Parser, new[]{ "AgentInfos", "SideChannel" }, null, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.UnityRLOutputProto.Types.ListAgentInfoProto), global::MLAgents.CommunicatorObjects.UnityRLOutputProto.Types.ListAgentInfoProto.Parser, new[]{ "Value" }, null, null, null),
            null, })
          }));
    }
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public UnityRLOutputProto(UnityRLOutputProto other) : this() {
      agentInfos_ = other.agentInfos_.Clone();
+      sideChannel_ = other.sideChannel_;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

      get { return agentInfos_; }
    }

+    /// <summary>Field number for the "side_channel" field.</summary>
+    public const int SideChannelFieldNumber = 3;
+    private pb::ByteString sideChannel_ = pb::ByteString.Empty;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pb::ByteString SideChannel {
+      get { return sideChannel_; }
+      set {
+        sideChannel_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as UnityRLOutputProto);
        return true;
      }
      if (!AgentInfos.Equals(other.AgentInfos)) return false;
+      if (SideChannel != other.SideChannel) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

      hash ^= AgentInfos.GetHashCode();
+      if (SideChannel.Length != 0) hash ^= SideChannel.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public void WriteTo(pb::CodedOutputStream output) {
      agentInfos_.WriteTo(output, _map_agentInfos_codec);
+      if (SideChannel.Length != 0) {
+        output.WriteRawTag(26);
+        output.WriteBytes(SideChannel);
+      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
    public int CalculateSize() {
      int size = 0;
      size += agentInfos_.CalculateSize(_map_agentInfos_codec);
+      if (SideChannel.Length != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeBytesSize(SideChannel);
+      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
        return;
      }
      agentInfos_.Add(other.agentInfos_);
+      if (other.SideChannel.Length != 0) {
+        SideChannel = other.SideChannel;
+      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

            break;
          case 18: {
            agentInfos_.AddEntriesFrom(input, _map_agentInfos_codec);
+            break;
+          }
+          case 26: {
+            SideChannel = input.ReadBytes();
            break;
          }
        }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
            return bp;
        }

-        /// <summary>
-        /// Convert a MapField to ResetParameters.
-        /// </summary>
-        /// <param name="floatParams">The mapping of strings to floats from a protobuf MapField.</param>
-        /// <returns></returns>
-        public static ResetParameters ToResetParameters(this MapField<string, float> floatParams)
-        {
-            return new ResetParameters(floatParams);
-        }
-
-        /// <summary>
-        /// Convert an EnvironmnetParametersProto protobuf object to an EnvironmentResetParameters struct.
-        /// </summary>
-        /// <param name="epp">The instance of the EnvironmentParametersProto object.</param>
-        /// <returns>A new EnvironmentResetParameters struct.</returns>
-        public static EnvironmentResetParameters ToEnvironmentResetParameters(this EnvironmentParametersProto epp)
-        {
-            return new EnvironmentResetParameters
-            {
-                resetParameters = epp.FloatParameters?.ToResetParameters(),
-                customResetParameters = epp.CustomResetParameters
-            };
-        }

        public static UnityRLInitParameters ToUnityRLInitParameters(this UnityRLInitializationInputProto inputProto)
        {
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
 using System.Linq;
 using UnityEngine;
 using MLAgents.CommunicatorObjects;
+using System.IO;
+using Google.Protobuf;

 namespace MLAgents
 {
        public event QuitCommandHandler QuitCommandReceived;
        public event ResetCommandHandler ResetCommandReceived;
-        public event RLInputReceivedHandler RLInputReceived;

        /// If true, the communication is active.
        bool m_IsOpen;
        /// The communicator parameters sent at construction
        CommunicatorInitParameters m_CommunicatorInitParameters;

+        Dictionary<int, SideChannel> m_SideChannels = new Dictionary<int, SideChannel>();
+
        /// <summary>
        /// Initializes a new instance of the RPCCommunicator class.
        /// </summary>
                Version = initParameters.version
            };

-            academyParameters.EnvironmentParameters = new EnvironmentParametersProto();
-
-            var resetParameters = initParameters.environmentResetParameters.resetParameters;
-            foreach (var key in resetParameters.Keys)
-            {
-                academyParameters.EnvironmentParameters.FloatParameters.Add(key, resetParameters[key]);
-            }
-
            UnityInputProto input;
            UnityInputProto initializationInput;
            try

        void UpdateEnvironmentWithInput(UnityRLInputProto rlInput)
        {
-            SendRLInputReceivedEvent(rlInput.IsTraining);
-            SendCommandEvent(rlInput.Command, rlInput.EnvironmentParameters);
+            ProcessSideChannelData(m_SideChannels, rlInput.SideChannel.ToArray());
+            SendCommandEvent(rlInput.Command);
+
        }

        UnityInputProto Initialize(UnityOutputProto unityOutput,

        #region Sending Events

-        void SendCommandEvent(CommandProto command, EnvironmentParametersProto environmentParametersProto)
+        void SendCommandEvent(CommandProto command)
        {
            switch (command)
            {
                    }
                case CommandProto.Reset:
                    {
-                        ResetCommandReceived?.Invoke(environmentParametersProto.ToEnvironmentResetParameters());
+                        ResetCommandReceived?.Invoke();
                        return;
                    }
                default:
            }
        }

-        void SendRLInputReceivedEvent(bool isTraining)
-        {
-            RLInputReceived?.Invoke(new UnityRLInputParameters { isTraining = isTraining });
-        }
-
        #endregion

        #region Sending and retreiving data
            {
                message.RlInitializationOutput = tempUnityRlInitializationOutput;
            }
+
+            byte[] messageAggregated = GetSideChannelMessage(m_SideChannels);
+            message.RlOutput.SideChannel = ByteString.CopyFrom(messageAggregated);

            var input = Exchange(message);
            UpdateSentBrainParameters(tempUnityRlInitializationOutput);
            {
                m_SentBrainKeys.Add(brainProto.BrainName);
                m_UnsentBrainKeys.Remove(brainProto.BrainName);
+            }
+        }
+
+        #endregion
+
+
+        #region Handling side channels
+
+        /// <summary>
+        /// Registers a side channel to the communicator. The side channel will exchange 
+        /// messages with its Python equivalent.
+        /// </summary>
+        /// <param name="sideChannel"> The side channel to be registered.</param>
+        public void RegisterSideChannel(SideChannel sideChannel)
+        {
+            if (m_SideChannels.ContainsKey(sideChannel.ChannelType()))
+            {
+                throw new UnityAgentsException(string.Format(
+                "A side channel with type index {} is already registered. You cannot register multiple " +
+                "side channels of the same type."));
+            }
+            m_SideChannels.Add(sideChannel.ChannelType(), sideChannel);
+        }
+
+        /// <summary>
+        /// Grabs the messages that the registered side channels will send to Python at the current step
+        /// into a singe byte array.
+        /// </summary>
+        /// <param name="sideChannels"> A dictionary of channel type to channel.</param>
+        /// <returns></returns>
+        public static byte[] GetSideChannelMessage(Dictionary<int, SideChannel> sideChannels)
+        {
+            using (var memStream = new MemoryStream())
+            {
+                using (var binaryWriter = new BinaryWriter(memStream))
+                {
+                    foreach (var sideChannel in sideChannels.Values)
+                    {
+                        var messageList = sideChannel.MessageQueue;
+                        foreach (var message in messageList)
+                        {
+                            binaryWriter.Write(sideChannel.ChannelType());
+                            binaryWriter.Write(message.Count());
+                            binaryWriter.Write(message);
+                        }
+                        sideChannel.MessageQueue.Clear();
+                    }
+                    return memStream.ToArray();
+                }
+            }
+        }
+
+        /// <summary>
+        /// Separates the data received from Python into individual messages for each registered side channel.
+        /// </summary>
+        /// <param name="sideChannels">A dictionary of channel type to channel.</param>
+        /// <param name="dataReceived">The byte array of data received from Python.</param>
+        public static void ProcessSideChannelData(Dictionary<int, SideChannel> sideChannels, byte[] dataReceived)
+        {
+            if (dataReceived.Length == 0)
+            {
+                return;
+            }
+            using (var memStream = new MemoryStream(dataReceived))
+            {
+                using (var binaryReader = new BinaryReader(memStream))
+                {
+                    while (memStream.Position < memStream.Length)
+                    {
+                        int channelType = 0;
+                        byte[] message = null;
+                        try
+                        {
+                            channelType = binaryReader.ReadInt32();
+                            var messageLength = binaryReader.ReadInt32();
+                            message = binaryReader.ReadBytes(messageLength);
+                        }
+                        catch (Exception ex)
+                        {
+                            throw new UnityAgentsException(
+                                "There was a problem reading a message in a SideChannel. Please make sure the " +
+                                "version of MLAgents in Unity is compatible with the Python version. Original error : "
+                                + ex.Message);
+                        }
+                        if (sideChannels.ContainsKey(channelType))
+                        {
+                            sideChannels[channelType].OnMessageReceived(message);
+                        }
+                        else
+                        {
+                            Debug.Log(string.Format(
+                                "Unknown side channel data received. Channel type "
+                                + ": {0}", channelType));
+                        }
+                    }
+                }
            }
        }

--- a/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs

 namespace MLAgents
 {
-    public struct EnvironmentResetParameters
-    {
-        /// <summary>
-        /// Mapping of string : float which defines which parameters can be
-        /// reset from python.
-        /// </summary>
-        public ResetParameters resetParameters;
-
-        /// <summary>
-        /// The protobuf for custom reset parameters.
-        /// NOTE: This is the last remaining relic of gRPC protocol
-        /// that is left in our code.  We need to decide how to handle this
-        /// moving forward.
-        /// </summary>
-        public CustomResetParametersProto customResetParameters;
-    }
    public struct CommunicatorInitParameters
    {
        /// <summary>
        /// The version of the Unity SDK.
        /// </summary>
        public string version;
-        /// <summary>
-        /// The set of environment parameters defined by the user that will be sent to the communicator.
-        /// </summary>
-        public EnvironmentResetParameters environmentResetParameters;
    }
    public struct UnityRLInitParameters
    {
    /// Delegate for handling reset parameter updates sent from the communicator.
    /// </summary>
    /// <param name="resetParams"></param>
-    public delegate void ResetCommandHandler(EnvironmentResetParameters resetParams);
+    public delegate void ResetCommandHandler();

    /// <summary>
    /// Delegate to handle UnityRLInputParameters updates from the communicator.
        event ResetCommandHandler ResetCommandReceived;

        /// <summary>
-        /// Unity RL Input was received by the communicator.
-        /// </summary>
-        event RLInputReceivedHandler RLInputReceived;
-
-        /// <summary>
        /// Sends the academy parameters through the Communicator.
        /// Is used by the academy to send the AcademyParameters to the communicator.
        /// </summary>
        /// <param name="key">A key to identify which actions to get</param>
        /// <returns></returns>
        Dictionary<Agent, AgentAction> GetActions(string key);
+
+        /// <summary>
+        /// Registers a side channel to the communicator. The side channel will exchange 
+        /// messages with its Python equivalent.
+        /// </summary>
+        /// <param name="sideChannel"> The side channel to be registered.</param>
+        void RegisterSideChannel(SideChannel sideChannel);
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Timer.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Timer.cs
 // Compile with: csc CRefTest.cs -doc:Results.xml
+using System;
 using UnityEngine;
 using System.Collections.Generic;
 using System.IO;
        /// </summary>
        [DataMember(Name = "children", Order = 999)]
        Dictionary<string, TimerNode> m_Children;
+
+        /// <summary>
+        /// Gauge Nodes to measure arbitrary values.
+        /// </summary>
+        [DataMember(Name = "gauges", EmitDefaultValue = false)]
+        Dictionary<string, GaugeNode> m_Gauges;

        /// <summary>
        /// Custom sampler used to add timings to the profiler.
                var currentTicks = m_TotalTicks;
                if (m_TickStart != 0)
                {
-                    currentTicks += (System.DateTime.Now.Ticks - m_TickStart);
+                    currentTicks += (DateTime.Now.Ticks - m_TickStart);
                }

                return currentTicks;
            set {}  // Serialization needs this, but unused.
        }

+        public Dictionary<string, GaugeNode> Gauges
+        {
+            get { return m_Gauges; }
+        }
+
        /// <summary>
        /// Total seconds spent in this block, excluding it's children.
        /// </summary>
                // have a sensible value for total time (the running time since reset).
                // The root node doesn't have a sampler since that could interfere with the profiler.
                m_NumCalls = 1;
-                m_TickStart = System.DateTime.Now.Ticks;
+                m_TickStart = DateTime.Now.Ticks;
+                m_Gauges = new Dictionary<string, GaugeNode>();
            }
            else
            {
        public void Begin()
        {
            m_Sampler?.Begin();
-            m_TickStart = System.DateTime.Now.Ticks;
+            m_TickStart = DateTime.Now.Ticks;
        }

        /// <summary>
        {
-            var elapsed = System.DateTime.Now.Ticks - m_TickStart;
+            var elapsed = DateTime.Now.Ticks - m_TickStart;
            m_TotalTicks += elapsed;
            m_TickStart = 0;
            m_NumCalls++;
    }

    /// <summary>
+    /// Tracks the most recent value of a metric. This is analogous to gauges in statsd.
+    /// </summary>
+    [DataContract]
+    public class GaugeNode
+    {
+        [DataMember]
+        public float value;
+        [DataMember( Name = "min")]
+        public float minValue;
+        [DataMember( Name = "max")]
+        public float maxValue;
+        [DataMember]
+        public uint count;
+        public GaugeNode(float value)
+        {
+            this.value = value;
+            minValue = value;
+            maxValue = value;
+            count = 1;
+        }
+
+        public void Update(float newValue)
+        {
+            minValue = Mathf.Min(minValue, newValue);
+            maxValue = Mathf.Max(maxValue, newValue);
+            value = newValue;
+            ++count;
+        }
+    }
+
+    /// <summary>
    /// A "stack" of timers that allows for lightweight hierarchical profiling of long-running processes.
    /// <example>
    /// Example usage:
    /// This implements the Singleton pattern (solution 4) as described in
    /// https://csharpindepth.com/articles/singleton
    /// </remarks>
-    public class TimerStack : System.IDisposable
+    public class TimerStack : IDisposable
    {
        static readonly TimerStack k_Instance = new TimerStack();

        public TimerNode RootNode
        {
            get { return m_RootNode; }
+        }
+
+        public void SetGauge(string name, float value)
+        {
+            if (!float.IsNaN(value))
+            {
+                GaugeNode gauge;
+                if (m_RootNode.Gauges.TryGetValue(name, out gauge))
+                {
+                    gauge.Update(value);
+                }
+                else
+                {
+                    m_RootNode.Gauges[name] = new GaugeNode(value);
+                }
+            }
        }

        void Push(string name)
--- a/UnitySDK/UnitySDK.sln.DotSettings
+++ b/UnitySDK/UnitySDK.sln.DotSettings
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=protobuf/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Scaler/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Scriptable/@EntryIndexedValue">True</s:Boolean>
+	<s:Boolean x:Key="/Default/UserDictionary/Words/=statsd/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=stddev/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=vals/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
--- a/docs/Basic-Guide.md
+++ b/docs/Basic-Guide.md
 page](Learning-Environment-Executable.md) for instructions on how to build and
 use an executable.

-```console
-ml-agents$ mlagents-learn config/trainer_config.yaml --run-id=first-run --train
-
-
-                        ▄▄▄▓▓▓▓
-                   ╓▓▓▓▓▓▓█▓▓▓▓▓
-              ,▄▄▄m▀▀▀'  ,▓▓▓▀▓▓▄                           ▓▓▓  ▓▓▌
-            ▄▓▓▓▀'      ▄▓▓▀  ▓▓▓      ▄▄     ▄▄ ,▄▄ ▄▄▄▄   ,▄▄ ▄▓▓▌▄ ▄▄▄    ,▄▄
-          ▄▓▓▓▀        ▄▓▓▀   ▐▓▓▌     ▓▓▌   ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌  ╒▓▓▌
-        ▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓      ▓▀      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌   ▐▓▓▄ ▓▓▌
-        ▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄     ▓▓      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌    ▐▓▓▐▓▓
-          ^█▓▓▓        ▀▓▓▄   ▐▓▓▌     ▓▓▓▓▄▓▓▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▓▄    ▓▓▓▓`
-            '▀▓▓▓▄      ^▓▓▓  ▓▓▓       └▀▀▀▀ ▀▀ ^▀▀    `▀▀ `▀▀   '▀▀    ▐▓▓▌
-               ▀▀▀▀▓▄▄▄   ▓▓▓▓▓▓,                                      ▓▓▓▓▀
-                   `▀█▓▓▓▓▓▓▓▓▓▌
-                        ¬`▀▀▀█▓
-
-
-INFO:mlagents.learn:{'--curriculum': 'None',
- '--docker-target-name': 'Empty',
- '--env': 'None',
- '--help': False,
- '--keep-checkpoints': '5',
- '--lesson': '0',
- '--load': False,
- '--no-graphics': False,
- '--num-runs': '1',
- '--run-id': 'first-run',
- '--save-freq': '50000',
- '--seed': '-1',
- '--slow': False,
- '--train': True,
- '--worker-id': '0',
- '<trainer-config-path>': 'config/trainer_config.yaml'}
-INFO:mlagents.envs:Start training by pressing the Play button in the Unity Editor.
- ```
-
 **Note**: If you're using Anaconda, don't forget to activate the ml-agents
 environment first.

 INFO:mlagents.envs:
 'Ball3DAcademy' started successfully!
 Unity Academy name: Ball3DAcademy
-        Reset Parameters : {}

 INFO:mlagents.envs:Connected new brain:
 Unity brain name: 3DBallLearning
--- a/docs/Getting-Started-with-Balance-Ball.md
+++ b/docs/Getting-Started-with-Balance-Ball.md

 ### Academy

-The Academy object for the scene is placed on the Ball3DAcademy GameObject. When
-you look at an Academy component in the inspector, you can see several
-properties that control how the environment works.
-The **Training Configuration** and **Inference Configuration** properties
-set the graphics and timescale properties for the Unity application.
-The Academy uses the **Training Configuration**  during training and the
-**Inference Configuration** when not training. (*Inference* means that the
-Agent is using a trained model or heuristics or direct control — in other
-words, whenever **not** training.)
-Typically, you would set a low graphics quality and timescale to greater `1.0` for the **Training
-Configuration** and a high graphics quality and timescale to `1.0` for the
-**Inference Configuration** .
-
-**Note:** if you want to observe the environment during training, you can adjust
-the **Training Configuration** settings to use a larger window and a timescale
-closer to 1:1. Be sure to set these parameters back when training in earnest;
-otherwise, training can take a very long time.
-
-Another aspect of an environment is the Academy implementation. Since
+The Academy object for the scene is placed on the Ball3DAcademy GameObject. Since
 the base Academy class is abstract, you must always define a subclass. There are
 three functions you can implement, though they are all optional:

 To summarize, go to your command line, enter the `ml-agents` directory and type:

 ```sh
-mlagents-learn config/trainer_config.yaml --run-id=<run-identifier> --train
+mlagents-learn config/trainer_config.yaml --run-id=<run-identifier> --train --time-scale=100
 ```

 When the message _"Start training by pressing the Play button in the Unity
 environment first.

 The `--train` flag tells the ML-Agents toolkit to run in training mode.
+The `--time-scale=100` sets the `Time.TimeScale` value in Unity.

 **Note**: You can train using an executable rather than the Editor. To do so,
 follow the instructions in
--- a/docs/Installation-Windows.md
+++ b/docs/Installation-Windows.md
 Conda environment by typing `activate ml-agents`)_:

 ```sh
-git clone https://github.com/Unity-Technologies/ml-agents.git
+git clone --branch latest_release https://github.com/Unity-Technologies/ml-agents.git
+The `--branch latest_release` option will switch to the tag of the latest stable release.
+Omitting that will get the `master` branch which is potentially unstable.
-[here](https://github.com/Unity-Technologies/ml-agents/archive/master.zip).
+[here](https://github.com/Unity-Technologies/ml-agents/archive/latest_release.zip).

 The `UnitySDK` subdirectory contains the Unity Assets to add to your projects.
 It also contains many [example environments](Learning-Environment-Examples.md)
--- a/docs/Installation.md
+++ b/docs/Installation.md
 Once installed, you will want to clone the ML-Agents Toolkit GitHub repository.

 ```sh
-git clone https://github.com/Unity-Technologies/ml-agents.git
+git clone --branch latest_release https://github.com/Unity-Technologies/ml-agents.git
+The `--branch latest_release` option will switch to the tag of the latest stable release.
+Omitting that will get the `master` branch which is potentially unstable.

 The `UnitySDK` subdirectory contains the Unity Assets to add to your projects.
 It also contains many [example environments](Learning-Environment-Examples.md)
--- a/docs/Learning-Environment-Design-Academy.md
+++ b/docs/Learning-Environment-Design-Academy.md
 you want to add elements to the environment at random intervals, you can put the
 logic for creating them in the `AcademyStep()` function.

-## Academy Properties
-
-![Academy Inspector](images/academy.png)
-* `Configuration` - The engine-level settings which correspond to rendering
-  quality and engine speed.
-  * `Width` - Width of the environment window in pixels.
-  * `Height` - Height of the environment window in pixels.
-  * `Quality Level` - Rendering quality of environment. (Higher is better)
-  * `Time Scale` - Speed at which environment is run. (Higher is faster)
-  * `Target Frame Rate` - FPS engine attempts to maintain.
-* `Reset Parameters` - List of custom parameters that can be changed in the
-  environment on reset.
--- a/docs/Learning-Environment-Design.md
+++ b/docs/Learning-Environment-Design.md
 efficient and practical to create a purpose-built training scene.

 Both training and testing (or normal game) scenes must contain an Academy object
-to control the agent decision making process. The Academy defines several
-properties that can be set differently for a training scene versus a regular
-scene. The Academy's **Configuration** properties control rendering and time
-scale. You can set the **Training Configuration** to minimize the time Unity
-spends rendering graphics in order to speed up training.
+to control the agent decision making process.
 When you create a training environment in Unity, you must set up the scene so
 that it can be controlled by the external training process. Considerations
 include:
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
  * Vector Action space: (Discrete) Two possible actions (Move left, move
    right).
  * Visual Observations: None
-* Reset Parameters: None
+* Float Properties: None
 * Benchmark Mean Reward: 0.94

 ## [3DBall: 3D Balance Ball](https://youtu.be/dheeCO29-EI)
  * Vector Action space: (Continuous) Size of 2, with one value corresponding to
    X-rotation, and the other to Z-rotation.
  * Visual Observations: None.
-* Reset Parameters: Three
+* Float Properties: Three
    * scale: Specifies the scale of the ball in the 3 dimensions (equal across the three dimensions)
      * Default: 1
      * Recommended Minimum: 0.2
    using the `Mask Actions` checkbox within the `trueAgent` GameObject).
    The trained model file provided was generated with action masking turned on.
  * Visual Observations: One corresponding to top-down view of GridWorld.
-* Reset Parameters: Three, corresponding to grid size, number of obstacles, and
+* Float Properties: Three, corresponding to grid size, number of obstacles, and
  number of goals.
 * Benchmark Mean Reward: 0.8

  * Vector Action space: (Continuous) Size of 2, corresponding to movement
    toward net or away from net, and jumping.
  * Visual Observations: None
-* Reset Parameters: Three
+* Float Properties: Three
    * angle: Angle of the racket from the vertical (Y) axis.
      * Default: 55
      * Recommended Minimum: 35
    `VisualPushBlock` scene. __The visual observation version of
     this environment does not train with the provided default
     training parameters.__
-* Reset Parameters: Four
+* Float Properties: Four
    * block_scale: Scale of the block along the x and z dimensions
        * Default: 2
        * Recommended Minimum: 0.5
    * Side Motion (3 possible actions: Left, Right, No Action)
    * Jump (2 possible actions: Jump, No Action)
  * Visual Observations: None
-* Reset Parameters: Four
+* Float Properties: Four
 * Benchmark Mean Reward (Big & Small Wall): 0.8

 ## [Reacher](https://youtu.be/2N9EoF6pQyE)
  * Vector Action space: (Continuous) Size of 4, corresponding to torque
    applicable to two joints.
  * Visual Observations: None.
-* Reset Parameters: Five
+* Float Properties: Five
  * goal_size: radius of the goal zone
    * Default: 5
    * Recommended Minimum: 1
  * Vector Action space: (Continuous) Size of 20, corresponding to target
    rotations for joints.
  * Visual Observations: None
-* Reset Parameters: None
+* Float Properties: None
 * Benchmark Mean Reward for `CrawlerStaticTarget`: 2000
 * Benchmark Mean Reward for `CrawlerDynamicTarget`: 400

    `VisualFoodCollector` scene. __The visual observation version of
     this environment does not train with the provided default
     training parameters.__
-* Reset Parameters: Two
+* Float Properties: Two
  * laser_length: Length of the laser used by the agent
    * Default: 1
    * Recommended Minimum: 0.2
    `VisualHallway` scene. __The visual observation version of
     this environment does not train with the provided default
     training parameters.__
-* Reset Parameters: None
+* Float Properties: None
 * Benchmark Mean Reward: 0.7
  * To speed up training, you can enable curiosity by adding `use_curiosity: true` in `config/trainer_config.yaml`

  * Vector Action space: (Continuous) 3 corresponding to agent force applied for
    the jump.
  * Visual Observations: None
-* Reset Parameters: Two
+* Float Properties: Two
    * target_scale: The scale of the green cube in the 3 dimensions
        * Default: 150
        * Recommended Minimum: 50
      as well as rotation.
    * Goalie: 4 actions corresponding to forward, backward, sideways movement.
  * Visual Observations: None
-* Reset Parameters: Two
+* Float Properties: Two
  * ball_scale: Specifies the scale of the ball in the 3 dimensions (equal across the three dimensions)
    * Default: 7.5
    * Recommended minimum: 4
  * Vector Action space: (Continuous) Size of 39, corresponding to target
    rotations applicable to the joints.
  * Visual Observations: None
-* Reset Parameters: Four
+* Float Properties: Four
    * gravity: Magnitude of gravity
        * Default: 9.81
        * Recommended Minimum:
    `VisualPyramids` scene. __The visual observation version of
     this environment does not train with the provided default
     training parameters.__
-* Reset Parameters: None
+* Float Properties: None
 * Benchmark Mean Reward: 1.75
--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md
                   `▀█▓▓▓▓▓▓▓▓▓▌
                        ¬`▀▀▀█▓

-
-INFO:mlagents.learn:{'--curriculum': 'None',
- '--docker-target-name': 'Empty',
- '--env': '3DBall',
- '--help': False,
- '--keep-checkpoints': '5',
- '--lesson': '0',
- '--load': False,
- '--no-graphics': False,
- '--num-runs': '1',
- '--run-id': 'firstRun',
- '--save-freq': '50000',
- '--seed': '-1',
- '--slow': False,
- '--train': True,
- '--worker-id': '0',
- '<trainer-config-path>': 'config/trainer_config.yaml'}
 ```

 **Note**: If you're using Anaconda, don't forget to activate the ml-agents
 INFO:mlagents.envs:
 'Ball3DAcademy' started successfully!
 Unity Academy name: Ball3DAcademy
-        Reset Parameters : {}

 INFO:mlagents.envs:Connected new brain:
 Unity brain name: Ball3DLearning
--- a/docs/Migrating.md
+++ b/docs/Migrating.md

 # Migrating

+## Migrating from master to develop
+
+### Important changes
+* `CustomResetParameters` are now removed.
+* `reset()` on the Low-Level Python API no longer takes a `train_mode` argument. To modify the performance/speed of the engine, you must use an `EngineConfigurationChannel`
+* `reset()` on the Low-Level Python API no longer takes a `config` argument. `UnityEnvironment` no longer has a `reset_parameters` field. To modify float properties in the environment, you must use a `FloatPropertiesChannel`. For more information, refer to the [Low Level Python API documentation](Python-API.md)
+* The Academy no longer has a `Training Configuration` nor `Inference Configuration` field in the inspector. To modify the configuration from the Low-Level Python API, use an `EngineConfigurationChannel`. To modify it during training, use the new command line arguments `--width`, `--height`, `--quality-level`, `--time-scale` and `--target-frame-rate` in `mlagents-learn`.
+* The Academy no longer has a `Default Reset Parameters` field in the inspector. The Academy class no longer has a `ResetParameters`. To access shared float properties with Python, use the new `FloatProperties` field on the Academy.
+
+### Steps to Migrate
+ * If you had a custom `Training Configuration` in the Academy inspector, you will need to pass your custom configuration at every training run using the new command line arguments `--width`, `--height`, `--quality-level`, `--time-scale` and `--target-frame-rate`.
+ * If you were using `--slow` in `mlagents-learn`, you will need to pass your old `Inference Configuration` of the Academy inspector with the new command line arguments `--width`, `--height`, `--quality-level`, `--time-scale` and `--target-frame-rate` instead.
+
 ## Migrating from ML-Agents toolkit v0.11.0 to v0.12.0

 ### Important Changes
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
 - **Print : `print(str(env))`**
  Prints all parameters relevant to the loaded environment and the
  Brains.
- **Reset : `env.reset(train_mode=True, config=None)`**
+- **Reset : `env.reset()`**
-  - `train_mode` indicates whether to run the environment in train (`True`) or
-    test (`False`) mode.
-  - `config` is an optional dictionary of configuration flags specific to the
-    environment. For generic environments, `config` can be ignored. `config` is
-    a dictionary of strings to floats where the keys are the names of the
-    `resetParameters` and the values are their corresponding float values.
-    Define the reset parameters on the Academy Inspector window in the Unity
-    Editor.
 - **Step : `env.step(action)`**
  Sends a step signal to the environment using the actions. For each Brain :
  - `action` can be one dimensional arrays or two dimensional arrays if you have
 - **Close : `env.close()`**
  Sends a shutdown signal to the environment and closes the communication
  socket.
+
+### Modifying the environment from Python
+The Environment can be modified by using side channels to send data to the
+environment. When creating the environment, pass a list of side channels as
+`side_channels` argument to the constructor.
+
+__Note__ : A side channel will only send/receive messages when `env.step` is
+called.
+
+#### EngineConfigurationChannel
+An `EngineConfiguration` will allow you to modify the time scale and graphics quality of the Unity engine.
+`EngineConfigurationChannel` has two methods :
+
+ * `set_configuration_parameters` with arguments
+   * width: Defines the width of the display. Default 80.
+   * height: Defines the height of the display. Default 80.
+   * quality_level: Defines the quality level of the simulation. Default 1.
+   * time_scale: Defines the multiplier for the deltatime in the simulation. If set to a higher value, time will pass faster in the simulation but the physics might break. Default 20.
+   *  target_frame_rate: Instructs simulation to try to render at a specified frame rate. Default -1.
+ * `set_configuration` with argument config which is an `EngineConfig`
+ NamedTuple object.
+
+For example :
+```python
+from mlagents.envs.environment import UnityEnvironment
+from mlagents.envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
+
+channel = EngineConfigurationChannel()
+
+env = UnityEnvironment(base_port = 5004, side_channels = [channel])
+
+channel.set_configuration_parameters(time_scale = 2.0)
+
+i = env.reset()
+...
+```
+
+#### FloatPropertiesChannel
+A `FloatPropertiesChannel` will allow you to get and set float properties
+in the environment. You can call get_property and set_property on the
+side channel to read and write properties.
+`FloatPropertiesChannel` has three methods:
+
+ * `set_property` Sets a property in the Unity Environment.
+  * key: The string identifier of the property.
+  * value: The float value of the property.
+ * `get_property` Gets a property in the Unity Environment. If the property was not found, will return None.
+  * key: The string identifier of the property.
+ * `list_properties` Returns a list of all the string identifiers of the properties
+
+```python
+from mlagents.envs.environment import UnityEnvironment
+from mlagents.envs.side_channel.float_properties_channel import FloatPropertiesChannel
+
+channel = FloatPropertiesChannel()
+
+env = UnityEnvironment(base_port = 5004, side_channels = [channel])
+
+channel.set_property("parameter_1", 2.0)
+
+i = env.reset()
+...
+```
+
+Once a property has been modified in Python, you can access it in C# after the next call to `step` as follows:
+
+```csharp
+var academy = FindObjectOfType<Academy>();
+var sharedProperties = academy.FloatProperties;
+float property1 = sharedProperties.GetPropertyWithDefault("parameter_1", 0.0f);
+```

 ## mlagents-learn

--- a/docs/Training-Curriculum-Learning.md
+++ b/docs/Training-Curriculum-Learning.md

 In order to define a curriculum, the first step is to decide which parameters of
 the environment will vary. In the case of the Wall Jump environment, what varies
-is the height of the wall. We define this as a `Reset Parameter` in the Academy
-object of our scene, and by doing so it becomes adjustable via the Python API.
+is the height of the wall. We define this as a `Shared Float Property` that
+can be accessed in `Academy.FloatProperties`, and by doing so it becomes
+adjustable via the Python API.
 Rather than adjusting it by hand, we will create a JSON file which
 describes the structure of the curriculum. Within it, we can specify which
 points in the training process our wall height will change, either based on the
--- a/docs/Training-Generalized-Reinforcement-Learning-Agents.md
+++ b/docs/Training-Generalized-Reinforcement-Learning-Agents.md

 ## Introducing Generalization Using Reset Parameters

-To enable variations in the environments, we implemented `Reset Parameters`. We
+To enable variations in the environments, we implemented `Reset Parameters`.
+`Reset Parameters` are `Academy.FloatProperties` that are used only when
+resetting the environment. We
 also included different sampling methods and the ability to create new kinds of
 sampling methods for each `Reset Parameter`. In the 3D ball environment example displayed
 in the figure above, the reset parameters are `gravity`, `ball_mass` and `ball_scale`.
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
  will use the port `(base_port + worker_id)`, where the `worker_id` is sequential IDs
  given to each instance from 0 to `num_envs - 1`. Default is 5005. __Note:__ When
  training using the Editor rather than an executable, the base port will be ignored.
-* `--slow`: Specify this option to run the Unity environment at normal, game
-  speed. The `--slow` mode uses the **Time Scale** and **Target Frame Rate**
-  specified in the Academy's **Inference Configuration**. By default, training
-  runs using the speeds specified in your Academy's **Training Configuration**.
-  See
-  [Academy Properties](Learning-Environment-Design-Academy.md#academy-properties).
 * `--train`: Specifies whether to train model or only run in inference mode.
  When training, **always** use the `--train` option.
 * `--load`: If set, the training code loads an already trained model to
 * `--debug`: Specify this option to enable debug-level logging for some parts of the code.
 * `--multi-gpu`: Setting this flag enables the use of multiple GPU's (if available) during training.
 * `--cpu`: Forces training using CPU only.
+* Engine Configuration :
+  * `--width' : The width of the executable window of the environment(s) in pixels
+  (ignored for editor training) (Default 84)
+  * `--height` : The height of the executable window of the environment(s) in pixels
+  (ignored for editor training). (Default 84)
+  * `--quality-level` : The quality level of the environment(s). Equivalent to
+  calling `QualitySettings.SetQualityLevel` in Unity. (Default 5)
+  * `--time-scale` : The time scale of the Unity environment(s). Equivalent to setting
+  `Time.timeScale` in Unity. (Default 20.0, maximum 100.0)
+  * `--target-frame-rate` : The target frame rate of the Unity environment(s).
+  Equivalent to setting `Application.targetFrameRate` in Unity. (Default: -1)

 ### Training Config File

--- a/docs/Training-on-Amazon-Web-Service.md
+++ b/docs/Training-on-Amazon-Web-Service.md
 2. Clone the ML-Agents repo and install the required Python packages

    ```sh
-    git clone https://github.com/Unity-Technologies/ml-agents.git
+    git clone --branch latest_release https://github.com/Unity-Technologies/ml-agents.git
    cd ml-agents/ml-agents/
    pip3 install -e .
    ```
--- a/docs/Using-Virtual-Environment.md
+++ b/docs/Using-Virtual-Environment.md
 to run ML-Agents on (either local laptop/desktop or remote server). Python 3.6 can be
 installed from [here](https://www.python.org/downloads/).

+## Python Version Requirement (Required)
+This guide has been tested with Python 3.6 and 3.7. Python 3.8 is not supported at this time.

 ## Installing Pip (Required)

 1. To create a new environment named `sample-env` execute `$ python3 -m venv ~/python-envs/sample-env`
 1. To activate the environment execute `$ source ~/python-envs/sample-env/bin/activate`
 1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the latest, upgrade to
-the latest pip version using `pip3 install --upgrade pip`
+the latest pip version using `$ pip3 install --upgrade pip`
 1. Install ML-Agents package using `$ pip3 install mlagents`
 1. To deactivate the environment execute `$ deactivate`


 ## Windows Setup

-1. Create a folder where the virtual environments will reside `$ md python-envs`
-1. To create a new environment named `sample-env` execute `$ python3 -m venv python-envs\sample-env`
-1. To activate the environment execute `$ python-envs\sample-env\Scripts\activate`
-1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the latest, upgrade to
-the latest pip version using `pip3 install --upgrade pip`
-1. Install ML-Agents package using `$ pip3 install mlagents`
-1. To deactivate the environment execute `$ deactivate`
+1. Create a folder where the virtual environments will reside `md python-envs`
+1. To create a new environment named `sample-env` execute `python -m venv python-envs\sample-env`
+1. To activate the environment execute `python-envs\sample-env\Scripts\activate`
+1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the
+latest, upgrade to the latest pip version using `pip install --upgrade pip`
+1. Install ML-Agents package using `pip install mlagents`
+1. To deactivate the environment execute `deactivate`
+
+Note:
+- Verify that you are using Python 3.6 or Python 3.7. Launch a command prompt using `cmd` and
+ execute `python --version` to verify the version.
+- Python3 installation may require admin privileges on Windows.
+- This guide is for Windows 10 using a 64-bit architecture only.
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
    mock_braininfo = mock.Mock()
    mock_braininfo.return_value.vector_observations = np.array([num_agents * [1, 2, 3]])
    if number_visual_observations:
-        mock_braininfo.return_value.visual_observations = [[np.zeros(shape=(8, 8, 3))]]
+        mock_braininfo.return_value.visual_observations = [
+            [np.zeros(shape=(8, 8, 3), dtype=np.float32)]
+        ]
    mock_braininfo.return_value.rewards = num_agents * [1.0]
    mock_braininfo.return_value.local_done = num_agents * [False]
    mock_braininfo.return_value.agents = range(0, num_agents)
--- a/ml-agents-envs/mlagents/envs/base_unity_environment.py
+++ b/ml-agents-envs/mlagents/envs/base_unity_environment.py
 from abc import ABC, abstractmethod
-from typing import Dict, Optional, Any
+from typing import Dict, Optional

 from mlagents.envs.brain import AllBrainInfo, BrainParameters

        pass

    @abstractmethod
-    def reset(
-        self,
-        config: Optional[Dict] = None,
-        train_mode: bool = True,
-        custom_reset_parameters: Any = None,
-    ) -> AllBrainInfo:
+    def reset(self) -> AllBrainInfo:
-        pass
-
-    @property
-    @abstractmethod
-    def reset_parameters(self) -> Dict[str, float]:
        pass

    @abstractmethod
--- a/ml-agents-envs/mlagents/envs/brain.py
+++ b/ml-agents-envs/mlagents/envs/brain.py
    @staticmethod
    def merge_memories(m1, m2, agents1, agents2):
        if len(m1) == 0 and len(m2) != 0:
-            m1 = np.zeros((len(agents1), m2.shape[1]))
+            m1 = np.zeros((len(agents1), m2.shape[1]), dtype=np.float32)
-            m2 = np.zeros((len(agents2), m1.shape[1]))
+            m2 = np.zeros((len(agents2), m1.shape[1]), dtype=np.float32)
-            new_m1 = np.zeros((m1.shape[0], m2.shape[1]))
+            new_m1 = np.zeros((m1.shape[0], m2.shape[1]), dtype=np.float32)
-            new_m2 = np.zeros((m2.shape[0], m1.shape[1]))
+            new_m2 = np.zeros((m2.shape[0], m1.shape[1]), dtype=np.float32)
            new_m2[0 : m2.shape[0], 0 : m2.shape[1]] = m2
            return np.append(m1, new_m2, axis=0)
        return np.append(m1, m2, axis=0)
        return s

    @staticmethod
+    @timed
    def from_agent_proto(
        worker_id: int,
        agent_info_list: List[AgentInfoProto],
        vis_obs = BrainInfo._process_visual_observations(brain_params, agent_info_list)

        total_num_actions = sum(brain_params.vector_action_space_size)
-        mask_actions = np.ones((len(agent_info_list), total_num_actions))
+        mask_actions = np.ones(
+            (len(agent_info_list), total_num_actions), dtype=np.float32
+        )
        for agent_index, agent_info in enumerate(agent_info_list):
            if agent_info.action_mask is not None:
                if len(agent_info.action_mask) == total_num_actions:
        brain_params: BrainParameters, agent_info_list: List[AgentInfoProto]
    ) -> np.ndarray:
        if len(agent_info_list) == 0:
-            vector_obs = np.zeros((0, brain_params.vector_observation_space_size))
+            vector_obs = np.zeros(
+                (0, brain_params.vector_observation_space_size), dtype=np.float32
+            )
        else:
            stacked_obs = []
            has_nan = False
                for vo in vec_obs:
                    # TODO consider itertools.chain here
                    proto_vector_obs.extend(vo.float_data.data)
-                np_obs = np.array(proto_vector_obs)
+                np_obs = np.array(proto_vector_obs, dtype=np.float32)

                # Check for NaNs or infs in the observations
                # If there's a NaN in the observations, the dot() result will be NaN
                has_nan = has_nan or np.isnan(d)
                has_inf = has_inf or not np.isfinite(d)
                stacked_obs.append(np_obs)
-            vector_obs = np.array(stacked_obs)
+            vector_obs = np.array(stacked_obs, dtype=np.float32)

            # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
            if has_nan or has_inf:
--- a/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.py
+++ b/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.py


 from mlagents.envs.communicator_objects import brain_parameters_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_brain__parameters__pb2
-from mlagents.envs.communicator_objects import environment_parameters_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__pb2


 DESCRIPTOR = _descriptor.FileDescriptor(
-  serialized_pb=_b('\nGmlagents/envs/communicator_objects/unity_rl_initialization_output.proto\x12\x14\x63ommunicator_objects\x1a\x39mlagents/envs/communicator_objects/brain_parameters.proto\x1a?mlagents/envs/communicator_objects/environment_parameters.proto\"\xeb\x01\n UnityRLInitializationOutputProto\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x10\n\x08log_path\x18\x03 \x01(\t\x12\x44\n\x10\x62rain_parameters\x18\x05 \x03(\x0b\x32*.communicator_objects.BrainParametersProto\x12P\n\x16\x65nvironment_parameters\x18\x06 \x01(\x0b\x32\x30.communicator_objects.EnvironmentParametersProtoB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\nGmlagents/envs/communicator_objects/unity_rl_initialization_output.proto\x12\x14\x63ommunicator_objects\x1a\x39mlagents/envs/communicator_objects/brain_parameters.proto\"\x9f\x01\n UnityRLInitializationOutputProto\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x10\n\x08log_path\x18\x03 \x01(\t\x12\x44\n\x10\x62rain_parameters\x18\x05 \x03(\x0b\x32*.communicator_objects.BrainParametersProtoJ\x04\x08\x06\x10\x07\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-  dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_brain__parameters__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__pb2.DESCRIPTOR,])
+  dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_brain__parameters__pb2.DESCRIPTOR,])



      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
-    _descriptor.FieldDescriptor(
-      name='environment_parameters', full_name='communicator_objects.UnityRLInitializationOutputProto.environment_parameters', index=4,
-      number=6, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=222,
-  serialized_end=457,
+  serialized_start=157,
+  serialized_end=316,
-_UNITYRLINITIALIZATIONOUTPUTPROTO.fields_by_name['environment_parameters'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__pb2._ENVIRONMENTPARAMETERSPROTO
 DESCRIPTOR.message_types_by_name['UnityRLInitializationOutputProto'] = _UNITYRLINITIALIZATIONOUTPUTPROTO
 _sym_db.RegisterFileDescriptor(DESCRIPTOR)

--- a/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.pyi
+++ b/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_initialization_output_pb2.pyi
    BrainParametersProto as mlagents___envs___communicator_objects___brain_parameters_pb2___BrainParametersProto,
 )

-from mlagents.envs.communicator_objects.environment_parameters_pb2 import (
-    EnvironmentParametersProto as mlagents___envs___communicator_objects___environment_parameters_pb2___EnvironmentParametersProto,
-)
-
 from typing import (
    Iterable as typing___Iterable,
    Optional as typing___Optional,
    @property
    def brain_parameters(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[mlagents___envs___communicator_objects___brain_parameters_pb2___BrainParametersProto]: ...

-    @property
-    def environment_parameters(self) -> mlagents___envs___communicator_objects___environment_parameters_pb2___EnvironmentParametersProto: ...
-
    def __init__(self,
        *,
        name : typing___Optional[typing___Text] = None,
-        environment_parameters : typing___Optional[mlagents___envs___communicator_objects___environment_parameters_pb2___EnvironmentParametersProto] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> UnityRLInitializationOutputProto: ...
-        def HasField(self, field_name: typing_extensions___Literal[u"environment_parameters"]) -> builtin___bool: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"brain_parameters",u"environment_parameters",u"log_path",u"name",u"version"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"brain_parameters",u"log_path",u"name",u"version"]) -> None: ...
-        def HasField(self, field_name: typing_extensions___Literal[u"environment_parameters",b"environment_parameters"]) -> builtin___bool: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"brain_parameters",b"brain_parameters",u"environment_parameters",b"environment_parameters",u"log_path",b"log_path",u"name",b"name",u"version",b"version"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"brain_parameters",b"brain_parameters",u"log_path",b"log_path",u"name",b"name",u"version",b"version"]) -> None: ...
--- a/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_input_pb2.py
+++ b/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_input_pb2.py


 from mlagents.envs.communicator_objects import agent_action_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_agent__action__pb2
-from mlagents.envs.communicator_objects import environment_parameters_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__pb2
 from mlagents.envs.communicator_objects import command_pb2 as mlagents_dot_envs_dot_communicator__objects_dot_command__pb2


  syntax='proto3',
-  serialized_pb=_b('\n7mlagents/envs/communicator_objects/unity_rl_input.proto\x12\x14\x63ommunicator_objects\x1a\x35mlagents/envs/communicator_objects/agent_action.proto\x1a?mlagents/envs/communicator_objects/environment_parameters.proto\x1a\x30mlagents/envs/communicator_objects/command.proto\"\xc3\x03\n\x11UnityRLInputProto\x12P\n\ragent_actions\x18\x01 \x03(\x0b\x32\x39.communicator_objects.UnityRLInputProto.AgentActionsEntry\x12P\n\x16\x65nvironment_parameters\x18\x02 \x01(\x0b\x32\x30.communicator_objects.EnvironmentParametersProto\x12\x13\n\x0bis_training\x18\x03 \x01(\x08\x12\x33\n\x07\x63ommand\x18\x04 \x01(\x0e\x32\".communicator_objects.CommandProto\x1aM\n\x14ListAgentActionProto\x12\x35\n\x05value\x18\x01 \x03(\x0b\x32&.communicator_objects.AgentActionProto\x1aq\n\x11\x41gentActionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12K\n\x05value\x18\x02 \x01(\x0b\x32<.communicator_objects.UnityRLInputProto.ListAgentActionProto:\x02\x38\x01\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n7mlagents/envs/communicator_objects/unity_rl_input.proto\x12\x14\x63ommunicator_objects\x1a\x35mlagents/envs/communicator_objects/agent_action.proto\x1a\x30mlagents/envs/communicator_objects/command.proto\"\xfe\x02\n\x11UnityRLInputProto\x12P\n\ragent_actions\x18\x01 \x03(\x0b\x32\x39.communicator_objects.UnityRLInputProto.AgentActionsEntry\x12\x33\n\x07\x63ommand\x18\x04 \x01(\x0e\x32\".communicator_objects.CommandProto\x12\x14\n\x0cside_channel\x18\x05 \x01(\x0c\x1aM\n\x14ListAgentActionProto\x12\x35\n\x05value\x18\x01 \x03(\x0b\x32&.communicator_objects.AgentActionProto\x1aq\n\x11\x41gentActionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12K\n\x05value\x18\x02 \x01(\x0b\x32<.communicator_objects.UnityRLInputProto.ListAgentActionProto:\x02\x38\x01J\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
-  dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_agent__action__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_command__pb2.DESCRIPTOR,])
+  dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_agent__action__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_command__pb2.DESCRIPTOR,])



  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=511,
-  serialized_end=588,
+  serialized_start=365,
+  serialized_end=442,
 )

 _UNITYRLINPUTPROTO_AGENTACTIONSENTRY = _descriptor.Descriptor(
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=590,
-  serialized_end=703,
+  serialized_start=444,
+  serialized_end=557,
 )

 _UNITYRLINPUTPROTO = _descriptor.Descriptor(
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
    _descriptor.FieldDescriptor(
-      name='environment_parameters', full_name='communicator_objects.UnityRLInputProto.environment_parameters', index=1,
-      number=2, type=11, cpp_type=10, label=1,
-      has_default_value=False, default_value=None,
+      name='command', full_name='communicator_objects.UnityRLInputProto.command', index=1,
+      number=4, type=14, cpp_type=8, label=1,
+      has_default_value=False, default_value=0,
-      name='is_training', full_name='communicator_objects.UnityRLInputProto.is_training', index=2,
-      number=3, type=8, cpp_type=7, label=1,
-      has_default_value=False, default_value=False,
-      message_type=None, enum_type=None, containing_type=None,
-      is_extension=False, extension_scope=None,
-      options=None, file=DESCRIPTOR),
-    _descriptor.FieldDescriptor(
-      name='command', full_name='communicator_objects.UnityRLInputProto.command', index=3,
-      number=4, type=14, cpp_type=8, label=1,
-      has_default_value=False, default_value=0,
+      name='side_channel', full_name='communicator_objects.UnityRLInputProto.side_channel', index=2,
+      number=5, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=252,
-  serialized_end=703,
+  serialized_start=187,
+  serialized_end=569,
 )

 _UNITYRLINPUTPROTO_LISTAGENTACTIONPROTO.fields_by_name['value'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_agent__action__pb2._AGENTACTIONPROTO
 _UNITYRLINPUTPROTO.fields_by_name['agent_actions'].message_type = _UNITYRLINPUTPROTO_AGENTACTIONSENTRY
-_UNITYRLINPUTPROTO.fields_by_name['environment_parameters'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_environment__parameters__pb2._ENVIRONMENTPARAMETERSPROTO
 _UNITYRLINPUTPROTO.fields_by_name['command'].enum_type = mlagents_dot_envs_dot_communicator__objects_dot_command__pb2._COMMANDPROTO
 DESCRIPTOR.message_types_by_name['UnityRLInputProto'] = _UNITYRLINPUTPROTO
 _sym_db.RegisterFileDescriptor(DESCRIPTOR)
--- a/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_input_pb2.pyi
+++ b/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_input_pb2.pyi
    CommandProto as mlagents___envs___communicator_objects___command_pb2___CommandProto,
 )

-from mlagents.envs.communicator_objects.environment_parameters_pb2 import (
-    EnvironmentParametersProto as mlagents___envs___communicator_objects___environment_parameters_pb2___EnvironmentParametersProto,
-)
-
 from typing import (
    Iterable as typing___Iterable,
    Mapping as typing___Mapping,
            def HasField(self, field_name: typing_extensions___Literal[u"value",b"value"]) -> builtin___bool: ...
            def ClearField(self, field_name: typing_extensions___Literal[u"key",b"key",u"value",b"value"]) -> None: ...

-    is_training = ... # type: builtin___bool
+    side_channel = ... # type: builtin___bytes
-    @property
-    def environment_parameters(self) -> mlagents___envs___communicator_objects___environment_parameters_pb2___EnvironmentParametersProto: ...
-
-        environment_parameters : typing___Optional[mlagents___envs___communicator_objects___environment_parameters_pb2___EnvironmentParametersProto] = None,
-        is_training : typing___Optional[builtin___bool] = None,
+        side_channel : typing___Optional[builtin___bytes] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> UnityRLInputProto: ...
-        def HasField(self, field_name: typing_extensions___Literal[u"environment_parameters"]) -> builtin___bool: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"agent_actions",u"command",u"environment_parameters",u"is_training"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"agent_actions",u"command",u"side_channel"]) -> None: ...
-        def HasField(self, field_name: typing_extensions___Literal[u"environment_parameters",b"environment_parameters"]) -> builtin___bool: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"agent_actions",b"agent_actions",u"command",b"command",u"environment_parameters",b"environment_parameters",u"is_training",b"is_training"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"agent_actions",b"agent_actions",u"command",b"command",u"side_channel",b"side_channel"]) -> None: ...
--- a/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_output_pb2.py
+++ b/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_output_pb2.py
  name='mlagents/envs/communicator_objects/unity_rl_output.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n8mlagents/envs/communicator_objects/unity_rl_output.proto\x12\x14\x63ommunicator_objects\x1a\x33mlagents/envs/communicator_objects/agent_info.proto\"\xa3\x02\n\x12UnityRLOutputProto\x12L\n\nagentInfos\x18\x02 \x03(\x0b\x32\x38.communicator_objects.UnityRLOutputProto.AgentInfosEntry\x1aI\n\x12ListAgentInfoProto\x12\x33\n\x05value\x18\x01 \x03(\x0b\x32$.communicator_objects.AgentInfoProto\x1an\n\x0f\x41gentInfosEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12J\n\x05value\x18\x02 \x01(\x0b\x32;.communicator_objects.UnityRLOutputProto.ListAgentInfoProto:\x02\x38\x01J\x04\x08\x01\x10\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n8mlagents/envs/communicator_objects/unity_rl_output.proto\x12\x14\x63ommunicator_objects\x1a\x33mlagents/envs/communicator_objects/agent_info.proto\"\xb9\x02\n\x12UnityRLOutputProto\x12L\n\nagentInfos\x18\x02 \x03(\x0b\x32\x38.communicator_objects.UnityRLOutputProto.AgentInfosEntry\x12\x14\n\x0cside_channel\x18\x03 \x01(\x0c\x1aI\n\x12ListAgentInfoProto\x12\x33\n\x05value\x18\x01 \x03(\x0b\x32$.communicator_objects.AgentInfoProto\x1an\n\x0f\x41gentInfosEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12J\n\x05value\x18\x02 \x01(\x0b\x32;.communicator_objects.UnityRLOutputProto.ListAgentInfoProto:\x02\x38\x01J\x04\x08\x01\x10\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
  ,
  dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_agent__info__pb2.DESCRIPTOR,])

  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=236,
-  serialized_end=309,
+  serialized_start=258,
+  serialized_end=331,
 )

 _UNITYRLOUTPUTPROTO_AGENTINFOSENTRY = _descriptor.Descriptor(
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=311,
-  serialized_end=421,
+  serialized_start=333,
+  serialized_end=443,
 )

 _UNITYRLOUTPUTPROTO = _descriptor.Descriptor(
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='side_channel', full_name='communicator_objects.UnityRLOutputProto.side_channel', index=1,
+      number=3, type=12, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b(""),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  oneofs=[
  ],
  serialized_start=136,
-  serialized_end=427,
+  serialized_end=449,
 )

 _UNITYRLOUTPUTPROTO_LISTAGENTINFOPROTO.fields_by_name['value'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_agent__info__pb2._AGENTINFOPROTO
--- a/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_output_pb2.pyi
+++ b/ml-agents-envs/mlagents/envs/communicator_objects/unity_rl_output_pb2.pyi
            def HasField(self, field_name: typing_extensions___Literal[u"value",b"value"]) -> builtin___bool: ...
            def ClearField(self, field_name: typing_extensions___Literal[u"key",b"key",u"value",b"value"]) -> None: ...

+    side_channel = ... # type: builtin___bytes

    @property
    def agentInfos(self) -> typing___MutableMapping[typing___Text, UnityRLOutputProto.ListAgentInfoProto]: ...
        agentInfos : typing___Optional[typing___Mapping[typing___Text, UnityRLOutputProto.ListAgentInfoProto]] = None,
+        side_channel : typing___Optional[builtin___bytes] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> UnityRLOutputProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"agentInfos"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"agentInfos",u"side_channel"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"agentInfos",b"agentInfos"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"agentInfos",b"agentInfos",u"side_channel",b"side_channel"]) -> None: ...
--- a/ml-agents-envs/mlagents/envs/env_manager.py
+++ b/ml-agents-envs/mlagents/envs/env_manager.py
 from abc import ABC, abstractmethod
-from typing import Any, List, Dict, NamedTuple, Optional
+from typing import List, Dict, NamedTuple, Optional
 from mlagents.envs.brain import AllBrainInfo, BrainParameters
 from mlagents.envs.policy import Policy
 from mlagents.envs.action_info import ActionInfo
        pass

    @abstractmethod
-    def reset(
-        self,
-        config: Dict = None,
-        train_mode: bool = True,
-        custom_reset_parameters: Any = None,
-    ) -> List[EnvironmentStep]:
+    def reset(self, config: Dict = None) -> List[EnvironmentStep]:
        pass

    @property

    @property
    @abstractmethod
-    def reset_parameters(self) -> Dict[str, float]:
+    def get_properties(self) -> Dict[str, float]:
        pass

    @abstractmethod
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
 import subprocess
 from typing import Dict, List, Optional, Any

+from mlagents.envs.side_channel.side_channel import SideChannel
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
 from mlagents.envs.timers import timed, hierarchical_timer
 from .brain import AllBrainInfo, BrainInfo, BrainParameters
 from mlagents.envs.communicator_objects.unity_rl_input_pb2 import UnityRLInputProto
 from mlagents.envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
 from mlagents.envs.communicator_objects.agent_action_pb2 import AgentActionProto
-from mlagents.envs.communicator_objects.environment_parameters_pb2 import (
-    EnvironmentParametersProto,
-)
 from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutputProto
 from mlagents.envs.communicator_objects.unity_rl_initialization_input_pb2 import (
    UnityRLInitializationInputProto,
 from .rpc_communicator import RpcCommunicator
 from sys import platform
 import signal
+import struct

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("mlagents.envs")
        no_graphics: bool = False,
        timeout_wait: int = 60,
        args: Optional[List[str]] = None,
+        side_channels: Optional[List[SideChannel]] = None,
    ):
        """
        Starts a new unity environment and establishes a connection with the environment.
        :int timeout_wait: Time (in seconds) to wait for connection from environment.
        :bool train_mode: Whether to run in training mode, speeding up the simulation, by default.
        :list args: Addition Unity command line arguments
+        :list side_channels: Additional side channel for no-rl communication with Unity
        """
        args = args or []
        atexit.register(self._close)
        self.timeout_wait: int = timeout_wait
        self.communicator = self.get_communicator(worker_id, base_port, timeout_wait)
        self.worker_id = worker_id
+        self.side_channels: Dict[int, SideChannel] = {}
+        if side_channels is not None:
+            for _sc in side_channels:
+                if _sc.channel_type in self.side_channels:
+                    raise UnityEnvironmentException(
+                        "There cannot be two side channels with the same channel type {0}.".format(
+                            _sc.channel_type
+                        )
+                    )
+                self.side_channels[_sc.channel_type] = _sc

        # If the environment name is None, a new environment will not be launched
        # and the communicator will directly try to connect to an existing unity environment.
        self._external_brain_names: List[str] = []
        self._num_external_brains = 0
        self._update_brain_parameters(aca_output)
-        self._resetParameters = dict(aca_params.environment_parameters.float_parameters)
        logger.info(
            "\n'{0}' started successfully!\n{1}".format(self._academy_name, str(self))
        )
        for brain_name in self.external_brain_names:
            external_brains[brain_name] = self.brains[brain_name]
        return external_brains
-
-    @property
-    def reset_parameters(self):
-        return self._resetParameters

    def executable_launcher(self, file_name, docker_training, no_graphics, args):
        cwd = os.getcwd()
                )

    def __str__(self):
-        reset_params_str = (
-            "\n\t\t".join(
-                [
-                    str(k) + " -> " + str(self._resetParameters[k])
-                    for k in self._resetParameters
-                ]
-            )
-            if self._resetParameters
-            else "{}"
-        )
-        return f"""Unity Academy name: {self._academy_name}
-        Reset Parameters : {reset_params_str}"""
+        return """Unity Academy name: {0}""".format(self._academy_name)
-    def reset(
-        self,
-        config: Dict = None,
-        train_mode: bool = True,
-        custom_reset_parameters: Any = None,
-    ) -> AllBrainInfo:
+    def reset(self) -> AllBrainInfo:
-        if config is None:
-            config = self._resetParameters
-        elif config:
-            logger.info(
-                "Academy reset with parameters: {0}".format(
-                    ", ".join([str(x) + " -> " + str(config[x]) for x in config])
-                )
-            )
-        for k in config:
-            if (k in self._resetParameters) and (isinstance(config[k], (int, float))):
-                self._resetParameters[k] = config[k]
-            elif not isinstance(config[k], (int, float)):
-                raise UnityEnvironmentException(
-                    "The value for parameter '{0}'' must be an Integer or a Float.".format(
-                        k
-                    )
-                )
-            else:
-                raise UnityEnvironmentException(
-                    "The parameter '{0}' is not a valid parameter.".format(k)
-                )
-
-            outputs = self.communicator.exchange(
-                self._generate_reset_input(train_mode, config, custom_reset_parameters)
-            )
+            outputs = self.communicator.exchange(self._generate_reset_input())
            if outputs is None:
                raise UnityCommunicationException("Communicator has stopped.")
            self._update_brain_parameters(outputs)
            _data[brain_name] = BrainInfo.from_agent_proto(
                self.worker_id, agent_info_list, self.brains[brain_name]
            )
+        self._parse_side_channel_message(self.side_channels, output.side_channel)
+    @staticmethod
+    def _parse_side_channel_message(
+        side_channels: Dict[int, SideChannel], data: bytearray
+    ) -> None:
+        offset = 0
+        while offset < len(data):
+            try:
+                channel_type, message_len = struct.unpack_from("<ii", data, offset)
+                offset = offset + 8
+                message_data = data[offset : offset + message_len]
+                offset = offset + message_len
+            except Exception:
+                raise UnityEnvironmentException(
+                    "There was a problem reading a message in a SideChannel. "
+                    "Please make sure the version of MLAgents in Unity is "
+                    "compatible with the Python version."
+                )
+            if len(message_data) != message_len:
+                raise UnityEnvironmentException(
+                    "The message received by the side channel {0} was "
+                    "unexpectedly short. Make sure your Unity Environment "
+                    "sending side channel data properly.".format(channel_type)
+                )
+            if channel_type in side_channels:
+                side_channels[channel_type].on_message_received(message_data)
+            else:
+                logger.warning(
+                    "Unknown side channel data received. Channel type "
+                    ": {0}.".format(channel_type)
+                )
+
+    @staticmethod
+    def _generate_side_channel_data(side_channels: Dict[int, SideChannel]) -> bytearray:
+        result = bytearray()
+        for channel_type, channel in side_channels.items():
+            for message in channel.message_queue:
+                result += struct.pack("<ii", channel_type, len(message))
+                result += message
+            channel.message_queue = []
+        return result
+
    def _update_brain_parameters(self, output: UnityOutputProto) -> None:
        init_output = output.rl_initialization_output

                        action.value = float(value[b][i])
                rl_in.agent_actions[b].value.extend([action])
                rl_in.command = 0
+        rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels))
-    def _generate_reset_input(
-        self, training: bool, config: Dict, custom_reset_parameters: Any
-    ) -> UnityInputProto:
+    def _generate_reset_input(self) -> UnityInputProto:
-        rl_in.is_training = training
-        rl_in.environment_parameters.CopyFrom(EnvironmentParametersProto())
-        for key in config:
-            rl_in.environment_parameters.float_parameters[key] = config[key]
-        if custom_reset_parameters is not None:
-            rl_in.environment_parameters.custom_reset_parameters.CopyFrom(
-                custom_reset_parameters
-            )
+        rl_in.side_channel = bytes(self._generate_side_channel_data(self.side_channels))
        return self.wrap_unity_input(rl_in)

    def send_academy_parameters(
--- a/ml-agents-envs/mlagents/envs/simple_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/simple_env_manager.py
-from typing import Any, Dict, List
+from typing import Dict, List

 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
 from mlagents.envs.env_manager import EnvManager, EnvironmentStep
+from mlagents.envs.side_channel.float_properties_channel import FloatPropertiesChannel


 class SimpleEnvManager(EnvManager):
    """

-    def __init__(self, env: BaseUnityEnvironment):
+    def __init__(
+        self, env: BaseUnityEnvironment, float_prop_channel: FloatPropertiesChannel
+    ):
+        self.shared_float_properties = float_prop_channel
        self.env = env
        self.previous_step: EnvironmentStep = EnvironmentStep(None, {}, None)
        self.previous_all_action_info: Dict[str, ActionInfo] = {}
        return [step_info]

    def reset(
-        self,
-        config: Dict[str, float] = None,
-        train_mode: bool = True,
-        custom_reset_parameters: Any = None,
+        self, config: Dict[str, float] = None
-        all_brain_info = self.env.reset(
-            config=config,
-            train_mode=train_mode,
-            custom_reset_parameters=custom_reset_parameters,
-        )
+        if config is not None:
+            for k, v in config.items():
+                self.shared_float_properties.set_property(k, v)
+        all_brain_info = self.env.reset()
        self.previous_step = EnvironmentStep(None, all_brain_info, None)
        return [self.previous_step]


    @property
-    def reset_parameters(self) -> Dict[str, float]:
-        return self.env.reset_parameters
+    def get_properties(self) -> Dict[str, float]:
+        reset_params = {}
+        for k in self.shared_float_properties.list_properties():
+            reset_params[k] = self.shared_float_properties.get_property(k)
+        return reset_params

    def close(self):
        self.env.close()
--- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
 )
 from mlagents.envs.brain import AllBrainInfo, BrainParameters
 from mlagents.envs.action_info import ActionInfo
+from mlagents.envs.side_channel.float_properties_channel import FloatPropertiesChannel
+from mlagents.envs.side_channel.engine_configuration_channel import (
+    EngineConfigurationChannel,
+    EngineConfig,
+)
+from mlagents.envs.side_channel.side_channel import SideChannel

 logger = logging.getLogger("mlagents.envs")



 def worker(
-    parent_conn: Connection, step_queue: Queue, pickled_env_factory: str, worker_id: int
+    parent_conn: Connection,
+    step_queue: Queue,
+    pickled_env_factory: str,
+    worker_id: int,
+    engine_configuration: EngineConfig,
-    env_factory: Callable[[int], UnityEnvironment] = cloudpickle.loads(
-        pickled_env_factory
+    env_factory: Callable[
+        [int, List[SideChannel]], UnityEnvironment
+    ] = cloudpickle.loads(pickled_env_factory)
+    shared_float_properties = FloatPropertiesChannel()
+    engine_configuration_channel = EngineConfigurationChannel()
+    engine_configuration_channel.set_configuration(engine_configuration)
+    env: BaseUnityEnvironment = env_factory(
+        worker_id, [shared_float_properties, engine_configuration_channel]
-    env: BaseUnityEnvironment = env_factory(worker_id)

    def _send_response(cmd_name, payload):
        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))
                reset_timers()
            elif cmd.name == "external_brains":
                _send_response("external_brains", env.external_brains)
-            elif cmd.name == "reset_parameters":
-                _send_response("reset_parameters", env.reset_parameters)
+            elif cmd.name == "get_properties":
+                reset_params = {}
+                for k in shared_float_properties.list_properties():
+                    reset_params[k] = shared_float_properties.get_property(k)
+
+                _send_response("get_properties", reset_params)
-                all_brain_info = env.reset(
-                    cmd.payload[0], cmd.payload[1], cmd.payload[2]
-                )
+                for k, v in cmd.payload.items():
+                    shared_float_properties.set_property(k, v)
+                all_brain_info = env.reset()
                _send_response("reset", all_brain_info)
            elif cmd.name == "close":
                break

 class SubprocessEnvManager(EnvManager):
    def __init__(
-        self, env_factory: Callable[[int], BaseUnityEnvironment], n_env: int = 1
+        self,
+        env_factory: Callable[[int, List[SideChannel]], BaseUnityEnvironment],
+        engine_configuration: EngineConfig,
+        n_env: int = 1,
    ):
        super().__init__()
        self.env_workers: List[UnityEnvWorker] = []
-                self.create_worker(worker_idx, self.step_queue, env_factory)
+                self.create_worker(
+                    worker_idx, self.step_queue, env_factory, engine_configuration
+                )
            )

    @staticmethod
-        env_factory: Callable[[int], BaseUnityEnvironment],
+        env_factory: Callable[[int, List[SideChannel]], BaseUnityEnvironment],
+        engine_configuration: EngineConfig,
    ) -> UnityEnvWorker:
        parent_conn, child_conn = Pipe()

        child_process = Process(
-            target=worker, args=(child_conn, step_queue, pickled_env_factory, worker_id)
+            target=worker,
+            args=(
+                child_conn,
+                step_queue,
+                pickled_env_factory,
+                worker_id,
+                engine_configuration,
+            ),
        )
        child_process.start()
        return UnityEnvWorker(child_process, worker_id, parent_conn)
        step_infos = self._postprocess_steps(worker_steps)
        return step_infos

-    def reset(
-        self,
-        config: Optional[Dict] = None,
-        train_mode: bool = True,
-        custom_reset_parameters: Any = None,
-    ) -> List[EnvironmentStep]:
+    def reset(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
        while any(ew.waiting for ew in self.env_workers):
            if not self.step_queue.empty():
                step = self.step_queue.get_nowait()
-            ew.send("reset", (config, train_mode, custom_reset_parameters))
+            ew.send("reset", config)
        # Next (synchronously) collect the reset observations from each worker in sequence
        for ew in self.env_workers:
            ew.previous_step = EnvironmentStep(None, ew.recv().payload, None)
        return self.env_workers[0].recv().payload

    @property
-    def reset_parameters(self) -> Dict[str, float]:
-        self.env_workers[0].send("reset_parameters")
+    def get_properties(self) -> Dict[str, float]:
+        self.env_workers[0].send("get_properties")
        return self.env_workers[0].recv().payload

    def close(self) -> None:
--- a/ml-agents-envs/mlagents/envs/tests/test_brain.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_brain.py
 from typing import List
 import logging
 import numpy as np
-import sys
 from unittest import mock

 from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto
    agent_info_proto = _make_agent_info_proto([1.0, float("inf"), 0.0])

    brain_info = BrainInfo.from_agent_proto(1, [agent_info_proto], test_brain)
-    # inf should get set to float_max
-    expected = [1.0, sys.float_info.max, 0.0]
+    # inf should get set to float32_max
+    float32_max = np.finfo(np.float32).max
+    expected = [1.0, float32_max, 0.0]
    assert (brain_info.vector_observations == expected).all()
    mock_nan_to_num.assert_called()
    # We don't warn on inf, just NaN
--- a/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
    StepResponse,
 )
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
+from mlagents.envs.side_channel.engine_configuration_channel import EngineConfig


 def mock_env_factory(worker_id):
 class SubprocessEnvManagerTest(unittest.TestCase):
    def test_environments_are_created(self):
        SubprocessEnvManager.create_worker = MagicMock()
-        env = SubprocessEnvManager(mock_env_factory, 2)
+        env = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 2)
-                mock.call(0, env.step_queue, mock_env_factory),
-                mock.call(1, env.step_queue, mock_env_factory),
+                mock.call(
+                    0, env.step_queue, mock_env_factory, EngineConfig.default_config()
+                ),
+                mock.call(
+                    1, env.step_queue, mock_env_factory, EngineConfig.default_config()
+                ),
-        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker(
+        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
-        manager = SubprocessEnvManager(mock_env_factory, 1)
+        manager = SubprocessEnvManager(
+            mock_env_factory, EngineConfig.default_config(), 1
+        )
-        manager.reset(params, False)
-        manager.env_workers[0].send.assert_called_with("reset", (params, False, None))
+        manager.reset(params)
+        manager.env_workers[0].send.assert_called_with("reset", (params))
-        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker(
+        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
-        manager = SubprocessEnvManager(mock_env_factory, 4)
+        manager = SubprocessEnvManager(
+            mock_env_factory, EngineConfig.default_config(), 4
+        )
-            env.send.assert_called_with("reset", (params, True, None))
+            env.send.assert_called_with("reset", (params))
            env.recv.assert_called()
            # Check that the "last steps" are set to the value returned for each step
            self.assertEqual(

    def test_step_takes_steps_for_all_non_waiting_envs(self):
-        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker(
+        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
-        manager = SubprocessEnvManager(mock_env_factory, 3)
+        manager = SubprocessEnvManager(
+            mock_env_factory, EngineConfig.default_config(), 3
+        )
        manager.step_queue = Mock()
        manager.step_queue.get_nowait.side_effect = [
            EnvironmentResponse("step", 0, StepResponse(0, None)),
--- a/ml-agents/mlagents/trainers/bc/policy.py
+++ b/ml-agents/mlagents/trainers/bc/policy.py
        else:
            feed_dict[self.model.true_action] = mini_batch["actions"]
            feed_dict[self.model.action_masks] = np.ones(
-                (num_sequences, sum(self.brain.vector_action_space_size))
+                (num_sequences, sum(self.brain.vector_action_space_size)),
+                dtype=np.float32,
            )
        if self.use_vec_obs:
            feed_dict[self.model.vector_in] = mini_batch["vector_obs"]
        if self.use_recurrent:
-            feed_dict[self.model.memory_in] = np.zeros([num_sequences, self.m_size])
+            feed_dict[self.model.memory_in] = np.zeros(
+                [num_sequences, self.m_size], dtype=np.float32
+            )
        run_out = self._execute_model(feed_dict, self.update_dict)
        return run_out
--- a/ml-agents/mlagents/trainers/bc/trainer.py
+++ b/ml-agents/mlagents/trainers/bc/trainer.py
 from mlagents.envs.brain import BrainInfo
 from mlagents.envs.action_info import ActionInfoOutputs
 from mlagents.trainers.bc.policy import BCPolicy
-from mlagents.trainers.buffer import Buffer
+from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.agent_processor import ProcessingBuffer
 from mlagents.trainers.trainer import Trainer

 logger = logging.getLogger("mlagents.trainers")

        self.batches_per_epoch = trainer_parameters["batches_per_epoch"]

-        self.demonstration_buffer = Buffer()
-        self.evaluation_buffer = Buffer()
+        self.demonstration_buffer = AgentBuffer()
+        self.evaluation_buffer = ProcessingBuffer()

    def add_experiences(
        self,
        Returns whether or not the trainer has enough elements to run update model
        :return: A boolean corresponding to whether or not update_model() can be run
        """
-        return (
-            len(self.demonstration_buffer.update_buffer["actions"]) > self.n_sequences
-        )
+        return self.demonstration_buffer.num_experiences > self.n_sequences
-        self.demonstration_buffer.update_buffer.shuffle(self.policy.sequence_length)
+        self.demonstration_buffer.shuffle(self.policy.sequence_length)
-            len(self.demonstration_buffer.update_buffer["actions"]) // batch_size,
+            self.demonstration_buffer.num_experiences // batch_size,
-            update_buffer = self.demonstration_buffer.update_buffer
+            update_buffer = self.demonstration_buffer
            mini_batch = update_buffer.make_mini_batch(i, i + batch_size)
            run_out = self.policy.update(mini_batch, self.n_sequences)
            loss = run_out["policy_loss"]
--- a/ml-agents/mlagents/trainers/buffer.py
+++ b/ml-agents/mlagents/trainers/buffer.py
 import numpy as np
 import h5py
+from typing import List, BinaryIO

 from mlagents.envs.exception import UnityException

    pass


-class Buffer(dict):
+class AgentBuffer(dict):
-    Buffer contains a dictionary of AgentBuffer. The AgentBuffers are indexed by agent_id.
-    Buffer also contains an update_buffer that corresponds to the buffer used when updating the model.
+    AgentBuffer contains a dictionary of AgentBufferFields. Each agent has his own AgentBuffer.
+    The keys correspond to the name of the field. Example: state, action
-    class AgentBuffer(dict):
+    class AgentBufferField(list):
-        AgentBuffer contains a dictionary of AgentBufferFields. Each agent has his own AgentBuffer.
-        The keys correspond to the name of the field. Example: state, action
+        AgentBufferField is a list of numpy arrays. When an agent collects a field, you can add it to his
+        AgentBufferField with the append method.
-        class AgentBufferField(list):
-            """
-            AgentBufferField is a list of numpy arrays. When an agent collects a field, you can add it to his
-            AgentBufferField with the append method.
-            """
-
-            def __init__(self):
-                self.padding_value = 0
-                super(Buffer.AgentBuffer.AgentBufferField, self).__init__()
-
-            def __str__(self):
-                return str(np.array(self).shape)
-
-            def append(self, element, padding_value=0):
-                """
-                Adds an element to this list. Also lets you change the padding
-                type, so that it can be set on append (e.g. action_masks should
-                be padded with 1.)
-                :param element: The element to append to the list.
-                :param padding_value: The value used to pad when get_batch is called.
-                """
-                super(Buffer.AgentBuffer.AgentBufferField, self).append(element)
-                self.padding_value = padding_value
-
-            def extend(self, data):
-                """
-                Adds a list of np.arrays to the end of the list of np.arrays.
-                :param data: The np.array list to append.
-                """
-                self += list(np.array(data))
-
-            def set(self, data):
-                """
-                Sets the list of np.array to the input data
-                :param data: The np.array list to be set.
-                """
-                self[:] = []
-                self[:] = list(np.array(data))
-
-            def get_batch(self, batch_size=None, training_length=1, sequential=True):
-                """
-                Retrieve the last batch_size elements of length training_length
-                from the list of np.array
-                :param batch_size: The number of elements to retrieve. If None:
-                All elements will be retrieved.
-                :param training_length: The length of the sequence to be retrieved. If
-                None: only takes one element.
-                :param sequential: If true and training_length is not None: the elements
-                will not repeat in the sequence. [a,b,c,d,e] with training_length = 2 and
-                sequential=True gives [[0,a],[b,c],[d,e]]. If sequential=False gives
-                [[a,b],[b,c],[c,d],[d,e]]
-                """
-                if sequential:
-                    # The sequences will not have overlapping elements (this involves padding)
-                    leftover = len(self) % training_length
-                    # leftover is the number of elements in the first sequence (this sequence might need 0 padding)
-                    if batch_size is None:
-                        # retrieve the maximum number of elements
-                        batch_size = len(self) // training_length + 1 * (leftover != 0)
-                    # The maximum number of sequences taken from a list of length len(self) without overlapping
-                    # with padding is equal to batch_size
-                    if batch_size > (
-                        len(self) // training_length + 1 * (leftover != 0)
-                    ):
-                        raise BufferException(
-                            "The batch size and training length requested for get_batch where"
-                            " too large given the current number of data points."
-                        )
-                    if batch_size * training_length > len(self):
-                        padding = np.array(self[-1]) * self.padding_value
-                        return np.array(
-                            [padding] * (training_length - leftover) + self[:],
-                            dtype=np.float32,
-                        )
-                    else:
-                        return np.array(
-                            self[len(self) - batch_size * training_length :],
-                            dtype=np.float32,
-                        )
-                else:
-                    # The sequences will have overlapping elements
-                    if batch_size is None:
-                        # retrieve the maximum number of elements
-                        batch_size = len(self) - training_length + 1
-                    # The number of sequences of length training_length taken from a list of len(self) elements
-                    # with overlapping is equal to batch_size
-                    if (len(self) - training_length + 1) < batch_size:
-                        raise BufferException(
-                            "The batch size and training length requested for get_batch where"
-                            " too large given the current number of data points."
-                        )
-                    tmp_list = []
-                    for end in range(len(self) - batch_size + 1, len(self) + 1):
-                        tmp_list += self[end - training_length : end]
-                    return np.array(tmp_list, dtype=np.float32)
-
-            def reset_field(self):
-                """
-                Resets the AgentBufferField
-                """
-                self[:] = []
-
-            self.last_brain_info = None
-            self.last_take_action_outputs = None
-            super(Buffer.AgentBuffer, self).__init__()
+            self.padding_value = 0
+            super().__init__()
-            return ", ".join(
-                ["'{0}' : {1}".format(k, str(self[k])) for k in self.keys()]
-            )
+            return str(np.array(self).shape)
-        def reset_agent(self):
+        def append(self, element: np.ndarray, padding_value: float = 0.0) -> None:
-            Resets the AgentBuffer
+            Adds an element to this list. Also lets you change the padding
+            type, so that it can be set on append (e.g. action_masks should
+            be padded with 1.)
+            :param element: The element to append to the list.
+            :param padding_value: The value used to pad when get_batch is called.
-            for k in self.keys():
-                self[k].reset_field()
-            self.last_brain_info = None
-            self.last_take_action_outputs = None
+            super().append(element)
+            self.padding_value = padding_value
-        def __getitem__(self, key):
-            if key not in self.keys():
-                self[key] = self.AgentBufferField()
-            return super(Buffer.AgentBuffer, self).__getitem__(key)
-
-        def check_length(self, key_list):
+        def extend(self, data: np.ndarray) -> None:
-            Some methods will require that some fields have the same length.
-            check_length will return true if the fields in key_list
-            have the same length.
-            :param key_list: The fields which length will be compared
+            Adds a list of np.arrays to the end of the list of np.arrays.
+            :param data: The np.array list to append.
-            if len(key_list) < 2:
-                return True
-            length = None
-            for key in key_list:
-                if key not in self.keys():
-                    return False
-                if (length is not None) and (length != len(self[key])):
-                    return False
-                length = len(self[key])
-            return True
+            self += list(np.array(data))
-        def shuffle(self, sequence_length, key_list=None):
+        def set(self, data):
-            Shuffles the fields in key_list in a consistent way: The reordering will
-            be the same across fields.
-            :param key_list: The fields that must be shuffled.
+            Sets the list of np.array to the input data
+            :param data: The np.array list to be set.
-            if key_list is None:
-                key_list = list(self.keys())
-            if not self.check_length(key_list):
-                raise BufferException(
-                    "Unable to shuffle if the fields are not of same length"
-                )
-            s = np.arange(len(self[key_list[0]]) // sequence_length)
-            np.random.shuffle(s)
-            for key in key_list:
-                tmp = []
-                for i in s:
-                    tmp += self[key][i * sequence_length : (i + 1) * sequence_length]
-                self[key][:] = tmp
+            # Make sure we convert incoming data to float32 if it's a float
+            dtype = None
+            if data is not None and len(data) and isinstance(data[0], float):
+                dtype = np.float32
+            self[:] = []
+            self[:] = list(np.array(data, dtype=dtype))
-        def make_mini_batch(self, start, end):
-            """
-            Creates a mini-batch from buffer.
-            :param start: Starting index of buffer.
-            :param end: Ending index of buffer.
-            :return: Dict of mini batch.
-            """
-            mini_batch = {}
-            for key in self:
-                mini_batch[key] = self[key][start:end]
-            return mini_batch
-
-        def sample_mini_batch(self, batch_size, sequence_length=1):
-            """
-            Creates a mini-batch from a random start and end.
-            :param batch_size: number of elements to withdraw.
-            :param sequence_length: Length of sequences to sample.
-                Number of sequences to sample will be batch_size/sequence_length.
+        def get_batch(
+            self,
+            batch_size: int = None,
+            training_length: int = 1,
+            sequential: bool = True,
+        ) -> np.ndarray:
-            num_seq_to_sample = batch_size // sequence_length
-            mini_batch = Buffer.AgentBuffer()
-            buff_len = len(next(iter(self.values())))
-            num_sequences_in_buffer = buff_len // sequence_length
-            start_idxes = (
-                np.random.randint(num_sequences_in_buffer, size=num_seq_to_sample)
-                * sequence_length
-            )  # Sample random sequence starts
-            for i in start_idxes:
-                for key in self:
-                    mini_batch[key].extend(self[key][i : i + sequence_length])
-            return mini_batch
-
-        def save_to_file(self, file_object):
-            """
-            Saves the AgentBuffer to a file-like object.
+            Retrieve the last batch_size elements of length training_length
+            from the list of np.array
+            :param batch_size: The number of elements to retrieve. If None:
+            All elements will be retrieved.
+            :param training_length: The length of the sequence to be retrieved. If
+            None: only takes one element.
+            :param sequential: If true and training_length is not None: the elements
+            will not repeat in the sequence. [a,b,c,d,e] with training_length = 2 and
+            sequential=True gives [[0,a],[b,c],[d,e]]. If sequential=False gives
+            [[a,b],[b,c],[c,d],[d,e]]
-            with h5py.File(file_object) as write_file:
-                for key, data in self.items():
-                    write_file.create_dataset(
-                        key, data=data, dtype="f", compression="gzip"
+            if sequential:
+                # The sequences will not have overlapping elements (this involves padding)
+                leftover = len(self) % training_length
+                # leftover is the number of elements in the first sequence (this sequence might need 0 padding)
+                if batch_size is None:
+                    # retrieve the maximum number of elements
+                    batch_size = len(self) // training_length + 1 * (leftover != 0)
+                # The maximum number of sequences taken from a list of length len(self) without overlapping
+                # with padding is equal to batch_size
+                if batch_size > (len(self) // training_length + 1 * (leftover != 0)):
+                    raise BufferException(
+                        "The batch size and training length requested for get_batch where"
+                        " too large given the current number of data points."
+                    )
+                if batch_size * training_length > len(self):
+                    padding = np.array(self[-1], dtype=np.float32) * self.padding_value
+                    return np.array(
+                        [padding] * (training_length - leftover) + self[:],
+                        dtype=np.float32,
+                    )
+                else:
+                    return np.array(
+                        self[len(self) - batch_size * training_length :],
+                        dtype=np.float32,
+                    )
+            else:
+                # The sequences will have overlapping elements
+                if batch_size is None:
+                    # retrieve the maximum number of elements
+                    batch_size = len(self) - training_length + 1
+                # The number of sequences of length training_length taken from a list of len(self) elements
+                # with overlapping is equal to batch_size
+                if (len(self) - training_length + 1) < batch_size:
+                    raise BufferException(
+                        "The batch size and training length requested for get_batch where"
+                        " too large given the current number of data points."
+                tmp_list: List[np.ndarray] = []
+                for end in range(len(self) - batch_size + 1, len(self) + 1):
+                    tmp_list += self[end - training_length : end]
+                return np.array(tmp_list, dtype=np.float32)
-        def load_from_file(self, file_object):
+        def reset_field(self) -> None:
-            Loads the AgentBuffer from a file-like object.
+            Resets the AgentBufferField
-            with h5py.File(file_object) as read_file:
-                for key in list(read_file.keys()):
-                    self[key] = Buffer.AgentBuffer.AgentBufferField()
-                    # extend() will convert the numpy array's first dimension into list
-                    self[key].extend(read_file[key][()])
+            self[:] = []
-        self.update_buffer = self.AgentBuffer()
-        super(Buffer, self).__init__()
+        self.last_brain_info = None
+        self.last_take_action_outputs = None
+        super().__init__()
-        return "update buffer :\n\t{0}\nlocal_buffers :\n{1}".format(
-            str(self.update_buffer),
-            "\n".join(
-                ["\tagent {0} :{1}".format(k, str(self[k])) for k in self.keys()]
-            ),
-        )
+        return ", ".join(["'{0}' : {1}".format(k, str(self[k])) for k in self.keys()])
+
+    def reset_agent(self) -> None:
+        """
+        Resets the AgentBuffer
+        """
+        for k in self.keys():
+            self[k].reset_field()
+        self.last_brain_info = None
+        self.last_take_action_outputs = None
-            self[key] = self.AgentBuffer()
-        return super(Buffer, self).__getitem__(key)
+            self[key] = self.AgentBufferField()
+        return super().__getitem__(key)
+
+    def check_length(self, key_list: List[str]) -> bool:
+        """
+        Some methods will require that some fields have the same length.
+        check_length will return true if the fields in key_list
+        have the same length.
+        :param key_list: The fields which length will be compared
+        """
+        if len(key_list) < 2:
+            return True
+        length = None
+        for key in key_list:
+            if key not in self.keys():
+                return False
+            if (length is not None) and (length != len(self[key])):
+                return False
+            length = len(self[key])
+        return True
+
+    def shuffle(self, sequence_length: int, key_list: List[str] = None) -> None:
+        """
+        Shuffles the fields in key_list in a consistent way: The reordering will
+        be the same across fields.
+        :param key_list: The fields that must be shuffled.
+        """
+        if key_list is None:
+            key_list = list(self.keys())
+        if not self.check_length(key_list):
+            raise BufferException(
+                "Unable to shuffle if the fields are not of same length"
+            )
+        s = np.arange(len(self[key_list[0]]) // sequence_length)
+        np.random.shuffle(s)
+        for key in key_list:
+            tmp: List[np.ndarray] = []
+            for i in s:
+                tmp += self[key][i * sequence_length : (i + 1) * sequence_length]
+            self[key][:] = tmp
+
+    def make_mini_batch(self, start: int, end: int) -> "AgentBuffer":
+        """
+        Creates a mini-batch from buffer.
+        :param start: Starting index of buffer.
+        :param end: Ending index of buffer.
+        :return: Dict of mini batch.
+        """
+        mini_batch = AgentBuffer()
+        for key in self:
+            mini_batch[key] = self[key][start:end]
+        return mini_batch
+
+    def sample_mini_batch(
+        self, batch_size: int, sequence_length: int = 1
+    ) -> "AgentBuffer":
+        """
+        Creates a mini-batch from a random start and end.
+        :param batch_size: number of elements to withdraw.
+        :param sequence_length: Length of sequences to sample.
+            Number of sequences to sample will be batch_size/sequence_length.
+        """
+        num_seq_to_sample = batch_size // sequence_length
+        mini_batch = AgentBuffer()
+        buff_len = self.num_experiences
+        num_sequences_in_buffer = buff_len // sequence_length
+        start_idxes = (
+            np.random.randint(num_sequences_in_buffer, size=num_seq_to_sample)
+            * sequence_length
+        )  # Sample random sequence starts
+        for i in start_idxes:
+            for key in self:
+                mini_batch[key].extend(self[key][i : i + sequence_length])
+        return mini_batch
+
+    def save_to_file(self, file_object: BinaryIO) -> None:
+        """
+        Saves the AgentBuffer to a file-like object.
+        """
+        with h5py.File(file_object) as write_file:
+            for key, data in self.items():
+                write_file.create_dataset(key, data=data, dtype="f", compression="gzip")
-    def reset_update_buffer(self):
+    def load_from_file(self, file_object: BinaryIO) -> None:
-        Resets the update buffer
+        Loads the AgentBuffer from a file-like object.
-        self.update_buffer.reset_agent()
+        with h5py.File(file_object) as read_file:
+            for key in list(read_file.keys()):
+                self[key] = AgentBuffer.AgentBufferField()
+                # extend() will convert the numpy array's first dimension into list
+                self[key].extend(read_file[key][()])
-    def truncate_update_buffer(self, max_length, sequence_length=1):
+    def truncate(self, max_length: int, sequence_length: int = 1) -> None:
-        Truncates the update buffer to a certain length.
+        Truncates the buffer to a certain length.
-        current_length = len(next(iter(self.update_buffer.values())))
+        current_length = self.num_experiences
-            for _key in self.update_buffer.keys():
-                self.update_buffer[_key] = self.update_buffer[_key][
-                    current_length - max_length :
-                ]
+            for _key in self.keys():
+                self[_key] = self[_key][current_length - max_length :]
-    def reset_local_buffers(self):
-        """
-        Resets all the local local_buffers
+    @property
+    def num_experiences(self) -> int:
-        agent_ids = list(self.keys())
-        for k in agent_ids:
-            self[k].reset_agent()
-
-    def append_update_buffer(
-        self, agent_id, key_list=None, batch_size=None, training_length=None
-    ):
-        """
-        Appends the buffer of an agent to the update buffer.
-        :param agent_id: The id of the agent which data will be appended
-        :param key_list: The fields that must be added. If None: all fields will be appended.
-        :param batch_size: The number of elements that must be appended. If None: All of them will be.
-        :param training_length: The length of the samples that must be appended. If None: only takes one element.
-        """
-        if key_list is None:
-            key_list = self[agent_id].keys()
-        if not self[agent_id].check_length(key_list):
-            raise BufferException(
-                "The length of the fields {0} for agent {1} where not of same length".format(
-                    key_list, agent_id
-                )
-            )
-        for field_key in key_list:
-            self.update_buffer[field_key].extend(
-                self[agent_id][field_key].get_batch(
-                    batch_size=batch_size, training_length=training_length
-                )
-            )
+        The number of agent experiences in the AgentBuffer, i.e. the length of the buffer.
-    def append_all_agent_batch_to_update_buffer(
-        self, key_list=None, batch_size=None, training_length=None
-    ):
+        An experience consists of one element across all of the fields of this AgentBuffer.
+        Note that these all have to be the same length, otherwise shuffle and append_to_update_buffer
+        will fail.
-        Appends the buffer of all agents to the update buffer.
-        :param key_list: The fields that must be added. If None: all fields will be appended.
-        :param batch_size: The number of elements that must be appended. If None: All of them will be.
-        :param training_length: The length of the samples that must be appended. If None: only takes one element.
-        """
-        for agent_id in self.keys():
-            self.append_update_buffer(agent_id, key_list, batch_size, training_length)
+        if self.values():
+            return len(next(iter(self.values())))
+        else:
+            return 0
--- a/ml-agents/mlagents/trainers/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/components/bc/module.py
        self.batch_size = batch_size if batch_size else default_batch_size
        self.num_epoch = num_epoch if num_epoch else default_num_epoch
        self.n_sequences = max(
-            min(
-                self.batch_size, len(self.demonstration_buffer.update_buffer["actions"])
-            )
+            min(self.batch_size, self.demonstration_buffer.num_experiences)
            // policy.sequence_length,
            1,
        )

        batch_losses = []
        possible_demo_batches = (
-            len(self.demonstration_buffer.update_buffer["actions"]) // self.n_sequences
+            self.demonstration_buffer.num_experiences // self.n_sequences
        )
        possible_batches = possible_demo_batches

        for _ in range(n_epoch):
-            self.demonstration_buffer.update_buffer.shuffle(
+            self.demonstration_buffer.shuffle(
                sequence_length=self.policy.sequence_length
            )
            if max_batches == 0:
            for i in range(num_batches // self.policy.sequence_length):
-                demo_update_buffer = self.demonstration_buffer.update_buffer
+                demo_update_buffer = self.demonstration_buffer
                start = i * self.n_sequences * self.policy.sequence_length
                end = (i + 1) * self.n_sequences * self.policy.sequence_length
                mini_batch_demo = demo_update_buffer.make_mini_batch(start, end)
                (
                    self.n_sequences * self.policy.sequence_length,
                    sum(self.policy.model.brain.vector_action_space_size),
-                )
+                ),
+                dtype=np.float32,
            )
        if self.policy.model.brain.vector_observation_space_size > 0:
            feed_dict[self.policy.model.vector_in] = mini_batch_demo["vector_obs"]
            ]
        if self.use_recurrent:
            feed_dict[self.policy.model.memory_in] = np.zeros(
-                [self.n_sequences, self.policy.m_size]
+                [self.n_sequences, self.policy.m_size], dtype=np.float32
            )
            if not self.policy.model.brain.vector_action_space_type == "continuous":
                feed_dict[self.policy.model.prev_action] = mini_batch_demo[
--- a/ml-agents/mlagents/trainers/components/reward_signals/init.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/init.py
        :return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
        """
        return RewardSignalResult(
-            self.strength * np.zeros(len(current_info.agents)),
-            np.zeros(len(current_info.agents)),
+            self.strength * np.zeros(len(current_info.agents), dtype=np.float32),
+            np.zeros(len(current_info.agents), dtype=np.float32),
        )

    def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
        """
        mini_batch_len = len(next(iter(mini_batch.values())))
        return RewardSignalResult(
-            self.strength * np.zeros(mini_batch_len), np.zeros(mini_batch_len)
+            self.strength * np.zeros(mini_batch_len, dtype=np.float32),
+            np.zeros(mini_batch_len, dtype=np.float32),
        )

    def prepare_update(
--- a/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
        :param next_info: The BrainInfo from the next timestep.
        :return: a RewardSignalResult of (scaled intrinsic reward, unscaled intrinsic reward) provided by the generator
        """
-        unscaled_reward = np.array(next_info.rewards)
+        unscaled_reward = np.array(next_info.rewards, dtype=np.float32)
        scaled_reward = self.strength * unscaled_reward
        return RewardSignalResult(scaled_reward, unscaled_reward)

--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
        :return: Feed_dict for update process.
        """
        max_num_experiences = min(
-            len(mini_batch["actions"]),
-            len(self.demonstration_buffer.update_buffer["actions"]),
+            len(mini_batch["actions"]), self.demonstration_buffer.num_experiences
        )
        # If num_sequences is less, we need to shorten the input batch.
        for key, element in mini_batch.items():
-        mini_batch_demo = self.demonstration_buffer.update_buffer.sample_mini_batch(
+        mini_batch_demo = self.demonstration_buffer.sample_mini_batch(
            len(mini_batch["actions"]), 1
        )

--- a/ml-agents/mlagents/trainers/curriculum.py
+++ b/ml-agents/mlagents/trainers/curriculum.py


 class Curriculum(object):
-    def __init__(self, location, default_reset_parameters):
+    def __init__(self, location):
-        :param default_reset_parameters: Set of reset parameters for
-               environment.
        """
        self.max_lesson_num = 0
        self.measure = None

        parameters = self.data["parameters"]
        for key in parameters:
-            if key not in default_reset_parameters:
-                raise CurriculumConfigError(
-                    "The parameter {0} in Curriculum {1} is not present in "
-                    "the Environment".format(key, location)
-                )
            if len(parameters[key]) != self.max_lesson_num + 1:
                raise CurriculumConfigError(
                    "The parameter {0} in Curriculum {1} must have {2} values "
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
 import os
 from typing import List, Tuple
 import numpy as np
-from mlagents.trainers.buffer import Buffer
+from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.agent_processor import ProcessingBuffer
 from mlagents.envs.brain import BrainParameters, BrainInfo
 from mlagents.envs.communicator_objects.agent_info_action_pair_pb2 import (
    AgentInfoActionPairProto,
    DemonstrationMetaProto,
 )
+from mlagents.envs.timers import timed, hierarchical_timer
 from google.protobuf.internal.decoder import _DecodeVarint32  # type: ignore


+@timed
-) -> Buffer:
+) -> AgentBuffer:
-    demo_buffer = Buffer()
+    demo_process_buffer = ProcessingBuffer()
+    demo_buffer = AgentBuffer()
    for idx, experience in enumerate(pair_infos):
        if idx > len(pair_infos) - 2:
            break
        next_brain_info = BrainInfo.from_agent_proto(
            0, [next_pair_info.agent_info], brain_params
        )
-        previous_action = np.array(pair_infos[idx].action_info.vector_actions) * 0
+        previous_action = (
+            np.array(pair_infos[idx].action_info.vector_actions, dtype=np.float32) * 0
+        )
-            previous_action = np.array(pair_infos[idx - 1].action_info.vector_actions)
-        demo_buffer[0].last_brain_info = current_brain_info
-        demo_buffer[0]["done"].append(next_brain_info.local_done[0])
-        demo_buffer[0]["rewards"].append(next_brain_info.rewards[0])
+            previous_action = np.array(
+                pair_infos[idx - 1].action_info.vector_actions, dtype=np.float32
+            )
+        demo_process_buffer[0].last_brain_info = current_brain_info
+        demo_process_buffer[0]["done"].append(next_brain_info.local_done[0])
+        demo_process_buffer[0]["rewards"].append(next_brain_info.rewards[0])
-            demo_buffer[0]["visual_obs%d" % i].append(
+            demo_process_buffer[0]["visual_obs%d" % i].append(
-            demo_buffer[0]["vector_obs"].append(
+            demo_process_buffer[0]["vector_obs"].append(
-        demo_buffer[0]["actions"].append(current_pair_info.action_info.vector_actions)
-        demo_buffer[0]["prev_action"].append(previous_action)
+        demo_process_buffer[0]["actions"].append(
+            current_pair_info.action_info.vector_actions
+        )
+        demo_process_buffer[0]["prev_action"].append(previous_action)
-            demo_buffer.append_update_buffer(
-                0, batch_size=None, training_length=sequence_length
+            demo_process_buffer.append_to_update_buffer(
+                demo_buffer, 0, batch_size=None, training_length=sequence_length
-            demo_buffer.reset_local_buffers()
-    demo_buffer.append_update_buffer(
-        0, batch_size=None, training_length=sequence_length
+            demo_process_buffer.reset_local_buffers()
+    demo_process_buffer.append_to_update_buffer(
+        demo_buffer, 0, batch_size=None, training_length=sequence_length
+@timed
-) -> Tuple[BrainParameters, Buffer]:
+) -> Tuple[BrainParameters, AgentBuffer]:
    """
    Loads demonstration file and uses it to fill training buffer.
    :param file_path: Location of demonstration file (.demo).
    return brain_params, demo_buffer


+@timed
 def load_demonstration(
    file_path: str
 ) -> Tuple[BrainParameters, List[AgentInfoActionPairProto], int]:
    info_action_pairs = []
    total_expected = 0
    for _file_path in file_paths:
-        data = open(_file_path, "rb").read()
-        next_pos, pos, obs_decoded = 0, 0, 0
-        while pos < len(data):
-            next_pos, pos = _DecodeVarint32(data, pos)
-            if obs_decoded == 0:
-                meta_data_proto = DemonstrationMetaProto()
-                meta_data_proto.ParseFromString(data[pos : pos + next_pos])
-                total_expected += meta_data_proto.number_steps
-                pos = INITIAL_POS
-            if obs_decoded == 1:
-                brain_param_proto = BrainParametersProto()
-                brain_param_proto.ParseFromString(data[pos : pos + next_pos])
-                pos += next_pos
-            if obs_decoded > 1:
-                agent_info_action = AgentInfoActionPairProto()
-                agent_info_action.ParseFromString(data[pos : pos + next_pos])
-                if brain_params is None:
-                    brain_params = BrainParameters.from_proto(
-                        brain_param_proto, agent_info_action.agent_info
-                    )
-                info_action_pairs.append(agent_info_action)
-                if len(info_action_pairs) == total_expected:
-                    break
-                pos += next_pos
-            obs_decoded += 1
+        with open(_file_path, "rb") as fp:
+            with hierarchical_timer("read_file"):
+                data = fp.read()
+            next_pos, pos, obs_decoded = 0, 0, 0
+            while pos < len(data):
+                next_pos, pos = _DecodeVarint32(data, pos)
+                if obs_decoded == 0:
+                    meta_data_proto = DemonstrationMetaProto()
+                    meta_data_proto.ParseFromString(data[pos : pos + next_pos])
+                    total_expected += meta_data_proto.number_steps
+                    pos = INITIAL_POS
+                if obs_decoded == 1:
+                    brain_param_proto = BrainParametersProto()
+                    brain_param_proto.ParseFromString(data[pos : pos + next_pos])
+                    pos += next_pos
+                if obs_decoded > 1:
+                    agent_info_action = AgentInfoActionPairProto()
+                    agent_info_action.ParseFromString(data[pos : pos + next_pos])
+                    if brain_params is None:
+                        brain_params = BrainParameters.from_proto(
+                            brain_param_proto, agent_info_action.agent_info
+                        )
+                    info_action_pairs.append(agent_info_action)
+                    if len(info_action_pairs) == total_expected:
+                        break
+                    pos += next_pos
+                obs_decoded += 1
    return brain_params, info_action_pairs, total_expected
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents.envs.exception import SamplerException
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
 from mlagents.envs.subprocess_env_manager import SubprocessEnvManager
+from mlagents.envs.side_channel.side_channel import SideChannel
+from mlagents.envs.side_channel.engine_configuration_channel import EngineConfig


 class CommandLineOptions(NamedTuple):
    num_envs: int
    curriculum_folder: Optional[str]
    lesson: int
-    slow: bool
    no_graphics: bool
    multi_gpu: bool  # ?
    trainer_config_path: str
    cpu: bool
-
-    @property
-    def fast_simulation(self) -> bool:
-        return not self.slow
+    width: int
+    height: int
+    quality_level: int
+    time_scale: float
+    target_frame_rate: int

    @staticmethod
    def from_argparse(args: Any) -> "CommandLineOptions":
        "--seed", default=-1, type=int, help="Random seed used for training"
    )
    parser.add_argument(
-        "--slow", action="store_true", help="Whether to run the game at training speed"
-    )
-    parser.add_argument(
        "--train",
        default=False,
        dest="train_model",

    parser.add_argument("--version", action="version", version=get_version_string())

+    eng_conf = parser.add_argument_group(title="Engine Configuration")
+    eng_conf.add_argument(
+        "--width",
+        default=84,
+        type=int,
+        help="The width of the executable window of the environment(s)",
+    )
+    eng_conf.add_argument(
+        "--height",
+        default=84,
+        type=int,
+        help="The height of the executable window of the environment(s)",
+    )
+    eng_conf.add_argument(
+        "--quality-level",
+        default=5,
+        type=int,
+        help="The quality level of the environment(s)",
+    )
+    eng_conf.add_argument(
+        "--time-scale",
+        default=20,
+        type=float,
+        help="The time scale of the Unity environment(s)",
+    )
+    eng_conf.add_argument(
+        "--target-frame-rate",
+        default=-1,
+        type=int,
+        help="The target frame rate of the Unity environment(s)",
+    )
+
    args = parser.parse_args(argv)
    return CommandLineOptions.from_argparse(args)

        port,
        options.env_args,
    )
-    env = SubprocessEnvManager(env_factory, options.num_envs)
+    engine_config = EngineConfig(
+        options.width,
+        options.height,
+        options.quality_level,
+        options.time_scale,
+        options.target_frame_rate,
+    )
+    env = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
-        options.sampler_file_path, env.reset_parameters, run_seed
+        options.sampler_file_path, run_seed
    )
    trainer_factory = TrainerFactory(
        trainer_config,
        maybe_meta_curriculum,
        options.train_model,
        run_seed,
-        options.fast_simulation,
        sampler_manager,
        resampling_interval,
    )
    tc.start_learning(env)


-def create_sampler_manager(sampler_file_path, env_reset_params, run_seed=None):
+def create_sampler_manager(sampler_file_path, run_seed=None):
    sampler_config = None
    resample_interval = None
    if sampler_file_path is not None:
        return None

    else:
-        meta_curriculum = MetaCurriculum(curriculum_folder, env.reset_parameters)
+        meta_curriculum = MetaCurriculum(curriculum_folder)
        # TODO: Should be able to start learning at different lesson numbers
        # for each curriculum.
        meta_curriculum.set_all_curriculums_to_lesson_num(lesson)
    seed: Optional[int],
    start_port: int,
    env_args: Optional[List[str]],
-) -> Callable[[int], BaseUnityEnvironment]:
+) -> Callable[[int, List[SideChannel]], BaseUnityEnvironment]:
    if env_path is not None:
        # Strip out executable extensions if passed
        env_path = (
    seed_count = 10000
    seed_pool = [np.random.randint(0, seed_count) for _ in range(seed_count)]

-    def create_unity_environment(worker_id: int) -> UnityEnvironment:
+    def create_unity_environment(
+        worker_id: int, side_channels: List[SideChannel]
+    ) -> UnityEnvironment:
        env_seed = seed
        if not env_seed:
            env_seed = seed_pool[worker_id % len(seed_pool)]
            no_graphics=no_graphics,
            base_port=start_port,
            args=env_args,
+            side_channels=side_channels,
        )

    return create_unity_environment
--- a/ml-agents/mlagents/trainers/meta_curriculum.py
+++ b/ml-agents/mlagents/trainers/meta_curriculum.py
 """Contains the MetaCurriculum class."""

 import os
-from typing import Any, Dict, Set
+from typing import Dict, Set
 from mlagents.trainers.curriculum import Curriculum
 from mlagents.trainers.exception import MetaCurriculumError

    particular brain in the environment.
    """

-    def __init__(
-        self, curriculum_folder: str, default_reset_parameters: Dict[str, Any]
-    ):
+    def __init__(self, curriculum_folder: str):
        """Initializes a MetaCurriculum object.

        Args:
                curriculum_filepath = os.path.join(
                    curriculum_folder, curriculum_filename
                )
-                curriculum = Curriculum(curriculum_filepath, default_reset_parameters)
+                curriculum = Curriculum(curriculum_filepath)
                config_keys: Set[str] = set(curriculum.get_config().keys())

                # Check if any two curriculums use the same reset params.
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
            ]
        if self.use_vec_obs:
            feed_dict[self.model.vector_in] = [brain_info.vector_observations[idx]]
+        agent_id = brain_info.agents[idx]
-            feed_dict[self.model.memory_in] = self.retrieve_memories([idx])
+            feed_dict[self.model.memory_in] = self.retrieve_memories([agent_id])
-            feed_dict[self.model.prev_action] = self.retrieve_previous_action([idx])
+            feed_dict[self.model.prev_action] = self.retrieve_previous_action(
+                [agent_id]
+            )
        value_estimates = self.sess.run(self.model.value_heads, feed_dict)

        value_estimates = {k: float(v) for k, v in value_estimates.items()}
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        if self.is_training:
            self.policy.update_normalization(next_info.vector_observations)
        for l in range(len(next_info.agents)):
-            agent_actions = self.training_buffer[next_info.agents[l]]["actions"]
+            agent_actions = self.processing_buffer[next_info.agents[l]]["actions"]
            if (
                next_info.local_done[l]
                or len(agent_actions) > self.trainer_parameters["time_horizon"]
-                    bootstrapping_info = self.training_buffer[agent_id].last_brain_info
+                    bootstrapping_info = self.processing_buffer[
+                        agent_id
+                    ].last_brain_info
                    idx = bootstrapping_info.agents.index(agent_id)
                else:
                    bootstrapping_info = next_info
                for name in self.policy.reward_signals:
                    bootstrap_value = value_next[name]

-                    local_rewards = self.training_buffer[agent_id][
+                    local_rewards = self.processing_buffer[agent_id][
-                    local_value_estimates = self.training_buffer[agent_id][
+                    local_value_estimates = self.processing_buffer[agent_id][
                        "{}_value_estimates".format(name)
                    ].get_batch()
                    local_advantage = get_gae(
                    )
                    local_return = local_advantage + local_value_estimates
                    # This is later use as target for the different value estimates
-                    self.training_buffer[agent_id]["{}_returns".format(name)].set(
+                    self.processing_buffer[agent_id]["{}_returns".format(name)].set(
-                    self.training_buffer[agent_id]["{}_advantage".format(name)].set(
+                    self.processing_buffer[agent_id]["{}_advantage".format(name)].set(
-                global_advantages = list(np.mean(np.array(tmp_advantages), axis=0))
-                global_returns = list(np.mean(np.array(tmp_returns), axis=0))
-                self.training_buffer[agent_id]["advantages"].set(global_advantages)
-                self.training_buffer[agent_id]["discounted_returns"].set(global_returns)
+                global_advantages = list(
+                    np.mean(np.array(tmp_advantages, dtype=np.float32), axis=0)
+                )
+                global_returns = list(
+                    np.mean(np.array(tmp_returns, dtype=np.float32), axis=0)
+                )
+                self.processing_buffer[agent_id]["advantages"].set(global_advantages)
+                self.processing_buffer[agent_id]["discounted_returns"].set(
+                    global_returns
+                )
-                self.training_buffer.append_update_buffer(
+                self.processing_buffer.append_to_update_buffer(
+                    self.update_buffer,
-                self.training_buffer[agent_id].reset_agent()
+                self.processing_buffer[agent_id].reset_agent()
                if next_info.local_done[l]:
                    self.stats["Environment/Episode Length"].append(
                        self.episode_steps.get(agent_id, 0)
        actions = take_action_outputs["action"]
        if self.policy.use_continuous_act:
            actions_pre = take_action_outputs["pre_action"]
-            self.training_buffer[agent_id]["actions_pre"].append(actions_pre[agent_idx])
+            self.processing_buffer[agent_id]["actions_pre"].append(
+                actions_pre[agent_idx]
+            )
-            self.training_buffer[agent_id]["random_normal_epsilon"].append(
+            self.processing_buffer[agent_id]["random_normal_epsilon"].append(
-        self.training_buffer[agent_id]["actions"].append(actions[agent_idx])
-        self.training_buffer[agent_id]["action_probs"].append(a_dist[agent_idx])
+        self.processing_buffer[agent_id]["actions"].append(actions[agent_idx])
+        self.processing_buffer[agent_id]["action_probs"].append(a_dist[agent_idx])

    def add_rewards_outputs(
        self,
        """
        for name, reward_result in rewards_out.reward_signals.items():
            # 0 because we use the scaled reward to train the agent
-            self.training_buffer[agent_id]["{}_rewards".format(name)].append(
+            self.processing_buffer[agent_id]["{}_rewards".format(name)].append(
-            self.training_buffer[agent_id]["{}_value_estimates".format(name)].append(
+            self.processing_buffer[agent_id]["{}_value_estimates".format(name)].append(
                values[name][agent_idx][0]
            )

        :return: A boolean corresponding to whether or not update_model() can be run
        """
-        size_of_buffer = len(self.training_buffer.update_buffer["actions"])
+        size_of_buffer = self.update_buffer.num_experiences
        return size_of_buffer > self.trainer_parameters["buffer_size"]

    def update_policy(self):
        """
-        buffer_length = len(self.training_buffer.update_buffer["actions"])
+        buffer_length = self.update_buffer.num_experiences
        self.trainer_metrics.start_policy_update_timer(
            number_experiences=buffer_length,
            mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
            int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
        )

-        advantages = self.training_buffer.update_buffer["advantages"].get_batch()
-        self.training_buffer.update_buffer["advantages"].set(
+        advantages = self.update_buffer["advantages"].get_batch()
+        self.update_buffer["advantages"].set(
-            self.training_buffer.update_buffer.shuffle(
-                sequence_length=self.policy.sequence_length
-            )
-            buffer = self.training_buffer.update_buffer
+            self.update_buffer.shuffle(sequence_length=self.policy.sequence_length)
+            buffer = self.update_buffer
            max_num_batch = buffer_length // batch_size
            for l in range(0, max_num_batch * batch_size, batch_size):
                update_stats = self.policy.update(
--- a/ml-agents/mlagents/trainers/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/rl_trainer.py

 from mlagents.envs.brain import BrainInfo
 from mlagents.envs.action_info import ActionInfoOutputs
-from mlagents.trainers.buffer import Buffer
+from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.agent_processor import ProcessingBuffer
 from mlagents.trainers.trainer import Trainer, UnityTrainerException
 from mlagents.trainers.components.reward_signals import RewardSignalResult

        # used for reporting only. We always want to report the environment reward to Tensorboard, regardless
        # of what reward signals are actually present.
        self.collected_rewards = {"environment": {}}
-        self.training_buffer = Buffer()
+        self.processing_buffer = ProcessingBuffer()
+        self.update_buffer = AgentBuffer()
        self.episode_steps = {}

    def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
        agents = []
        action_masks = []
        for agent_id in next_info.agents:
-            agent_brain_info = self.training_buffer[agent_id].last_brain_info
+            agent_brain_info = self.processing_buffer[agent_id].last_brain_info
            if agent_brain_info is None:
                agent_brain_info = next_info
            agent_index = agent_brain_info.agents.index(agent_id)
                )

        for agent_id in curr_info.agents:
-            self.training_buffer[agent_id].last_brain_info = curr_info
-            self.training_buffer[
+            self.processing_buffer[agent_id].last_brain_info = curr_info
+            self.processing_buffer[
                agent_id
            ].last_take_action_outputs = take_action_outputs

                curr_to_use, take_action_outputs["action"], next_info
            )
        # Store the environment reward
-        tmp_environment = np.array(next_info.rewards)
+        tmp_environment = np.array(next_info.rewards, dtype=np.float32)

        rewards_out = AllRewardsOutput(
            reward_signals=tmp_reward_signal_outs, environment=tmp_environment
-            stored_info = self.training_buffer[agent_id].last_brain_info
-            stored_take_action_outputs = self.training_buffer[
+            stored_info = self.processing_buffer[agent_id].last_brain_info
+            stored_take_action_outputs = self.processing_buffer[
                agent_id
            ].last_take_action_outputs
            if stored_info is not None:
                    for i, _ in enumerate(stored_info.visual_observations):
-                        self.training_buffer[agent_id]["visual_obs%d" % i].append(
+                        self.processing_buffer[agent_id]["visual_obs%d" % i].append(
-                        self.training_buffer[agent_id]["next_visual_obs%d" % i].append(
-                            next_info.visual_observations[i][next_idx]
-                        )
+                        self.processing_buffer[agent_id][
+                            "next_visual_obs%d" % i
+                        ].append(next_info.visual_observations[i][next_idx])
-                        self.training_buffer[agent_id]["vector_obs"].append(
+                        self.processing_buffer[agent_id]["vector_obs"].append(
-                        self.training_buffer[agent_id]["next_vector_in"].append(
+                        self.processing_buffer[agent_id]["next_vector_in"].append(
-                        self.training_buffer[agent_id]["memory"].append(
+                        self.processing_buffer[agent_id]["memory"].append(
-                    self.training_buffer[agent_id]["masks"].append(1.0)
-                    self.training_buffer[agent_id]["done"].append(
+                    self.processing_buffer[agent_id]["masks"].append(1.0)
+                    self.processing_buffer[agent_id]["done"].append(
                        next_info.local_done[next_idx]
                    )
                    # Add the outputs of the last eval
-                        self.training_buffer[agent_id]["action_mask"].append(
+                        self.processing_buffer[agent_id]["action_mask"].append(
-                    self.training_buffer[agent_id]["prev_action"].append(
+                    self.processing_buffer[agent_id]["prev_action"].append(
                        self.policy.retrieve_previous_action([agent_id])[0, :]
                    )

        A signal that the Episode has ended. The buffer must be reset.
        Get only called when the academy resets.
        """
-        self.training_buffer.reset_local_buffers()
+        self.processing_buffer.reset_local_buffers()
        for agent_id in self.episode_steps:
            self.episode_steps[agent_id] = 0
        for rewards in self.collected_rewards.values():
        Clear the buffers that have been built up during inference. If
        we're not training, this should be called instead of update_policy.
        """
-        self.training_buffer.reset_update_buffer()
+        self.update_buffer.reset_agent()

    def add_policy_outputs(
        self, take_action_outputs: ActionInfoOutputs, agent_id: str, agent_idx: int
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
                )
            LOGGER.debug(
                "Loaded update buffer with {} sequences".format(
-                    len(self.training_buffer.update_buffer["actions"])
+                    self.update_buffer.num_experiences
                )
            )

        filename = os.path.join(self.policy.model_path, "last_replay_buffer.hdf5")
        LOGGER.info("Saving Experience Replay Buffer to {}".format(filename))
        with open(filename, "wb") as file_object:
-            self.training_buffer.update_buffer.save_to_file(file_object)
+            self.update_buffer.save_to_file(file_object)

    def load_replay_buffer(self) -> None:
        """
        LOGGER.info("Loading Experience Replay Buffer from {}".format(filename))
        with open(filename, "rb+") as file_object:
-            self.training_buffer.update_buffer.load_from_file(file_object)
+            self.update_buffer.load_from_file(file_object)
-                len(self.training_buffer.update_buffer["actions"])
+                self.update_buffer.num_experiences
            )
        )

        Takes the output of the last action and store it into the training buffer.
        """
        actions = take_action_outputs["action"]
-        self.training_buffer[agent_id]["actions"].append(actions[agent_idx])
+        self.processing_buffer[agent_id]["actions"].append(actions[agent_idx])

    def add_rewards_outputs(
        self,
        """
        Takes the value output of the last action and store it into the training buffer.
        """
-        self.training_buffer[agent_id]["environment_rewards"].append(
+        self.processing_buffer[agent_id]["environment_rewards"].append(
            rewards_out.environment[agent_next_idx]
        )

        if self.is_training:
            self.policy.update_normalization(next_info.vector_observations)
        for l in range(len(next_info.agents)):
-            agent_actions = self.training_buffer[next_info.agents[l]]["actions"]
+            agent_actions = self.processing_buffer[next_info.agents[l]]["actions"]
            if (
                next_info.local_done[l]
                or len(agent_actions) >= self.trainer_parameters["time_horizon"]
                # Bootstrap using last brain info. Set last element to duplicate obs and remove dones.
                if next_info.max_reached[l]:
-                    bootstrapping_info = self.training_buffer[agent_id].last_brain_info
+                    bootstrapping_info = self.processing_buffer[
+                        agent_id
+                    ].last_brain_info
-                        self.training_buffer[agent_id]["next_visual_obs%d" % i][
+                        self.processing_buffer[agent_id]["next_visual_obs%d" % i][
-                        self.training_buffer[agent_id]["next_vector_in"][
+                        self.processing_buffer[agent_id]["next_vector_in"][
-                    self.training_buffer[agent_id]["done"][-1] = False
+                    self.processing_buffer[agent_id]["done"][-1] = False
-                self.training_buffer.append_update_buffer(
+                self.processing_buffer.append_to_update_buffer(
+                    self.update_buffer,
-                self.training_buffer[agent_id].reset_agent()
+                self.processing_buffer[agent_id].reset_agent()
                if next_info.local_done[l]:
                    self.stats["Environment/Episode Length"].append(
                        self.episode_steps.get(agent_id, 0)
        :return: A boolean corresponding to whether or not update_model() can be run
        """
        return (
-            len(self.training_buffer.update_buffer["actions"])
-            >= self.trainer_parameters["batch_size"]
+            self.update_buffer.num_experiences >= self.trainer_parameters["batch_size"]
            and self.step >= self.trainer_parameters["buffer_init_steps"]
        )

        """
        if self.step % self.train_interval == 0:
            self.trainer_metrics.start_policy_update_timer(
-                number_experiences=len(self.training_buffer.update_buffer["actions"]),
+                number_experiences=self.update_buffer.num_experiences,
                mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
            )
            self.update_sac_policy()
        batch_update_stats: Dict[str, list] = defaultdict(list)
        for _ in range(num_updates):
            LOGGER.debug("Updating SAC policy at step {}".format(self.step))
-            buffer = self.training_buffer.update_buffer
+            buffer = self.update_buffer
-                len(self.training_buffer.update_buffer["actions"])
+                self.update_buffer.num_experiences
                >= self.trainer_parameters["batch_size"]
            ):
                sampled_minibatch = buffer.sample_mini_batch(

        # Truncate update buffer if neccessary. Truncate more than we need to to avoid truncating
        # a large buffer at each update.
-        if (
-            len(self.training_buffer.update_buffer["actions"])
-            > self.trainer_parameters["buffer_size"]
-        ):
-            self.training_buffer.truncate_update_buffer(
+        if self.update_buffer.num_experiences > self.trainer_parameters["buffer_size"]:
+            self.update_buffer.truncate(
                int(self.trainer_parameters["buffer_size"] * BUFFER_TRUNCATE_PERCENT)
            )

        N times, then the reward signals are updated N times. Normally, the reward signal
        and policy are updated in parallel.
        """
-        buffer = self.training_buffer.update_buffer
+        buffer = self.update_buffer
        num_updates = self.reward_signal_updates_per_train
        n_sequences = max(
            int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
--- a/ml-agents/mlagents/trainers/tests/init.py
+++ b/ml-agents/mlagents/trainers/tests/init.py
+import os
+
+# Opt-in checking mode to ensure that we always create numpy arrays using float32
+if os.getenv("TEST_ENFORCE_NUMPY_FLOAT32"):
+    # This file is importer by pytest multiple times, but this breaks the patching
+    # Removing the env variable seems the easiest way to prevent this.
+    del os.environ["TEST_ENFORCE_NUMPY_FLOAT32"]
+    import numpy as np
+    import traceback
+
+    __old_np_array = np.array
+    __old_np_zeros = np.zeros
+    __old_np_ones = np.ones
+
+    def _check_no_float64(arr, kwargs_dtype):
+        if arr.dtype == np.float64:
+            tb = traceback.extract_stack()
+            # tb[-1] in the stack is this function.
+            # tb[-2] is the wrapper function, e.g. np_array_no_float64
+            # we want the calling function, so use tb[-3]
+            filename = tb[-3].filename
+            # Only raise if this came from mlagents code, not tensorflow
+            if (
+                "ml-agents/mlagents" in filename
+                or "ml-agents-envs/mlagents" in filename
+            ) and "tensorflow_to_barracuda.py" not in filename:
+                raise ValueError(
+                    f"float64 array created. Set dtype=np.float32 instead of current dtype={kwargs_dtype}. "
+                    f"Run pytest with TEST_ENFORCE_NUMPY_FLOAT32=1 to confirm fix."
+                )
+
+    def np_array_no_float64(*args, **kwargs):
+        res = __old_np_array(*args, **kwargs)
+        _check_no_float64(res, kwargs.get("dtype"))
+        return res
+
+    def np_zeros_no_float64(*args, **kwargs):
+        res = __old_np_zeros(*args, **kwargs)
+        _check_no_float64(res, kwargs.get("dtype"))
+        return res
+
+    def np_ones_no_float64(*args, **kwargs):
+        res = __old_np_ones(*args, **kwargs)
+        _check_no_float64(res, kwargs.get("dtype"))
+        return res
+
+    np.array = np_array_no_float64
+    np.zeros = np_zeros_no_float64
+    np.ones = np_ones_no_float64
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
 import numpy as np

 from mlagents.envs.brain import CameraResolution, BrainParameters
-from mlagents.trainers.buffer import Buffer
+from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.agent_processor import ProcessingBuffer


 def create_mock_brainparams(
    mock_braininfo = mock.Mock()

    mock_braininfo.return_value.visual_observations = num_vis_observations * [
-        np.ones((num_agents, 84, 84, 3))
+        np.ones((num_agents, 84, 84, 3), dtype=np.float32)
-        num_agents * [num_vector_observations * [1]]
+        num_agents * [num_vector_observations * [1]], dtype=np.float32
-            num_agents * [num_discrete_branches * [0.5]]
+            num_agents * [num_discrete_branches * [0.5]], dtype=np.float32
-            num_agents * [num_vector_acts * [1.0]]
+            num_agents * [num_vector_acts * [1.0]], dtype=np.float32
-            num_agents * [num_vector_acts * [0.5]]
+            num_agents * [num_vector_acts * [0.5]], dtype=np.float32
-    mock_braininfo.return_value.memories = np.ones((num_agents, 8))
+    mock_braininfo.return_value.memories = np.ones((num_agents, 8), dtype=np.float32)
    mock_braininfo.return_value.rewards = num_agents * [1.0]
    mock_braininfo.return_value.local_done = num_agents * [False]
    mock_braininfo.return_value.max_reached = num_agents * [100]
    # If a key_list was given, remove those keys
    if exclude_key_list:
        for key in exclude_key_list:
-            if key in buffer.update_buffer:
-                buffer.update_buffer.pop(key)
+            if key in buffer:
+                buffer.pop(key)
-    buffer = Buffer()
+    buffer = ProcessingBuffer()
+    update_buffer = AgentBuffer()
    # Make a buffer
    for idx, experience in enumerate(brain_infos):
        if idx > len(brain_infos) - 2:
        fake_action_size = len(brain_params.vector_action_space_size)
        if brain_params.vector_action_space_type == "continuous":
            fake_action_size = brain_params.vector_action_space_size[0]
-        buffer[0]["actions"].append(np.zeros(fake_action_size))
-        buffer[0]["prev_action"].append(np.zeros(fake_action_size))
+        buffer[0]["actions"].append(np.zeros(fake_action_size, dtype=np.float32))
+        buffer[0]["prev_action"].append(np.zeros(fake_action_size, dtype=np.float32))
-                np.ones(sum(brain_params.vector_action_space_size))
+                np.ones(sum(brain_params.vector_action_space_size), dtype=np.float32)
-            buffer[0]["action_probs"].append(np.ones(buffer[0]["actions"][0].shape))
-        buffer[0]["actions_pre"].append(np.ones(buffer[0]["actions"][0].shape))
+            buffer[0]["action_probs"].append(
+                np.ones(buffer[0]["actions"][0].shape, dtype=np.float32)
+            )
+        buffer[0]["actions_pre"].append(
+            np.ones(buffer[0]["actions"][0].shape, dtype=np.float32)
+        )
-            np.ones(buffer[0]["actions"][0].shape)
+            np.ones(buffer[0]["actions"][0].shape, dtype=np.float32)
-            np.ones(np.sum(brain_params.vector_action_space_size))
+            np.ones(np.sum(brain_params.vector_action_space_size), dtype=np.float32)
-        buffer[0]["memory"].append(np.ones(memory_size))
+        buffer[0]["memory"].append(np.ones(memory_size, dtype=np.float32))
-    buffer.append_update_buffer(0, batch_size=None, training_length=sequence_length)
-    return buffer
+    buffer.append_to_update_buffer(
+        update_buffer, 0, batch_size=None, training_length=sequence_length
+    )
+    return update_buffer


 def setup_mock_env_and_brains(
--- a/ml-agents/mlagents/trainers/tests/test_bc.py
+++ b/ml-agents/mlagents/trainers/tests/test_bc.py
                model.dropout_rate: 1.0,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
-                model.action_masks: np.ones([2, 2]),
+                model.action_masks: np.ones([2, 2], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

                model.dropout_rate: 1.0,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
-                model.visual_in[0]: np.ones([2, 40, 30, 3]),
-                model.visual_in[1]: np.ones([2, 40, 30, 3]),
-                model.action_masks: np.ones([2, 2]),
+                model.visual_in[0]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.visual_in[1]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.action_masks: np.ones([2, 2], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
-                model.visual_in[0]: np.ones([2, 40, 30, 3]),
-                model.visual_in[1]: np.ones([2, 40, 30, 3]),
+                model.visual_in[0]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.visual_in[1]: np.ones([2, 40, 30, 3], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

--- a/ml-agents/mlagents/trainers/tests/test_buffer.py
+++ b/ml-agents/mlagents/trainers/tests/test_buffer.py
 import numpy as np
-from mlagents.trainers.buffer import Buffer
+from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.agent_processor import ProcessingBuffer


 def assert_array(a, b):
        assert la[i] == lb[i]


-def construct_fake_buffer():
-    b = Buffer()
+def construct_fake_processing_buffer():
+    b = ProcessingBuffer()
    for fake_agent_id in range(4):
        for step in range(9):
            b[fake_agent_id]["vector_observation"].append(


 def test_buffer():
-    b = construct_fake_buffer()
+    b = construct_fake_processing_buffer()
    a = b[1]["vector_observation"].get_batch(
        batch_size=2, training_length=1, sequential=True
    )
    )
    b[4].reset_agent()
    assert len(b[4]) == 0
-    b.append_update_buffer(3, batch_size=None, training_length=2)
-    b.append_update_buffer(2, batch_size=None, training_length=2)
-    assert len(b.update_buffer["action"]) == 20
-    assert np.array(b.update_buffer["action"]).shape == (20, 2)
+    update_buffer = AgentBuffer()
+    b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
+    b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
+    assert len(update_buffer["action"]) == 20
-    c = b.update_buffer.make_mini_batch(start=0, end=1)
-    assert c.keys() == b.update_buffer.keys()
+    assert np.array(update_buffer["action"]).shape == (20, 2)
+
+    c = update_buffer.make_mini_batch(start=0, end=1)
+    assert c.keys() == update_buffer.keys()
    assert np.array(c["action"]).shape == (1, 2)



 def test_buffer_sample():
-    b = construct_fake_buffer()
-    b.append_update_buffer(3, batch_size=None, training_length=2)
-    b.append_update_buffer(2, batch_size=None, training_length=2)
+    b = construct_fake_processing_buffer()
+    update_buffer = AgentBuffer()
+    b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
+    b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
-    mb = b.update_buffer.sample_mini_batch(batch_size=4, sequence_length=1)
-    assert mb.keys() == b.update_buffer.keys()
+    mb = update_buffer.sample_mini_batch(batch_size=4, sequence_length=1)
+    assert mb.keys() == update_buffer.keys()
-    mb = b.update_buffer.sample_mini_batch(batch_size=20, sequence_length=19)
-    assert mb.keys() == b.update_buffer.keys()
+    mb = update_buffer.sample_mini_batch(batch_size=20, sequence_length=19)
+    assert mb.keys() == update_buffer.keys()
+def test_num_experiences():
+    b = construct_fake_processing_buffer()
+    update_buffer = AgentBuffer()
+
+    assert len(update_buffer["action"]) == 0
+    assert update_buffer.num_experiences == 0
+
+    b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
+    b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
+
+    assert len(update_buffer["action"]) == 20
+    assert update_buffer.num_experiences == 20
+
+
-    b = construct_fake_buffer()
-    b.append_update_buffer(3, batch_size=None, training_length=2)
-    b.append_update_buffer(2, batch_size=None, training_length=2)
+    b = construct_fake_processing_buffer()
+    update_buffer = AgentBuffer()
+    b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
+    b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
-    b.truncate_update_buffer(2)
-    assert len(b.update_buffer["action"]) == 2
+    update_buffer.truncate(2)
+    assert update_buffer.num_experiences == 2
-    b.append_update_buffer(3, batch_size=None, training_length=2)
-    b.append_update_buffer(2, batch_size=None, training_length=2)
+    b.append_to_update_buffer(update_buffer, 3, batch_size=None, training_length=2)
+    b.append_to_update_buffer(update_buffer, 2, batch_size=None, training_length=2)
-    b.truncate_update_buffer(4, sequence_length=3)
-    assert len(b.update_buffer["action"]) == 3
+    update_buffer.truncate(4, sequence_length=3)
+    assert update_buffer.num_experiences == 3
--- a/ml-agents/mlagents/trainers/tests/test_curriculum.py
+++ b/ml-agents/mlagents/trainers/tests/test_curriculum.py

@patch("builtins.open", new_callable=mock_open, read_data=dummy_curriculum_json_str)
 def test_init_curriculum_happy_path(mock_file, location, default_reset_parameters):
-    curriculum = Curriculum(location, default_reset_parameters)
+    curriculum = Curriculum(location)

    assert curriculum._brain_name == "TestBrain"
    assert curriculum.lesson_num == 0
    mock_file, location, default_reset_parameters
 ):
    with pytest.raises(CurriculumConfigError):
-        Curriculum(location, default_reset_parameters)
+        Curriculum(location)
-    curriculum = Curriculum(location, default_reset_parameters)
+    curriculum = Curriculum(location)
    assert curriculum.lesson_num == 0

    curriculum.lesson_num = 1

@patch("builtins.open", new_callable=mock_open, read_data=dummy_curriculum_json_str)
 def test_get_config(mock_file):
-    curriculum = Curriculum("TestBrain.json", {"param1": 1, "param2": 1, "param3": 1})
+    curriculum = Curriculum("TestBrain.json")
    assert curriculum.get_config() == {"param1": 0.7, "param2": 100, "param3": 0.2}

    curriculum.lesson_num = 2
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1)
-    assert len(demo_buffer.update_buffer["actions"]) == total_expected - 1
+    assert len(demo_buffer["actions"]) == total_expected - 1


 def test_load_demo_dir():
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1)
-    assert len(demo_buffer.update_buffer["actions"]) == total_expected - 1
+    assert len(demo_buffer["actions"]) == total_expected - 1
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
                None,
                False,
                0,
-                True,
                sampler_manager_mock.return_value,
                None,
            )
    assert opt.run_id == "ppo"
    assert opt.save_freq == 50000
    assert opt.seed == -1
-    assert opt.fast_simulation is True
    assert opt.train_model is False
    assert opt.base_port == 5005
    assert opt.num_envs == 1
        "--num-runs=3",
        "--save-freq=123456",
        "--seed=7890",
-        "--slow",
        "--train",
        "--base-port=4004",
        "--num-envs=2",
    assert opt.run_id == "myawesomerun"
    assert opt.save_freq == 123456
    assert opt.seed == 7890
-    assert opt.fast_simulation is False
    assert opt.train_model is True
    assert opt.base_port == 4004
    assert opt.num_envs == 2
--- a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
+++ b/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
 def test_init_meta_curriculum_happy_path(
    listdir, mock_curriculum_init, mock_curriculum_get_config, default_reset_parameters
 ):
-    meta_curriculum = MetaCurriculum("test/", default_reset_parameters)
+    meta_curriculum = MetaCurriculum("test/")

    assert len(meta_curriculum.brains_to_curriculums) == 2

-    calls = [
-        call("test/Brain1.json", default_reset_parameters),
-        call("test/Brain2.json", default_reset_parameters),
-    ]
+    calls = [call("test/Brain1.json"), call("test/Brain2.json")]

    mock_curriculum_init.assert_has_calls(calls)

    with pytest.raises(MetaCurriculumError):
-        MetaCurriculum("test/", default_reset_parameters)
+        MetaCurriculum("test/")


@patch("mlagents.trainers.curriculum.Curriculum")
--- a/ml-agents/mlagents/trainers/tests/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_policy.py
    test_seed = 3
    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    policy_eval_out = {
-        "action": np.array([1.0]),
-        "memory_out": np.array([[2.5]]),
-        "value": np.array([1.1]),
+        "action": np.array([1.0], dtype=np.float32),
+        "memory_out": np.array([[2.5]], dtype=np.float32),
+        "value": np.array([1.1], dtype=np.float32),
    }
    policy.evaluate = MagicMock(return_value=policy_eval_out)
    brain_info_with_agents = BrainInfo(
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
-                model.visual_in[0]: np.ones([2, 40, 30, 3]),
-                model.visual_in[1]: np.ones([2, 40, 30, 3]),
-                model.epsilon: np.array([[0, 1], [2, 3]]),
+                model.visual_in[0]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.visual_in[1]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.epsilon: np.array([[0, 1], [2, 3]], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
-                model.visual_in[0]: np.ones([2, 40, 30, 3]),
-                model.visual_in[1]: np.ones([2, 40, 30, 3]),
-                model.action_masks: np.ones([2, 2]),
+                model.visual_in[0]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.visual_in[1]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.action_masks: np.ones([2, 2], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
-                model.action_masks: np.ones([2, 2]),
+                model.action_masks: np.ones([2, 2], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

                model.batch_size: 1,
                model.sequence_length: 2,
                model.prev_action: [[0], [0]],
-                model.memory_in: np.zeros((1, memory_size)),
+                model.memory_in: np.zeros((1, memory_size), dtype=np.float32),
-                model.action_masks: np.ones([1, 2]),
+                model.action_masks: np.ones([1, 2], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

            feed_dict = {
                model.batch_size: 1,
                model.sequence_length: 2,
-                model.memory_in: np.zeros((1, memory_size)),
+                model.memory_in: np.zeros((1, memory_size), dtype=np.float32),
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
                model.epsilon: np.array([[0, 1]]),
            }
 def test_rl_functions():
-    rewards = np.array([0.0, 0.0, 0.0, 1.0])
+    rewards = np.array([0.0, 0.0, 0.0, 1.0], dtype=np.float32)
-    np.testing.assert_array_almost_equal(returns, np.array([0.729, 0.81, 0.9, 1.0]))
+    np.testing.assert_array_almost_equal(
+        returns, np.array([0.729, 0.81, 0.9, 1.0], dtype=np.float32)
+    )


 def test_trainer_increment_step(dummy_config):
    # Test update with sequence length smaller than batch size
    buffer = mb.simulate_rollout(env, trainer.policy, BUFFER_INIT_SAMPLES)
    # Mock out reward signal eval
-    buffer.update_buffer["extrinsic_rewards"] = buffer.update_buffer["rewards"]
-    buffer.update_buffer["extrinsic_returns"] = buffer.update_buffer["rewards"]
-    buffer.update_buffer["extrinsic_value_estimates"] = buffer.update_buffer["rewards"]
-    buffer.update_buffer["curiosity_rewards"] = buffer.update_buffer["rewards"]
-    buffer.update_buffer["curiosity_returns"] = buffer.update_buffer["rewards"]
-    buffer.update_buffer["curiosity_value_estimates"] = buffer.update_buffer["rewards"]
+    buffer["extrinsic_rewards"] = buffer["rewards"]
+    buffer["extrinsic_returns"] = buffer["rewards"]
+    buffer["extrinsic_value_estimates"] = buffer["rewards"]
+    buffer["curiosity_rewards"] = buffer["rewards"]
+    buffer["curiosity_returns"] = buffer["rewards"]
+    buffer["curiosity_value_estimates"] = buffer["rewards"]
-    trainer.training_buffer = buffer
+    trainer.update_buffer = buffer
    trainer.update_policy()
    # Make batch length a larger multiple of sequence length
    trainer.trainer_parameters["batch_size"] = 128
    rewardsout = AllRewardsOutput(
        reward_signals={
            "extrinsic": RewardSignalResult(
-                scaled_reward=np.array([1.0, 1.0]), unscaled_reward=np.array([1.0, 1.0])
+                scaled_reward=np.array([1.0, 1.0], dtype=np.float32),
+                unscaled_reward=np.array([1.0, 1.0], dtype=np.float32),
-        environment=np.array([1.0, 1.0]),
+        environment=np.array([1.0, 1.0], dtype=np.float32),
-    values = {"extrinsic": np.array([[2.0]])}
+    values = {"extrinsic": np.array([[2.0]], dtype=np.float32)}
    agent_id = "123"
    idx = 0
    # make sure that we're grabbing from the next_idx for rewards. If we're not, the test will fail.
        agent_idx=idx,
        agent_next_idx=next_idx,
    )
-    assert trainer.training_buffer[agent_id]["extrinsic_value_estimates"][0] == 2.0
-    assert trainer.training_buffer[agent_id]["extrinsic_rewards"][0] == 1.0
+    assert trainer.processing_buffer[agent_id]["extrinsic_value_estimates"][0] == 2.0
+    assert trainer.processing_buffer[agent_id]["extrinsic_rewards"][0] == 1.0


 if __name__ == "__main__":
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
    brain_info = brain_infos[env.external_brain_names[0]]
    next_brain_info = env.step()[env.external_brain_names[0]]
    # Test evaluate
-    action = np.ones((len(brain_info.agents), policy.num_branches))
+    action = np.ones((len(brain_info.agents), policy.num_branches), dtype=np.float32)
    rsig_result = policy.reward_signals[reward_signal_name].evaluate(
        brain_info, action, next_brain_info
    )
 def reward_signal_update(env, policy, reward_signal_name):
    buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
    feed_dict = policy.reward_signals[reward_signal_name].prepare_update(
-        policy.model, buffer.update_buffer.make_mini_batch(0, 10), 2
+        policy.model, buffer.make_mini_batch(0, 10), 2
    )
    out = policy._execute_model(
        feed_dict, policy.reward_signals[reward_signal_name].update_dict
--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
 import mlagents.trainers.tests.mock_brain as mb
 import numpy as np
 from mlagents.trainers.rl_trainer import RLTrainer
-from mlagents.trainers.tests.test_buffer import construct_fake_buffer
+from mlagents.trainers.tests.test_buffer import construct_fake_processing_buffer
+from mlagents.trainers.buffer import AgentBuffer


@pytest.fixture
 def create_mock_policy():
    mock_policy = mock.Mock()
    mock_policy.reward_signals = {}
-    mock_policy.retrieve_memories.return_value = np.zeros((1, 1))
-    mock_policy.retrieve_previous_action.return_value = np.zeros((1, 1))
+    mock_policy.retrieve_memories.return_value = np.zeros((1, 1), dtype=np.float32)
+    mock_policy.retrieve_previous_action.return_value = np.zeros(
+        (1, 1), dtype=np.float32
+    )
    return mock_policy


    fake_action_outputs = {
        "action": [0.1, 0.1],
        "value_heads": {},
-        "entropy": np.array([1.0]),
+        "entropy": np.array([1.0], dtype=np.float32),
        "learning_rate": 1.0,
    }
    mock_braininfo = mb.create_mock_braininfo(
    trainer.end_episode()
    for agent_id in trainer.episode_steps:
        assert trainer.episode_steps[agent_id] == 0
-        assert len(trainer.training_buffer[agent_id]["action"]) == 0
+        assert len(trainer.processing_buffer[agent_id]["action"]) == 0
    for rewards in trainer.collected_rewards.values():
        for agent_id in rewards:
            assert rewards[agent_id] == 0
    trainer = create_rl_trainer()
-    trainer.training_buffer = construct_fake_buffer()
-    trainer.training_buffer.append_update_buffer(2, batch_size=None, training_length=2)
+    trainer.processing_buffer = construct_fake_processing_buffer()
+    trainer.update_buffer = AgentBuffer()
+    trainer.processing_buffer.append_to_update_buffer(
+        trainer.update_buffer, 2, batch_size=None, training_length=2
+    )
-    for _, arr in trainer.training_buffer.update_buffer.items():
+    for _, arr in trainer.update_buffer.items():
        assert len(arr) == 0
--- a/ml-agents/mlagents/trainers/tests/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/test_sac.py
    assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE[0])

    # Test update
-    buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
+    update_buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
-    buffer.update_buffer["extrinsic_rewards"] = buffer.update_buffer["rewards"]
-    policy.update(
-        buffer.update_buffer, num_sequences=len(buffer.update_buffer["actions"])
-    )
+    update_buffer["extrinsic_rewards"] = update_buffer["rewards"]
+    policy.update(update_buffer, num_sequences=update_buffer.num_experiences)
    env.close()


    )

    # Test update, while removing PPO-specific buffer elements.
-    buffer = mb.simulate_rollout(
+    update_buffer = mb.simulate_rollout(
        env,
        policy,
        BUFFER_INIT_SAMPLES,
    # Mock out reward signal eval
-    buffer.update_buffer["extrinsic_rewards"] = buffer.update_buffer["rewards"]
-    buffer.update_buffer["curiosity_rewards"] = buffer.update_buffer["rewards"]
+    update_buffer["extrinsic_rewards"] = update_buffer["rewards"]
+    update_buffer["curiosity_rewards"] = update_buffer["rewards"]
-        {"curiosity": buffer.update_buffer},
-        num_sequences=len(buffer.update_buffer["actions"]),
+        {"curiosity": update_buffer}, num_sequences=update_buffer.num_experiences
    )
    env.close()

    assert run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))

    # Test update
-    buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
+    update_buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
-    buffer.update_buffer["extrinsic_rewards"] = buffer.update_buffer["rewards"]
-    policy.update(
-        buffer.update_buffer, num_sequences=len(buffer.update_buffer["actions"])
-    )
+    update_buffer["extrinsic_rewards"] = update_buffer["rewards"]
+    policy.update(update_buffer, num_sequences=update_buffer.num_experiences)
    env.close()


    assert run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))

    # Test update
-    buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
+    update_buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
-    buffer.update_buffer["extrinsic_rewards"] = buffer.update_buffer["rewards"]
-    run_out = policy.update(
-        buffer.update_buffer, num_sequences=len(buffer.update_buffer["actions"])
-    )
+    update_buffer["extrinsic_rewards"] = update_buffer["rewards"]
+    run_out = policy.update(update_buffer, num_sequences=update_buffer.num_experiences)
    assert type(run_out) is dict


    assert run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))

    # Test update
-    buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
+    update_buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
-    buffer.update_buffer["extrinsic_rewards"] = buffer.update_buffer["rewards"]
-    policy.update(buffer.update_buffer, num_sequences=2)
+    update_buffer["extrinsic_rewards"] = update_buffer["rewards"]
+    policy.update(update_buffer, num_sequences=2)
    env.close()


                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
-                model.visual_in[0]: np.ones([2, 40, 30, 3]),
-                model.visual_in[1]: np.ones([2, 40, 30, 3]),
+                model.visual_in[0]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.visual_in[1]: np.ones([2, 40, 30, 3], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
-                model.visual_in[0]: np.ones([2, 40, 30, 3]),
-                model.visual_in[1]: np.ones([2, 40, 30, 3]),
-                model.action_masks: np.ones([2, 2]),
+                model.visual_in[0]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.visual_in[1]: np.ones([2, 40, 30, 3], dtype=np.float32),
+                model.action_masks: np.ones([2, 2], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

                model.batch_size: 2,
                model.sequence_length: 1,
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
-                model.action_masks: np.ones([2, 2]),
+                model.action_masks: np.ones([2, 2], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

                model.batch_size: 1,
                model.sequence_length: 2,
                model.prev_action: [[0], [0]],
-                model.memory_in: np.zeros((1, memory_size)),
+                model.memory_in: np.zeros((1, memory_size), dtype=np.float32),
-                model.action_masks: np.ones([1, 2]),
+                model.action_masks: np.ones([1, 2], dtype=np.float32),
            }
            sess.run(run_list, feed_dict=feed_dict)

            feed_dict = {
                model.batch_size: 1,
                model.sequence_length: 2,
-                model.memory_in: np.zeros((1, memory_size)),
+                model.memory_in: np.zeros((1, memory_size), dtype=np.float32),
                model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]),
            }
            sess.run(run_list, feed_dict=feed_dict)
    trainer_params["model_path"] = str(tmpdir)
    trainer_params["save_replay_buffer"] = True
    trainer = SACTrainer(mock_brain, 1, trainer_params, True, False, 0, 0)
-    trainer.training_buffer = mb.simulate_rollout(
+    trainer.update_buffer = mb.simulate_rollout(
-    buffer_len = len(trainer.training_buffer.update_buffer["actions"])
+    buffer_len = trainer.update_buffer.num_experiences
-    assert len(trainer2.training_buffer.update_buffer["actions"]) == buffer_len
+    assert trainer2.update_buffer.num_experiences == buffer_len


 if __name__ == "__main__":
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
 )
 from mlagents.envs.simple_env_manager import SimpleEnvManager
 from mlagents.envs.sampler_class import SamplerManager
+from mlagents.envs.side_channel.float_properties_channel import FloatPropertiesChannel


 BRAIN_NAME = __name__
        seed = 1337

        trainer_config = yaml.safe_load(config)
-        env_manager = SimpleEnvManager(env)
+        env_manager = SimpleEnvManager(env, FloatPropertiesChannel())
        trainer_factory = TrainerFactory(
            trainer_config=trainer_config,
            summaries_dir=dir,
            meta_curriculum=None,
            train=True,
            training_seed=seed,
-            fast_simulation=True,
            sampler_manager=SamplerManager(None),
            resampling_interval=None,
            save_freq=save_freq,
--- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
        meta_curriculum=None,
        train=True,
        training_seed=99,
-        fast_simulation=True,
        sampler_manager=SamplerManager({}),
        resampling_interval=None,
    )
        meta_curriculum=None,
        train=True,
        training_seed=seed,
-        fast_simulation=True,
        sampler_manager=SamplerManager({}),
        resampling_interval=None,
    )
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py
        self.seed = seed
        self.brain = brain
        self.use_recurrent = trainer_parameters["use_recurrent"]
-        self.memory_dict: Dict[int, np.ndarray] = {}
+        self.memory_dict: Dict[str, np.ndarray] = {}
-        self.previous_action_dict: Dict[int, np.array] = {}
+        self.previous_action_dict: Dict[str, np.array] = {}
        self.normalize = trainer_parameters.get("normalize", False)
        self.use_continuous_act = brain.vector_action_space_type == "continuous"
        if self.use_continuous_act:
        :param num_agents: Number of agents.
        :return: Numpy array of zeros.
        """
-        return np.zeros((num_agents, self.m_size), dtype=np.float)
+        return np.zeros((num_agents, self.m_size), dtype=np.float32)
-        self, agent_ids: List[int], memory_matrix: Optional[np.ndarray]
+        self, agent_ids: List[str], memory_matrix: Optional[np.ndarray]
    ) -> None:
        if memory_matrix is None:
            return
-    def retrieve_memories(self, agent_ids: List[int]) -> np.ndarray:
-        memory_matrix = np.zeros((len(agent_ids), self.m_size), dtype=np.float)
+    def retrieve_memories(self, agent_ids: List[str]) -> np.ndarray:
+        memory_matrix = np.zeros((len(agent_ids), self.m_size), dtype=np.float32)
        for index, agent_id in enumerate(agent_ids):
            if agent_id in self.memory_dict:
                memory_matrix[index, :] = self.memory_dict[agent_id]
        return np.zeros((num_agents, self.num_branches), dtype=np.int)

    def save_previous_action(
-        self, agent_ids: List[int], action_matrix: Optional[np.ndarray]
+        self, agent_ids: List[str], action_matrix: Optional[np.ndarray]
    ) -> None:
        if action_matrix is None:
            return
-    def retrieve_previous_action(self, agent_ids: List[int]) -> np.ndarray:
+    def retrieve_previous_action(self, agent_ids: List[str]) -> np.ndarray:
        action_matrix = np.zeros((len(agent_ids), self.num_branches), dtype=np.int)
        for index, agent_id in enumerate(agent_ids):
            if agent_id in self.previous_action_dict:
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
        meta_curriculum: Optional[MetaCurriculum],
        train: bool,
        training_seed: int,
-        fast_simulation: bool,
        sampler_manager: SamplerManager,
        resampling_interval: Optional[int],
    ):
        self.trainer_metrics: Dict[str, TrainerMetrics] = {}
        self.meta_curriculum = meta_curriculum
        self.training_start_time = time()
-        self.fast_simulation = fast_simulation
        self.sampler_manager = sampler_manager
        self.resampling_interval = resampling_interval
        np.random.seed(training_seed)
            self.meta_curriculum.get_config() if self.meta_curriculum else {}
        )
        sampled_reset_param.update(new_meta_curriculum_config)
-        return env.reset(train_mode=self.fast_simulation, config=sampled_reset_param)
+        return env.reset(config=sampled_reset_param)

    def _should_save_model(self, global_step: int) -> bool:
        return (
--- a/protobuf-definitions/proto/mlagents/envs/communicator_objects/unity_rl_initialization_output.proto
+++ b/protobuf-definitions/proto/mlagents/envs/communicator_objects/unity_rl_initialization_output.proto
 syntax = "proto3";

 import "mlagents/envs/communicator_objects/brain_parameters.proto";
-import "mlagents/envs/communicator_objects/environment_parameters.proto";

 option csharp_namespace = "MLAgents.CommunicatorObjects";
 package communicator_objects;
    string version = 2;
    string log_path = 3;
    repeated BrainParametersProto brain_parameters = 5;
-    EnvironmentParametersProto environment_parameters = 6;
+    reserved 6; //environment parameters
 }
--- a/protobuf-definitions/proto/mlagents/envs/communicator_objects/unity_rl_input.proto
+++ b/protobuf-definitions/proto/mlagents/envs/communicator_objects/unity_rl_input.proto
 syntax = "proto3";

 import "mlagents/envs/communicator_objects/agent_action.proto";
-import "mlagents/envs/communicator_objects/environment_parameters.proto";
 import "mlagents/envs/communicator_objects/command.proto";

 option csharp_namespace = "MLAgents.CommunicatorObjects";
            repeated AgentActionProto value = 1;
        }
    map<string, ListAgentActionProto> agent_actions = 1;
-    EnvironmentParametersProto environment_parameters = 2;
-    bool is_training = 3;
+    reserved 2; //deprecated environment proto
+    reserved 3; //deprecated is_trainig
+    bytes side_channel = 5;
 }
--- a/protobuf-definitions/proto/mlagents/envs/communicator_objects/unity_rl_output.proto
+++ b/protobuf-definitions/proto/mlagents/envs/communicator_objects/unity_rl_output.proto
    }
    reserved 1; // deprecated bool global_done field
    map<string, ListAgentInfoProto> agentInfos = 2;
+    bytes side_channel = 3;
 }