Merge branch 'master' into develop-test-imitation

5 年前 · b1cfa74d
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
    [Header("Specific to Ball3D")]
    public GameObject ball;
    Rigidbody m_BallRb;
-    IFloatProperties m_ResetParams;
+    FloatPropertiesChannel m_ResetParams;
-    public override void InitializeAgent()
+    public override void Initialize()
    {
        m_BallRb = ball.GetComponent<Rigidbody>();
        m_ResetParams = Academy.Instance.FloatProperties;
        sensor.AddObservation(m_BallRb.velocity);
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
        var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);
            Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f)
        {
            SetReward(-1f);
-            Done();
+            EndEpisode();
        }
        else
        {

-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        gameObject.transform.rotation = new Quaternion(0f, 0f, 0f, 0f);
        gameObject.transform.Rotate(new Vector3(1, 0, 0), Random.Range(-10f, 10f));
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
    [Header("Specific to Ball3DHard")]
    public GameObject ball;
    Rigidbody m_BallRb;
-    IFloatProperties m_ResetParams;
+    FloatPropertiesChannel m_ResetParams;
-    public override void InitializeAgent()
+    public override void Initialize()
    {
        m_BallRb = ball.GetComponent<Rigidbody>();
        m_ResetParams = Academy.Instance.FloatProperties;
        sensor.AddObservation((ball.transform.position - gameObject.transform.position));
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
        var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);
            Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f)
        {
            SetReward(-1f);
-            Done();
+            EndEpisode();
        }
        else
        {

-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        gameObject.transform.rotation = new Quaternion(0f, 0f, 0f, 0f);
        gameObject.transform.Rotate(new Vector3(1, 0, 0), Random.Range(-10f, 10f));
--- a/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs
+++ b/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs
        if (m_Position == k_SmallGoalPosition)
        {
            m_Agent.AddReward(0.1f);
-            m_Agent.Done();
+            m_Agent.EndEpisode();
            ResetAgent();
        }

-            m_Agent.Done();
+            m_Agent.EndEpisode();
            ResetAgent();
        }
    }
--- a/Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
    int m_NumberJumps = 20;
    int m_JumpLeft = 20;

-    IFloatProperties m_ResetParams;
+    FloatPropertiesChannel m_ResetParams;
-    public override void InitializeAgent()
+    public override void Initialize()
    {
        m_Rb = gameObject.GetComponent<Rigidbody>();
        m_LookDir = Vector3.zero;
        sensor.AddObservation(target.transform.localPosition);
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        for (var i = 0; i < vectorAction.Length; i++)
        {
        m_LookDir = new Vector3(x, y, z);
    }

-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        gameObject.transform.localPosition = new Vector3(
            (1 - 2 * Random.value) * 5, 2, (1 - 2 * Random.value) * 5);
        if (gameObject.transform.position.y < -1)
        {
            AddReward(-1);
-            Done();
+            EndEpisode();
            return;
        }

            AddReward(-1);
-            Done();
+            EndEpisode();
-            Done();
+            EndEpisode();
        }
    }

--- a/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
    Quaternion m_LookRotation;
    Matrix4x4 m_TargetDirMatrix;

-    public override void InitializeAgent()
+    public override void Initialize()
    {
        m_JdController = GetComponent<JointDriveController>();
        m_DirToTarget = target.position - body.position;
        target.position = newTargetPos + ground.position;
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        // The dictionary with all the body parts in it are in the jdController
        var bpDict = m_JdController.bodyPartsDict;
    /// <summary>
    /// Loop over body parts and reset them to initial conditions.
    /// </summary>
-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        if (m_DirToTarget != Vector3.zero)
        {
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
    public bool useVectorObs;


-    public override void InitializeAgent()
+    public override void Initialize()
-        base.InitializeAgent();
        m_AgentRb = GetComponent<Rigidbody>();
        m_MyArea = area.GetComponent<FoodCollectorArea>();
        m_FoodCollecterSettings = FindObjectOfType<FoodCollectorSettings>();
        gameObject.GetComponentInChildren<Renderer>().material = normalMaterial;
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        MoveAgent(vectorAction);
    }
        return action;
    }

-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        Unfreeze();
        Unpoison();
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Demos/ExpertGrid.demo
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Demos/ExpertGrid.demo
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
    const int k_Left = 3;
    const int k_Right = 4;

-    public override void InitializeAgent()
-    {
-    }
-
    public override void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
    {
        // Mask the necessary actions if selected by the user.
    }

    // to be implemented by the developer
-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        AddReward(-0.01f);
        var action = Mathf.FloorToInt(vectorAction[0]);
            if (hit.Where(col => col.gameObject.CompareTag("goal")).ToArray().Length == 1)
            {
                SetReward(1f);
-                Done();
+                EndEpisode();
-                Done();
+                EndEpisode();
            }
        }
    }
    }

    // to be implemented by the developer
-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        area.AreaReset();
    }
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs

    public GameObject trueAgent;

-    IFloatProperties m_ResetParameters;
+    FloatPropertiesChannel m_ResetParameters;

    Camera m_AgentCam;

--- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
    HallwaySettings m_HallwaySettings;
    int m_Selection;

-    public override void InitializeAgent()
+    public override void Initialize()
-        base.InitializeAgent();
        m_HallwaySettings = FindObjectOfType<HallwaySettings>();
        m_AgentRb = GetComponent<Rigidbody>();
        m_GroundRenderer = ground.GetComponent<Renderer>();
        m_AgentRb.AddForce(dirToGo * m_HallwaySettings.agentRunSpeed, ForceMode.VelocityChange);
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        AddReward(-1f / maxStep);
        MoveAgent(vectorAction);
                SetReward(-0.1f);
                StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.failMaterial, 0.5f));
            }
-            Done();
+            EndEpisode();
        }
    }

        return new float[] { 0 };
    }

-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        var agentOffset = -15f;
        var blockOffset = 0f;
--- a/Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
+++ b/Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
        m_PushBlockSettings = FindObjectOfType<PushBlockSettings>();
    }

-    public override void InitializeAgent()
+    public override void Initialize()
-        base.InitializeAgent();
        goalDetect = block.GetComponent<GoalDetect>();
        goalDetect.agent = this;

        AddReward(5f);

        // By marking an agent as done AgentReset() will be called automatically.
-        Done();
+        EndEpisode();

        // Swap ground material for a bit to indicate we scored.
        StartCoroutine(GoalScoredSwapGroundMaterial(m_PushBlockSettings.goalScoredMaterial, 0.5f));
    /// <summary>
    /// Called every step of the engine. Here the agent takes an action.
    /// </summary>
-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        // Move the agent using the action.
        MoveAgent(vectorAction);
    /// In the editor, if "Reset On Done" is checked then AgentReset() will be
    /// called automatically anytime we mark done = true in an agent script.
    /// </summary>
-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        var rotation = Random.Range(0, 4);
        var rotationAngle = rotation * 90f;
--- a/Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
    public GameObject areaSwitch;
    public bool useVectorObs;

-    public override void InitializeAgent()
+    public override void Initialize()
-        base.InitializeAgent();
        m_AgentRb = GetComponent<Rigidbody>();
        m_MyArea = area.GetComponent<PyramidArea>();
        m_SwitchLogic = areaSwitch.GetComponent<PyramidSwitch>();
        m_AgentRb.AddForce(dirToGo * 2f, ForceMode.VelocityChange);
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        AddReward(-1f / maxStep);
        MoveAgent(vectorAction);
        return new float[] { 0 };
    }

-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        var enumerable = Enumerable.Range(0, 9).OrderBy(x => Guid.NewGuid()).Take(9);
        var items = enumerable.ToArray();
        if (collision.gameObject.CompareTag("goal"))
        {
            SetReward(2f);
-            Done();
+            EndEpisode();
        }
    }
 }
--- a/Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
    /// Collect the rigidbodies of the reacher in order to resue them for
    /// observations and actions.
    /// </summary>
-    public override void InitializeAgent()
+    public override void Initialize()
    {
        m_RbA = pendulumA.GetComponent<Rigidbody>();
        m_RbB = pendulumB.GetComponent<Rigidbody>();
    /// <summary>
    /// The agent's four actions correspond to torques on each of the two joints.
    /// </summary>
-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        m_GoalDegree += m_GoalSpeed;
        UpdateGoalPosition();
    /// <summary>
    /// Resets the position and velocity of the agent and the goal.
    /// </summary>
-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        pendulumA.transform.position = new Vector3(0f, -4f, 0f) + transform.position;
        pendulumA.transform.rotation = Quaternion.Euler(180f, 0f, 0f);
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs

                if (agentDoneOnGroundContact)
                {
-                    agent.Done();
+                    agent.EndEpisode();
                }
            }
        }
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
        void OverrideModel()
        {
            m_Agent.LazyInitialize();
-            var name = m_Agent.BehaviorName;
+            var bp = m_Agent.GetComponent<BehaviorParameters>();
+            var name = bp.behaviorName;
-            m_Agent.GiveModel($"Override_{name}", nnModel);
+            m_Agent.SetModel($"Override_{name}", nnModel);

        }
    }
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
    [HideInInspector]
    public Rigidbody agentRb;
    SoccerSettings m_SoccerSettings;
+    BehaviorParameters m_BehaviorParameters;
-    public override void InitializeAgent()
+    public override void Initialize()
-        base.InitializeAgent();
-        if (TeamId == (int)Team.Blue)
+        m_BehaviorParameters = gameObject.GetComponent<BehaviorParameters>();
+        if (m_BehaviorParameters.TeamId == (int)Team.Blue)
        {
            team = Team.Blue;
            m_Transform = new Vector3(transform.position.x - 4f, .5f, transform.position.z);
            ForceMode.VelocityChange);
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        // Existential penalty for strikers.
        AddReward(-1f / 3000f);
        }
    }

-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        if (team == Team.Purple)
        {
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
            {
                ps.agentScript.AddReward(-1);
            }
-            ps.agentScript.Done();  //all agents need to be reset
+            ps.agentScript.EndEpisode();  //all agents need to be reset

            if (goalTextUI)
            {
--- a/Project/Assets/ML-Agents/Examples/Startup/Scripts/Startup.cs
+++ b/Project/Assets/ML-Agents/Examples/Startup/Scripts/Startup.cs
    internal class Startup : MonoBehaviour
    {
        const string k_SceneVariableName = "SCENE_NAME";
+        private const string k_SceneCommandLineFlag = "--mlagents-scene-name";
-            var sceneName = Environment.GetEnvironmentVariable(k_SceneVariableName);
+            var sceneName = "";
+            
+            // Check for the CLI '--scene-name' flag.  This will be used if
+            // no scene environment variable is found.
+            var args = Environment.GetCommandLineArgs();
+            Console.WriteLine("Command line arguments passed: " + String.Join(" ", args));
+            for (int i = 0; i < args.Length; i++) {
+                if (args [i] == k_SceneCommandLineFlag && i < args.Length - 1) {
+                    sceneName = args[i + 1];
+                }
+            }
+
+            var sceneEnvironmentVariable = Environment.GetEnvironmentVariable(k_SceneVariableName);
+            if (!string.IsNullOrEmpty(sceneEnvironmentVariable))
+            {
+                sceneName = sceneEnvironmentVariable;
+            }
+            
            SwitchScene(sceneName);
        }

            {
-                throw new ArgumentException(
-                    $"You didn't specified the {k_SceneVariableName} environment variable");
+                Console.WriteLine(
+                    $"You didn't specify the {k_SceneVariableName} environment variable or the {k_SceneCommandLineFlag} command line argument."
+                );
+                Application.Quit(22);
+                return;
-                throw new ArgumentException(
-                    $"The scene {sceneName} doesn't exist within your build. ");
+                Console.WriteLine(
+                    $"The scene {sceneName} doesn't exist within your build."
+                );
+                Application.Quit(22);
+                return;
            }
            SceneManager.LoadSceneAsync(sceneName);
        }
--- a/Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
    {
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
    }
 }
--- a/Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs

    void Reset()
    {
-        m_AgentA.Done();
-        m_AgentB.Done();
+        m_AgentA.EndEpisode();
+        m_AgentB.EndEpisode();
        m_Area.MatchReset();
        lastFloorHit = FloorHit.Service;
        net = false;
--- a/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
    Rigidbody m_AgentRb;
    Rigidbody m_BallRb;
    float m_InvertMult;
-    IFloatProperties m_ResetParams;
+    FloatPropertiesChannel m_ResetParams;

    // Looks for the scoreboard based on the name of the gameObjects.
    // Do not modify the names of the Score GameObjects

-    public override void InitializeAgent()
+    public override void Initialize()
    {
        m_AgentRb = GetComponent<Rigidbody>();
        m_BallRb = ball.GetComponent<Rigidbody>();
        sensor.AddObservation(m_InvertMult * gameObject.transform.rotation.z);
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        var moveX = Mathf.Clamp(vectorAction[0], -1f, 1f) * m_InvertMult;
        var moveY = Mathf.Clamp(vectorAction[1], -1f, 1f);
        return action;
    }

-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        m_InvertMult = invertX ? -1f : 1f;

--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
    Rigidbody m_ChestRb;
    Rigidbody m_SpineRb;

-    IFloatProperties m_ResetParams;
+    FloatPropertiesChannel m_ResetParams;
-    public override void InitializeAgent()
+    public override void Initialize()
    {
        m_JdController = GetComponent<JointDriveController>();
        m_JdController.SetupBodyPart(hips);
        }
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        var bpDict = m_JdController.bodyPartsDict;
        var i = -1;
    /// <summary>
    /// Loop over body parts and reset them to initial conditions.
    /// </summary>
-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        if (m_DirToTarget != Vector3.zero)
        {
--- a/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
    Vector3 m_JumpTargetPos;
    Vector3 m_JumpStartingPos;

-    public override void InitializeAgent()
+    public override void Initialize()
    {
        m_WallJumpSettings = FindObjectOfType<WallJumpSettings>();
        m_Configuration = Random.Range(0, 5);
        jumpingTime -= Time.fixedDeltaTime;
    }

-    public override void AgentAction(float[] vectorAction)
+    public override void OnActionReceived(float[] vectorAction)
    {
        MoveAgent(vectorAction);
        if ((!Physics.Raycast(m_AgentRb.position, Vector3.down, 20))
-            Done();
+            EndEpisode();
            ResetBlock(m_ShortBlockRb);
            StartCoroutine(
                GoalScoredSwapGroundMaterial(m_WallJumpSettings.failMaterial, .5f));
        if (col.gameObject.CompareTag("goal") && DoGroundCheck(true))
        {
            SetReward(1f);
-            Done();
+            EndEpisode();
            StartCoroutine(
                GoalScoredSwapGroundMaterial(m_WallJumpSettings.goalScoredMaterial, 2));
        }
        blockRb.angularVelocity = Vector3.zero;
    }

-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        ResetBlock(m_ShortBlockRb);
        transform.localPosition = new Vector3(
                Academy.Instance.FloatProperties.GetPropertyWithDefault("no_wall_height", 0),
                localScale.z);
            wall.transform.localScale = localScale;
-            GiveModel("SmallWallJump", noWallBrain);
+            SetModel("SmallWallJump", noWallBrain);
        }
        else if (config == 1)
        {
                localScale.z);
            wall.transform.localScale = localScale;
-            GiveModel("SmallWallJump", smallWallBrain);
+            SetModel("SmallWallJump", smallWallBrain);
        }
        else
        {
                height,
                localScale.z);
            wall.transform.localScale = localScale;
-            GiveModel("BigWallJump", bigWallBrain);
+            SetModel("BigWallJump", bigWallBrain);
        }
    }
 }
--- a/README.md
+++ b/README.md
 used for multiple purposes, including controlling NPC behavior (in a variety of
 settings such as multi-agent and adversarial), automated testing of game builds
 and evaluating different game design decisions pre-release. The ML-Agents
-toolkit is mutually beneficial for both game developers and AI researchers as it
+Toolkit is mutually beneficial for both game developers and AI researchers as it
 provides a central platform where advances in AI can be evaluated on Unity’s
 rich environments and then made accessible to the wider research and game
 developer communities.
 * Unity environment control from Python
-* 10+ sample Unity environments
+* 15+ sample Unity environments
 * Two deep reinforcement learning algorithms,
 [Proximal Policy Optimization](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Training-PPO.md)
 (PPO) and [Soft Actor-Critic](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Training-SAC.md)
 * Built-in support for Imitation Learning
 * Flexible agent control with On Demand Decision Making
 * Visualizing network outputs within the environment
-* Simplified set-up with Docker
-## Documentation
+## Releases & Documentation
+**Our latest, stable release is 0.14.1. Click
+[here](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md) to
+get started with the latest release of ML-Agents.**
+
+The table below lists all our releases, including our `master` branch which is under active
+development and may be unstable. A few helpful guidelines:
+* The docs links in the table below include installation and usage instructions specific to each
+release. Remember to always use the documentation that corresponds to the release version you're
+using.
+* See the [GitHub releases](https://github.com/Unity-Technologies/ml-agents/releases) for more
+details of the changes between versions.
+* If you have used an earlier version of the ML-Agents Toolkit, we strongly recommend our
+[guide on migrating from earlier versions](docs/Migrating.md).
-* For more information, in addition to installation and usage instructions, see
-  the [documentation for the latest release](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md).
-* If you are a researcher interested in a discussion of Unity as an AI platform, see a pre-print of our [reference paper on Unity and the ML-Agents Toolkit](https://arxiv.org/abs/1809.02627). Also, see below for instructions on citing this paper.
-* If you have used an earlier version of the ML-Agents toolkit, we strongly
-  recommend our [guide on migrating from earlier versions](docs/Migrating.md).
+
+| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
+|:-------:|:------:|:-------------:|:-------:|:------------:|
+| **master** (unstable) | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
+| **0.14.1** (latest stable release) | February 26, 2020 | **[source](https://github.com/Unity-Technologies/ml-agents/tree/latest_release)** |  **[docs](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/latest_release.zip)** |
+| **0.14.0**  | February 13, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
+| **0.13.1**  | January 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) |
+| **0.13.0**  | January 8, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.0.zip) |
+| **0.12.1**  | December 11, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.1.zip) |
+| **0.12.0**  | December 2, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.0.zip) |
+| **0.11.0**  | November 4, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.11.0.zip) |
+| **0.10.1**  | October 9, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.1.zip) |
+| **0.10.0**  | September 30, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.0.zip) |
+
+
+## Citation
+
+If you are a researcher interested in a discussion of Unity as an AI platform, see a pre-print
+of our [reference paper on Unity and the ML-Agents Toolkit](https://arxiv.org/abs/1809.02627).
+
+If you use Unity or the ML-Agents Toolkit to conduct research, we ask that you cite the following
+paper as a reference:
+
+Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D. (2018). Unity: A General Platform for Intelligent Agents. *arXiv preprint arXiv:1809.02627.* https://github.com/Unity-Technologies/ml-agents.
+
-
+* (February 28, 2020) [Training intelligent adversaries using self-play with ML-Agents](https://blogs.unity3d.com/2020/02/28/training-intelligent-adversaries-using-self-play-with-ml-agents/)
+* (November 11, 2019) [Training your agents 7 times faster with ML-Agents](https://blogs.unity3d.com/2019/11/11/training-your-agents-7-times-faster-with-ml-agents/)
+* (October 21, 2019) [The AI@Unity interns help shape the world](https://blogs.unity3d.com/2019/10/21/the-aiunity-interns-help-shape-the-world/)
+* (April 15, 2019) [Unity ML-Agents Toolkit v0.8: Faster training on real games](https://blogs.unity3d.com/2019/04/15/unity-ml-agents-toolkit-v0-8-faster-training-on-real-games/)
+* (March 1, 2019) [Unity ML-Agents Toolkit v0.7: A leap towards cross-platform inference](https://blogs.unity3d.com/2019/03/01/unity-ml-agents-toolkit-v0-7-a-leap-towards-cross-platform-inference/)
+* (December 17, 2018) [ML-Agents Toolkit v0.6: Improved usability of Brains and Imitation Learning](https://blogs.unity3d.com/2018/12/17/ml-agents-toolkit-v0-6-improved-usability-of-brains-and-imitation-learning/)
+* (October 2, 2018) [Puppo, The Corgi: Cuteness Overload with the Unity ML-Agents Toolkit](https://blogs.unity3d.com/2018/10/02/puppo-the-corgi-cuteness-overload-with-the-unity-ml-agents-toolkit/)
+* (September 11, 2018) [ML-Agents Toolkit v0.5, new resources for AI researchers available now](https://blogs.unity3d.com/2018/09/11/ml-agents-toolkit-v0-5-new-resources-for-ai-researchers-available-now/)
+* (June 26, 2018) [Solving sparse-reward tasks with Curiosity](https://blogs.unity3d.com/2018/06/26/solving-sparse-reward-tasks-with-curiosity/)
+* (June 19, 2018) [Unity ML-Agents Toolkit v0.4 and Udacity Deep Reinforcement Learning Nanodegree](https://blogs.unity3d.com/2018/06/19/unity-ml-agents-toolkit-v0-4-and-udacity-deep-reinforcement-learning-nanodegree/)
+* (May 24, 2018) [Imitation Learning in Unity: The Workflow](https://blogs.unity3d.com/2018/05/24/imitation-learning-in-unity-the-workflow/)
+* (March 15, 2018) [ML-Agents Toolkit v0.3 Beta released: Imitation Learning, feedback-driven features, and more](https://blogs.unity3d.com/2018/03/15/ml-agents-v0-3-beta-released-imitation-learning-feedback-driven-features-and-more/)
+* (December 11, 2017) [Using Machine Learning Agents in a real game: a beginner’s guide](https://blogs.unity3d.com/2017/12/11/using-machine-learning-agents-in-a-real-game-a-beginners-guide/)
+* (December 8, 2017) [Introducing ML-Agents Toolkit v0.2: Curriculum Learning, new environments, and more](https://blogs.unity3d.com/2017/12/08/introducing-ml-agents-v0-2-curriculum-learning-new-environments-and-more/)
+* (September 19, 2017) [Introducing: Unity Machine Learning Agents Toolkit](https://blogs.unity3d.com/2017/09/19/introducing-unity-machine-learning-agents/)
-* [Using Machine Learning Agents in a real game: a beginner’s guide](https://blogs.unity3d.com/2017/12/11/using-machine-learning-agents-in-a-real-game-a-beginners-guide/)
-* [Post](https://blogs.unity3d.com/2018/02/28/introducing-the-winners-of-the-first-ml-agents-challenge/)
-  announcing the winners of our
-  [first ML-Agents Challenge](https://connect.unity.com/challenges/ml-agents-1)
-* [Post](https://blogs.unity3d.com/2018/01/23/designing-safer-cities-through-simulations/)
-  overviewing how Unity can be leveraged as a simulator to design safer cities.

 In addition to our own documentation, here are some additional, relevant articles:


 ## Community and Feedback

-The ML-Agents toolkit is an open-source project and we encourage and welcome
+The ML-Agents Toolkit is an open-source project and we encourage and welcome
-For problems with the installation and setup of the the ML-Agents toolkit, or
+For problems with the installation and setup of the the ML-Agents Toolkit, or
-If you run into any other problems using the ML-Agents toolkit, or have a specific
+If you run into any other problems using the ML-Agents Toolkit, or have a specific
-Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue
-to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
+Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents
+Toolkit can we continue to improve and grow. Please take a few minutes to
+[let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
-## Releases
-The latest release is 0.14.1. Previous releases can be found below:
-
-| **Version** | **Source** | **Documentation** | **Download** |
-|:-------:|:------:|:-------------:|:-------:|
-| **0.14.0**  | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
-| **0.13.1**  | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) |
-| **0.13.0**  | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.0.zip) |
-| **0.12.1**  | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.1.zip) |
-| **0.12.0**  | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.0.zip) |
-| **0.11.0**  | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.11.0.zip) |
-| **0.10.1**  | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.1.zip) |
-| **0.10.0**  | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0) |  [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.0.zip) |
-
-See the [GitHub releases](https://github.com/Unity-Technologies/ml-agents/releases) for more details of the changes
-between versions.
-
-Please note that the `master` branch is under active development, so the documentation there may differ from the code
-of a previous release. Always use the documentation that corresponds to the release version you're using.
-
-## Citation
-
-If you use Unity or the ML-Agents Toolkit to conduct research, we ask that you cite the following paper as a reference:
-
-Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D. (2018). Unity: A General Platform for Intelligent Agents. *arXiv preprint arXiv:1809.02627.* https://github.com/Unity-Technologies/ml-agents.
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 - All SideChannel related code has been moved to the namespace `MLAgents.SideChannels`.
 - `BrainParameters` and `SpaceType` have been removed from the public API
 - `BehaviorParameters` have been removed from the public API.
+ - The following methods in the `Agent` class have been deprecated and will be removed in a later release:
+   - `InitializeAgent()` was renamed to `Initialize()`
+   - `AgentAction()` was renamed to `OnActionReceived()`
+   - `AgentReset()` was renamed to `OnEpisodeBegin()`
+   - `Done()` was renamed to `EndEpisode()`
+   - `GiveModel()` was renamed to `SetModel()`

 ### Minor Changes
 - Monitor.cs was moved to Examples. (#3372)
 - Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391)
 - A tutorial on adding custom SideChannels was added (#3391)
 - The stepping logic for the Agent and the Academy has been simplified (#3448)
- - Update Barracuda to 0.6.0-preview
+ - Update Barracuda to 0.6.1-preview
 * The interface for `RayPerceptionSensor.PerceiveStatic()` was changed to take an input class and write to an output class, and the method was renamed to `Perceive()`.
 - The checkpoint file suffix was changed from `.cptk` to `.ckpt` (#3470)
 - The command-line argument used to determine the port that an environment will listen on was changed from `--port` to `--mlagents-port`.
 - The method `GetStepCount()` on the Agent class has been replaced with the property getter `StepCount`
 - `RayPerceptionSensorComponent` and related classes now display the debug gizmos whenever the Agent is selected (not just Play mode).
 - Most fields on `RayPerceptionSensorComponent` can now be changed while the editor is in Play mode. The exceptions to this are fields that affect the number of observations.
+ - Most fields on `CameraSensorComponent` and `RenderTextureSensorComponent` were changed to private and replaced by properties with the same name.
-
+ - `DecisionRequester` has been made internal (you can still use the DecisionRequesterComponent from the inspector). `RepeatAction` was renamed `TakeActionsBetweenDecisions` for clarity. (#3555)
+ - The `IFloatProperties` interface has been removed.
+ - Fix #3579.
+ - Improved inference performance for models with multiple action branches. (#3598)
+ - Fixed an issue when using GAIL with less than `batch_size` number of demonstrations. (#3591)
+ - The interfaces to the `SideChannel` classes (on C# and python) have changed to use new  `IncomingMessage` and `OutgoingMessage` classes. These should make reading and writing data to the channel easier. (#3596)

 ## [0.14.1-preview] - 2020-02-25

--- a/com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
+++ b/com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
-Please see the [ML-Agents README)(https://github.com/Unity-Technologies/ml-agents/blob/master/README.md)
+# About ML-Agents package (`com.unity.ml-agents`)
+
+The Unity ML-Agents package contains the C# SDK for the
+[Unity ML-Agents Toolkit](https://github.com/Unity-Technologies/ml-agents).
+
+The package provides the ability for any Unity scene to be converted into a learning
+environment where character behaviors can be trained using a variety of machine learning
+algorithms. Additionally, it enables any trained behavior to be embedded back into the Unity
+scene. More specifically, the package provides the following core functionalities:
+* Define Agents: entities whose behavior will be learned. Agents are entities
+that generate observations (through sensors), take actions and receive rewards from
+the environment.
+* Define Behaviors: entities that specifiy how an agent should act. Multiple agents can
+share the same Behavior and a scene may have multiple Behaviors.
+* Record demonstrations of an agent within the Editor. These demonstrations can be
+valuable to train a behavior for that agent.
+* Embedding a trained behavior into the scene via the
+[Unity Inference Engine](https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html).
+Thus an Agent can switch from a learning behavior to an inference behavior.
+
+Note that this package does not contain the machine learning algorithms for training
+behaviors. It relies on a Python package to orchestrate the training. This package
+only enables instrumenting a Unity scene and setting it up for training, and then
+embedding the trained model back into your Unity scene.
+
+## Preview package
+This package is available as a preview, so it is not ready for production use.
+The features and documentation in this package might change before it is verified for release.
+
+
+## Package contents
+
+The following table describes the package folder structure:
+
+|**Location**|**Description**|
+|---|---|
+|*Documentation~*|Contains the documentation for the Unity package.|
+|*Editor*|Contains utilities for Editor windows and drawers.|
+|*Plugins*|Contains third-party DLLs.|
+|*Runtime*|Contains core C# APIs for integrating ML-Agents into your Unity scene. |
+|*Tests*|Contains the unit tests for the package.|
+
+<a name="Installation"></a>
+
+## Installation
+
+To install this package, follow the instructions in the
+[Package Manager documentation](https://docs.unity3d.com/Manual/upm-ui-install.html).
+
+To install the Python package to enable training behaviors, follow the instructions on our
+[GitHub repository](https://github.com/Unity-Technologies/ml-agents/blob/latest_release/docs/Installation.md).
+
+## Requirements
+
+This version of the Unity ML-Agents package is compatible with the following versions of the Unity Editor:
+
+* 2018.4 and later (recommended)
+
+## Known limitations
+
+### Headless Mode
+
+If you enable Headless mode, you will not be able to collect visual observations
+from your agents.
+
+### Rendering Speed and Synchronization
+
+Currently the speed of the game physics can only be increased to 100x real-time.
+The Academy also moves in time with FixedUpdate() rather than Update(), so game
+behavior implemented in Update() may be out of sync with the agent decision
+making. See
+[Execution Order of Event Functions](https://docs.unity3d.com/Manual/ExecutionOrder.html)
+for more information.
+
+You can control the frequency of Academy stepping by calling
+`Academy.Instance.DisableAutomaticStepping()`, and then calling
+`Academy.Instance.EnvironmentStep()`
+
+### Unity Inference Engine Models
+Currently, only models created with our trainers are supported for running
+ML-Agents with a neural network behavior.
+
+
+## Helpful links
+
+If you are new to the Unity ML-Agents package, or have a question after reading
+the documentation, you can checkout our
+[GitHUb Repository](https://github.com/Unity-Technologies/ml-agents), which
+also includes a number of ways to
+[connect with us](https://github.com/Unity-Technologies/ml-agents#community-and-feedback)
+including our [ML-Agents Forum](https://forum.unity.com/forums/ml-agents.453/).
+
--- a/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
+++ b/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
 using UnityEditor;
 using Barracuda;
 using MLAgents.Policies;
+using UnityEngine;

 namespace MLAgents.Editor
 {
        {
            var so = serializedObject;
            so.Update();
+            bool needPolicyUpdate; // Whether the name, model, inference device, or BehaviorType changed.
+            EditorGUI.indentLevel++;
+            EditorGUI.BeginChangeCheck(); // global
+
-            EditorGUI.indentLevel++;
-            EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
-            EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
-            EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
-            EditorGUI.indentLevel++;
-            EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
-            EditorGUI.indentLevel--;
-            EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
+            {
+                EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
+            }
+            needPolicyUpdate = EditorGUI.EndChangeCheck();
+
+            EditorGUI.BeginDisabledGroup(Application.isPlaying);
+            {
+                EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
+            }
+            EditorGUI.EndDisabledGroup();
+
+            EditorGUI.BeginChangeCheck();
+            {
+                EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
+                EditorGUI.indentLevel++;
+                EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
+                EditorGUI.indentLevel--;
+            }
+            needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();
+
+            EditorGUI.BeginChangeCheck();
+            {
+                EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
+            }
+            needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();
+
-            EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
-            // EditorGUILayout.PropertyField(serializedObject.FindProperty("m_Heuristic"), true);
-            EditorGUI.indentLevel--;
-            if (EditorGUI.EndChangeCheck())
+            EditorGUI.BeginDisabledGroup(Application.isPlaying);
-                m_RequireReload = true;
+                EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
+            EditorGUI.EndDisabledGroup();
+
+            EditorGUI.indentLevel--;
+            m_RequireReload = EditorGUI.EndChangeCheck();
+
+            if (needPolicyUpdate)
+            {
+                UpdateAgentPolicy();
+            }
        }

        /// <summary>
                        EditorGUILayout.HelpBox(check, MessageType.Warning);
                    }
                }
+            }
+        }
+
+        void UpdateAgentPolicy()
+        {
+            if (Application.isPlaying)
+            {
+                var behaviorParameters = (BehaviorParameters)target;
+                var agent = behaviorParameters.GetComponent<Agent>();
+                if (agent == null)
+                {
+                    return;
+                }
+
+                agent.ReloadPolicy();
+
            }
        }
    }
--- a/com.unity.ml-agents/Editor/RayPerceptionSensorComponentBaseEditor.cs
+++ b/com.unity.ml-agents/Editor/RayPerceptionSensorComponentBaseEditor.cs
            EditorGUI.BeginChangeCheck();
            EditorGUI.indentLevel++;

-            EditorGUILayout.PropertyField(so.FindProperty("m_SensorName"), true);
-
-            // Because the number of rays and the tags affect the observation shape,
-            // they are not editable during play mode.
+            // Don't allow certain fields to be modified during play mode.
+            // * SensorName affects the ordering of the Agent's observations
+            // * The number of tags and rays affects the size of the observations.
+                EditorGUILayout.PropertyField(so.FindProperty("m_SensorName"), true);
                EditorGUILayout.PropertyField(so.FindProperty("m_DetectableTags"), true);
                EditorGUILayout.PropertyField(so.FindProperty("m_RaysPerDirection"), true);
            }
                m_RequireSensorUpdate = true;
            }

-            UpdateSensorIfDirty();
+            UpdateSensorIfDirty();
-
-                if (m_RequireSensorUpdate)
-                {
-                    var sensorComponent = serializedObject.targetObject as RayPerceptionSensorComponentBase;
-                    sensorComponent?.UpdateSensor();
-                    m_RequireSensorUpdate = false;
-                }
+            if (m_RequireSensorUpdate)
+            {
+                var sensorComponent = serializedObject.targetObject as RayPerceptionSensorComponentBase;
+                sensorComponent?.UpdateSensor();
+                m_RequireSensorUpdate = false;
+            }
        }
    }

--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        /// <summary>
        /// Collection of float properties (indexed by a string).
        /// </summary>
-        public IFloatProperties FloatProperties;
+        public FloatPropertiesChannel FloatProperties;


        // Fields not provided in the Inspector.
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
            public int maxStep;
        }

-        public int TeamId {
-            get {
-                LazyInitialize();
-                return m_PolicyFactory.TeamId;
-                }
-        }
-        public string BehaviorName  {
-            get {
-                LazyInitialize();
-                return m_PolicyFactory.behaviorName;
-                }
-        }
-
        [SerializeField][HideInInspector]
        internal AgentParameters agentParameters;
        [SerializeField][HideInInspector]
        /// </summary>
        internal VectorSensor collectObservationsSensor;

-        void OnEnable()
+        /// <summary>
+        /// Called when the attached <see cref="GameObject"/> becomes enabled and active.
+        /// </summary>
+        protected virtual void OnEnable()
        {
            LazyInitialize();
        }
            Academy.Instance.AgentForceReset += _AgentReset;
            m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
            ResetData();
-            InitializeAgent();
+            Initialize();
            InitializeSensors();
        }

            Disabled,
        }

-        void OnDisable()
+        /// <summary>
+        /// Called when the attached <see cref="GameObject"/> becomes disabled and inactive.
+        /// </summary>
+        protected virtual void OnDisable()
        {
            DemonstrationWriters.Clear();

            m_Brain?.RequestDecision(m_Info, sensors);

            // We also have to write any to any DemonstationStores so that they get the "done" flag.
-            foreach(var demoWriter in DemonstrationWriters)
+            foreach (var demoWriter in DemonstrationWriters)
            {
                demoWriter.Record(m_Info, sensors);
            }
            m_RequestDecision = false;
        }

+        [Obsolete("GiveModel() has been deprecated, use SetModel() instead.")]
+        public void GiveModel(
+            string behaviorName,
+            NNModel model,
+            InferenceDevice inferenceDevice = InferenceDevice.CPU)
+        {
+            SetModel(behaviorName, model, inferenceDevice);
+        }
+
        /// <summary>
        /// Updates the Model for the agent. Any model currently assigned to the
        /// agent will be replaced with the provided one. If the arguments are
        /// <param name="model"> The model to use for inference.</param>
        /// <param name = "inferenceDevice"> Define on what device the model
        /// will be run.</param>
-        public void GiveModel(
+        public void SetModel(
-            m_PolicyFactory.GiveModel(behaviorName, model, inferenceDevice);
+            if (behaviorName == m_PolicyFactory.behaviorName &&
+                model == m_PolicyFactory.model &&
+                inferenceDevice == m_PolicyFactory.inferenceDevice)
+            {
+                // If everything is the same, don't make any changes.
+                return;
+            }
+
+            m_PolicyFactory.model = model;
+            m_PolicyFactory.inferenceDevice = inferenceDevice;
+            m_PolicyFactory.behaviorName = behaviorName;
+            ReloadPolicy();
+        }
+
+        /// <summary>
+        /// Updates the type of behavior for the agent.
+        /// </summary>
+        /// <param name="behaviorType"> The new behaviorType for the Agent.</param>
+        public void SetBehaviorType(BehaviorType behaviorType)
+        {
+            if (m_PolicyFactory.behaviorType == behaviorType)
+            {
+                return;
+            }
+            m_PolicyFactory.behaviorType = behaviorType;
+            ReloadPolicy();
+        }
+
+        internal void ReloadPolicy()
+        {
+
        }

        /// <summary>
            TimerStack.Instance.SetGauge(gaugeName, GetCumulativeReward());
        }

+        [Obsolete("Done() has been deprecated, use EndEpisode() instead.")]
+        public void Done()
+        {
+            EndEpisode();
+        }
+
-        public void Done()
+        public void EndEpisode()
        {
            NotifyAgentDone(DoneReason.DoneCalled);
            _AgentReset();
            m_RequestAction = true;
        }

-
        /// Helper function that resets all the data structures associated with
        /// the agent. Typically used when the agent is being initialized or reset
        /// at the end of an episode.
            // should stay the previous action before the Done(), so that it is properly recorded.
            if (m_Action.vectorActions == null)
            {
-                if (param.vectorActionSpaceType == SpaceType.Continuous)
-                {
-                    m_Action.vectorActions = new float[param.vectorActionSize[0]];
-                    m_Info.storedVectorActions = new float[param.vectorActionSize[0]];
-                }
-                else
-                {
-                    m_Action.vectorActions = new float[param.vectorActionSize.Length];
-                    m_Info.storedVectorActions = new float[param.vectorActionSize.Length];
-                }
+                m_Action.vectorActions = new float[param.numActions];
+                m_Info.storedVectorActions = new float[param.numActions];
+        }
+
+        [Obsolete("InitializeAgent() has been deprecated, use Initialize() instead.")]
+        public virtual void InitializeAgent()
+        {
        }

        /// <summary>
        /// One sample use is to store local references to other objects in the
        /// scene which would facilitate computing this agents observation.
        /// </remarks>
-        public virtual void InitializeAgent()
+        public virtual void Initialize()
+#pragma warning disable 0618
+            InitializeAgent();
+#pragma warning restore 0618
        }

        /// <summary>
        {
            Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions.");
            var param = m_PolicyFactory.brainParameters;
-            var actionSize = param.vectorActionSpaceType == SpaceType.Continuous ?
-                param.vectorActionSize[0] :
-                param.vectorActionSize.Length;
-            return new float[actionSize];
+            return new float[param.numActions];
        }

        /// <summary>
        /// </summary>
        void SendInfoToBrain()
        {
+            if (!m_Initialized)
+            {
+                throw new UnityAgentsException("Call to SendInfoToBrain when Agent hasn't been initialized." +
+                    "Please ensure that you are calling 'base.OnEnable()' if you have overridden OnEnable.");
+            }
+
            if (m_Brain == null)
            {
                return;
            m_Brain.RequestDecision(m_Info, sensors);

            // If we have any DemonstrationWriters, write the AgentInfo and sensors to them.
-            foreach(var demoWriter in DemonstrationWriters)
+            foreach (var demoWriter in DemonstrationWriters)
            {
                demoWriter.Record(m_Info, sensors);
            }
                sensor.Update();
            }
        }
-

        /// <summary>
        /// Collects the vector observations of the agent.
        {
        }

+        [Obsolete("AgentAction() has been deprecated, use OnActionReceived() instead.")]
+        public virtual void AgentAction(float[] vectorAction)
+        {
+        }
+
        /// <summary>
        /// Specifies the agent behavior at every step based on the provided
        /// action.
        /// will be of length 1.
        /// </param>
-        public virtual void AgentAction(float[] vectorAction)
+        public virtual void OnActionReceived(float[] vectorAction)
+        {
+#pragma warning disable 0618
+            AgentAction(m_Action.vectorActions);
+#pragma warning restore 0618
+        }
+
+        [Obsolete("AgentReset() has been deprecated, use OnEpisodeBegin() instead.")]
+        public virtual void AgentReset()
        {
        }

        /// episode).
        /// </summary>
-        public virtual void AgentReset()
+        public virtual void OnEpisodeBegin()
+#pragma warning disable 0618
+            AgentReset();
+#pragma warning restore 0618
        }

        /// <summary>
        }

        /// <summary>
-        /// This method will forcefully reset the agent and will also reset the hasAlreadyReset flag.
-        /// This way, even if the agent was already in the process of reseting, it will be reset again
-        /// and will not send a Done flag at the next step.
-        /// </summary>
-        void ForceReset()
-        {
-            _AgentReset();
-        }
-
-        /// <summary>
        /// An internal reset method that updates internal data structures in
        /// addition to calling <see cref="AgentReset"/>.
        /// </summary>
            m_StepCount = 0;
-            AgentReset();
+            OnEpisodeBegin();
        }

        /// <summary>
            if ((m_RequestAction) && (m_Brain != null))
            {
                m_RequestAction = false;
-                AgentAction(m_Action.vectorActions);
+                OnActionReceived(m_Action.vectorActions);
+
            }

            if ((m_StepCount >= maxStep) && (maxStep > 0))
        void DecideAction()
        {
            m_Action.vectorActions = m_Brain?.DecideAction();
-            if (m_Action.vectorActions == null){
+            if (m_Action.vectorActions == null)
+            {
                ResetData();
            }
        }
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
            }
            else
            {
+                var compressedObs = sensor.GetCompressedObservation();
+                if (compressedObs == null)
+                {
+                    throw new UnityAgentsException(
+                        $"GetCompressedObservation() returned null data for sensor named {sensor.GetName()}. " +
+                        "You must return a byte[]. If you don't want to use compressed observations, " +
+                        "return SensorCompressionType.None from GetCompressionType()."
+                        );
+                }
+
-                    CompressedData = ByteString.CopyFrom(sensor.GetCompressedObservation()),
+                    CompressedData = ByteString.CopyFrom(compressedObs),
                    CompressionType = (CompressionTypeProto)sensor.GetCompressionType(),
                };
            }
--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
                            "Python library version: {2}.",
                            pythonCommunicationVersion, initParameters.unityCommunicationVersion,
                            pythonPackageVersion
-                            );
+                        );
                    }
                    else
                    {
                    "A side channel with type index {0} is already registered. You cannot register multiple " +
                    "side channels of the same id.", channelId));
            }
+
+            // Process any messages that we've already received for this channel ID.
+            var numMessages = m_CachedMessages.Count;
+            for (int i = 0; i < numMessages; i++)
+            {
+                var cachedMessage = m_CachedMessages.Dequeue();
+                if (channelId == cachedMessage.ChannelId)
+                {
+                    using (var incomingMsg = new IncomingMessage(cachedMessage.Message))
+                    {
+                        sideChannel.OnMessageReceived(incomingMsg);
+                    }
+                }
+                else
+                {
+                    m_CachedMessages.Enqueue(cachedMessage);
+                }
+            }
            m_SideChannels.Add(channelId, sideChannel);
        }

            }
        }

+        private struct CachedSideChannelMessage
+        {
+            public Guid ChannelId;
+            public byte[] Message;
+        }
+
+        private static Queue<CachedSideChannelMessage> m_CachedMessages = new Queue<CachedSideChannelMessage>();
+
        /// <summary>
        /// Separates the data received from Python into individual messages for each registered side channel.
        /// </summary>
        {
+            while (m_CachedMessages.Count != 0)
+            {
+                var cachedMessage = m_CachedMessages.Dequeue();
+                if (sideChannels.ContainsKey(cachedMessage.ChannelId))
+                {
+                    using (var incomingMsg = new IncomingMessage(cachedMessage.Message))
+                    {
+                        sideChannels[cachedMessage.ChannelId].OnMessageReceived(incomingMsg);
+                    }
+                }
+                else
+                {
+                    Debug.Log(string.Format(
+                        "Unknown side channel data received. Channel Id is "
+                        + ": {0}", cachedMessage.ChannelId));
+                }
+            }
+
            if (dataReceived.Length == 0)
            {
                return;
                        }
                        if (sideChannels.ContainsKey(channelId))
                        {
-                            sideChannels[channelId].OnMessageReceived(message);
+                            using (var incomingMsg = new IncomingMessage(message))
+                            {
+                                sideChannels[channelId].OnMessageReceived(incomingMsg);
+                            }
-                            Debug.Log(string.Format(
-                                "Unknown side channel data received. Channel Id is "
-                                + ": {0}", channelId));
+                            // Don't recognize this ID, but cache it in case the SideChannel that can handle
+                            // it is registered before the next call to ProcessSideChannelData.
+                            m_CachedMessages.Enqueue(new CachedSideChannelMessage
+                            {
+                                ChannelId = channelId,
+                                Message = message
+                            });
                        }
                    }
                }
--- a/com.unity.ml-agents/Runtime/DecisionRequester.cs
+++ b/com.unity.ml-agents/Runtime/DecisionRequester.cs
-using System.Runtime.CompilerServices;
+using UnityEngine.Serialization;

 namespace MLAgents
 {
    /// </summary>
    [AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
-    public class DecisionRequester : MonoBehaviour
+    internal class DecisionRequester : MonoBehaviour
    {
        /// <summary>
        /// The frequency with which the agent requests a decision. A DecisionPeriod of 5 means
        [Tooltip("Indicates whether or not the agent will take an action during the Academy " +
                 "steps where it does not request a decision. Has no effect when DecisionPeriod " +
                 "is set to 1.")]
-        public bool RepeatAction = true;
+        [FormerlySerializedAs("RepeatAction")]
+        public bool TakeActionsBetweenDecisions = true;

        /// <summary>
        /// Whether or not the Agent decisions should start at an offset (different for each agent).
            {
                m_Agent?.RequestDecision();
            }
-            if (RepeatAction)
+            if (TakeActionsBetweenDecisions)
            {
                m_Agent?.RequestAction();
            }
--- a/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
                m_VisualObservationsInitialized = true;
            }

-            Profiler.BeginSample("LearningBrain.DecideAction");
+            Profiler.BeginSample("ModelRunner.DecideAction");

            Profiler.BeginSample($"MLAgents.{m_Model.name}.GenerateTensors");
            // Prepare the input tensors to be feed into the engine
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
 namespace MLAgents.Policies
 {
    /// <summary>
+    /// Defines what type of behavior the Agent will be using
+    /// </summary>
+    [Serializable]
+    public enum BehaviorType
+    {
+        /// <summary>
+        /// The Agent will use the remote process for decision making.
+        /// if unavailable, will use inference and if no model is provided, will use
+        /// the heuristic.
+        /// </summary>
+        Default,
+
+        /// <summary>
+        /// The Agent will always use its heuristic
+        /// </summary>
+        HeuristicOnly,
+
+        /// <summary>
+        /// The Agent will always use inference with the provided
+        /// neural network model.
+        /// </summary>
+        InferenceOnly
+    }
+
+
+    /// <summary>
-    internal class BehaviorParameters : MonoBehaviour
+    public class BehaviorParameters : MonoBehaviour
-        [Serializable]
-        enum BehaviorType
+        [HideInInspector, SerializeField]
+        BrainParameters m_BrainParameters = new BrainParameters();
+
+        /// <summary>
+        /// The associated <see cref="BrainParameters"/> for this behavior.
+        /// </summary>
+        public BrainParameters brainParameters
-            Default,
-            HeuristicOnly,
-            InferenceOnly
+            get { return m_BrainParameters; }
+            internal set { m_BrainParameters = value; }
-        [HideInInspector]
-        [SerializeField]
-        BrainParameters m_BrainParameters = new BrainParameters();
-        [HideInInspector]
-        [SerializeField]
+        [HideInInspector, SerializeField]
-        [HideInInspector]
-        [SerializeField]
+
+        /// <summary>
+        /// The neural network model used when in inference mode.
+        /// This cannot be set directly; use <see cref="Agent.GiveModel(string,NNModel,InferenceDevice)"/>
+        /// to set it.
+        /// </summary>
+        public NNModel model
+        {
+            get { return m_Model; }
+            internal set { m_Model = value; }
+        }
+
+        [HideInInspector, SerializeField]
-        [HideInInspector]
-        [SerializeField]
+
+        /// <summary>
+        /// How inference is performed for this Agent's model.
+        /// This cannot be set directly; use <see cref="Agent.GiveModel(string,NNModel,InferenceDevice)"/>
+        /// to set it.
+        /// </summary>
+        public InferenceDevice inferenceDevice
+        {
+            get { return m_InferenceDevice; }
+            internal set { m_InferenceDevice = value; }
+        }
-        // Disable warning /com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs(...):
-        //   warning CS0649: Field 'BehaviorParameters.m_BehaviorType' is never assigned to,
-        //   and will always have its default value
-        // This field is set in the custom editor.
-        #pragma warning disable 0649
+        [HideInInspector, SerializeField]
-        #pragma warning restore 0649
-        [HideInInspector]
-        [SerializeField]
+
+        /// <summary>
+        /// The BehaviorType for the Agent.
+        /// This cannot be set directly; use <see cref="Agent.SetBehaviorType(BehaviorType)"/>
+        /// to set it.
+        /// </summary>
+        public BehaviorType behaviorType
+        {
+            get { return m_BehaviorType; }
+            internal set { m_BehaviorType = value; }
+        }
+
+        [HideInInspector, SerializeField]
+        /// The name of this behavior, which is used as a base name. See
+        /// <see cref="fullyQualifiedBehaviorName"/> for the full name.
+        /// This cannot be set directly; use <see cref="Agent.GiveModel(string,NNModel,InferenceDevice)"/>
+        /// to set it.
+        /// </summary>
+        public string behaviorName
+        {
+            get { return m_BehaviorName; }
+            internal set { m_BehaviorName = value; }
+        }
+
+        /// <summary>
-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("m_TeamID")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("m_TeamID")]
+        // TODO properties here instead of Agent

        [FormerlySerializedAs("m_useChildSensors")]
        [HideInInspector]

        /// <summary>
-        /// The associated <see cref="BrainParameters"/> for this behavior.
-        /// </summary>
-        internal BrainParameters brainParameters
-        {
-            get { return m_BrainParameters; }
-        }
-
-        /// <summary>
-        }
-
-        /// <summary>
-        /// The name of this behavior, which is used as a base name. See
-        /// <see cref="fullyQualifiedBehaviorName"/> for the full name.
-        /// </summary>
-        public string behaviorName
-        {
-            get { return m_BehaviorName; }
+            internal set { m_UseChildSensors = value; } // TODO make public, don't allow changes at runtime
        }

        /// <summary>
            get { return m_BehaviorName + "?team=" + TeamId; }
        }

-        public IPolicy GeneratePolicy(Func<float[]> heuristic)
+        internal IPolicy GeneratePolicy(Func<float[]> heuristic)
        {
            switch (m_BehaviorType)
            {
                default:
                    return new HeuristicPolicy(heuristic);
            }
-        }
-
-        /// <summary>
-        /// Updates the model and related details for this behavior.
-        /// </summary>
-        /// <param name="newBehaviorName">New name for the behavior.</param>
-        /// <param name="model">New neural network model for this behavior.</param>
-        /// <param name="inferenceDevice">New inference device for this behavior.</param>
-        public void GiveModel(
-            string newBehaviorName,
-            NNModel model,
-            InferenceDevice inferenceDevice = InferenceDevice.CPU)
-        {
-            m_Model = model;
-            m_InferenceDevice = inferenceDevice;
-            m_BehaviorName = newBehaviorName;
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
    /// <summary>
    /// Whether the action space is discrete or continuous.
    /// </summary>
-    internal enum SpaceType
+    public enum SpaceType
    {
        /// <summary>
        /// Discrete action space: a fixed number of options are available.
    /// decision process.
    /// </summary>
    [Serializable]
-    internal class BrainParameters
+    public class BrainParameters
    {
        /// <summary>
        /// If continuous : The length of the float vector that represents the state.
        /// Defines if the action is discrete or continuous.
        /// </summary>
        public SpaceType vectorActionSpaceType = SpaceType.Discrete;
+
+        public int numActions
+        {
+            get
+            {
+                switch (vectorActionSpaceType)
+                {
+                    case SpaceType.Discrete:
+                        return vectorActionSize.Length;
+                    case SpaceType.Continuous:
+                        return vectorActionSize[0];
+                    default:
+                        return 0;
+                }
+            }
+        }

        /// <summary>
        /// Deep clones the BrainParameter object.
--- a/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
        SensorCompressionType m_CompressionType;

        /// <summary>
+        /// The Camera used for rendering the sensor observations.
+        /// </summary>
+        public Camera camera
+        {
+            get { return m_Camera; }
+            set { m_Camera = value; }
+        }
+
+        /// <summary>
+        /// The compression type used by the sensor.
+        /// </summary>
+        public SensorCompressionType compressionType
+        {
+            get { return m_CompressionType;  }
+            set { m_CompressionType = value; }
+        }
+
+
+        /// <summary>
        /// Creates and returns the camera sensor.
        /// </summary>
        /// <param name="camera">Camera object to capture images from.</param>
--- a/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs
 using UnityEngine;
+using UnityEngine.Serialization;

 namespace MLAgents.Sensors
 {
    [AddComponentMenu("ML Agents/Camera Sensor", (int)MenuGroup.Sensors)]
    public class CameraSensorComponent : SensorComponent
    {
+        [HideInInspector, SerializeField, FormerlySerializedAs("camera")]
+        Camera m_Camera;
+
+        CameraSensor m_Sensor;
+
-        public new Camera camera;
+        public new Camera camera
+        {
+            get { return m_Camera;  }
+            set { m_Camera = value; UpdateSensor(); }
+        }
+
+        [HideInInspector, SerializeField, FormerlySerializedAs("sensorName")]
+        string m_SensorName = "CameraSensor";
-        public string sensorName = "CameraSensor";
+        public string sensorName
+        {
+            get { return m_SensorName;  }
+            internal set { m_SensorName = value;  }
+        }
+
+        [HideInInspector, SerializeField, FormerlySerializedAs("width")]
+        int m_Width = 84;
-        /// Width of the generated image.
+        /// Width of the generated observation.
-        public int width = 84;
+        public int width
+        {
+            get { return m_Width;  }
+            internal set { m_Width = value;  }
+        }
+
+        [HideInInspector, SerializeField, FormerlySerializedAs("height")]
+        int m_Height = 84;
-        /// Height of the generated image.
+        /// Height of the generated observation.
-        public int height = 84;
+        public int height
+        {
+            get { return m_Height;  }
+            internal set { m_Height = value;  }
+        }
+
+        [HideInInspector, SerializeField, FormerlySerializedAs("grayscale")]
+        public bool m_Grayscale;
-        public bool grayscale;
+        public bool grayscale
+        {
+            get { return m_Grayscale;  }
+            internal set { m_Grayscale = value;  }
+        }
+
+        [HideInInspector, SerializeField, FormerlySerializedAs("compression")]
+        SensorCompressionType m_Compression = SensorCompressionType.PNG;
-        public SensorCompressionType compression = SensorCompressionType.PNG;
+        public SensorCompressionType compression
+        {
+            get { return m_Compression;  }
+            set { m_Compression = value; UpdateSensor(); }
+        }

        /// <summary>
        /// Creates the <see cref="CameraSensor"/>
        {
-            return new CameraSensor(camera, width, height, grayscale, sensorName, compression);
+            m_Sensor = new CameraSensor(m_Camera, m_Width, m_Height, grayscale, m_SensorName, compression);
+            return m_Sensor;
        }

        /// <summary>
        public override int[] GetObservationShape()
        {
-            return CameraSensor.GenerateShape(width, height, grayscale);
+            return CameraSensor.GenerateShape(m_Width, m_Height, grayscale);
+        }
+
+        /// <summary>
+        /// Update fields that are safe to change on the Sensor at runtime.
+        /// </summary>
+        internal void UpdateSensor()
+        {
+            if (m_Sensor != null)
+            {
+                m_Sensor.camera = m_Camera;
+                m_Sensor.compressionType = m_Compression;
+            }
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponent3D.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponent3D.cs
    [AddComponentMenu("ML Agents/Ray Perception Sensor 3D", (int)MenuGroup.Sensors)]
    public class RayPerceptionSensorComponent3D : RayPerceptionSensorComponentBase
    {
-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("startVerticalOffset")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("startVerticalOffset")]
        [Range(-10f, 10f)]
        [Tooltip("Ray start is offset up or down by this amount.")]
        float m_StartVerticalOffset;
            set { m_StartVerticalOffset = value; UpdateSensor(); }
        }

-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("endVerticalOffset")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("endVerticalOffset")]
        [Range(-10f, 10f)]
        [Tooltip("Ray end is offset up or down by this amount.")]
        float m_EndVerticalOffset;
--- a/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs
    /// </summary>
    public abstract class RayPerceptionSensorComponentBase : SensorComponent
    {
-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("sensorName")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("sensorName")]
        string m_SensorName = "RayPerceptionSensor";

        /// <summary>
            internal set => m_SensorName = value;
        }

-        [SerializeField]
-        [FormerlySerializedAs("detectableTags")]
+        [SerializeField, FormerlySerializedAs("detectableTags")]
        [Tooltip("List of tags in the scene to compare against.")]
        List<string> m_DetectableTags;

            internal set => m_DetectableTags = value;
        }

-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("raysPerDirection")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("raysPerDirection")]
        [Range(0, 50)]
        [Tooltip("Number of rays to the left and right of center.")]
        int m_RaysPerDirection = 3;
            internal set => m_RaysPerDirection = value;
        }

-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("maxRayDegrees")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("maxRayDegrees")]
-                 "Greater than 90 degrees will go backwards.")]
+            "Greater than 90 degrees will go backwards.")]
        float m_MaxRayDegrees = 70;

        /// <summary>
            set { m_MaxRayDegrees = value; UpdateSensor(); }
        }

-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("sphereCastRadius")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("sphereCastRadius")]
        [Range(0f, 10f)]
        [Tooltip("Radius of sphere to cast. Set to zero for raycasts.")]
        float m_SphereCastRadius = 0.5f;
            set { m_SphereCastRadius = value; UpdateSensor(); }
        }

-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("rayLength")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("rayLength")]
        [Range(1, 1000)]
        [Tooltip("Length of the rays to cast.")]
        float m_RayLength = 20f;
            set { m_RayLength = value; UpdateSensor(); }
        }

-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("rayLayerMask")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("rayLayerMask")]
        [Tooltip("Controls which layers the rays can hit.")]
        LayerMask m_RayLayerMask = Physics.DefaultRaycastLayers;

            set { m_RayLayerMask = value; UpdateSensor();}
        }

-        [HideInInspector]
-        [SerializeField]
-        [FormerlySerializedAs("observationStacks")]
+        [HideInInspector, SerializeField, FormerlySerializedAs("observationStacks")]
        [Range(1, 50)]
        [Tooltip("Whether to stack previous observations. Using 1 means no previous observations.")]
        int m_ObservationStacks = 1;
        RayPerceptionSensor m_RaySensor;

        /// <summary>
+        /// Get the RayPerceptionSensor that was created.
+        /// </summary>
+        public RayPerceptionSensor raySensor
+        {
+            get => m_RaySensor;
+        }
+
+        /// <summary>
        /// Returns the <see cref="RayPerceptionCastType"/> for the associated raycast sensor.
        /// </summary>
        /// <returns></returns>
            return new[] { obsSize * stacks };
        }

-        RayPerceptionInput GetRayPerceptionInput()
+        /// <summary>
+        /// Get the RayPerceptionInput that is used by the <see cref="RayPerceptionSensor"/>.
+        /// </summary>
+        /// <returns></returns>
+        public RayPerceptionInput GetRayPerceptionInput()
        {
            var rayAngles = GetRayAngles(raysPerDirection, maxRayDegrees);

        /// <summary>
        /// Draw the debug information from the sensor (if available).
        /// </summary>
-        void DrawRaycastGizmos(DebugDisplayInfo.RayInfo rayInfo, float alpha=1.0f)
+        void DrawRaycastGizmos(DebugDisplayInfo.RayInfo rayInfo, float alpha = 1.0f)
        {
            var startPositionWorld = rayInfo.worldStart;
            var endPositionWorld = rayInfo.worldEnd;
--- a/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
        SensorCompressionType m_CompressionType;

        /// <summary>
+        /// The compression type used by the sensor.
+        /// </summary>
+        public SensorCompressionType compressionType
+        {
+            get { return m_CompressionType;  }
+            set { m_CompressionType = value; }
+        }
+
+
+        /// <summary>
        /// Initializes the sensor.
        /// </summary>
        /// <param name="renderTexture">The <see cref="RenderTexture"/> instance to wrap.</param>
                var texture = ObservationToTexture(m_RenderTexture);
                // TODO support more types here, e.g. JPG
                var compressed = texture.EncodeToPNG();
-                UnityEngine.Object.Destroy(texture);
+                DestroyTexture(texture);
                return compressed;
            }
        }
            {
                var texture = ObservationToTexture(m_RenderTexture);
                var numWritten = Utilities.TextureToTensorProxy(texture, adapter, m_Grayscale);
-                UnityEngine.Object.Destroy(texture);
+                DestroyTexture(texture);
                return numWritten;
            }
        }
            texture2D.Apply();
            RenderTexture.active = prevActiveRt;
            return texture2D;
+        }
+
+        static void DestroyTexture(Texture2D texture)
+        {
+            if (Application.isEditor)
+            {
+                // Edit Mode tests complain if we use Destroy()
+                // TODO move to extension methods for UnityEngine.Object?
+                UnityEngine.Object.DestroyImmediate(texture);
+            }
+            else
+            {
+                UnityEngine.Object.Destroy(texture);
+            }
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs
 using UnityEngine;
+using UnityEngine.Serialization;

 namespace MLAgents.Sensors
 {
    [AddComponentMenu("ML Agents/Render Texture Sensor", (int)MenuGroup.Sensors)]
    public class RenderTextureSensorComponent : SensorComponent
    {
+        RenderTextureSensor m_Sensor;
+
-        public RenderTexture renderTexture;
+        [HideInInspector, SerializeField, FormerlySerializedAs("renderTexture")]
+        RenderTexture m_RenderTexture;
+
+        public RenderTexture renderTexture
+        {
+            get { return m_RenderTexture;  }
+            set { m_RenderTexture = value;  }
+        }
+
+        [HideInInspector, SerializeField, FormerlySerializedAs("sensorName")]
+        string m_SensorName = "RenderTextureSensor";
-        /// Name of the sensor.
+        /// Name of the generated <see cref="RenderTextureSensor"/>.
-        public string sensorName = "RenderTextureSensor";
+        public string sensorName
+        {
+            get { return m_SensorName;  }
+            internal set { m_SensorName = value;  }
+        }
+
+        [HideInInspector, SerializeField, FormerlySerializedAs("grayscale")]
+        public bool m_Grayscale;
-        public bool grayscale;
+        public bool grayscale
+        {
+            get { return m_Grayscale;  }
+            internal set { m_Grayscale = value;  }
+        }
+
+        [HideInInspector, SerializeField, FormerlySerializedAs("compression")]
+        SensorCompressionType m_Compression = SensorCompressionType.PNG;
-        public SensorCompressionType compression = SensorCompressionType.PNG;
+        public SensorCompressionType compression
+        {
+            get { return m_Compression;  }
+            set { m_Compression = value; UpdateSensor(); }
+        }
-            return new RenderTextureSensor(renderTexture, grayscale, sensorName, compression);
+            m_Sensor = new RenderTextureSensor(renderTexture, grayscale, sensorName, compression);
+            return m_Sensor;
        }

        /// <inheritdoc/>
            var height = renderTexture != null ? renderTexture.height : 0;

            return new[] { height, width, grayscale ? 1 : 3 };
+        }
+
+        /// <summary>
+        /// Update fields that are safe to change on the Sensor at runtime.
+        /// </summary>
+        internal void UpdateSensor()
+        {
+            if (m_Sensor != null)
+            {
+                m_Sensor.compressionType = m_Compression;
+            }
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
+++ b/com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
-using System.IO;
 using System;
 using UnityEngine;

        }

        /// <inheritdoc/>
-        public override void OnMessageReceived(byte[] data)
+        public override void OnMessageReceived(IncomingMessage msg)
-            using (var memStream = new MemoryStream(data))
-            {
-                using (var binaryReader = new BinaryReader(memStream))
-                {
-                    var width = binaryReader.ReadInt32();
-                    var height = binaryReader.ReadInt32();
-                    var qualityLevel = binaryReader.ReadInt32();
-                    var timeScale = binaryReader.ReadSingle();
-                    var targetFrameRate = binaryReader.ReadInt32();
+            var width = msg.ReadInt32();
+            var height = msg.ReadInt32();
+            var qualityLevel = msg.ReadInt32();
+            var timeScale = msg.ReadFloat32();
+            var targetFrameRate = msg.ReadInt32();
-                    timeScale = Mathf.Clamp(timeScale, 1, 100);
+            timeScale = Mathf.Clamp(timeScale, 1, 100);
-                    Screen.SetResolution(width, height, false);
-                    QualitySettings.SetQualityLevel(qualityLevel, true);
-                    Time.timeScale = timeScale;
-                    Time.captureFramerate = 60;
-                    Application.targetFrameRate = targetFrameRate;
-                }
-            }
+            Screen.SetResolution(width, height, false);
+            QualitySettings.SetQualityLevel(qualityLevel, true);
+            Time.timeScale = timeScale;
+            Time.captureFramerate = 60;
+            Application.targetFrameRate = targetFrameRate;
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/SideChannels/FloatPropertiesChannel.cs
+++ b/com.unity.ml-agents/Runtime/SideChannels/FloatPropertiesChannel.cs
 using System.Collections.Generic;
-using System.IO;
-using System.Text;
-    /// Interface for managing a collection of float properties keyed by a string variable.
-    /// </summary>
-    public interface IFloatProperties
-    {
-        /// <summary>
-        /// Sets one of the float properties of the environment. This data will be sent to Python.
-        /// </summary>
-        /// <param name="key"> The string identifier of the property.</param>
-        /// <param name="value"> The float value of the property.</param>
-        void SetProperty(string key, float value);
-
-        /// <summary>
-        /// Get an Environment property with a default value. If there is a value for this property,
-        /// it will be returned, otherwise, the default value will be returned.
-        /// </summary>
-        /// <param name="key"> The string identifier of the property.</param>
-        /// <param name="defaultValue"> The default value of the property.</param>
-        /// <returns></returns>
-        float GetPropertyWithDefault(string key, float defaultValue);
-
-        /// <summary>
-        /// Registers an action to be performed everytime the property is changed.
-        /// </summary>
-        /// <param name="key"> The string identifier of the property.</param>
-        /// <param name="action"> The action that ill be performed. Takes a float as input.</param>
-        void RegisterCallback(string key, Action<float> action);
-
-        /// <summary>
-        /// Returns a list of all the string identifiers of the properties currently present.
-        /// </summary>
-        /// <returns> The list of string identifiers </returns>
-        IList<string> ListProperties();
-    }
-
-    /// <summary>
-    public class FloatPropertiesChannel : SideChannel, IFloatProperties
+    public class FloatPropertiesChannel : SideChannel
    {
        Dictionary<string, float> m_FloatProperties = new Dictionary<string, float>();
        Dictionary<string, Action<float>> m_RegisteredActions = new Dictionary<string, Action<float>>();
            {
                ChannelId = new Guid(k_FloatPropertiesDefaultId);
            }
-            else{
+            else
+            {
-        public override void OnMessageReceived(byte[] data)
+        public override void OnMessageReceived(IncomingMessage msg)
-            var kv = DeserializeMessage(data);
-            m_FloatProperties[kv.Key] = kv.Value;
-            if (m_RegisteredActions.ContainsKey(kv.Key))
-            {
-                m_RegisteredActions[kv.Key].Invoke(kv.Value);
-            }
+            var key = msg.ReadString();
+            var value = msg.ReadFloat32();
+
+            m_FloatProperties[key] = value;
+
+            Action<float> action;
+            m_RegisteredActions.TryGetValue(key, out action);
+            action?.Invoke(value);
        }

        /// <inheritdoc/>
-            QueueMessageToSend(SerializeMessage(key, value));
-            if (m_RegisteredActions.ContainsKey(key))
+            using (var msgOut = new OutgoingMessage())
-                m_RegisteredActions[key].Invoke(value);
+                msgOut.WriteString(key);
+                msgOut.WriteFloat32(value);
+                QueueMessageToSend(msgOut);
+
+            Action<float> action;
+            m_RegisteredActions.TryGetValue(key, out action);
+            action?.Invoke(value);
-            if (m_FloatProperties.ContainsKey(key))
-            {
-                return m_FloatProperties[key];
-            }
-            else
-            {
-                return defaultValue;
-            }
+            float valueOut;
+            bool hasKey = m_FloatProperties.TryGetValue(key, out valueOut);
+            return hasKey ? valueOut : defaultValue;
        }

        /// <inheritdoc/>
        public IList<string> ListProperties()
        {
            return new List<string>(m_FloatProperties.Keys);
-        }
-
-        static KeyValuePair<string, float> DeserializeMessage(byte[] data)
-        {
-            using (var memStream = new MemoryStream(data))
-            {
-                using (var binaryReader = new BinaryReader(memStream))
-                {
-                    var keyLength = binaryReader.ReadInt32();
-                    var key = Encoding.ASCII.GetString(binaryReader.ReadBytes(keyLength));
-                    var value = binaryReader.ReadSingle();
-                    return new KeyValuePair<string, float>(key, value);
-                }
-            }
-        }
-
-        static byte[] SerializeMessage(string key, float value)
-        {
-            using (var memStream = new MemoryStream())
-            {
-                using (var binaryWriter = new BinaryWriter(memStream))
-                {
-                    var stringEncoded = Encoding.ASCII.GetBytes(key);
-                    binaryWriter.Write(stringEncoded.Length);
-                    binaryWriter.Write(stringEncoded);
-                    binaryWriter.Write(value);
-                    return memStream.ToArray();
-                }
-            }
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/SideChannels/RawBytesChannel.cs
+++ b/com.unity.ml-agents/Runtime/SideChannels/RawBytesChannel.cs
        }

        /// <inheritdoc/>
-        public override void OnMessageReceived(byte[] data)
+        public override void OnMessageReceived(IncomingMessage msg)
-            m_MessagesReceived.Add(data);
+            m_MessagesReceived.Add(msg.GetRawBytes());
        }

        /// <summary>
        /// <param name="data"> The byte array of data to send to Python.</param>
        public void SendRawBytes(byte[] data)
        {
-            QueueMessageToSend(data);
+            using (var msg = new OutgoingMessage())
+            {
+                msg.SetRawBytes(data);
+                QueueMessageToSend(msg);
+            }
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
+++ b/com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
 using System.Collections.Generic;
 using System;
+using System.IO;
+using System.Text;

 namespace MLAgents.SideChannels
 {
        /// of each type. Ensure the Unity side channels will be linked to their Python equivalent.
        /// </summary>
        /// <returns> The integer identifier of the SideChannel.</returns>
-        public Guid ChannelId{
+        public Guid ChannelId
+        {
            get;
            protected set;
        }
        /// Can be called multiple times per simulation step if multiple messages were sent.
        /// </summary>
-        /// <param name="data"> the payload of the message.</param>
-        public abstract void OnMessageReceived(byte[] data);
+        /// <param name="msg">The incoming message.</param>
+        public abstract void OnMessageReceived(IncomingMessage msg);
-        protected void QueueMessageToSend(byte[] data)
+        protected void QueueMessageToSend(OutgoingMessage msg)
-            MessageQueue.Add(data);
+            MessageQueue.Add(msg.ToByteArray());
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+using System.CodeDom;
 using UnityEngine;
 using NUnit.Framework;
 using System.Reflection;

        internal IPolicy GetPolicy()
        {
-            return (IPolicy) typeof(Agent).GetField("m_Brain", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
+            return (IPolicy)typeof(Agent).GetField("m_Brain", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
        }

        public int initializeAgentCalls;
        public TestSensor sensor1;
        public TestSensor sensor2;

-        public override void InitializeAgent()
+        public override void Initialize()
        {
            initializeAgentCalls += 1;

            sensor.AddObservation(0f);
        }

-        public override void AgentAction(float[] vectorAction)
+        public override void OnActionReceived(float[] vectorAction)
        {
            agentActionCalls += 1;
            agentActionCallsSinceLastReset += 1;
-        public override void AgentReset()
+        public override void OnEpisodeBegin()
        {
            agentResetCalls += 1;
            collectObservationsCallsSinceLastReset = 0;
        public byte[] GetCompressedObservation()
        {
            numCompressedCalls++;
-            return null;
+            return new byte[] { 0 };
        }

        public SensorCompressionType GetCompressionType()
                // Set agent 1 to done every 11 steps to test behavior
                if (i % 11 == 5)
                {
-                    agent1.Done();
+                    agent1.EndEpisode();
-                    agent2.Done();
+                    agent2.EndEpisode();
                    numberAgent2Reset += 1;
                    agent2StepSinceReset = 0;
                }
            Assert.AreEqual(numSteps, agent1.heuristicCalls);
            Assert.AreEqual(numSteps, agent1.sensor1.numWriteCalls);
            Assert.AreEqual(numSteps, agent1.sensor2.numCompressedCalls);
+        }
+    }
+
+    [TestFixture]
+    public class TestOnEnableOverride
+    {
+        public class OnEnableAgent : Agent
+        {
+            public bool callBase;
+
+            protected override void OnEnable()
+            {
+                if (callBase)
+                    base.OnEnable();
+            }
+        }
+
+        static void _InnerAgentTestOnEnableOverride(bool callBase = false)
+        {
+            var go = new GameObject();
+            var agent = go.AddComponent<OnEnableAgent>();
+            agent.callBase = callBase;
+            var onEnable = typeof(OnEnableAgent).GetMethod("OnEnable", BindingFlags.NonPublic | BindingFlags.Instance);
+            var sendInfo = typeof(Agent).GetMethod("SendInfoToBrain", BindingFlags.NonPublic | BindingFlags.Instance);
+            Assert.NotNull(onEnable);
+            onEnable.Invoke(agent, null);
+            Assert.NotNull(sendInfo);
+            if (agent.callBase)
+            {
+                Assert.DoesNotThrow(() => sendInfo.Invoke(agent, null));
+            }
+            else
+            {
+                Assert.Throws<UnityAgentsException>(() =>
+                {
+                    try
+                    {
+                        sendInfo.Invoke(agent, null);
+                    }
+                    catch (TargetInvocationException e)
+                    {
+                        throw e.GetBaseException();
+                    }
+                });
+            }
+        }
+
+        [Test]
+        public void TestAgentCallBaseOnEnable()
+        {
+            _InnerAgentTestOnEnableOverride(true);
+        }
+
+        [Test]
+        public void TestAgentDontCallBaseOnEnable()
+        {
+            _InnerAgentTestOnEnableOverride();
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
        {
            public List<int> messagesReceived = new List<int>();

-            public TestSideChannel() {
+            public TestSideChannel()
+            {
-            public override void OnMessageReceived(byte[] data)
+            public override void OnMessageReceived(IncomingMessage msg)
-                messagesReceived.Add(BitConverter.ToInt32(data, 0));
+                messagesReceived.Add(msg.ReadInt32());
-            public void SendInt(int data)
+            public void SendInt(int value)
-                QueueMessageToSend(BitConverter.GetBytes(data));
+                using (var msg = new OutgoingMessage())
+                {
+                    msg.WriteInt32(value);
+                    QueueMessageToSend(msg);
+                }
            }
        }

            fakeData = RpcCommunicator.GetSideChannelMessage(dictSender);
            RpcCommunicator.ProcessSideChannelData(dictReceiver, fakeData);
            Assert.AreEqual(wasCalled, 1);
+
+            var keysA = propA.ListProperties();
+            Assert.AreEqual(2, keysA.Count);
+            Assert.IsTrue(keysA.Contains(k1));
+            Assert.IsTrue(keysA.Contains(k2));
+
+            var keysB = propA.ListProperties();
+            Assert.AreEqual(2, keysB.Count);
+            Assert.IsTrue(keysB.Contains(k1));
+            Assert.IsTrue(keysB.Contains(k2));
+        }
+
+        [Test]
+        public void TestOutgoingMessageRawBytes()
+        {
+            // Make sure that SetRawBytes resets the buffer correctly.
+            // Write 8 bytes (an int and float) then call SetRawBytes with 4 bytes
+            var msg = new OutgoingMessage();
+            msg.WriteInt32(42);
+            msg.WriteFloat32(1.0f);
+
+            var data = new byte[] { 1, 2, 3, 4 };
+            msg.SetRawBytes(data);
+
+            var result = msg.ToByteArray();
+            Assert.AreEqual(data, result);
+        }
+
+        [Test]
+        public void TestMessageReadWrites()
+        {
+            var boolVal = true;
+            var intVal = 1337;
+            var floatVal = 4.2f;
+            var floatListVal = new float[] { 1001, 1002 };
+            var stringVal = "mlagents!";
+
+            IncomingMessage incomingMsg;
+            using (var outgoingMsg = new OutgoingMessage())
+            {
+                outgoingMsg.WriteBoolean(boolVal);
+                outgoingMsg.WriteInt32(intVal);
+                outgoingMsg.WriteFloat32(floatVal);
+                outgoingMsg.WriteString(stringVal);
+                outgoingMsg.WriteFloatList(floatListVal);
+
+                incomingMsg = new IncomingMessage(outgoingMsg.ToByteArray());
+            }
+
+            Assert.AreEqual(boolVal, incomingMsg.ReadBoolean());
+            Assert.AreEqual(intVal, incomingMsg.ReadInt32());
+            Assert.AreEqual(floatVal, incomingMsg.ReadFloat32());
+            Assert.AreEqual(stringVal, incomingMsg.ReadString());
+            Assert.AreEqual(floatListVal, incomingMsg.ReadFloatList());
        }
    }
 }
--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
 	"unity": "2018.4",
 	"description": "Add interactivity to your game with Machine Learning Agents trained using Deep Reinforcement Learning.",
 	"dependencies": {
-		"com.unity.barracuda": "0.6.0-preview"
+		"com.unity.barracuda": "0.6.1-preview"
 	}
 }
--- a/config/sac_trainer_config.yaml
+++ b/config/sac_trainer_config.yaml
    summary_freq: 20000

 PushBlock:
-    max_steps: 1.5e7
+    max_steps: 2e6
    init_entcoef: 0.05
    hidden_units: 256
    summary_freq: 60000
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml

 Bouncer:
    normalize: true
-    max_steps: 7.0e6
+    max_steps: 4.0e6
-    max_steps: 1.5e7
+    max_steps: 2.0e6
    batch_size: 128
    buffer_size: 2048
    beta: 1.0e-2
--- a/docs/Getting-Started-with-Balance-Ball.md
+++ b/docs/Getting-Started-with-Balance-Ball.md
 * **Behavior Parameters** — Every Agent must have a Behavior. The Behavior
  determines how an Agent makes decisions. More on Behavior Parameters in
  the next section.
-* **Max Step** — Defines how many simulation steps can occur before the Agent
-  decides it is done. In 3D Balance Ball, an Agent restarts after 5000 steps.
+* **Max Step** — Defines how many simulation steps can occur before the Agent's
+  episode ends. In 3D Balance Ball, an Agent restarts after 5000 steps.
-Perhaps the more interesting aspect of an agents is the Agent subclass
-implementation. When you create an Agent, you must extend the base Agent class.
+When you create an Agent, you must extend the base Agent class.
-* agent.AgentReset() — Called when the Agent resets, including at the beginning
-  of a session. The Ball3DAgent class uses the reset function to reset the
+* `Agent.OnEpisodeBegin()` — Called when the Agent resets, including at the beginning
+  of the simulation. The Ball3DAgent class uses the reset function to reset the
-* agent.CollectObservations(VectorSensor sensor) — Called every simulation step. Responsible for
+* `Agent.CollectObservations(VectorSensor sensor)` — Called every simulation step. Responsible for
-* agent.AgentAction() — Called every simulation step. Receives the action chosen
+* `Agent.OnActionReceived()` — Called every time the Agent receives an action to take. Receives the action chosen
-  small change in the agent cube's rotation at each step. The `AgentAction()` function
+  small change in the agent cube's rotation at each step. The `OnActionReceived()` method
-  negative reward for dropping the ball. An Agent is also marked as done when it
+  negative reward for dropping the ball. An Agent's episode is also ended when it
-* agent.Heuristic() - When the `Use Heuristic` checkbox is checked in the Behavior
+* `Agent.Heuristic()` - When the `Behavior Type` is set to `Heuristic Only` in the Behavior
  Parameters of the Agent, the Agent will use the `Heuristic()` method to generate
  the actions of the Agent. As such, the `Heuristic()` method returns an array of
  floats. In the case of the Ball 3D Agent, the `Heuristic()` method converts the
--- a/docs/Installation.md
+++ b/docs/Installation.md
 # Installation

-To install and use ML-Agents, you need to install Unity, clone this repository and
-install Python with additional dependencies. Each of the subsections below
-overviews each step, in addition to a Docker set-up.
+The ML-Agents Toolkit contains several components:
+* Unity package ([`com.unity.ml-agents`](../com.unity.ml-agents/)) contains the Unity C#
+SDK that will be integrated into your Unity scene.
+* Three Python packages:
+  * [`mlagents`](../ml-agents/) contains the machine learning algorithms that enables you
+  to train behaviors in your Unity scene. Most users of ML-Agents will only need to
+  directly install `mlagents`.
+  * [`mlagents_envs`](../ml-agents-envs/) contains a Python API to interact with a Unity
+  scene. It is a foundational layer that facilitates data messaging between Unity scene
+  and the Python machine learning algorithms. Consequently, `mlagents` depends on `mlagents_envs`.
+  * [`gym_unity`](../gym-unity/) provides a Python-wrapper for your Unity scene that
+  supports the OpenAI Gym interface.
+* Unity [Project](../Project/) that contains several
+[example environments](Learning-Environment-Examples.md) that highlight the various features
+of the toolkit to help you get started.
-## Install **Unity 2018.4** or Later
+Consequently, to install and use ML-Agents you will need to:
+* Install Unity (2018.4 or later)
+* Install Python (3.6.1 or higher)
+* Clone this repository
+* Install the `com.unity.ml-agents` Unity package
+* Install the `mlagents` Python package
-[Download](https://store.unity.com/download) and install Unity. If you would
-like to use our Docker set-up (introduced later), make sure to select the _Linux
-Build Support_ component when installing Unity.
+### Install **Unity 2018.4** or Later
-<p align="center">
-  <img src="images/unity_linux_build_support.png"
-       alt="Linux Build Support"
-       width="500" border="10" />
-</p>
+[Download](https://unity3d.com/get-unity/download) and install Unity. We strongly recommend
+that you install Unity through the Unity Hub as it will enable you to manage multiple Unity
+versions.
-## Environment Setup
-We now support a single mechanism for installing ML-Agents on Mac/Windows/Linux using Virtual
-Environments. For more information on Virtual Environments and installation instructions,
-follow this [guide](Using-Virtual-Environment.md).
+### Install **Python 3.6.1** or Higher
-Although we don't support Anaconda installation path of ML-Agents for Windows, the previous guide
-is still in the docs folder.  Please refer to [Windows Installation (Deprecated)](Installation-Windows.md).
+We recommend [installing](https://www.python.org/downloads/) Python 3.6 or 3.7. If your Python
+environment doesn't include `pip3`, see these
+[instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers)
+on installing it.
+
+Although we do not provide support for Anaconda installation on Windows, the previous
+[Windows Anaconda Installation (Deprecated) guide](Installation-Anaconda-Windows.md)
+is still available.
-Once installed, you will want to clone the ML-Agents Toolkit GitHub repository.
+Now that you have installed Unity and Python, you will need to clone the
+ML-Agents Toolkit GitHub repository to install the Unity package (the Python
+packages can be installed directly from PyPi - a Python package registry).

 ```sh
 git clone --branch latest_release https://github.com/Unity-Technologies/ml-agents.git

-The `com.unity.ml-agents` subdirectory contains the core code to add to your projects.
-The `Project` subdirectory contains many [example environments](Learning-Environment-Examples.md)
-to help you get started.
-
-### Package Installation
-ML-Agents C# SDK is transitioning to a Unity Package.  While we are working on getting into the
-official packages list, you can add the `com.unity.ml-agents` package to your project by
-navigating to the menu `Window`  -> `Package Manager`.  In the package manager window click
-on the `+` button.
-
-<p align="center">
-  <img src="images/unity_package_manager_window.png"
-       alt="Linux Build Support"
-       width="500" border="10" />
-</p>
+### Install the `com.unity.ml-agents` Unity package
-**NOTE:** In Unity 2018.4 it's on the bottom right of the packages list, and in Unity 2019.3 it's on the top left of the packages list.
-
-Select `Add package from disk...` and navigate into the
+The Unity ML-Agents C# SDK is a Unity Package. We are working on getting it added to the
+official Unity package registry which will enable you to install the `com.unity.ml-agents` package
+[directly from the registry](https://docs.unity3d.com/Manual/upm-ui-install.html) without cloning
+this repository. Until then, you will need to
+[install it from the local package](https://docs.unity3d.com/Manual/upm-ui-local.html) that you
+just cloned. You can add the `com.unity.ml-agents` package to
+your project by navigating to the menu `Window`  -> `Package Manager`. In the package manager
+window click on the `+` button. Select `Add package from disk...` and navigate into the
+**NOTE:** In Unity 2018.4 it's on the bottom right of the packages list, and in Unity 2019.3 it's
+on the top left of the packages list.
+
+  <img src="images/unity_package_manager_window.png"
+       alt="Unity Package Manager Window"
+       height="340" border="10" />
-       alt="Linux Build Support"
-       width="500" border="10" />
+     alt="package.json"
+     height="340" border="10" />
-
-The `ml-agents` subdirectory contains a Python package which provides deep reinforcement
-learning trainers to use with Unity environments.
-
-The `ml-agents-envs` subdirectory contains a Python API to interface with Unity, which
-the `ml-agents` package depends on.
-
-The `gym-unity` subdirectory contains a package to interface with OpenAI Gym.
-
-### Install Python and mlagents Package
-
-In order to use ML-Agents toolkit, you need Python 3.6.1 or higher.
-[Download](https://www.python.org/downloads/) and install the latest version of Python if you do not already have it.
+### Install the `mlagents` Python package
-If your Python environment doesn't include `pip3`, see these
-[instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers)
-on installing it.
+Installing the `mlagents` Python package involves installing other Python packages
+that `mlagents` depends on. So you may run into installation issues if your machine
+has older versions of any of those dependencies already installed. Consequently, our
+supported path for installing `mlagents` is to leverage Python Virtual Environments.
+Virtual Environments provide a mechanim for isolating the dependencies for each project
+and are supported on Mac / Windows / Linux. We offer a dedicated
+[guide on Virtual Environments](Using-Virtual-Environment.md).
-To install the `mlagents` Python package, run from the command line:
+To install the `mlagents` Python package, activate your virtual environment and run from the
+command line:
-Note that this will install `ml-agents` from PyPi, _not_ from the cloned repo.
+Note that this will install `mlagents` from PyPi, _not_ from the cloned repo.
-By installing the `mlagents` package, the dependencies listed in the [setup.py file](../ml-agents/setup.py) are also installed.
-Some of the primary dependencies include:
-
- [TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support)
- [Jupyter](Background-Jupyter.md)
-
-**Notes:**
-
- We do not currently support Python 3.5 or lower.
- If you are using Anaconda and are having trouble with TensorFlow, please see
-  the following
-  [link](https://www.tensorflow.org/install/pip)
-  on how to install TensorFlow in an Anaconda environment.
+By installing the `mlagents` package, the dependencies listed in the
+[setup.py file](../ml-agents/setup.py) are also installed. These include
+[TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support) and
+[Jupyter](Background-Jupyter.md).
-### Installing for Development
+#### Advanced: Installing for Development
-If you intend to make modifications to `ml-agents` or `ml-agents-envs`, you should install
+If you intend to make modifications to `mlagents` or `mlagents_envs`, you should install
- `ml-agents` and `ml-agents-envs` separately. From the repo's root directory, run:
+ `mlagents` and `mlagents_envs` separately. From the repo's root directory, run:
-cd ml-agents-envs
-pip3 install -e ./
-cd ..
-cd ml-agents
-pip3 install -e ./
+pip3 install -e ./ml-agents-envs
+pip3 install -e ./ml-agents
-Running pip with the `-e` flag will let you make changes to the Python files directly and have those
-reflected when you run `mlagents-learn`. It is important to install these packages in this order as the
-`mlagents` package depends on `mlagents_envs`, and installing it in the other
+Running pip with the `-e` flag will let you make changes to the Python files directly and have
+those reflected when you run `mlagents-learn`. It is important to install these packages in this
+order as the `mlagents` package depends on `mlagents_envs`, and installing it in the other
-setting up the ML-Agents toolkit within Unity, running a pre-trained model, in
+setting up the ML-Agents Toolkit within Unity, running a pre-trained model, in
 addition to building and training environments.

 ## Help
--- a/docs/Learning-Environment-Best-Practices.md
+++ b/docs/Learning-Environment-Best-Practices.md
  lessons which progressively increase in difficulty are presented to the agent
  ([learn more here](Training-Curriculum-Learning.md)).
 * When possible, it is often helpful to ensure that you can complete the task by
-  using a heuristic to control the agent. To do so, check the `Use Heuristic`
-  checkbox on the Agent and implement the `Heuristic()` method on the Agent.
+  using a heuristic to control the agent. To do so, set the `Behavior Type`
+  to `Heuristic Only` on the Agent's Behavior Parameters, and implement the
+   `Heuristic()` method on the Agent.
 * It is often helpful to make many copies of the agent, and give them the same
  `Behavior Name`. In this way the learning process can get more feedback
  information from all of these agents, which helps it train faster.
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md

 ### Initialization and Resetting the Agent

-When the Agent reaches its target, it marks itself done and its Agent reset
-function moves the target to a random location. In addition, if the Agent rolls
-off the platform, the reset function puts it back onto the floor.
+When the Agent reaches its target, its episode ends and the `OnEpisodeBegin()`
+method moves the target to a random location. In addition, if the Agent rolls
+off the platform, the `OnEpisodeBegin()` method puts it back onto the floor.

 To move the target GameObject, we need a reference to its Transform (which
 stores a GameObject's position, orientation and scale in the 3D world). To get
    }

    public Transform Target;
-    public override void AgentReset()
+    public override void OnEpisodeBegin()
    {
        if (this.transform.position.y < 0)
        {
 }
 ```

-The final part of the Agent code is the `Agent.AgentAction()` method, which
-receives the decision from the Brain and assigns the reward.
+The final part of the Agent code is the `Agent.OnActionReceived()` method, which
+receives the actions from the Brain and assigns the reward.
-`AgentAction()` function. The number of elements in this array is determined by
+`OnActionReceived()` function. The number of elements in this array is determined by
 the `Vector Action` `Space Type` and `Space Size` settings of the
 agent's Brain. The RollerAgent uses the continuous vector action space and needs
 two continuous control signals from the Brain. Thus, we will set the Brain

 ### Rewards

-Reinforcement learning requires rewards. Assign rewards in the `AgentAction()`
+Reinforcement learning requires rewards. Assign rewards in the `OnActionReceived()`
 function. The learning algorithm uses the rewards assigned to the Agent during
 the simulation and learning process to determine whether it is giving
 the Agent the optimal actions. You want to reward an Agent for completing the
 The RollerAgent calculates the distance to detect when it reaches the target.
 When it does, the code calls the `Agent.SetReward()` method to assign a
-reward of 1.0 and marks the agent as finished by calling the `Done()` method
+reward of 1.0 and marks the agent as finished by calling the `EndEpisode()` method
 on the Agent.

 ```csharp
 if (distanceToTarget < 1.42f)
 {
    SetReward(1.0f);
-    Done();
+    EndEpisode();
-Finally, if the Agent falls off the platform, set the Agent to done so that it can reset itself:
+Finally, if the Agent falls off the platform, end the episode so that it can reset itself:
-    Done();
+    EndEpisode();
-### AgentAction()
+### OnActionReceived()
-`AgentAction()` function looks like:
+`OnActionReceived()` function looks like:
-public override void AgentAction(float[] vectorAction)
+public override void OnActionReceived(float[] vectorAction)
 {
    // Actions, size = 2
    Vector3 controlSignal = Vector3.zero;
    if (distanceToTarget < 1.42f)
    {
        SetReward(1.0f);
-        Done();
+        EndEpisode();
-        Done();
+        EndEpisode();
    }

 }
 to the values of the "Horizontal" and "Vertical" input axis (which correspond to
 the keyboard arrow keys).

-In order for the Agent to use the Heuristic, You will need to check the `Use Heuristic`
-checkbox in the `Behavior Parameters` of the RollerAgent.
+In order for the Agent to use the Heuristic, You will need to set the `Behavior Type`
+to `Heuristic Only` in the `Behavior Parameters` of the RollerAgent.


 Press **Play** to run the scene and use the arrows keys to move the Agent around
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
 The Policy class abstracts out the decision making logic from the Agent itself so
 that you can use the same Policy in multiple Agents. How a Policy makes its
 decisions depends on the kind of Policy it is. You can change the Policy of an
-Agent by changing its `Behavior Parameters`. If you check `Use Heuristic`, the
-Agent will use its `Heuristic()` method to make decisions which can allow you to
-control the Agent manually or write your own Policy. If the Agent has a `Model`
-file, it Policy will use the neural network `Model` to take decisions.
+Agent by changing its `Behavior Parameters`. If you set `Behavior Type` to
+`Heuristic Only`, the Agent will use its `Heuristic()` method to make decisions
+which can allow you to control the Agent manually or write your own Policy. If
+the Agent has a `Model` file, it Policy will use the neural network `Model` to
+take decisions.

 ## Decisions


 An action is an instruction from the Policy that the agent carries out. The
 action is passed to the Agent as a parameter when the Academy invokes the
-agent's `AgentAction()` function. When you specify that the vector action space
+agent's `OnActionReceived()` function. When you specify that the vector action space
 is **Continuous**, the action parameter passed to the Agent is an array of
 control signals with length equal to the `Vector Action Space Size` property.
 When you specify a **Discrete** vector action space type, the action parameter
 values themselves mean. The training algorithm simply tries different values for
 the action list and observes the affect on the accumulated rewards over time and
 many training episodes. Thus, the only place actions are defined for an Agent is
-in the `AgentAction()` function. You simply specify the type of vector action
-space, and, for the continuous vector action space, the number of values, and
-then apply the received values appropriately (and consistently) in
-`ActionAct()`.
+in the `OnActionReceived()` function.

 For example, if you designed an agent to move in two dimensions, you could use
 either continuous or the discrete vector actions. In the continuous case, you
 ### Continuous Action Space

 When an Agent uses a Policy set to the **Continuous** vector action space, the
-action parameter passed to the Agent's `AgentAction()` function is an array with
+action parameter passed to the Agent's `OnActionReceived()` function is an array with
 length equal to the `Vector Action Space Size` property value.
 The individual values in the array have whatever meanings that you ascribe to
 them. If you assign an element in the array as the speed of an Agent, for
 These control values are applied as torques to the bodies making up the arm:

 ```csharp
-public override void AgentAction(float[] act)
+public override void OnActionReceived(float[] act)
 {
    float torque_x = Mathf.Clamp(act[0], -1, 1) * 100f;
    float torque_z = Mathf.Clamp(act[1], -1, 1) * 100f;
 ### Discrete Action Space

 When an Agent uses a  **Discrete** vector action space, the
-action parameter passed to the Agent's `AgentAction()` function is an array
+action parameter passed to the Agent's `OnActionReceived()` function is an array
 containing indices. With the discrete vector action space, `Branches` is an
 array of integers, each value corresponds to the number of possibilities for
 each branch.
 agent be able to move __and__ jump concurrently. We define the first branch to
 have 5 possible actions (don't move, go left, go right, go backward, go forward)
 and the second one to have 2 possible actions (don't jump, jump). The
-AgentAction method would look something like:
+`OnActionReceived()` method would look something like:

 ```csharp
 // Get the action index for movement
 Agent's Heuristic to control the Agent while watching how it accumulates rewards.

 Allocate rewards to an Agent by calling the `AddReward()` method in the
-`AgentAction()` function. The reward assigned between each decision
+`OnActionReceived()` function. The reward assigned between each decision
 should be in the range [-1,1]. Values outside this range can lead to
 unstable training. The `reward` value is reset to zero when the agent receives a
 new decision. If there are multiple calls to `AddReward()` for a single agent

 ### Examples

-You can examine the `AgentAction()` functions defined in the [example
+You can examine the `OnActionReceived()` functions defined in the [example
 environments](Learning-Environment-Examples.md) to see how those projects
 allocate rewards.

 if (hitObjects.Where(col => col.gameObject.tag == "goal").ToArray().Length == 1)
 {
    AddReward(1.0f);
-    Done();
+    EndEpisode();
-    Done();
+    EndEpisode();
 }
 ```

    Mathf.Abs(gameObject.transform.position.x - area.transform.position.x) > 8f ||
    Mathf.Abs(gameObject.transform.position.z + 5 - area.transform.position.z) > 8)
 {
-    Done();
+    EndEpisode();
    AddReward(-1f);
 }
 ```
 platform:

 ```csharp
-if (IsDone() == false)
-{
-    SetReward(0.1f);
-}
+
+SetReward(0.1f);
-// When ball falls mark Agent as done and give a negative penalty
+// When ball falls mark Agent as finished and give a negative penalty
-    Done();
+    EndEpisode();
+
 }
 ```

-Note that all of these environments make use of the `Done()` method, which manually
+Note that all of these environments make use of the `EndEpisode()` method, which manually
 terminates an episode when a termination condition is reached. This can be
 called independently of the `Max Step` property.

--- a/docs/Learning-Environment-Design.md
+++ b/docs/Learning-Environment-Design.md

 Training and simulation proceed in steps orchestrated by the ML-Agents Academy
 class. The Academy works with Agent objects in the scene to step
-through the simulation. When all Agents in the scene are _done_,
-one training episode is finished.
+through the simulation.

 During training, the external Python training process communicates with the
 Academy to run a series of episodes while it collects data and optimizes its
 The ML-Agents Academy class orchestrates the agent simulation loop as follows:

 1. Calls your Academy's `OnEnvironmentReset` delegate.
-2. Calls the `AgentReset()` function for each Agent in the scene.
+2. Calls the `OnEpisodeBegin()` function for each Agent in the scene.
-5. Calls the `AgentAction()` function for each Agent in the scene, passing in
-   the action chosen by the Agent's Policy. (This function is not called if the
-   Agent is done.)
-6. Calls the Agent's `AgentReset()` function if the Agent has reached its `Max
-   Step` count or has otherwise marked itself as `done`.
+5. Calls the `OnActionReceived()` function for each Agent in the scene, passing in
+   the action chosen by the Agent's Policy.
+6. Calls the Agent's `OnEpisodeBegin()` function if the Agent has reached its `Max
+   Step` count or has otherwise marked itself as `EndEpisode()`.
-implement the above methods. The `Agent.CollectObservations(VectorSensor sensor)` and
-`Agent.AgentAction()` functions are required; the other methods are optional —
-whether you need to implement them or not depends on your specific scenario.
+implement the above methods whether you need to implement them or not depends on
+your specific scenario.

 **Note:** The API used by the Python training process to communicate with
 and control the Academy during training can be used for other purposes as well.
 have appropriate `Behavior Parameters`.

 To create an Agent, extend the Agent class and implement the essential
-`CollectObservations(VectorSensor sensor)` and `AgentAction()` methods:
+`CollectObservations(VectorSensor sensor)` and `OnActionReceived()` methods:
-* `AgentAction()` — Carries out the action chosen by the Agent's Policy and
+* `OnActionReceived()` — Carries out the action chosen by the Agent's Policy and
  assigns a reward to the current state.

 Your implementations of these functions determine how the Behavior Parameters
-manually set an Agent to done in your `AgentAction()` function when the Agent
-has finished (or irrevocably failed) its task by calling the `Done()` function.
+manually terminate an Agent episode in your `OnActionReceived()` function when the Agent
+has finished (or irrevocably failed) its task by calling the `EndEpisode()` function.
-Agent will consider itself done after it has taken that many steps. You can
-use the `Agent.AgentReset()` function to prepare the Agent to start again.
+Agent will consider the episode over after it has taken that many steps. You can
+use the `Agent.OnEpisodeBegin()` function to prepare the Agent to start again.

 See [Agents](Learning-Environment-Design-Agents.md) for detailed information
 about programming your own Agents.
 * The Academy must reset the scene to a valid starting point for each episode of
  training.
 * A training episode must have a definite end — either using `Max Steps` or by
-  each Agent setting itself to `done`.
+  each Agent ending its episode manually with `EndEpisode()`.
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
 * Goal: The agents must hit the ball so that the opponent cannot hit a valid
 return.
 * Agents: The environment contains two agent with same Behavior Parameters.
- After training you can check the `Use Heuristic` checkbox on one of the Agents
- to play against your trained model.
+ After training you can set the `Behavior Type` to `Heuristic Only` on one of the Agent's
+ Behavior Parameters to play against your trained model.
 * Agent Reward Function (independent):
  * +1.0 To the agent that wins the point. An agent wins a point by preventing
   the opponent from hitting a valid return.
--- a/docs/Limitations.md
+++ b/docs/Limitations.md
 # Limitations

-## Unity SDK
-
-### Headless Mode
-
-If you enable Headless mode, you will not be able to collect visual observations
-from your agents.
-
-### Rendering Speed and Synchronization
-
-Currently the speed of the game physics can only be increased to 100x real-time.
-The Academy also moves in time with FixedUpdate() rather than Update(), so game
-behavior implemented in Update() may be out of sync with the agent decision
-making. See
-[Execution Order of Event Functions](https://docs.unity3d.com/Manual/ExecutionOrder.html)
-for more information.
-
-You can control the frequency of Academy stepping by calling
-`Academy.Instance.DisableAutomaticStepping()`, and then calling
-`Academy.Instance.EnvironmentStep()`
-
-### Unity Inference Engine Models
-Currently, only models created with our trainers are supported for running
-ML-Agents with a neural network behavior.
-
-## Python API
-
-### Python version
-
-As of version 0.3, we no longer support Python 2.
-
+See the package-specific Limitations pages:
+* [Unity `com.unity.mlagents` package](../com.unity.ml-agents/Documentation~/com.unity.ml-agents.md)
+* [`mlagents` Python package](../ml-agents/README.md)
+* [`mlagents_envs` Python package](../ml-agents-envs/README.md)
+* [`gym_unity` Python package](../gym-unity/README.md)
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 * `AgentInfo.actionMasks` has been renamed to `AgentInfo.discreteActionMasks`.
 * `BrainParameters` and `SpaceType` have been removed from the public API
 * `BehaviorParameters` have been removed from the public API.
+* `DecisionRequester` has been made internal (you can still use the DecisionRequesterComponent from the inspector). `RepeatAction` was renamed `TakeActionsBetweenDecisions` for clarity.
+* The following methods in the `Agent` class have been renamed. The original method names will be removed in a later release:
+  * `InitializeAgent()` was renamed to `Initialize()`
+  * `AgentAction()` was renamed to `OnActionReceived()`
+  * `AgentReset()` was renamed to `OnEpsiodeBegin()`
+  * `Done()` was renamed to `EndEpisode()`
+  * `GiveModel()` was renamed to `SetModel()`
+* The `IFloatProperties` interface has been removed.
+* The interface for SideChannels was changed:
+  * In C#, `OnMessageReceived` now takes a `IncomingMessage` argument, and `QueueMessageToSend` takes an `OutgoingMessage` argument.
+  * In python, `on_message_received` now takes a `IncomingMessage` argument, and `queue_message_to_send` takes an `OutgoingMessage` argument.

 ### Steps to Migrate
 * Add the `using MLAgents.Sensors;` in addition to `using MLAgents;` on top of your Agent's script.
 iterate through `RayPerceptionOutput.rayOutputs` and call `RayPerceptionOutput.RayOutput.ToFloatArray()`.
-* Re-import all of your `*.NN` files to work with the updated Barracuda package.
+* We strongly recommend replacing the following methods with their new equivalent as they will be removed in a later release:
+  * `InitializeAgent()` to `Initialize()`
+  * `AgentAction()` to `OnActionReceived()`
+  * `AgentReset()` to `OnEpsiodeBegin()`
+  * `Done()` to `EndEpisode()`
+  * `GiveModel()` to `SetModel()`
+* Replace `IFloatProperties` variables with `FloatPropertiesChannel` variables.
+* If you implemented custom `SideChannels`, update the signatures of your methods, and add your data to the `OutgoingMessage` or read it from the `IncomingMessage`.

 ## Migrating from 0.13 to 0.14

--- a/docs/Python-API.md
+++ b/docs/Python-API.md
 allows you to interact directly with a Unity Environment (`mlagents_envs`) and
 an entry point to train (`mlagents-learn`) which allows you to train agents in
 Unity Environments using our implementations of reinforcement learning or
-imitation learning.
+imitation learning. This document describes how to use the `mlagents_envs` API.
+For information on using `mlagents-learn`, see [here](Training-ML-Agents.md).
-You can use the Python Low Level API to interact directly with your learning
-environment, and use it to develop new learning algorithms.
+The Python Low Level API can be used to interact directly with your Unity learning environment.
+As such, it can serve as the basis for developing and evaluating new learning algorithms.

 ## mlagents_envs

 Python-side communication happens through `UnityEnvironment` which is located in
 [`environment.py`](../ml-agents-envs/mlagents_envs/environment.py). To load
 a Unity environment from a built binary file, put the file in the same directory
-as `envs`. For example, if the filename of your Unity environment is 3DBall.app, in python, run:
+as `envs`. For example, if the filename of your Unity environment is `3DBall`, in python, run:

 ```python
 from mlagents_envs.environment import UnityEnvironment
 `discrete_action_branches = (3,2,)`)


-### Modifying the environment from Python
-The Environment can be modified by using side channels to send data to the
-environment. When creating the environment, pass a list of side channels as
-`side_channels` argument to the constructor.
+### Communicating additional information with the Environment
+In addition to the means of communicating between Unity and python described above,
+we also provide methods for sharing agent-agnostic information. These
+additional methods are referred to as side channels. ML-Agents includes two ready-made
+side channels, described below. It is also possible to create custom side channels to
+communicate any additional data between a Unity environment and Python. Instructions for
+creating custom side channels can be found [here](Custom-SideChannels.md).
+
+Side channels exist as separate classes which are instantiated, and then passed as list to the `side_channels` argument of the constructor of the `UnityEnvironment` class.
+
+```python
+channel = MyChannel()
+
+env = UnityEnvironment(side_channels = [channel])
+```
-__Note__ : A side channel will only send/receive messages when `env.step` is
+__Note__ : A side channel will only send/receive messages when `env.step` or `env.reset()` is
-An `EngineConfiguration` will allow you to modify the time scale and graphics quality of the Unity engine.
+The `EngineConfiguration` side channel allows you to modify the time-scale, resolution, and graphics quality of the environment. This can be useful for adjusting the environment to perform better during training, or be more interpretable during inference.
+
- * `set_configuration_parameters` with arguments
-   * width: Defines the width of the display. Default 80.
-   * height: Defines the height of the display. Default 80.
-   * quality_level: Defines the quality level of the simulation. Default 1.
-   * time_scale: Defines the multiplier for the deltatime in the simulation. If set to a higher value, time will pass faster in the simulation but the physics might break. Default 20.
-   *  target_frame_rate: Instructs simulation to try to render at a specified frame rate. Default -1.
+ * `set_configuration_parameters` which takes the following arguments:
+   * `width`: Defines the width of the display. Default 80.
+   * `height`: Defines the height of the display. Default 80.
+   * `quality_level`: Defines the quality level of the simulation. Default 1.
+   * `time_scale`: Defines the multiplier for the deltatime in the simulation. If set to a higher value, time will pass faster in the simulation but the physics may perform unpredictably. Default 20.
+   *  `target_frame_rate`: Instructs simulation to try to render at a specified frame rate. Default -1.
-For example :
+For example, the following code would adjust the time-scale of the simulation to be 2x realtime.
+
 ```python
 from mlagents_envs.environment import UnityEnvironment
 from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
 ```

 #### FloatPropertiesChannel
-A `FloatPropertiesChannel` will allow you to get and set float properties
-in the environment. You can call get_property and set_property on the
-side channel to read and write properties.
+The `FloatPropertiesChannel` will allow you to get and set pre-defined numerical values in the environment. This can be useful for adjusting environment-specific settings, or for reading non-agent related information from the environment. You can call `get_property` and `set_property` on the side channel to read and write properties.
+
 `FloatPropertiesChannel` has three methods:

 * `set_property` Sets a property in the Unity Environment.
 channel.set_property("parameter_1", 2.0)

 i = env.reset()
+
+readout_value = channel.get_property("parameter_2")
 ...
 ```

 float property1 = sharedProperties.GetPropertyWithDefault("parameter_1", 0.0f);
 ```

-#### [Advanced] Create your own SideChannel
-
-You can create your own `SideChannel` in C# and Python and use it to communicate data between the two.
-
-##### Unity side
-The side channel will have to implement the `SideChannel` abstract class and the following method.
-
- * `OnMessageReceived(byte[] data)` : You must implement this method to specify what the side channel will be doing
- with the data received from Python. The data is a `byte[]` argument.
-
-The side channel must also assign a `ChannelId` property in the constructor. The `ChannelId` is a Guid
-(or UUID in Python) used to uniquely identify a side channel. This Guid must be the same on C# and Python.
-There can only be one side channel of a certain id during communication.
-
-To send a byte array from C# to Python, call the `base.QueueMessageToSend(data)` method inside the side channel.
-The `data` argument must be a `byte[]`.
-
-To register a side channel on the Unity side, call `Academy.Instance.RegisterSideChannel` with the side channel
-as only argument.
-
-##### Python side
-The side channel will have to implement the `SideChannel` abstract class. You must implement :
-
- * `on_message_received(self, data: bytes) -> None` : You must implement this method to specify what the
- side channel will be doing with the data received from Unity. The data is a `byte[]` argument.
-
-The side channel must also assign a `channel_id` property in the constructor. The `channel_id` is a UUID
-(referred in C# as Guid) used to uniquely identify a side channel. This number must be the same on C# and
-Python. There can only be one side channel of a certain id during communication.
-
-To assign the `channel_id` call the abstract class constructor with the appropriate `channel_id` as follows:
-
-```python
-super().__init__(my_channel_id)
-```
-
-To send a byte array from Python to C#, call the `super().queue_message_to_send(bytes_data)` method inside the
-side channel. The `bytes_data` argument must be a `bytes` object.
-
-To register a side channel on the Python side, pass the side channel as argument when creating the
-`UnityEnvironment` object. One of the arguments of the constructor (`side_channels`) is a list of side channels.
-
-##### Example implementation
-
-Here is a simple implementation of a Side Channel that will exchange strings between C# and Python
-(encoded as ascii).
-
-One the C# side :
-Here is an implementation of a `StringLogSideChannel` that will listed to the `UnityEngine.Debug.LogError` calls in
-the game :
-
-```csharp
-using UnityEngine;
-using MLAgents;
-using System.Text;
-using System;
-
-public class StringLogSideChannel : SideChannel
-{
-    public StringLogSideChannel()
-    {
-        ChannelId = new Guid("621f0a70-4f87-11ea-a6bf-784f4387d1f7");
-    }
-
-    public override void OnMessageReceived(byte[] data)
-    {
-        var receivedString = Encoding.ASCII.GetString(data);
-        Debug.Log("From Python : " + receivedString);
-    }
-
-    public void SendDebugStatementToPython(string logString, string stackTrace, LogType type)
-    {
-        if (type == LogType.Error)
-        {
-            var stringToSend = type.ToString() + ": " + logString + "\n" + stackTrace;
-            var encodedString = Encoding.ASCII.GetBytes(stringToSend);
-            base.QueueMessageToSend(encodedString);
-        }
-    }
-}
-```
-
-We also need to register this side channel to the Academy and to the `Application.logMessageReceived` events,
-so we write a simple MonoBehavior for this. (Do not forget to attach it to a GameObject in the scene).
-
-```csharp
-using UnityEngine;
-using MLAgents;
-
-
-public class RegisterStringLogSideChannel : MonoBehaviour
-{
-
-    StringLogSideChannel stringChannel;
-    public void Awake()
-    {
-        // We create the Side Channel
-        stringChannel = new StringLogSideChannel();
-
-        // When a Debug.Log message is created, we send it to the stringChannel
-        Application.logMessageReceived += stringChannel.SendDebugStatementToPython;
-
-        // Just in case the Academy has not yet initialized
-        Academy.Instance.RegisterSideChannel(stringChannel);
-    }
-
-    public void OnDestroy()
-    {
-        // De-register the Debug.Log callback
-        Application.logMessageReceived -= stringChannel.SendDebugStatementToPython;
-        if (Academy.IsInitialized){
-            Academy.Instance.UnregisterSideChannel(stringChannel);
-        }
-    }
-
-    public void Update()
-    {
-        // Optional : If the space bar is pressed, raise an error !
-        if (Input.GetKeyDown(KeyCode.Space))
-        {
-            Debug.LogError("This is a fake error. Space bar was pressed in Unity.");
-        }
-    }
-}
-```
-
-And here is the script on the Python side. This script creates a new Side channel type (`StringLogChannel`) and
-launches a `UnityEnvironment` with that side channel.
-
-```python
-
-from mlagents_envs.environment import UnityEnvironment
-from mlagents_envs.side_channel.side_channel import SideChannel
-import numpy as np
-
-
-# Create the StringLogChannel class
-class StringLogChannel(SideChannel):
-
-    def __init__(self) -> None:
-        super().__init__(uuid.UUID("621f0a70-4f87-11ea-a6bf-784f4387d1f7"))
+#### Custom side channels
-    def on_message_received(self, data: bytes) -> None:
-        """
-        Note :We must implement this method of the SideChannel interface to
-        receive messages from Unity
-        """
-        # We simply print the data received interpreted as ascii
-        print(data.decode("ascii"))
-
-    def send_string(self, data: str) -> None:
-        # Convert the string to ascii
-        bytes_data = data.encode("ascii")
-        # We call this method to queue the data we want to send
-        super().queue_message_to_send(bytes_data)
-
-# Create the channel
-string_log = StringLogChannel()
-
-# We start the communication with the Unity Editor and pass the string_log side channel as input
-env = UnityEnvironment(base_port=UnityEnvironment.DEFAULT_EDITOR_PORT, side_channels=[string_log])
-env.reset()
-string_log.send_string("The environment was reset")
-
-group_name = env.get_agent_groups()[0]  # Get the first group_name
-for i in range(1000):
-    step_data = env.get_step_result(group_name)
-    n_agents = step_data.n_agents()  # Get the number of agents
-    # We send data to Unity : A string with the number of Agent at each
-    string_log.send_string(
-        "Step " + str(i) + " occurred with " + str(n_agents) + " agents."
-    )
-    env.step()  # Move the simulation forward
-
-env.close()
-```
-
-Now, if you run this script and press `Play` the Unity Editor when prompted, The console in the Unity Editor will
-display a message at every Python step. Additionally, if you press the Space Bar in the Unity Engine, a message will
-appear in the terminal.
+For information on how to make custom side channels for sending additional data types, see the documentation [here](Custom-SideChannels.md).
--- a/docs/Readme.md
+++ b/docs/Readme.md
  * [Using the Monitor](Feature-Monitor.md)
  * [Using the Video Recorder](https://github.com/Unity-Technologies/video-recorder)
  * [Using an Executable Environment](Learning-Environment-Executable.md)
+  * [Creating Custom Side Channels](Custom-SideChannels.md)

 ## Training

 * [Training on the Cloud with Amazon Web Services](Training-on-Amazon-Web-Service.md)
 * [Training on the Cloud with Microsoft Azure](Training-on-Microsoft-Azure.md)
 * [Using Docker](Using-Docker.md)
-* [Installation-Windows](Installation-Windows.md)
+* [Windows Anaconda Installation](Installation-Anaconda-Windows.md)
--- a/docs/Training-Curriculum-Learning.md
+++ b/docs/Training-Curriculum-Learning.md
  greater than number of thresholds.

 Once our curriculum is defined, we have to use the reset parameters we defined
-and modify the environment from the Agent's `AgentReset()` function. See
+and modify the environment from the Agent's `OnEpisodeBegin()` function. See
 [WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
 for an example.

--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 | init_entcoef         | How much the agent should explore in the beginning of training.                                                                                                                         | SAC                      |
 | lambd                | The regularization parameter.                                                                                                                                                           | PPO                      |
 | learning_rate        | The initial learning rate for gradient descent.                                                                                                                                         | PPO, SAC             |
+| learning_rate_schedule | Determines how learning rate changes over time. | PPO, SAC |
 | max_steps            | The maximum number of simulation steps to run during a training session.                                                                                                                | PPO, SAC             |
 | memory_size          | The size of the memory an agent must keep. Used for training with a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md).                                 | PPO, SAC             |
 | normalize            | Whether to automatically normalize observations.                                                                                                                                        | PPO, SAC                 |
--- a/docs/Using-Docker.md
+++ b/docs/Using-Docker.md

 ## Requirements

- Unity _Linux Build Support_ Component
+- Unity _Linux Build Support_ Component. Make sure to select the _Linux
+Build Support_ component when installing Unity.
+
+<p align="center">
+  <img src="images/unity_linux_build_support.png"
+       alt="Linux Build Support"
+       width="500" border="10" />
+</p>

 ## Setup

--- a/docs/Using-Virtual-Environment.md
+++ b/docs/Using-Virtual-Environment.md
 spinning up a new environment and verifying the compatibility of the code with the
 different version.

-Requirement - Python 3.6 must be installed on the machine you would like
-to run ML-Agents on (either local laptop/desktop or remote server). Python 3.6 can be
-installed from [here](https://www.python.org/downloads/).
-
 ## Python Version Requirement (Required)
 This guide has been tested with Python 3.6 and 3.7. Python 3.8 is not supported at this time.

 1. To activate the environment execute `$ source ~/python-envs/sample-env/bin/activate`
 1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the latest, upgrade to
 the latest pip version using `$ pip3 install --upgrade pip`
-1. Install ML-Agents package using `$ pip3 install mlagents`
-1. To deactivate the environment execute `$ deactivate`
+1. To deactivate the environment execute `$ deactivate` (you can reactivate the environment
+using the same `activate` command listed above)

 ## Ubuntu Setup

 1. To activate the environment execute `python-envs\sample-env\Scripts\activate`
 1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the
 latest, upgrade to the latest pip version using `pip install --upgrade pip`
-1. Install ML-Agents package using `pip install mlagents`
-1. To deactivate the environment execute `deactivate`
+1. To deactivate the environment execute `deactivate` (you can reactivate the environment
+using the same `activate` command listed above)

 Note:
 - Verify that you are using Python 3.6 or Python 3.7. Launch a command prompt using `cmd` and
--- a/docs/images/unity_package_manager_window.png
+++ b/docs/images/unity_package_manager_window.png
--- a/docs/localized/KR/docs/Installation.md
+++ b/docs/localized/KR/docs/Installation.md
 </p>

 ## Windows 사용자
-Windows에서 환경을 설정하기 위해, [세부 사항](Installation-Windows.md)에 설정 방법에 대해 작성하였습니다.
+Windows에서 환경을 설정하기 위해, [세부 사항](Installation-Anaconda-Windows.md)에 설정 방법에 대해 작성하였습니다.
 Mac과 Linux는 다음 가이드를 확인해주십시오.

 ## Mac 또는 Unix 사용자
--- a/docs/localized/zh-CN/docs/Installation.md
+++ b/docs/localized/zh-CN/docs/Installation.md

 ### Windows 用户

-如果您是刚接触 Python 和 TensorFlow 的 Windows 用户，请遵循[此指南](/docs/Installation-Windows.md)来设置 Python 环境。
+如果您是刚接触 Python 和 TensorFlow 的 Windows 用户，请遵循[此指南](/docs/Installation-Anaconda-Windows.md)来设置 Python 环境。

 ### Mac 和 Unix 用户

--- a/ml-agents-envs/mlagents_envs/communicator.py
+++ b/ml-agents-envs/mlagents_envs/communicator.py
-import logging
-
-
-logger = logging.getLogger("mlagents_envs")


 class Communicator(object):
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
 from typing import Dict, List, Optional, Any

 import mlagents_envs
-from mlagents_envs.side_channel.side_channel import SideChannel
+from mlagents_envs.side_channel.side_channel import SideChannel, IncomingMessage

 from mlagents_envs.base_env import (
    BaseEnv,
 import signal
 import struct

-logging.basicConfig(level=logging.INFO)
+
 logger = logging.getLogger("mlagents_envs")


                    "sending side channel data properly.".format(channel_id)
                )
            if channel_id in side_channels:
-                side_channels[channel_id].on_message_received(message_data)
+                incoming_message = IncomingMessage(message_data)
+                side_channels[channel_id].on_message_received(incoming_message)
            else:
                logger.warning(
                    "Unknown side channel data received. Channel type "
--- a/ml-agents-envs/mlagents_envs/exception.py
+++ b/ml-agents-envs/mlagents_envs/exception.py
-import logging
-
-logger = logging.getLogger("mlagents_envs")
-
-
 class UnityException(Exception):
    """
    Any error related to ml-agents environment.
--- a/ml-agents-envs/mlagents_envs/rpc_communicator.py
+++ b/ml-agents-envs/mlagents_envs/rpc_communicator.py
-import logging
 import grpc
 from typing import Optional

 from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
 from mlagents_envs.communicator_objects.unity_output_pb2 import UnityOutputProto
 from .exception import UnityTimeOutException, UnityWorkerInUseException
-
-logger = logging.getLogger("mlagents_envs")


 class UnityToExternalServicerImplementation(UnityToExternalProtoServicer):
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
    NONE as COMPRESSION_NONE,
 )
 from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
-import logging
-
-logger = logging.getLogger("mlagents_envs")


 def agent_group_spec_from_proto(
--- a/ml-agents-envs/mlagents_envs/side_channel/init.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/init.py
+from mlagents_envs.side_channel.incoming_message import IncomingMessage  # noqa
+from mlagents_envs.side_channel.outgoing_message import OutgoingMessage  # noqa
+
+from mlagents_envs.side_channel.side_channel import SideChannel  # noqa
--- a/ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py
-from mlagents_envs.side_channel.side_channel import SideChannel
+from mlagents_envs.side_channel import SideChannel, OutgoingMessage, IncomingMessage
-import struct
 import uuid
 from typing import NamedTuple

    def __init__(self) -> None:
        super().__init__(uuid.UUID("e951342c-4f7e-11ea-b238-784f4387d1f7"))

-    def on_message_received(self, data: bytes) -> None:
+    def on_message_received(self, msg: IncomingMessage) -> None:
        """
        Is called by the environment to the side channel. Can be called
        multiple times per step if multiple messages are meant for that
        :param target_frame_rate: Instructs simulation to try to render at a
        specified frame rate. Default -1.
        """
-        data = bytearray()
-        data += struct.pack("<i", width)
-        data += struct.pack("<i", height)
-        data += struct.pack("<i", quality_level)
-        data += struct.pack("<f", time_scale)
-        data += struct.pack("<i", target_frame_rate)
-        super().queue_message_to_send(data)
+        msg = OutgoingMessage()
+        msg.write_int32(width)
+        msg.write_int32(height)
+        msg.write_int32(quality_level)
+        msg.write_float32(time_scale)
+        msg.write_int32(target_frame_rate)
+        super().queue_message_to_send(msg)
-        data = bytearray()
-        data += struct.pack("<iiifi", *config)
-        super().queue_message_to_send(data)
+        self.set_configuration_parameters(**config._asdict())
--- a/ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py
-from mlagents_envs.side_channel.side_channel import SideChannel
-import struct
+from mlagents_envs.side_channel import SideChannel, IncomingMessage, OutgoingMessage
-from typing import Dict, Tuple, Optional, List
+from typing import Dict, Optional, List


 class FloatPropertiesChannel(SideChannel):
            channel_id = uuid.UUID(("60ccf7d0-4f7e-11ea-b238-784f4387d1f7"))
        super().__init__(channel_id)

-    def on_message_received(self, data: bytes) -> None:
+    def on_message_received(self, msg: IncomingMessage) -> None:
-        Note that Python should never receive an engine configuration from
-        Unity
-        k, v = self.deserialize_float_prop(data)
+        k = msg.read_string()
+        v = msg.read_float32()
        self._float_properties[k] = v

    def set_property(self, key: str, value: float) -> None:
        :param value: The float value of the property.
        """
        self._float_properties[key] = value
-        super().queue_message_to_send(self.serialize_float_prop(key, value))
+        msg = OutgoingMessage()
+        msg.write_string(key)
+        msg.write_float32(value)
+        super().queue_message_to_send(msg)

    def get_property(self, key: str) -> Optional[float]:
        """
        :return:
        """
        return dict(self._float_properties)
-
-    @staticmethod
-    def serialize_float_prop(key: str, value: float) -> bytearray:
-        result = bytearray()
-        encoded_key = key.encode("ascii")
-        result += struct.pack("<i", len(encoded_key))
-        result += encoded_key
-        result += struct.pack("<f", value)
-        return result
-
-    @staticmethod
-    def deserialize_float_prop(data: bytes) -> Tuple[str, float]:
-        offset = 0
-        encoded_key_len = struct.unpack_from("<i", data, offset)[0]
-        offset = offset + 4
-        key = data[offset : offset + encoded_key_len].decode("ascii")
-        offset = offset + encoded_key_len
-        value = struct.unpack_from("<f", data, offset)[0]
-        return key, value
--- a/ml-agents-envs/mlagents_envs/side_channel/raw_bytes_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/raw_bytes_channel.py
-from mlagents_envs.side_channel.side_channel import SideChannel
+from mlagents_envs.side_channel import SideChannel, IncomingMessage, OutgoingMessage
 from typing import List
 import uuid

        self._received_messages: List[bytes] = []
        super().__init__(channel_id)

-    def on_message_received(self, data: bytes) -> None:
+    def on_message_received(self, msg: IncomingMessage) -> None:
-        self._received_messages.append(data)
+        self._received_messages.append(msg.get_raw_bytes())

    def get_and_clear_received_messages(self) -> List[bytes]:
        """
        Queues a message to be sent by the environment at the next call to
        step.
        """
-        super().queue_message_to_send(data)
+        msg = OutgoingMessage()
+        msg.set_raw_bytes(data)
+        super().queue_message_to_send(msg)
--- a/ml-agents-envs/mlagents_envs/side_channel/side_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/side_channel.py
 from abc import ABC, abstractmethod
 from typing import List
 import uuid
+import logging
+
+from mlagents_envs.side_channel import IncomingMessage, OutgoingMessage
+
+logger = logging.getLogger(__name__)


 class SideChannel(ABC):
    to the Env object at construction.
    """

-    def __init__(self, channel_id):
+    def __init__(self, channel_id: uuid.UUID):
-    def queue_message_to_send(self, data: bytearray) -> None:
+    def queue_message_to_send(self, msg: OutgoingMessage) -> None:
-        self.message_queue.append(data)
+        self.message_queue.append(msg.buffer)
-    def on_message_received(self, data: bytes) -> None:
+    def on_message_received(self, msg: IncomingMessage) -> None:
        """
        Is called by the environment to the side channel. Can be called
        multiple times per step if multiple messages are meant for that
--- a/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
-import struct
-from mlagents_envs.side_channel.side_channel import SideChannel
+from mlagents_envs.side_channel import SideChannel, IncomingMessage, OutgoingMessage
 from mlagents_envs.side_channel.float_properties_channel import FloatPropertiesChannel
 from mlagents_envs.side_channel.raw_bytes_channel import RawBytesChannel
 from mlagents_envs.environment import UnityEnvironment
        self.list_int = []
        super().__init__(uuid.UUID("a85ba5c0-4f87-11ea-a517-784f4387d1f7"))

-    def on_message_received(self, data):
-        val = struct.unpack_from("<i", data, 0)[0]
+    def on_message_received(self, msg: IncomingMessage) -> None:
+        val = msg.read_int32()
-        data = bytearray()
-        data += struct.pack("<i", value)
-        super().queue_message_to_send(data)
+        msg = OutgoingMessage()
+        msg.write_int32(value)
+        super().queue_message_to_send(msg)


 def test_int_channel():

    messages = receiver.get_and_clear_received_messages()
    assert len(messages) == 0
+
+
+def test_message_bool():
+    vals = [True, False]
+    msg_out = OutgoingMessage()
+    for v in vals:
+        msg_out.write_bool(v)
+
+    msg_in = IncomingMessage(msg_out.buffer)
+    read_vals = []
+    for _ in range(len(vals)):
+        read_vals.append(msg_in.read_bool())
+    assert vals == read_vals
+
+
+def test_message_int32():
+    val = 1337
+    msg_out = OutgoingMessage()
+    msg_out.write_int32(val)
+
+    msg_in = IncomingMessage(msg_out.buffer)
+    read_val = msg_in.read_int32()
+    assert val == read_val
+
+
+def test_message_float32():
+    val = 42.0
+    msg_out = OutgoingMessage()
+    msg_out.write_float32(val)
+
+    msg_in = IncomingMessage(msg_out.buffer)
+    read_val = msg_in.read_float32()
+    # These won't be exactly equal in general, since python floats are 64-bit.
+    assert val == read_val
+
+
+def test_message_string():
+    val = "mlagents!"
+    msg_out = OutgoingMessage()
+    msg_out.write_string(val)
+
+    msg_in = IncomingMessage(msg_out.buffer)
+    read_val = msg_in.read_string()
+    assert val == read_val
+
+
+def test_message_float_list():
+    val = [1.0, 3.0, 9.0]
+    msg_out = OutgoingMessage()
+    msg_out.write_float32_list(val)
+
+    msg_in = IncomingMessage(msg_out.buffer)
+    read_val = msg_in.read_float32_list()
+    # These won't be exactly equal in general, since python floats are 64-bit.
+    assert val == read_val
--- a/ml-agents/mlagents/trainers/brain.py
+++ b/ml-agents/mlagents/trainers/brain.py
-import logging
-
-
-logger = logging.getLogger("mlagents.trainers")


 class CameraResolution(NamedTuple):
--- a/ml-agents/mlagents/trainers/components/reward_signals/init.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/init.py

 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.policy.tf_policy import TFPolicy
+from mlagents.trainers.buffer import AgentBuffer

 logger = logging.getLogger("mlagents.trainers")

        self.strength = strength
        self.stats_name_to_update_name: Dict[str, str] = {}

-    def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
+    def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
        """
        Evaluates the reward for the data present in the Dict mini_batch. Use this when evaluating a reward
        function drawn straight from a Buffer.
        )

    def prepare_update(
-        self, policy: TFPolicy, mini_batch: Dict[str, np.ndarray], num_sequences: int
+        self, policy: TFPolicy, mini_batch: AgentBuffer, num_sequences: int
    ) -> Dict[tf.Tensor, Any]:
        """
        If the reward signal has an internal model (e.g. GAIL or Curiosity), get the feed_dict
--- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
 from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
 from mlagents.trainers.components.reward_signals.curiosity.model import CuriosityModel
 from mlagents.trainers.policy.tf_policy import TFPolicy
+from mlagents.trainers.buffer import AgentBuffer


 class CuriosityRewardSignal(RewardSignal):
        }
        self.has_updated = False

-    def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
+    def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
        feed_dict: Dict[tf.Tensor, Any] = {
            self.policy.batch_size_ph: len(mini_batch["actions"]),
            self.policy.sequence_length_ph: self.policy.sequence_length,
        super().check_config(config_dict, param_keys)

    def prepare_update(
-        self, policy: TFPolicy, mini_batch: Dict[str, np.ndarray], num_sequences: int
+        self, policy: TFPolicy, mini_batch: AgentBuffer, num_sequences: int
    ) -> Dict[tf.Tensor, Any]:
        """
        Prepare for update and get feed_dict.
--- a/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
 import numpy as np

 from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
+from mlagents.trainers.buffer import AgentBuffer


 class ExtrinsicRewardSignal(RewardSignal):
        param_keys = ["strength", "gamma"]
        super().check_config(config_dict, param_keys)

-    def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
+    def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
        env_rews = np.array(mini_batch["environment_rewards"], dtype=np.float32)
        return RewardSignalResult(self.strength * env_rews, env_rews)
--- a/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
 from typing import Any, Dict, List
-import logging
 import numpy as np
 from mlagents.tf_utils import tf

 from mlagents.trainers.demo_loader import demo_to_buffer
-
-LOGGER = logging.getLogger("mlagents.trainers")
+from mlagents.trainers.buffer import AgentBuffer


 class GAILRewardSignal(RewardSignal):
            "Policy/GAIL Expert Estimate": "gail_expert_estimate",
        }

-    def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
+    def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
        feed_dict: Dict[tf.Tensor, Any] = {
            self.policy.batch_size_ph: len(mini_batch["actions"]),
            self.policy.sequence_length_ph: self.policy.sequence_length,
        super().check_config(config_dict, param_keys)

    def prepare_update(
-        self, policy: TFPolicy, mini_batch: Dict[str, np.ndarray], num_sequences: int
+        self, policy: TFPolicy, mini_batch: AgentBuffer, num_sequences: int
    ) -> Dict[tf.Tensor, Any]:
        """
        Prepare inputs for update. .
        """
-        max_num_experiences = min(
-            len(mini_batch["actions"]), self.demonstration_buffer.num_experiences
-        )
-        # If num_sequences is less, we need to shorten the input batch.
-        for key, element in mini_batch.items():
-            mini_batch[key] = element[:max_num_experiences]
-
-        # Get batch from demo buffer
+        # Get batch from demo buffer. Even if demo buffer is smaller, we sample with replacement
-            len(mini_batch["actions"]), 1
+            mini_batch.num_experiences, 1
        )

        feed_dict: Dict[tf.Tensor, Any] = {
--- a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
-import logging
-
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.components.reward_signals import RewardSignal
 from mlagents.trainers.components.reward_signals.extrinsic.signal import (
    CuriosityRewardSignal,
 )
 from mlagents.trainers.policy.tf_policy import TFPolicy
-
-logger = logging.getLogger("mlagents.trainers")


 NAME_TO_CLASS: Dict[str, Type[RewardSignal]] = {
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
-import logging
 import os
 from typing import List, Tuple
 import numpy as np
 )
 from mlagents_envs.timers import timed, hierarchical_timer
 from google.protobuf.internal.decoder import _DecodeVarint32  # type: ignore
-
-
-logger = logging.getLogger("mlagents.trainers")


@timed
--- a/ml-agents/mlagents/trainers/distributions.py
+++ b/ml-agents/mlagents/trainers/distributions.py
            and 1 for unmasked.
        """
        unmasked_log_probs = self._create_policy_branches(logits, act_size)
-        self._sampled_policy, self._all_probs, action_index = self._get_masked_actions_probs(
-            unmasked_log_probs, act_size, action_masks
-        )
+        (
+            self._sampled_policy,
+            self._all_probs,
+            action_index,
+        ) = self._get_masked_actions_probs(unmasked_log_probs, act_size, action_masks)
-        self._entropy = self._create_entropy(
-            self._sampled_onehot, self._all_probs, action_index, act_size
-        )
+        self._entropy = self._create_entropy(self._all_probs, action_index, act_size)
        self._total_prob = self._get_log_probs(
            self._sampled_onehot, self._all_probs, action_index, act_size
        )
                    kernel_initializer=ModelUtils.scaled_init(0.01),
                )
            )
-        unmasked_log_probs = tf.concat(policy_branches, axis=1)
-        return unmasked_log_probs
+        return policy_branches
-        unmasked_log_probs: tf.Tensor,
+        unmasked_log_probs: List[tf.Tensor],
        act_size: List[int],
        action_masks: tf.Tensor,
    ) -> Tuple[tf.Tensor, tf.Tensor, np.ndarray]:
        return log_probs

    def _create_entropy(
-        self,
-        all_log_probs: tf.Tensor,
-        sample_onehot: tf.Tensor,
-        action_idx: List[int],
-        act_size: List[int],
+        self, all_log_probs: tf.Tensor, action_idx: List[int], act_size: List[int]
    ) -> tf.Tensor:
        entropy = tf.reduce_sum(
            (
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
 # # Unity ML-Agents Toolkit
 # ## ML-Agent Learning (Ghost Trainer)

-# import logging
 from typing import Deque, Dict, List, Any, cast

 import numpy as np
 from mlagents.trainers.trajectory import Trajectory
 from mlagents.trainers.agent_processor import AgentManagerQueue

-LOGGER = logging.getLogger("mlagents.trainers")
+logger = logging.getLogger("mlagents.trainers")


 class GhostTrainer(Trainer):
        Saves training statistics to Tensorboard.
        """
        opponents = np.array(self.policy_elos, dtype=np.float32)
-        LOGGER.info(
+        logger.info(
            " Learning brain {} ELO: {:0.3f}\n"
            "Mean Opponent ELO: {:0.3f}"
            " Std Opponent ELO: {:0.3f}".format(
                x = "current"
                self.policy_elos[-1] = self.current_elo
            self.current_opponent = -1 if x == "current" else x
-            LOGGER.debug(
+            logger.debug(
                "Step {}: Swapping snapshot {} to id {} with {} learning".format(
                    self.get_step, x, name_behavior_id, self.learning_behavior_name
                )
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents_envs.side_channel.side_channel import SideChannel
 from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
 from mlagents_envs.exception import UnityEnvironmentException
+from mlagents_envs.timers import hierarchical_timer
+from mlagents.logging_util import create_logger


 def _create_parser():
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
-    # Recognize and use docker volume if one is passed as an argument
-    if not options.docker_target_name:
-        model_path = f"./models/{options.run_id}"
-        summaries_dir = "./summaries"
-    else:
-        model_path = f"/{options.docker_target_name}/models/{options.run_id}"
-        summaries_dir = f"/{options.docker_target_name}/summaries"
-    port = options.base_port
+    with hierarchical_timer("run_training.setup"):
+        # Recognize and use docker volume if one is passed as an argument
+        if not options.docker_target_name:
+            model_path = f"./models/{options.run_id}"
+            summaries_dir = "./summaries"
+        else:
+            model_path = f"/{options.docker_target_name}/models/{options.run_id}"
+            summaries_dir = f"/{options.docker_target_name}/summaries"
+        port = options.base_port
-    # Configure CSV, Tensorboard Writers and StatsReporter
-    # We assume reward and episode length are needed in the CSV.
-    csv_writer = CSVWriter(
-        summaries_dir,
-        required_fields=["Environment/Cumulative Reward", "Environment/Episode Length"],
-    )
-    tb_writer = TensorboardWriter(summaries_dir)
-    gauge_write = GaugeWriter()
-    StatsReporter.add_writer(tb_writer)
-    StatsReporter.add_writer(csv_writer)
-    StatsReporter.add_writer(gauge_write)
+        # Configure CSV, Tensorboard Writers and StatsReporter
+        # We assume reward and episode length are needed in the CSV.
+        csv_writer = CSVWriter(
+            summaries_dir,
+            required_fields=[
+                "Environment/Cumulative Reward",
+                "Environment/Episode Length",
+            ],
+        )
+        tb_writer = TensorboardWriter(summaries_dir)
+        gauge_write = GaugeWriter()
+        StatsReporter.add_writer(tb_writer)
+        StatsReporter.add_writer(csv_writer)
+        StatsReporter.add_writer(gauge_write)
+
+        if options.env_path is None:
+            port = UnityEnvironment.DEFAULT_EDITOR_PORT
+        env_factory = create_environment_factory(
+            options.env_path,
+            options.docker_target_name,
+            options.no_graphics,
+            run_seed,
+            port,
+            options.env_args,
+        )
+        engine_config = EngineConfig(
+            options.width,
+            options.height,
+            options.quality_level,
+            options.time_scale,
+            options.target_frame_rate,
+        )
+        env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
+        maybe_meta_curriculum = try_create_meta_curriculum(
+            options.curriculum_config, env_manager, options.lesson
+        )
+        sampler_manager, resampling_interval = create_sampler_manager(
+            options.sampler_config, run_seed
+        )
+        trainer_factory = TrainerFactory(
+            options.trainer_config,
+            summaries_dir,
+            options.run_id,
+            model_path,
+            options.keep_checkpoints,
+            options.train_model,
+            options.load_model,
+            run_seed,
+            maybe_meta_curriculum,
+            options.multi_gpu,
+        )
+        # Create controller and begin training.
+        tc = TrainerController(
+            trainer_factory,
+            model_path,
+            summaries_dir,
+            options.run_id,
+            options.save_freq,
+            maybe_meta_curriculum,
+            options.train_model,
+            run_seed,
+            sampler_manager,
+            resampling_interval,
+        )
-    if options.env_path is None:
-        port = UnityEnvironment.DEFAULT_EDITOR_PORT
-    env_factory = create_environment_factory(
-        options.env_path,
-        options.docker_target_name,
-        options.no_graphics,
-        run_seed,
-        port,
-        options.env_args,
-    )
-    engine_config = EngineConfig(
-        options.width,
-        options.height,
-        options.quality_level,
-        options.time_scale,
-        options.target_frame_rate,
-    )
-    env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
-    maybe_meta_curriculum = try_create_meta_curriculum(
-        options.curriculum_config, env_manager, options.lesson
-    )
-    sampler_manager, resampling_interval = create_sampler_manager(
-        options.sampler_config, run_seed
-    )
-    trainer_factory = TrainerFactory(
-        options.trainer_config,
-        summaries_dir,
-        options.run_id,
-        model_path,
-        options.keep_checkpoints,
-        options.train_model,
-        options.load_model,
-        run_seed,
-        maybe_meta_curriculum,
-        options.multi_gpu,
-    )
-    # Create controller and begin training.
-    tc = TrainerController(
-        trainer_factory,
-        model_path,
-        summaries_dir,
-        options.run_id,
-        options.save_freq,
-        maybe_meta_curriculum,
-        options.train_model,
-        run_seed,
-        sampler_manager,
-        resampling_interval,
-    )
    # Begin training
    try:
        tc.start_learning(env_manager)
    except Exception:
        print("\n\n\tUnity Technologies\n")
    print(get_version_string())
-    trainer_logger = logging.getLogger("mlagents.trainers")
-    env_logger = logging.getLogger("mlagents_envs")
-        trainer_logger.setLevel("DEBUG")
-        env_logger.setLevel("DEBUG")
+        log_level = logging.DEBUG
-        trainer_logger.setLevel("INFO")
-        env_logger.setLevel("INFO")
-        # disable noisy warnings from tensorflow.
+        log_level = logging.INFO
+        # disable noisy warnings from tensorflow
+
+    trainer_logger = create_logger("mlagents.trainers", log_level)

    trainer_logger.debug("Configuration for this run:")
    trainer_logger.debug(json.dumps(options._asdict(), indent=4))
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
-import logging
 from enum import Enum
 from typing import Callable, Dict, List, Tuple, NamedTuple

 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.brain import CameraResolution
-
-logger = logging.getLogger("mlagents.trainers")

 ActivationFunction = Callable[[tf.Tensor], tf.Tensor]
 EncoderFunction = Callable[
        )

    @staticmethod
-    def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
+    def break_into_branches(
+        concatenated_logits: tf.Tensor, action_size: List[int]
+    ) -> List[tf.Tensor]:
+        """
+        Takes a concatenated set of logits that represent multiple discrete action branches
+        and breaks it up into one Tensor per branch.
+        :param concatenated_logits: Tensor that represents the concatenated action branches
+        :param action_size: List of ints containing the number of possible actions for each branch.
+        :return: A List of Tensors containing one tensor per branch.
+        """
+        action_idx = [0] + list(np.cumsum(action_size))
+        branched_logits = [
+            concatenated_logits[:, action_idx[i] : action_idx[i + 1]]
+            for i in range(len(action_size))
+        ]
+        return branched_logits
+
+    @staticmethod
+    def create_discrete_action_masking_layer(
+        branches_logits: List[tf.Tensor],
+        action_masks: tf.Tensor,
+        action_size: List[int],
+    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
-        :param all_logits: The concatenated unnormalized action probabilities for all branches
+        :param branches_logits: A List of the unnormalized action probabilities for each branch
        :param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
        :param action_size: A list containing the number of possible actions for each branch
        :return: The action output dimension [batch_size, num_branches], the concatenated
-        action_idx = [0] + list(np.cumsum(action_size))
-        branches_logits = [
-            all_logits[:, action_idx[i] : action_idx[i + 1]]
-            for i in range(len(action_size))
-        ]
-        branch_masks = [
-            action_masks[:, action_idx[i] : action_idx[i + 1]]
-            for i in range(len(action_size))
-        ]
+        branch_masks = ModelUtils.break_into_branches(action_masks, action_size)
        raw_probs = [
            tf.multiply(tf.nn.softmax(branches_logits[k]) + EPSILON, branch_masks[k])
            for k in range(len(action_size))
--- a/ml-agents/mlagents/trainers/policy/nn_policy.py
+++ b/ml-agents/mlagents/trainers/policy/nn_policy.py
-import logging
-
-
 from mlagents_envs.timers import timed
 from mlagents_envs.base_env import BatchedStepResult
 from mlagents.trainers.brain import BrainParameters
    GaussianDistribution,
    MultiCategoricalDistribution,
 )
-
-logger = logging.getLogger("mlagents.trainers")

 EPSILON = 1e-6  # Small value to avoid divide by zero

--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
 import logging
 from typing import Any, Dict, List, Optional
-
-
-
 from mlagents_envs.exception import UnityException
 from mlagents.trainers.policy import Policy
 from mlagents.trainers.action_info import ActionInfo

        self.use_recurrent = trainer_parameters["use_recurrent"]
        self.memory_dict: Dict[str, np.ndarray] = {}
-        self.reward_signals: Dict[str, "RewardSignal"] = {}
        self.num_branches = len(self.brain.vector_action_space_size)
        self.previous_action_dict: Dict[str, np.array] = {}
        self.normalize = trainer_parameters.get("normalize", False)
--- a/ml-agents/mlagents/trainers/ppo/optimizer.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer.py
-import logging
-
 import numpy as np
 from mlagents.tf_utils import tf
 from mlagents_envs.timers import timed
 from mlagents.trainers.buffer import AgentBuffer
-
-
-logger = logging.getLogger("mlagents.trainers")


 class PPOOptimizer(TFOptimizer):
            dtype=tf.float32,
            name="old_probabilities",
        )
+
+        # Break old log probs into separate branches
+        old_log_prob_branches = ModelUtils.break_into_branches(
+            self.all_old_log_probs, self.policy.act_size
+        )
+
-            self.all_old_log_probs, self.policy.action_masks, self.policy.act_size
+            old_log_prob_branches, self.policy.action_masks, self.policy.act_size
        )

        action_idx = [0] + list(np.cumsum(self.policy.act_size))
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.ppo.optimizer import PPOOptimizer
 from mlagents.trainers.trajectory import Trajectory
+from mlagents.trainers.exception import UnityTrainerException
+

 logger = logging.getLogger("mlagents.trainers")

        self.load = load
        self.seed = seed
        self.policy: NNPolicy = None  # type: ignore
+
+    def _check_param_keys(self):
+        super()._check_param_keys()
+        # Check that batch size is greater than sequence length. Else, throw
+        # an exception.
+        if (
+            self.trainer_parameters["sequence_length"]
+            > self.trainer_parameters["batch_size"]
+            and self.trainer_parameters["use_recurrent"]
+        ):
+            raise UnityTrainerException(
+                "batch_size must be greater than or equal to sequence_length when use_recurrent is True."
+            )

    def _process_trajectory(self, trajectory: Trajectory) -> None:
        """
        """
        if self.policy:
            logger.warning(
-                "add_policy has been called twice. {} is not a multi-agent trainer".format(
+                "Your environment contains multiple teams, but {} doesn't support adversarial games. Enable self-play to \
+                    train adversarial games.".format(
                    self.__class__.__name__
                )
            )
--- a/ml-agents/mlagents/trainers/sac/network.py
+++ b/ml-agents/mlagents/trainers/sac/network.py
-import logging
-
-
 from mlagents.trainers.models import ModelUtils, EncoderType

 LOG_STD_MAX = 2
 CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.
-
-LOGGER = logging.getLogger("mlagents.trainers")
-
 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"

--- a/ml-agents/mlagents/trainers/sac/optimizer.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer.py

 EPSILON = 1e-6  # Small value to avoid divide by zero

-LOGGER = logging.getLogger("mlagents.trainers")
+logger = logging.getLogger("mlagents.trainers")

 POLICY_SCOPE = ""
 TARGET_SCOPE = "target_network"

        for name in stream_names:
            if discrete:
-                _branched_mpq1 = self._apply_as_branches(
-                    self.policy_network.q1_pheads[name] * discrete_action_probs
+                _branched_mpq1 = ModelUtils.break_into_branches(
+                    self.policy_network.q1_pheads[name] * discrete_action_probs,
+                    self.act_size,
                )
                branched_mpq1 = tf.stack(
                    [
                )
                _q1_p_mean = tf.reduce_mean(branched_mpq1, axis=0)

-                _branched_mpq2 = self._apply_as_branches(
-                    self.policy_network.q2_pheads[name] * discrete_action_probs
+                _branched_mpq2 = ModelUtils.break_into_branches(
+                    self.policy_network.q2_pheads[name] * discrete_action_probs,
+                    self.act_size,
                )
                branched_mpq2 = tf.stack(
                    [

            if discrete:
                # We need to break up the Q functions by branch, and update them individually.
-                branched_q1_stream = self._apply_as_branches(
-                    self.policy.selected_actions * q1_streams[name]
+                branched_q1_stream = ModelUtils.break_into_branches(
+                    self.policy.selected_actions * q1_streams[name], self.act_size
-                branched_q2_stream = self._apply_as_branches(
-                    self.policy.selected_actions * q2_streams[name]
+                branched_q2_stream = ModelUtils.break_into_branches(
+                    self.policy.selected_actions * q2_streams[name], self.act_size
                )

                # Reduce each branch into scalar
        self.ent_coef = tf.exp(self.log_ent_coef)
        if discrete:
            # We also have to do a different entropy and target_entropy per branch.
-            branched_per_action_ent = self._apply_as_branches(per_action_entropy)
+            branched_per_action_ent = ModelUtils.break_into_branches(
+                per_action_entropy, self.act_size
+            )
            branched_ent_sums = tf.stack(
                [
                    tf.reduce_sum(_lp, axis=1, keep_dims=True) + _te
            # Same with policy loss, we have to do the loss per branch and average them,
            # so that larger branches don't get more weight.
            # The equivalent KL divergence from Eq 10 of Haarnoja et al. is also pi*log(pi) - Q
-            branched_q_term = self._apply_as_branches(
-                discrete_action_probs * self.policy_network.q1_p
+            branched_q_term = ModelUtils.break_into_branches(
+                discrete_action_probs * self.policy_network.q1_p, self.act_size
            )

            branched_policy_loss = tf.stack(

        self.entropy = self.policy_network.entropy

-    def _apply_as_branches(self, concat_logits: tf.Tensor) -> List[tf.Tensor]:
-        """
-        Takes in a concatenated set of logits and breaks it up into a list of non-concatenated logits, one per
-        action branch
-        """
-        action_idx = [0] + list(np.cumsum(self.act_size))
-        branches_logits = [
-            concat_logits[:, action_idx[i] : action_idx[i + 1]]
-            for i in range(len(self.act_size))
-        ]
-        return branches_logits
-
    def _create_sac_optimizer_ops(self) -> None:
        """
        Creates the Adam optimizers and update ops for SAC, including
                self.target_network.value_vars, self.policy_network.value_vars
            )
        ]
-        LOGGER.debug("value_vars")
+        logger.debug("value_vars")
-        LOGGER.debug("targvalue_vars")
+        logger.debug("targvalue_vars")
-        LOGGER.debug("critic_vars")
+        logger.debug("critic_vars")
-        LOGGER.debug("q_vars")
+        logger.debug("q_vars")
-        LOGGER.debug("policy_vars")
+        logger.debug("policy_vars")
        policy_vars = self.policy.get_trainable_variables()
        self.print_all_vars(policy_vars)


    def print_all_vars(self, variables):
        for _var in variables:
-            LOGGER.debug(_var)
+            logger.debug(_var)

    @timed
    def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
        return update_stats

    def update_reward_signals(
-        self, reward_signal_minibatches: Mapping[str, Dict], num_sequences: int
+        self, reward_signal_minibatches: Mapping[str, AgentBuffer], num_sequences: int
    ) -> Dict[str, float]:
        """
        Only update the reward signals.
        feed_dict: Dict[tf.Tensor, Any],
        update_dict: Dict[str, tf.Tensor],
        stats_needed: Dict[str, str],
-        reward_signal_minibatches: Mapping[str, Dict],
+        reward_signal_minibatches: Mapping[str, AgentBuffer],
        num_sequences: int,
    ) -> None:
        """
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
 from mlagents.trainers.trainer.rl_trainer import RLTrainer
 from mlagents.trainers.trajectory import Trajectory, SplitObservations
 from mlagents.trainers.brain import BrainParameters
+from mlagents.trainers.exception import UnityTrainerException


 logger = logging.getLogger("mlagents.trainers")
            else False
        )

+    def _check_param_keys(self):
+        super()._check_param_keys()
+        # Check that batch size is greater than sequence length. Else, throw
+        # an exception.
+        if (
+            self.trainer_parameters["sequence_length"]
+            > self.trainer_parameters["batch_size"]
+            and self.trainer_parameters["use_recurrent"]
+        ):
+            raise UnityTrainerException(
+                "batch_size must be greater than or equal to sequence_length when use_recurrent is True."
+            )
+
    def save_model(self, name_behavior_id: str) -> None:
        """
        Saves the model. Overrides the default save_model since we want to save
        self.collected_rewards["environment"][agent_id] += np.sum(
            agent_buffer_trajectory["environment_rewards"]
        )
-        for name, reward_signal in self.policy.reward_signals.items():
+        for name, reward_signal in self.optimizer.reward_signals.items():
            evaluate_result = reward_signal.evaluate_batch(
                agent_buffer_trajectory
            ).scaled_reward
            reparameterize=True,
            create_tf_graph=False,
        )
-        for _reward_signal in policy.reward_signals.keys():
-            self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
-
        # Load the replay buffer if load
        if self.load and self.checkpoint_replay_buffer:
            try:
        """
        if self.policy:
            logger.warning(
-                "add_policy has been called twice. {} is not a multi-agent trainer".format(
+                "Your environment contains multiple teams, but {} doesn't support adversarial games. Enable self-play to \
+                    train adversarial games.".format(
                    self.__class__.__name__
                )
            )
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
 import random
-from typing import Dict, List
+from typing import Dict, List, Any, Tuple
 import numpy as np

 from mlagents_envs.base_env import (
 from mlagents_envs.rpc_utils import proto_from_batched_step_result

 OBS_SIZE = 1
+VIS_OBS_SIZE = (20, 20, 3)
 STEP_SIZE = 0.1

 TIME_PENALTY = 0.001
    it reaches -1. The position is incremented by the action amount (clamped to [-step_size, step_size]).
    """

-    def __init__(self, brain_names, use_discrete, step_size=STEP_SIZE):
+    def __init__(
+        self,
+        brain_names,
+        use_discrete,
+        step_size=STEP_SIZE,
+        num_visual=0,
+        num_vector=1,
+        vis_obs_size=VIS_OBS_SIZE,
+        vec_obs_size=OBS_SIZE,
+    ):
+        self.num_visual = num_visual
+        self.num_vector = num_vector
+        self.vis_obs_size = vis_obs_size
+        self.vec_obs_size = vec_obs_size
-            [(OBS_SIZE,)], action_type, (2,) if use_discrete else 1
+            self._make_obs_spec(), action_type, (2,) if use_discrete else 1
        )
        self.names = brain_names
        self.position: Dict[str, float] = {}
        self.rewards: Dict[str, float] = {}
        self.final_rewards: Dict[str, List[float]] = {}
        self.step_result: Dict[str, BatchedStepResult] = {}
+        self.agent_id: Dict[str, int] = {}
+            self.agent_id[name] = 0
            self.goal[name] = self.random.choice([-1, 1])
            self.rewards[name] = 0
            self.final_rewards[name] = []

+    def _make_obs_spec(self) -> List[Any]:
+        obs_spec: List[Any] = []
+        for _ in range(self.num_vector):
+            obs_spec.append((self.vec_obs_size,))
+        for _ in range(self.num_visual):
+            obs_spec.append(self.vis_obs_size)
+        return obs_spec
+
+    def _make_obs(self, value: float) -> List[np.ndarray]:
+        obs = []
+        for _ in range(self.num_vector):
+            obs.append(np.ones((1, self.vec_obs_size), dtype=np.float32) * value)
+        for _ in range(self.num_visual):
+            obs.append(np.ones((1,) + self.vis_obs_size, dtype=np.float32) * value)
+        return obs
+
    def get_agent_groups(self):
        return self.names

            delta = 1 if act else -1
        else:
            delta = self.action[name][0][0]
+
        delta = clamp(delta, -self.step_size, self.step_size)
        self.position[name] += delta
        self.position[name] = clamp(self.position[name], -1, 1)
    def _make_batched_step(
        self, name: str, done: bool, reward: float
    ) -> BatchedStepResult:
-        m_vector_obs = [np.ones((1, OBS_SIZE), dtype=np.float32) * self.goal[name]]
+        m_vector_obs = self._make_obs(self.goal[name])
-        m_agent_id = np.array([0], dtype=np.int32)
+        m_agent_id = np.array([self.agent_id[name]], dtype=np.int32)
+
+        if done:
+            self._reset_agent(name)
+            new_vector_obs = self._make_obs(self.goal[name])
+            (
+                m_vector_obs,
+                m_reward,
+                m_done,
+                m_agent_id,
+                action_mask,
+            ) = self._construct_reset_step(
+                m_vector_obs,
+                new_vector_obs,
+                m_reward,
+                m_done,
+                m_agent_id,
+                action_mask,
+                name,
+            )
-            m_vector_obs, m_reward, m_done, m_done, m_agent_id, action_mask
+            m_vector_obs,
+            m_reward,
+            m_done,
+            np.zeros(m_done.shape, dtype=bool),
+            m_agent_id,
+            action_mask,
+    def _construct_reset_step(
+        self,
+        vector_obs: List[np.ndarray],
+        new_vector_obs: List[np.ndarray],
+        reward: np.ndarray,
+        done: np.ndarray,
+        agent_id: np.ndarray,
+        action_mask: List[np.ndarray],
+        name: str,
+    ) -> Tuple[List[np.ndarray], np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        new_reward = np.array([0.0], dtype=np.float32)
+        new_done = np.array([False], dtype=np.bool)
+        new_agent_id = np.array([self.agent_id[name]], dtype=np.int32)
+        new_action_mask = self._generate_mask()
+
+        m_vector_obs = [
+            np.concatenate((old, new), axis=0)
+            for old, new in zip(vector_obs, new_vector_obs)
+        ]
+        m_reward = np.concatenate((reward, new_reward), axis=0)
+        m_done = np.concatenate((done, new_done), axis=0)
+        m_agent_id = np.concatenate((agent_id, new_agent_id), axis=0)
+        if action_mask is not None:
+            action_mask = [
+                np.concatenate((old, new), axis=0)
+                for old, new in zip(action_mask, new_action_mask)
+            ]
+        return m_vector_obs, m_reward, m_done, m_agent_id, action_mask
+
-
-            done = self._take_action(name)
+            done = self._take_action(name)
-            if done:
-                self._reset_agent(name)

    def _generate_mask(self):
        if self.discrete:
        self.step_count[name] = 0
        self.final_rewards[name].append(self.rewards[name])
        self.rewards[name] = 0
+        self.agent_id[name] = self.agent_id[name] + 1

    def reset(self) -> None:  # type: ignore
        for name in self.names:
        recurrent_obs_val = (
            self.goal[name] if self.step_count[name] <= self.num_show_steps else 0
        )
-        m_vector_obs = [np.ones((1, OBS_SIZE), dtype=np.float32) * recurrent_obs_val]
+        m_vector_obs = self._make_obs(recurrent_obs_val)
-        m_agent_id = np.array([0], dtype=np.int32)
+        m_agent_id = np.array([self.agent_id[name]], dtype=np.int32)
+        if done:
+            self._reset_agent(name)
+            recurrent_obs_val = (
+                self.goal[name] if self.step_count[name] <= self.num_show_steps else 0
+            )
+            new_vector_obs = self._make_obs(recurrent_obs_val)
+            (
+                m_vector_obs,
+                m_reward,
+                m_done,
+                m_agent_id,
+                action_mask,
+            ) = self._construct_reset_step(
+                m_vector_obs,
+                new_vector_obs,
+                m_reward,
+                m_done,
+                m_agent_id,
+                action_mask,
+                name,
+            )
-            m_vector_obs, m_reward, m_done, m_done, m_agent_id, action_mask
+            m_vector_obs,
+            m_reward,
+            m_done,
+            np.zeros(m_done.shape, dtype=bool),
+            m_agent_id,
+            action_mask,
        )
--- a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
+++ b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
 import os
 import tempfile
+import pytest
+import yaml
+from mlagents.trainers.tests.test_nn_policy import create_policy_mock
+from mlagents.tf_utils import tf
+from mlagents.model_serialization import SerializationSettings, export_policy_model


 def test_barracuda_converter():

    # cleanup
    os.remove(tmpfile)
+
+
+@pytest.fixture
+def dummy_config():
+    return yaml.safe_load(
+        """
+        trainer: ppo
+        batch_size: 32
+        beta: 5.0e-3
+        buffer_size: 512
+        epsilon: 0.2
+        hidden_units: 128
+        lambd: 0.95
+        learning_rate: 3.0e-4
+        max_steps: 5.0e4
+        normalize: true
+        num_epoch: 5
+        num_layers: 2
+        time_horizon: 64
+        sequence_length: 64
+        summary_freq: 1000
+        use_recurrent: false
+        normalize: true
+        memory_size: 8
+        curiosity_strength: 0.0
+        curiosity_enc_size: 1
+        summary_path: test
+        model_path: test
+        reward_signals:
+          extrinsic:
+            strength: 1.0
+            gamma: 0.99
+        """
+    )
+
+
+@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
+@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
+@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
+def test_policy_conversion(dummy_config, tmpdir, rnn, visual, discrete):
+    tf.reset_default_graph()
+    dummy_config["summary_path"] = str(tmpdir)
+    dummy_config["model_path"] = os.path.join(tmpdir, "test")
+    policy = create_policy_mock(
+        dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
+    )
+    policy.save_model(1000)
+    settings = SerializationSettings(
+        policy.model_path, os.path.join(tmpdir, policy.brain.brain_name)
+    )
+    export_policy_model(settings, policy.graph, policy.sess)
+
+    # These checks taken from test_barracuda_converter
+    assert os.path.isfile(os.path.join(tmpdir, "test.nn"))
+    assert os.path.getsize(os.path.join(tmpdir, "test.nn")) > 100