Merge remote-tracking branch 'origin/master' into release_6-to-master

4 年前 · 5a233353
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
            . venv/bin/activate
            mkdir test-reports
            pip freeze > test-reports/pip_versions.txt
-            pytest -n 2 --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
+            pytest --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings

      - run:
          name: Verify there are no hidden/missing metafiles.
--- a/DevProject/Assets/ML-Agents/Scripts/Tests/Performance/SensorPerformanceTests.cs
+++ b/DevProject/Assets/ML-Agents/Scripts/Tests/Performance/SensorPerformanceTests.cs
 using NUnit.Framework;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Policies;
 using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Sensors.Reflection;
            {
            }

-            public override void Heuristic(float[] actionsOut)
+            public override void Heuristic(in ActionBuffers actionsOut)
            {
            }
        }
                sensor.AddObservation(new Quaternion(1, 2, 3, 4));
            }

-            public override void Heuristic(float[] actionsOut)
+            public override void Heuristic(in ActionBuffers actionsOut)
            {
            }
        }
            [Observable]
            public Quaternion QuaternionField = new Quaternion(1, 2, 3, 4);

-            public override void Heuristic(float[] actionsOut)
+            public override void Heuristic(in ActionBuffers actionsOut)
            {
            }
        }
                get { return m_QuaternionField; }
            }

-            public override void Heuristic(float[] actionsOut)
+            public override void Heuristic(in ActionBuffers actionsOut)
            {
            }
        }
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
+using System;
+using Unity.MLAgents.Actuators;
+using Random = UnityEngine.Random;

 public class Ball3DAgent : Agent
 {
        sensor.AddObservation(m_BallRb.velocity);
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
-        var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
-        var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);
+        var actionZ = 2f * Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f);
+        var actionX = 2f * Mathf.Clamp(actionBuffers.ContinuousActions[1], -1f, 1f);

        if ((gameObject.transform.rotation.z < 0.25f && actionZ > 0f) ||
            (gameObject.transform.rotation.z > -0.25f && actionZ < 0f))
        SetResetParameters();
    }

-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        actionsOut[0] = -Input.GetAxis("Horizontal");
-        actionsOut[1] = Input.GetAxis("Vertical");
+        var continuousActionsOut = actionsOut.ContinuousActions;
+        continuousActionsOut[0] = -Input.GetAxis("Horizontal");
+        continuousActionsOut[1] = Input.GetAxis("Vertical");
    }

    public void SetBall()
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
 using UnityEngine;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 public class Ball3DHardAgent : Agent
        sensor.AddObservation((ball.transform.position - gameObject.transform.position));
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
-        var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);
+        var continuousActions = actionBuffers.ContinuousActions;
+        var actionZ = 2f * Mathf.Clamp(continuousActions[0], -1f, 1f);
+        var actionX = 2f * Mathf.Clamp(continuousActions[1], -1f, 1f);

        if ((gameObject.transform.rotation.z < 0.25f && actionZ > 0f) ||
            (gameObject.transform.rotation.z > -0.25f && actionZ < 0f))
--- a/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs
+++ b/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs
 using UnityEngine;
 using UnityEngine.SceneManagement;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using UnityEngine.Serialization;

 /// <summary>
    /// Controls the movement of the GameObject based on the actions received.
    /// </summary>
    /// <param name="vectorAction"></param>
-    public void ApplyAction(float[] vectorAction)
+    public void ApplyAction(ActionSegment<int> vectorAction)
-        var movement = (int)vectorAction[0];
+        var movement = vectorAction[0];

        var direction = 0;

        if (Academy.Instance.IsCommunicatorOn)
        {
            // Apply the previous step's actions
-            ApplyAction(m_Agent.GetAction());
+            ApplyAction(m_Agent.GetStoredActionBuffers().DiscreteActions);
            m_Agent?.RequestDecision();
        }
        else
                // Apply the previous step's actions
-                ApplyAction(m_Agent.GetAction());
+                ApplyAction(m_Agent.GetStoredActionBuffers().DiscreteActions);

                m_TimeSinceDecision = 0f;
                m_Agent?.RequestDecision();
--- a/Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
 using UnityEngine;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 public class BouncerAgent : Agent
        sensor.AddObservation(target.transform.localPosition);
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        for (var i = 0; i < vectorAction.Length; i++)
+        var continuousActions = actionBuffers.ContinuousActions;
+        for (var i = 0; i < continuousActions.Length; i++)
-            vectorAction[i] = Mathf.Clamp(vectorAction[i], -1f, 1f);
+            continuousActions[i] = Mathf.Clamp(continuousActions[i], -1f, 1f);
-        var x = vectorAction[0];
-        var y = ScaleAction(vectorAction[1], 0, 1);
-        var z = vectorAction[2];
+        var x = continuousActions[0];
+        var y = ScaleAction(continuousActions[1], 0, 1);
+        var z = continuousActions[2];
-            vectorAction[0] * vectorAction[0] +
-            vectorAction[1] * vectorAction[1] +
-            vectorAction[2] * vectorAction[2]) / 3f);
+            continuousActions[0] * continuousActions[0] +
+            continuousActions[1] * continuousActions[1] +
+            continuousActions[2] * continuousActions[2]) / 3f);

        m_LookDir = new Vector3(x, y, z);
    }
        }
    }

-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        actionsOut[0] = Input.GetAxis("Horizontal");
-        actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
-        actionsOut[2] = Input.GetAxis("Vertical");
+        var continuousActionsOut = actionsOut.ContinuousActions;
+        continuousActionsOut[0] = Input.GetAxis("Horizontal");
+        continuousActionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
+        continuousActionsOut[2] = Input.GetAxis("Vertical");
    }

    void Update()
--- a/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
 using System;
 using UnityEngine;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgentsExamples;
 using Unity.MLAgents.Sensors;
 using Random = UnityEngine.Random;
        AddReward(1f);
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
+        var continuousActions = actionBuffers.ContinuousActions;
-        bpDict[leg0Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[leg1Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[leg2Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[leg3Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[leg0Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
-        bpDict[leg1Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
-        bpDict[leg2Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
-        bpDict[leg3Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
+        bpDict[leg0Upper].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[leg1Upper].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[leg2Upper].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[leg3Upper].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[leg0Lower].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[leg1Lower].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[leg2Lower].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[leg3Lower].SetJointTargetRotation(continuousActions[++i], 0, 0);
-        bpDict[leg0Upper].SetJointStrength(vectorAction[++i]);
-        bpDict[leg1Upper].SetJointStrength(vectorAction[++i]);
-        bpDict[leg2Upper].SetJointStrength(vectorAction[++i]);
-        bpDict[leg3Upper].SetJointStrength(vectorAction[++i]);
-        bpDict[leg0Lower].SetJointStrength(vectorAction[++i]);
-        bpDict[leg1Lower].SetJointStrength(vectorAction[++i]);
-        bpDict[leg2Lower].SetJointStrength(vectorAction[++i]);
-        bpDict[leg3Lower].SetJointStrength(vectorAction[++i]);
+        bpDict[leg0Upper].SetJointStrength(continuousActions[++i]);
+        bpDict[leg1Upper].SetJointStrength(continuousActions[++i]);
+        bpDict[leg2Upper].SetJointStrength(continuousActions[++i]);
+        bpDict[leg3Upper].SetJointStrength(continuousActions[++i]);
+        bpDict[leg0Lower].SetJointStrength(continuousActions[++i]);
+        bpDict[leg1Lower].SetJointStrength(continuousActions[++i]);
+        bpDict[leg2Lower].SetJointStrength(continuousActions[++i]);
+        bpDict[leg3Lower].SetJointStrength(continuousActions[++i]);
    }

    void FixedUpdate()
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
+using System;
+using Unity.MLAgents.Actuators;
+using Random = UnityEngine.Random;

 public class FoodCollectorAgent : Agent
 {
        return new Color32(r, g, b, 255);
    }

-    public void MoveAgent(float[] act)
+    public void MoveAgent(ActionSegment<int> act)
    {
        m_Shoot = false;

        gameObject.GetComponentInChildren<Renderer>().material = normalMaterial;
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        MoveAgent(vectorAction);
+        MoveAgent(actionBuffers.DiscreteActions);
-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        actionsOut[0] = 0f;
-        actionsOut[1] = 0f;
-        actionsOut[2] = 0f;
+        var discreteActionsOut = actionsOut.DiscreteActions;
+        discreteActionsOut[0] = 0;
+        discreteActionsOut[1] = 0;
+        discreteActionsOut[2] = 0;
-            actionsOut[2] = 2f;
+            discreteActionsOut[2] = 2;
-            actionsOut[0] = 1f;
+            discreteActionsOut[0] = 1;
-            actionsOut[2] = 1f;
+            discreteActionsOut[2] = 1;
-            actionsOut[0] = 2f;
+            discreteActionsOut[0] = 2;
-        actionsOut[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
+        discreteActionsOut[3] = Input.GetKey(KeyCode.Space) ? 1 : 0;
    }

    public override void OnEpisodeBegin()
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
 using UnityEngine;
 using System.Linq;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using UnityEngine.Serialization;

 public class GridAgent : Agent
        m_ResetParams = Academy.Instance.EnvironmentParameters;
    }

-    public override void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
+    public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
    {
        // Mask the necessary actions if selected by the user.
        if (maskActions)

            if (positionX == 0)
            {
-                actionMasker.SetMask(0, new []{ k_Left});
+                actionMask.WriteMask(0, new []{ k_Left});
-                actionMasker.SetMask(0, new []{k_Right});
+                actionMask.WriteMask(0, new []{k_Right});
-                actionMasker.SetMask(0, new []{k_Down});
+                actionMask.WriteMask(0, new []{k_Down});
-                actionMasker.SetMask(0, new []{k_Up});
+                actionMask.WriteMask(0, new []{k_Up});
-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        var action = Mathf.FloorToInt(vectorAction[0]);
+        var action = actionBuffers.DiscreteActions[0];

        var targetPos = transform.position;
        switch (action)
        }
    }

-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        actionsOut[0] = k_NoAction;
+        var discreteActionsOut = actionsOut.DiscreteActions;
+        discreteActionsOut[0] = k_NoAction;
-            actionsOut[0] = k_Right;
+            discreteActionsOut[0] = k_Right;
-            actionsOut[0] = k_Up;
+            discreteActionsOut[0] = k_Up;
-            actionsOut[0] = k_Left;
+            discreteActionsOut[0] = k_Left;
-            actionsOut[0] = k_Down;
+            discreteActionsOut[0] = k_Down;
        }
    }

--- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
 using System.Collections;
 using UnityEngine;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 public class HallwayAgent : Agent
        m_GroundRenderer.material = m_GroundMaterial;
    }

-    public void MoveAgent(float[] act)
+    public void MoveAgent(ActionSegment<int> act)
-        var action = Mathf.FloorToInt(act[0]);
+        var action = act[0];
        switch (action)
        {
            case 1:
        m_AgentRb.AddForce(dirToGo * m_HallwaySettings.agentRunSpeed, ForceMode.VelocityChange);
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        MoveAgent(vectorAction);
+        MoveAgent(actionBuffers.DiscreteActions);
    }

    void OnCollisionEnter(Collision col)
        }
    }

-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        actionsOut[0] = 0;
+        var discreteActionsOut = actionsOut.DiscreteActions;
+        discreteActionsOut[0] = 0;
-            actionsOut[0] = 3;
+            discreteActionsOut[0] = 3;
-            actionsOut[0] = 1;
+            discreteActionsOut[0] = 1;
-            actionsOut[0] = 4;
+            discreteActionsOut[0] = 4;
-            actionsOut[0] = 2;
+            discreteActionsOut[0] = 2;
        }
    }

--- a/Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
+++ b/Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
 using System.Collections;
 using UnityEngine;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;

 public class PushAgentBasic : Agent
 {
    /// <summary>
    /// Moves the agent according to the selected action.
    /// </summary>
-    public void MoveAgent(float[] act)
+    public void MoveAgent(ActionSegment<int> act)
-        var action = Mathf.FloorToInt(act[0]);
+        var action = act[0];

        switch (action)
        {
    /// <summary>
    /// Called every step of the engine. Here the agent takes an action.
    /// </summary>
-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        MoveAgent(vectorAction);
+        MoveAgent(actionBuffers.DiscreteActions);
-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        actionsOut[0] = 0;
+        var discreteActionsOut = actionsOut.DiscreteActions;
+        discreteActionsOut[0] = 0;
-            actionsOut[0] = 3;
+            discreteActionsOut[0] = 3;
-            actionsOut[0] = 1;
+            discreteActionsOut[0] = 1;
-            actionsOut[0] = 4;
+            discreteActionsOut[0] = 4;
-            actionsOut[0] = 2;
+            discreteActionsOut[0] = 2;
        }
    }

--- a/Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
 using UnityEngine;
 using Random = UnityEngine.Random;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 public class PyramidAgent : Agent
        }
    }

-    public void MoveAgent(float[] act)
+    public void MoveAgent(ActionSegment<int> act)
-        var action = Mathf.FloorToInt(act[0]);
+        var action = act[0];
        switch (action)
        {
            case 1:
        m_AgentRb.AddForce(dirToGo * 2f, ForceMode.VelocityChange);
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        MoveAgent(vectorAction);
+        MoveAgent(actionBuffers.DiscreteActions);
-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        actionsOut[0] = 0;
+        var discreteActionsOut = actionsOut.DiscreteActions;
+        discreteActionsOut[0] = 0;
-            actionsOut[0] = 3;
+            discreteActionsOut[0] = 3;
-            actionsOut[0] = 1;
+            discreteActionsOut[0] = 1;
-            actionsOut[0] = 4;
+            discreteActionsOut[0] = 4;
-            actionsOut[0] = 2;
+            discreteActionsOut[0] = 2;
        }
    }

--- a/Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
 using UnityEngine;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 public class ReacherAgent : Agent
    /// <summary>
    /// The agent's four actions correspond to torques on each of the two joints.
    /// </summary>
-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        var torqueX = Mathf.Clamp(vectorAction[0], -1f, 1f) * 150f;
-        var torqueZ = Mathf.Clamp(vectorAction[1], -1f, 1f) * 150f;
+        var torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f) * 150f;
+        var torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[1], -1f, 1f) * 150f;
-        torqueX = Mathf.Clamp(vectorAction[2], -1f, 1f) * 150f;
-        torqueZ = Mathf.Clamp(vectorAction[3], -1f, 1f) * 150f;
+        torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[2], -1f, 1f) * 150f;
+        torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[3], -1f, 1f) * 150f;
        m_RbB.AddTorque(new Vector3(torqueX, 0f, torqueZ));
    }

--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/DirectionIndicator.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/DirectionIndicator.cs
-using System.Collections;
-using System.Collections.Generic;
-using UnityEngine;
+using UnityEngine;
+       
+        public bool updatedByAgent; //should this be updated by the agent? If not, it will use local settings
-        
+
        void OnEnable()
        {
            m_StartingYPos = transform.position.y;
        {
-            transform.position = new Vector3(transformToFollow.position.x, m_StartingYPos + heightOffset, transformToFollow.position.z);
+            if (updatedByAgent)
+                return;
+            transform.position = new Vector3(transformToFollow.position.x, m_StartingYPos + heightOffset,
+                transformToFollow.position.z);
+        }
+
+        //Public method to allow an agent to directly update this component
+        public void MatchOrientation(Transform t)
+        {
+            transform.position = new Vector3(t.position.x, m_StartingYPos + heightOffset, t.position.z);
+            transform.rotation = t.rotation;
        }
    }
 }
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
 using System;
 using UnityEngine;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Policies;

 public class AgentSoccer : Agent
        m_ResetParams = Academy.Instance.EnvironmentParameters;
    }

-    public void MoveAgent(float[] act)
+    public void MoveAgent(ActionSegment<int> act)
    {
        var dirToGo = Vector3.zero;
        var rotateDir = Vector3.zero;
-        var forwardAxis = (int)act[0];
-        var rightAxis = (int)act[1];
-        var rotateAxis = (int)act[2];
+        var forwardAxis = act[0];
+        var rightAxis = act[1];
+        var rotateAxis = act[2];

        switch (forwardAxis)
        {
            ForceMode.VelocityChange);
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
    {

        if (position == Position.Goalie)
            // Existential penalty cumulant for Generic
            timePenalty -= m_Existential;
        }
-        MoveAgent(vectorAction);
+        MoveAgent(actionBuffers.DiscreteActions);
-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        Array.Clear(actionsOut, 0, actionsOut.Length);
+        var discreteActionsOut = actionsOut.DiscreteActions;
+        discreteActionsOut.Clear();
-            actionsOut[0] = 1f;
+            discreteActionsOut[0] = 1;
-            actionsOut[0] = 2f;
+            discreteActionsOut[0] = 2;
-            actionsOut[2] = 1f;
+            discreteActionsOut[2] = 1;
-            actionsOut[2] = 2f;
+            discreteActionsOut[2] = 2;
-            actionsOut[1] = 1f;
+            discreteActionsOut[1] = 1;
-            actionsOut[1] = 2f;
+            discreteActionsOut[1] = 2;
        }
    }
    /// <summary>
--- a/Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
 using UnityEngine;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 public class TemplateAgent : Agent
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
    {
    }

--- a/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
 using UnityEngine;
 using UnityEngine.UI;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 public class TennisAgent : Agent
        sensor.AddObservation(m_InvertMult * gameObject.transform.rotation.z);
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        var moveX = Mathf.Clamp(vectorAction[0], -1f, 1f) * m_InvertMult;
-        var moveY = Mathf.Clamp(vectorAction[1], -1f, 1f);
-        var rotate = Mathf.Clamp(vectorAction[2], -1f, 1f) * m_InvertMult;
+        var continuousActions = actionBuffers.ContinuousActions;
+        var moveX = Mathf.Clamp(continuousActions[0], -1f, 1f) * m_InvertMult;
+        var moveY = Mathf.Clamp(continuousActions[1], -1f, 1f);
+        var rotate = Mathf.Clamp(continuousActions[2], -1f, 1f) * m_InvertMult;

        if (moveY > 0.5 && transform.position.y - transform.parent.transform.position.y < -1.5f)
        {
        m_TextComponent.text = score.ToString();
    }

-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        actionsOut[0] = Input.GetAxis("Horizontal");    // Racket Movement
-        actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f;   // Racket Jumping
-        actionsOut[2] = Input.GetAxis("Vertical");   // Racket Rotation
+        var continuousActionsOut = actionsOut.ContinuousActions;
+        continuousActionsOut[0] = Input.GetAxis("Horizontal");    // Racket Movement
+        continuousActionsOut[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f;   // Racket Jumping
+        continuousActionsOut[2] = Input.GetAxis("Vertical");   // Racket Rotation
    }

    public override void OnEpisodeBegin()
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity.meta
 fileFormatVersion: 2
-guid: 79d5d2687bfbe45f5b78bd6c04992e0d
+guid: 65c87f50b8c81433d8fd7f6550773467
 DefaultImporter:
  externalObjects: {}
  userData: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStatic.unity
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStatic.unity
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
 using System;
-using MLAgentsExamples;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgentsExamples;
 using Unity.MLAgents.Sensors;
 using BodyPart = Unity.MLAgentsExamples.BodyPart;
 {
-    public float maximumWalkingSpeed = 999; //The max walk velocity magnitude an agent will be rewarded for
-    Vector3 m_WalkDir; //Direction to the target
-//    Quaternion m_WalkDirLookRot; //Will hold the rotation to our target
+    [Header("Walk Speed")]
+    [Range(0.1f, 10)]
+    [SerializeField]
+    //The walking speed to try and achieve
+    private float m_TargetWalkingSpeed = 10;
-    [Header("Target To Walk Towards")] [Space(10)]
-    public TargetController target; //Target the agent will walk towards.
+    public float MTargetWalkingSpeed // property
+    {
+        get { return m_TargetWalkingSpeed; }
+        set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); }
+    }
+
+    const float m_maxWalkingSpeed = 10; //The max walking speed
+
+    //Should the agent sample a new goal velocity each episode?
+    //If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin() 
+    //If false, the goal velocity will be walkingSpeed
+    public bool randomizeWalkSpeedEachEpisode;
+
+    //The direction an agent will walk during training.
+    private Vector3 m_WorldDirToWalk = Vector3.right;
+
+    [Header("Target To Walk Towards")] public Transform target; //Target the agent will walk towards during training.
-    [Header("Body Parts")] [Space(10)] public Transform hips;
+    [Header("Body Parts")] public Transform hips;
    public Transform chest;
    public Transform spine;
    public Transform head;
    public Transform forearmR;
    public Transform handR;

-    [Header("Orientation")] [Space(10)]
-    public OrientationCubeController orientationCube;
+    OrientationCubeController m_OrientationCube;
+    //The indicator graphic gameobject that points towards the target
+    DirectionIndicator m_DirectionIndicator;
-
-        orientationCube.UpdateOrientation(hips, target.transform);
+        m_OrientationCube = GetComponentInChildren<OrientationCubeController>();
+        m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();

        //Setup each body part
        m_JdController = GetComponent<JointDriveController>();
        }

        //Random start rotation to help generalize
-        transform.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
+        hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
+
+        UpdateOrientationObjects();
-        orientationCube.UpdateOrientation(hips, target.transform);
+        //Set our goal walking speed
+        MTargetWalkingSpeed =
+            randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : MTargetWalkingSpeed;

        SetResetParameters();
    }

        //Get velocities in the context of our orientation cube's space
        //Note: You can get these velocities in world space as well but it may not train as well.
-        sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.velocity));
-        sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
-        sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));

        if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR)
        {
    /// </summary>
    public override void CollectObservations(VectorSensor sensor)
    {
-        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, orientationCube.transform.forward));
-        sensor.AddObservation(Quaternion.FromToRotation(head.forward, orientationCube.transform.forward));
+        var cubeForward = m_OrientationCube.transform.forward;
+
+        //velocity we want to match
+        var velGoal = cubeForward * MTargetWalkingSpeed;
+        //ragdoll's avg vel
+        var avgVel = GetAvgVelocity();
+
+        //current ragdoll velocity. normalized 
+        sensor.AddObservation(Vector3.Distance(velGoal, avgVel));
+        //avg body vel relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel));
+        //vel goal relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal));
+
+        //rotation deltas
+        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
+        sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));
-        sensor.AddObservation(orientationCube.transform.InverseTransformPoint(target.transform.position));
+        //Position of target position relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));

        foreach (var bodyPart in m_JdController.bodyPartsList)
        {

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        bpDict[chest].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
-        bpDict[spine].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
+        var continuousActions = actionBuffers.ContinuousActions;
+        bpDict[chest].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+        bpDict[spine].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
-        bpDict[thighL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[thighR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[shinL].SetJointTargetRotation(vectorAction[++i], 0, 0);
-        bpDict[shinR].SetJointTargetRotation(vectorAction[++i], 0, 0);
-        bpDict[footR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
-        bpDict[footL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
+        bpDict[thighL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[thighR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[shinL].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[shinR].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[footR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
+        bpDict[footL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
-        bpDict[armL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[armR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[forearmL].SetJointTargetRotation(vectorAction[++i], 0, 0);
-        bpDict[forearmR].SetJointTargetRotation(vectorAction[++i], 0, 0);
-        bpDict[head].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
+        bpDict[armL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[armR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[forearmL].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[forearmR].SetJointTargetRotation(continuousActions[++i], 0, 0);
+        bpDict[head].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
-        bpDict[chest].SetJointStrength(vectorAction[++i]);
-        bpDict[spine].SetJointStrength(vectorAction[++i]);
-        bpDict[head].SetJointStrength(vectorAction[++i]);
-        bpDict[thighL].SetJointStrength(vectorAction[++i]);
-        bpDict[shinL].SetJointStrength(vectorAction[++i]);
-        bpDict[footL].SetJointStrength(vectorAction[++i]);
-        bpDict[thighR].SetJointStrength(vectorAction[++i]);
-        bpDict[shinR].SetJointStrength(vectorAction[++i]);
-        bpDict[footR].SetJointStrength(vectorAction[++i]);
-        bpDict[armL].SetJointStrength(vectorAction[++i]);
-        bpDict[forearmL].SetJointStrength(vectorAction[++i]);
-        bpDict[armR].SetJointStrength(vectorAction[++i]);
-        bpDict[forearmR].SetJointStrength(vectorAction[++i]);
+        bpDict[chest].SetJointStrength(continuousActions[++i]);
+        bpDict[spine].SetJointStrength(continuousActions[++i]);
+        bpDict[head].SetJointStrength(continuousActions[++i]);
+        bpDict[thighL].SetJointStrength(continuousActions[++i]);
+        bpDict[shinL].SetJointStrength(continuousActions[++i]);
+        bpDict[footL].SetJointStrength(continuousActions[++i]);
+        bpDict[thighR].SetJointStrength(continuousActions[++i]);
+        bpDict[shinR].SetJointStrength(continuousActions[++i]);
+        bpDict[footR].SetJointStrength(continuousActions[++i]);
+        bpDict[armL].SetJointStrength(continuousActions[++i]);
+        bpDict[forearmL].SetJointStrength(continuousActions[++i]);
+        bpDict[armR].SetJointStrength(continuousActions[++i]);
+        bpDict[forearmR].SetJointStrength(continuousActions[++i]);
+    }
+
+    //Update OrientationCube and DirectionIndicator
+    void UpdateOrientationObjects()
+    {
+        m_WorldDirToWalk = target.position - hips.position;
+        m_OrientationCube.UpdateOrientation(hips, target);
+        if (m_DirectionIndicator)
+        {
+            m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
+        }
-        var cubeForward = orientationCube.transform.forward;
-        orientationCube.UpdateOrientation(hips, target.transform);
+        UpdateOrientationObjects();
+
+        var cubeForward = m_OrientationCube.transform.forward;
+
-        // a. Velocity alignment with goal direction.
-        var moveTowardsTargetReward = Vector3.Dot(cubeForward,
-            Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed));
-        if (float.IsNaN(moveTowardsTargetReward))
+        // a. Match target speed
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        var matchSpeedReward = GetMatchingVelocityReward(cubeForward * MTargetWalkingSpeed, GetAvgVelocity());
+
+        //Check for NaNs
+        if (float.IsNaN(matchSpeedReward))
-                $" cubeForward: {cubeForward}\n"+
-                $" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n"+
-                $" maximumWalkingSpeed: {maximumWalkingSpeed}"
+                $" cubeForward: {cubeForward}\n" +
+                $" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n" +
+                $" maximumWalkingSpeed: {m_maxWalkingSpeed}"
-        // b. Rotation alignment with goal direction.
-        var lookAtTargetReward = Vector3.Dot(cubeForward, head.forward);
+        // b. Rotation alignment with target direction.
+        //This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
+        var lookAtTargetReward = (Vector3.Dot(cubeForward, head.forward) + 1) * .5F;
+
+        //Check for NaNs
-                $" cubeForward: {cubeForward}\n"+
+                $" cubeForward: {cubeForward}\n" +
-        // c. Encourage head height. //Should normalize to ~1
-        var headHeightOverFeetReward =
-            ((head.position.y - footL.position.y) + (head.position.y - footR.position.y) / 10);
-        if (float.IsNaN(headHeightOverFeetReward))
+        AddReward(matchSpeedReward * lookAtTargetReward);
+    }
+
+    //Returns the average velocity of all of the body parts
+    //Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
+    //...using the average helps prevent this erratic movement
+    Vector3 GetAvgVelocity()
+    {
+        Vector3 velSum = Vector3.zero;
+        Vector3 avgVel = Vector3.zero;
+
+        //ALL RBS
+        int numOfRB = 0;
+        foreach (var item in m_JdController.bodyPartsList)
-            throw new ArgumentException(
-                "NaN in headHeightOverFeetReward.\n" +
-                $" head.position: {head.position}\n"+
-                $" footL.position: {footL.position}\n"+
-                $" footR.position: {footR.position}"
-            );
+            numOfRB++;
+            velSum += item.rb.velocity;
-        AddReward(
-            + 0.02f * moveTowardsTargetReward
-            + 0.02f * lookAtTargetReward
-            + 0.005f * headHeightOverFeetReward
-        );
+        avgVel = velSum / numOfRB;
+        return avgVel;
+    }
+
+    //normalized value of the difference in avg speed vs goal walking speed.
+    public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity)
+    {
+        //distance between our actual velocity and goal velocity
+        var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, MTargetWalkingSpeed);
+
+        //return the value on a declining sigmoid shaped curve that decays from 1 to 0
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / MTargetWalkingSpeed, 2), 2);
    }

    /// <summary>
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta
 fileFormatVersion: 2
-guid: e785133c5b0ac461588106642550d1b3
+guid: 8cbae6de45ea44d0c97366e252052722
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta
 fileFormatVersion: 2
-guid: 8dfd4337ed40e4d48872a4f86919c9da
+guid: 185990f76b7804d1e83378e9d4454c6b
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
--- a/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
 using UnityEngine;
 using Unity.MLAgents;
 using Unity.Barracuda;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;
 using Unity.MLAgentsExamples;

        m_GroundRenderer.material = m_GroundMaterial;
    }

-    public void MoveAgent(float[] act)
+    public void MoveAgent(ActionSegment<int> act)
    {
        AddReward(-0.0005f);
        var smallGrounded = DoGroundCheck(true);
        var rotateDir = Vector3.zero;
-        var dirToGoForwardAction = (int)act[0];
-        var rotateDirAction = (int)act[1];
-        var dirToGoSideAction = (int)act[2];
-        var jumpAction = (int)act[3];
+        var dirToGoForwardAction = act[0];
+        var rotateDirAction = act[1];
+        var dirToGoSideAction = act[2];
+        var jumpAction = act[3];

        if (dirToGoForwardAction == 1)
            dirToGo = (largeGrounded ? 1f : 0.5f) * 1f * transform.forward;
        jumpingTime -= Time.fixedDeltaTime;
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
-        MoveAgent(vectorAction);
+        MoveAgent(actionBuffers.DiscreteActions);
        if ((!Physics.Raycast(m_AgentRb.position, Vector3.down, 20))
            || (!Physics.Raycast(m_ShortBlockRb.position, Vector3.down, 20)))
        {
        }
    }

-    public override void Heuristic(float[] actionsOut)
+    public override void Heuristic(in ActionBuffers actionsOut)
-        System.Array.Clear(actionsOut, 0, actionsOut.Length);
+        var discreteActionsOut = actionsOut.DiscreteActions;
+        discreteActionsOut.Clear();
-            actionsOut[1] = 2f;
+            discreteActionsOut[1] = 2;
-            actionsOut[0] = 1f;
+            discreteActionsOut[0] = 1;
-            actionsOut[1] = 1f;
+            discreteActionsOut[1] = 1;
-            actionsOut[0] = 2f;
+            discreteActionsOut[0] = 2;
-        actionsOut[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
+        discreteActionsOut[3] = Input.GetKey(KeyCode.Space) ? 1 : 0;
    }

    // Detect when the agent hits the goal
--- a/Project/Assets/ML-Agents/Examples/Worm/Scripts/WormAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Worm/Scripts/WormAgent.cs
 using UnityEngine;
 using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgentsExamples;
 using Unity.MLAgents.Sensors;

        AddReward(1f);
    }

-    public override void OnActionReceived(float[] vectorAction)
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+
+        var continuousActions = actionBuffers.ContinuousActions;
-        bpDict[bodySegment1].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[bodySegment2].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-        bpDict[bodySegment3].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
+        bpDict[bodySegment1].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[bodySegment2].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
+        bpDict[bodySegment3].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
-        bpDict[bodySegment1].SetJointStrength(vectorAction[++i]);
-        bpDict[bodySegment2].SetJointStrength(vectorAction[++i]);
-        bpDict[bodySegment3].SetJointStrength(vectorAction[++i]);
+        bpDict[bodySegment1].SetJointStrength(continuousActions[++i]);
+        bpDict[bodySegment2].SetJointStrength(continuousActions[++i]);
+        bpDict[bodySegment3].SetJointStrength(continuousActions[++i]);

        // Detect if worm fell off/through platform
        if (bodySegment0.position.y < ground.position.y - 2)
--- a/Project/ProjectSettings/ProjectVersion.txt
+++ b/Project/ProjectSettings/ProjectVersion.txt
-m_EditorVersion: 2018.4.17f1
+m_EditorVersion: 2018.4.24f1
--- a/com.unity.ml-agents.extensions/Editor/Unity.ML-Agents.Extensions.Editor.asmdef
+++ b/com.unity.ml-agents.extensions/Editor/Unity.ML-Agents.Extensions.Editor.asmdef
 {
    "name": "Unity.ML-Agents.Extensions.Editor",
    "references": [
-      "Unity.ML-Agents.Extensions"
+      "Unity.ML-Agents.Extensions",
+      "Unity.ML-Agents",
+      "Unity.ML-Agents.Editor"
    ],
    "includePlatforms": [
        "Editor"
--- a/com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs
+++ b/com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs
 using System.Runtime.CompilerServices;

 [assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.EditorTests")]
+[assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.Editor")]
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs
            return new Pose { rotation = t.rotation, position = t.position };
        }

+        /// <inheritdoc/>
+        protected internal override Object GetObjectAt(int index)
+        {
+            return m_Bodies[index];
+        }
+
+
+        internal IEnumerable<ArticulationBody> GetEnabledArticulationBodies()
+        {
+            if (m_Bodies == null)
+            {
+                yield break;
+            }
+
+            for (var i = 0; i < m_Bodies.Length; i++)
+            {
+                var articBody = m_Bodies[i];
+                if (articBody == null)
+                {
+                    // Ignore a virtual root.
+                    continue;
+                }
+
+                if (IsPoseEnabled(i))
+                {
+                    yield return articBody;
+                }
+            }
+        }
    }
 }
 #endif // UNITY_2020_1_OR_NEWER
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs
            var poseExtractor = new ArticulationBodyPoseExtractor(RootBody);
            var numPoseObservations = poseExtractor.GetNumPoseObservations(Settings);
            var numJointObservations = 0;
-            // Start from i=1 to ignore the root
-            for (var i = 1; i < poseExtractor.Bodies.Length; i++)
+
+            foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
-                numJointObservations += ArticulationBodyJointExtractor.NumObservations(
-                    poseExtractor.Bodies[i], Settings
-                );
+                numJointObservations += ArticulationBodyJointExtractor.NumObservations(articBody, Settings);
            }
            return new[] { numPoseObservations + numJointObservations };
        }
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
+using System.Collections.Generic;
 using UnityEngine;
 using Unity.MLAgents.Sensors;

        string m_SensorName;

        PoseExtractor m_PoseExtractor;
-        IJointExtractor[] m_JointExtractors;
+        List<IJointExtractor> m_JointExtractors;
-        ///  Construct a new PhysicsBodySensor
+        /// Construct a new PhysicsBodySensor
-        /// <param name="rootBody">The root Rigidbody. This has no Joints on it (but other Joints may connect to it).</param>
-        /// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
-        /// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
+        /// <param name="poseExtractor"></param>
-            Rigidbody rootBody,
-            GameObject rootGameObject,
-            GameObject virtualRoot,
+            RigidBodyPoseExtractor poseExtractor,
-            string sensorName=null
+            string sensorName
-            var poseExtractor = new RigidBodyPoseExtractor(rootBody, rootGameObject, virtualRoot);
-            m_SensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{rootBody?.name}" : sensorName;
+            m_SensorName = sensorName;
-            var rigidBodies = poseExtractor.Bodies;
-            if (rigidBodies != null)
-            {
-                m_JointExtractors = new IJointExtractor[rigidBodies.Length - 1]; // skip the root
-                for (var i = 1; i < rigidBodies.Length; i++)
-                {
-                    var jointExtractor = new RigidBodyJointExtractor(rigidBodies[i]);
-                    numJointExtractorObservations += jointExtractor.NumObservations(settings);
-                    m_JointExtractors[i - 1] = jointExtractor;
-                }
-            }
-            else
+            m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
+            foreach(var rb in poseExtractor.GetEnabledRigidbodies())
-                m_JointExtractors = new IJointExtractor[0];
+                var jointExtractor = new RigidBodyJointExtractor(rb);
+                numJointExtractorObservations += jointExtractor.NumObservations(settings);
+                m_JointExtractors.Add(jointExtractor);
            }

            var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);
            m_Settings = settings;

            var numJointExtractorObservations = 0;
-            var articBodies = poseExtractor.Bodies;
-            if (articBodies != null)
+            m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
+            foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
-                m_JointExtractors = new IJointExtractor[articBodies.Length - 1]; // skip the root
-                for (var i = 1; i < articBodies.Length; i++)
-                {
-                    var jointExtractor = new ArticulationBodyJointExtractor(articBodies[i]);
-                    numJointExtractorObservations += jointExtractor.NumObservations(settings);
-                    m_JointExtractors[i - 1] = jointExtractor;
-                }
-            }
-            else
-            {
-                m_JointExtractors = new IJointExtractor[0];
+                var jointExtractor = new ArticulationBodyJointExtractor(articBody);
+                numJointExtractorObservations += jointExtractor.NumObservations(settings);
+                m_JointExtractors.Add(jointExtractor);
            }

            var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs
+using System;
+using Object = UnityEngine.Object;

 namespace Unity.MLAgents.Extensions.Sensors
 {
        {
            if (m_ParentIndices == null)
            {
-                return -1;
+                throw new NullReferenceException("No parent indices set");
            }

            return m_ParentIndices[index];
        public void SetPoseEnabled(int index, bool val)
        {
            m_PoseEnabled[index] = val;
+        }
+
+        public bool IsPoseEnabled(int index)
+        {
+            return m_PoseEnabled[index];
        }

        /// <summary>
        /// <returns></returns>
        protected internal abstract Vector3 GetLinearVelocityAt(int index);

+        /// <summary>
+        /// Return the underlying object at the given index. This is only
+        /// used for display in the inspector.
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        protected internal virtual Object GetObjectAt(int index)
+        {
+            return null;
+        }
+

        /// <summary>
        /// Update the internal model space transform storage based on the underlying system.
                Debug.DrawLine(current.position+offset, current.position+offset+.1f*localRight, Color.blue);
            }
        }
+
+        /// <summary>
+        /// Simplified representation of the a node in the hierarchy for display.
+        /// </summary>
+        internal struct DisplayNode
+        {
+            /// <summary>
+            /// Underlying object in the hierarchy. Pass to EditorGUIUtility.ObjectContent() for display.
+            /// </summary>
+            public Object NodeObject;
+
+            /// <summary>
+            /// Whether the poses for the object are enabled.
+            /// </summary>
+            public bool Enabled;
+
+            /// <summary>
+            /// Depth in the hierarchy, used for adjusting the indent level.
+            /// </summary>
+            public int Depth;
+
+            /// <summary>
+            /// The index of the corresponding object in the PoseExtractor.
+            /// </summary>
+            public int OriginalIndex;
+        }
+
+        /// <summary>
+        /// Get a list of display nodes in depth-first order.
+        /// </summary>
+        /// <returns></returns>
+        internal IList<DisplayNode> GetDisplayNodes()
+        {
+            if (NumPoses == 0)
+            {
+                return Array.Empty<DisplayNode>();
+            }
+            var nodesOut = new List<DisplayNode>(NumPoses);
+
+            // List of children for each node
+            var tree = new Dictionary<int, List<int>>();
+            for (var i = 0; i < NumPoses; i++)
+            {
+                var parent = GetParentIndex(i);
+                if (i == -1)
+                {
+                    continue;
+                }
+
+                if (!tree.ContainsKey(parent))
+                {
+                    tree[parent] = new List<int>();
+                }
+                tree[parent].Add(i);
+            }
+
+            // Store (index, depth) in the stack
+            var stack = new Stack<(int, int)>();
+            stack.Push((0, 0));
+
+            while (stack.Count != 0)
+            {
+                var (current, depth) = stack.Pop();
+                var obj = GetObjectAt(current);
+
+                var node = new DisplayNode
+                {
+                    NodeObject = obj,
+                    Enabled = IsPoseEnabled(current),
+                    OriginalIndex = current,
+                    Depth = depth
+                };
+                nodesOut.Add(node);
+
+                // Add children
+                if (tree.ContainsKey(current))
+                {
+                    // Push to the stack in reverse order
+                    var children = tree[current];
+                    for (var childIdx = children.Count-1; childIdx >= 0; childIdx--)
+                    {
+                        stack.Push((children[childIdx], depth+1));
+                    }
+                }
+
+                // Safety check
+                // This shouldn't even happen, but in case we have a cycle in the graph
+                // exit instead of looping forever and eating up all the memory.
+                if (nodesOut.Count > NumPoses)
+                {
+                    return nodesOut;
+                }
+            }
+
+            return nodesOut;
+        }
+
    }

    /// <summary>
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs
        /// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
        /// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
        /// separate from the actual Rigidbodies in the hierarchy. For locomotion tasks, with ragdolls, this provides
-        /// a stabilized refernece frame, which can improve learning.</param>
-        public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null, GameObject virtualRoot = null)
+        /// a stabilized reference frame, which can improve learning.</param>
+        /// <param name="enableBodyPoses">Optional mapping of whether a body's psoe should be enabled or not.</param>
+        public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null,
+            GameObject virtualRoot = null, Dictionary<Rigidbody, bool> enableBodyPoses = null)
        {
            if (rootBody == null)
            {
            Rigidbody[] rbs;
+            Joint[] joints;
+                joints = rootBody.GetComponentsInChildren <Joint>();
+                joints = rootGameObject.GetComponentsInChildren<Joint>();
            }

            if (rbs == null || rbs.Length == 0)
            }

-                if (rbs[0] != rootBody)
+            if (rbs[0] != rootBody)
            {
                Debug.Log("Expected root body at index 0");
                return;
                }
            }

-            var joints = rootBody.GetComponentsInChildren <Joint>();
-
-
            foreach (var j in joints)
            {
                var parent = j.connectedBody;

            // By default, ignore the root
            SetPoseEnabled(0, false);
+
+            if (enableBodyPoses != null)
+            {
+                foreach (var pair in enableBodyPoses)
+                {
+                    var rb = pair.Key;
+                    if (bodyToIndex.TryGetValue(rb, out var index))
+                    {
+                        SetPoseEnabled(index, pair.Value);
+                    }
+                }
+            }
        }

        /// <inheritdoc/>
            return new Pose { rotation = body.rotation, position = body.position };
        }

+        /// <inheritdoc/>
+        protected internal override Object GetObjectAt(int index)
+        {
+            if (index == 0 && m_VirtualRoot != null)
+            {
+                return m_VirtualRoot;
+            }
+            return m_Bodies[index];
+        }
+
+
+        /// <summary>
+        /// Get a dictionary indicating which Rigidbodies' poses are enabled or disabled.
+        /// </summary>
+        /// <returns></returns>
+        internal Dictionary<Rigidbody, bool> GetBodyPosesEnabled()
+        {
+            var bodyPosesEnabled = new Dictionary<Rigidbody, bool>(m_Bodies.Length);
+            for (var i = 0; i < m_Bodies.Length; i++)
+            {
+                var rb = m_Bodies[i];
+                if (rb == null)
+                {
+                    continue; // skip virtual root
+                }
+
+                bodyPosesEnabled[rb] = IsPoseEnabled(i);
+            }
+
+            return bodyPosesEnabled;
+        }
+
+        internal IEnumerable<Rigidbody> GetEnabledRigidbodies()
+        {
+            if (m_Bodies == null)
+            {
+                yield break;
+            }
+
+            for (var i = 0; i < m_Bodies.Length; i++)
+            {
+                var rb = m_Bodies[i];
+                if (rb == null)
+                {
+                    // Ignore a virtual root.
+                    continue;
+                }
+
+                if (IsPoseEnabled(i))
+                {
+                    yield return rb;
+                }
+            }
+        }
    }

 }
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs
+using System.Collections.Generic;
 using UnityEngine;
 using Unity.MLAgents.Sensors;

        /// <summary>
        /// Optional sensor name. This must be unique for each Agent.
        /// </summary>
+        [SerializeField]
+        [SerializeField]
+        [HideInInspector]
+        RigidBodyPoseExtractor m_PoseExtractor;
+
        /// <summary>
        /// Creates a PhysicsBodySensor.
        /// </summary>
-            return new PhysicsBodySensor(RootBody, gameObject, VirtualRoot, Settings, sensorName);
+            var _sensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{RootBody?.name}" : sensorName;
+            return new PhysicsBodySensor(GetPoseExtractor(), Settings, _sensorName);
        }

        /// <inheritdoc/>
                return new[] { 0 };
            }

-            // TODO static method in PhysicsBodySensor?
-            // TODO only update PoseExtractor when body changes?
-            var poseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot);
+            var poseExtractor = GetPoseExtractor();
-            // Start from i=1 to ignore the root
-            for (var i = 1; i < poseExtractor.Bodies.Length; i++)
+            foreach(var rb in poseExtractor.GetEnabledRigidbodies())
-                var body = poseExtractor.Bodies[i];
-                var joint = body?.GetComponent<Joint>();
-                numJointObservations += RigidBodyJointExtractor.NumObservations(body, joint, Settings);
+                var joint = rb.GetComponent<Joint>();
+                numJointObservations += RigidBodyJointExtractor.NumObservations(rb, joint, Settings);
+        }
+
+        /// <summary>
+        /// Get the DisplayNodes of the hierarchy.
+        /// </summary>
+        /// <returns></returns>
+        internal IList<PoseExtractor.DisplayNode> GetDisplayNodes()
+        {
+            return GetPoseExtractor().GetDisplayNodes();
+        }
+
+        /// <summary>
+        /// Lazy construction of the PoseExtractor.
+        /// </summary>
+        /// <returns></returns>
+        RigidBodyPoseExtractor GetPoseExtractor()
+        {
+            if (m_PoseExtractor == null)
+            {
+                ResetPoseExtractor();
+            }
+
+            return m_PoseExtractor;
+        }
+
+        /// <summary>
+        /// Reset the pose extractor, trying to keep the enabled state of the corresponding poses the same.
+        /// </summary>
+        internal void ResetPoseExtractor()
+        {
+            // Get the current enabled state of each body, so that we can reinitialize with them.
+            Dictionary<Rigidbody, bool> bodyPosesEnabled = null;
+            if (m_PoseExtractor != null)
+            {
+                bodyPosesEnabled = m_PoseExtractor.GetBodyPosesEnabled();
+            }
+            m_PoseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot, bodyPosesEnabled);
+        }
+
+        /// <summary>
+        /// Toggle the pose at the given index.
+        /// </summary>
+        /// <param name="index"></param>
+        /// <param name="enabled"></param>
+        internal void SetPoseEnabled(int index, bool enabled)
+        {
+            GetPoseExtractor().SetPoseEnabled(index, enabled);
        }
    }

--- a/com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs
+++ b/com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs
+using System;
 using UnityEngine;
 using NUnit.Framework;
 using Unity.MLAgents.Extensions.Sensors;
    public class PoseExtractorTests
    {
-        class UselessPoseExtractor : PoseExtractor
+
+        class BasicPoseExtractor : PoseExtractor
        {
            protected internal override Pose GetPoseAt(int index)
            {
-            protected internal override Vector3 GetLinearVelocityAt(int index)
+            protected  internal override Vector3 GetLinearVelocityAt(int index)
+        }
+        class UselessPoseExtractor : BasicPoseExtractor
+        {
            public void Init(int[] parentIndices)
            {
                Setup(parentIndices);
            poseExtractor.UpdateModelSpacePoses();

            Assert.AreEqual(0, poseExtractor.NumPoses);
+
+            // Iterating through poses and velocities should be an empty loop
+            foreach (var pose in poseExtractor.GetEnabledModelSpacePoses())
+            {
+                throw new UnityAgentsException("This shouldn't happen");
+            }
+
+            foreach (var pose in poseExtractor.GetEnabledLocalSpacePoses())
+            {
+                throw new UnityAgentsException("This shouldn't happen");
+            }
+
+            foreach (var vel in poseExtractor.GetEnabledModelSpaceVelocities())
+            {
+                throw new UnityAgentsException("This shouldn't happen");
+            }
+
+            foreach (var vel in poseExtractor.GetEnabledLocalSpaceVelocities())
+            {
+                throw new UnityAgentsException("This shouldn't happen");
+            }
+
+            // Getting a parent index should throw an index exception
+            Assert.Throws <NullReferenceException>(
+                () => poseExtractor.GetParentIndex(0)
+            );
+
+            // DisplayNodes should be empty
+            var displayNodes = poseExtractor.GetDisplayNodes();
+            Assert.AreEqual(0, displayNodes.Count);
        }

        [Test]
            Assert.AreEqual(size, localPoseIndex);
        }

-        class BadPoseExtractor : PoseExtractor
+        [Test]
+        public void TestChainDisplayNodes()
+        {
+            var size = 4;
+            var chain = new ChainPoseExtractor(size);
+
+            var displayNodes = chain.GetDisplayNodes();
+            Assert.AreEqual(size, displayNodes.Count);
+
+            for (var i = 0; i < size; i++)
+            {
+                var displayNode = displayNodes[i];
+                Assert.AreEqual(i, displayNode.OriginalIndex);
+                Assert.AreEqual(null, displayNode.NodeObject);
+                Assert.AreEqual(i, displayNode.Depth);
+                Assert.AreEqual(true, displayNode.Enabled);
+            }
+        }
+
+        [Test]
+        public void TestDisplayNodesLoop()
+        {
+            // Degenerate case with a loop
+            var poseExtractor = new UselessPoseExtractor();
+            poseExtractor.Init(new[] {-1, 2, 1});
+
+            // This just shouldn't blow up
+            poseExtractor.GetDisplayNodes();
+
+            // Self-loop
+            poseExtractor.Init(new[] {-1, 1});
+
+            // This just shouldn't blow up
+            poseExtractor.GetDisplayNodes();
+        }
+
+        class BadPoseExtractor : BasicPoseExtractor
        {
            public BadPoseExtractor()
            {
                }
                Setup(parents);
            }
-
-            protected internal override Pose GetPoseAt(int index)
-            {
-                return Pose.identity;
-            }
-
-            protected  internal override Vector3 GetLinearVelocityAt(int index)
-            {
-                return Vector3.zero;
-            }
        }

        [Test]
                var bad = new BadPoseExtractor();
            });
        }
+
    }

    public class PoseExtensionTests
--- a/com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs
+++ b/com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs
            var rootRb = go.AddComponent<Rigidbody>();
            var poseExtractor = new RigidBodyPoseExtractor(rootRb);
            Assert.AreEqual(1, poseExtractor.NumPoses);
+
+            // Also pass the GameObject
+            poseExtractor = new RigidBodyPoseExtractor(rootRb, go);
+            Assert.AreEqual(1, poseExtractor.NumPoses);
+        }
+
+        [Test]
+        public void TestNoBodiesFound()
+        {
+            // Check that if we can't find any bodies under the game object, we get an empty extractor
+            var gameObj = new GameObject();
+            var rootRb = gameObj.AddComponent<Rigidbody>();
+            var otherGameObj = new GameObject();
+            var poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
+            Assert.AreEqual(0, poseExtractor.NumPoses);
+
+            // Add an RB under the other GameObject. Constructor will find a rigid body, but not the root.
+            var otherRb = otherGameObj.AddComponent<Rigidbody>();
+            poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
+            Assert.AreEqual(0, poseExtractor.NumPoses);
        }

        [Test]
            Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(0).position);
            Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(0).rotation);
            Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(0));
+
+            // Check DisplayNodes gives expected results
+            var displayNodes = poseExtractor.GetDisplayNodes();
+            Assert.AreEqual(2, displayNodes.Count);
+            Assert.AreEqual(rb1, displayNodes[0].NodeObject);
+            Assert.AreEqual(false, displayNodes[0].Enabled);
+
+            Assert.AreEqual(rb2, displayNodes[1].NodeObject);
+            Assert.AreEqual(true, displayNodes[1].Enabled);
        }

        [Test]
            Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(1).position);
            Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(1).rotation);
            Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(1));
+        }
+
+        [Test]
+        public void TestBodyPosesEnabledDictionary()
+        {
+            // * rootObj
+            //   - rb1
+            //   * go2
+            //     - rb2
+            //     - joint
+            var rootObj = new GameObject();
+            var rb1 = rootObj.AddComponent<Rigidbody>();
+
+            var go2 = new GameObject();
+            var rb2 = go2.AddComponent<Rigidbody>();
+            go2.transform.SetParent(rootObj.transform);
+
+            var joint = go2.AddComponent<ConfigurableJoint>();
+            joint.connectedBody = rb1;
+
+            var poseExtractor = new RigidBodyPoseExtractor(rb1);
+
+            // Expect the root body disabled and the attached one enabled.
+            Assert.IsFalse(poseExtractor.IsPoseEnabled(0));
+            Assert.IsTrue(poseExtractor.IsPoseEnabled(1));
+            var bodyPosesEnabled = poseExtractor.GetBodyPosesEnabled();
+            Assert.IsFalse(bodyPosesEnabled[rb1]);
+            Assert.IsTrue(bodyPosesEnabled[rb2]);
+
+            // Swap the values
+            bodyPosesEnabled[rb1] = true;
+            bodyPosesEnabled[rb2] = false;
+
+            var poseExtractor2 = new RigidBodyPoseExtractor(rb1, null, null, bodyPosesEnabled);
+            Assert.IsTrue(poseExtractor2.IsPoseEnabled(0));
+            Assert.IsFalse(poseExtractor2.IsPoseEnabled(1));
+
+
        }
    }
 }
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 and this project adheres to
 [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

+## [Unreleased]
+
+### Major Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+### Minor Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+### Bug Fixes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+## [1.3.0-preview] - 2020-08-12
+
+### Major Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+### Minor Changes
+#### com.unity.ml-agents (C#)
+- Update Barracuda to 1.0.2.
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+### Bug Fixes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
 ## [1.3.0-preview] - 2020-08-12

 ### Major Changes
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
 using System;
 using System.Collections.Generic;
 using System.Collections.ObjectModel;
+using System.Linq;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Sensors.Reflection;
 using Unity.MLAgents.Demonstrations;
        /// to separate between different agents in the environment.
        /// </summary>
        public int episodeId;
-    }
-    /// <summary>
-    /// Struct that contains the action information sent from the Brain to the
-    /// Agent.
-    /// </summary>
-    internal struct AgentAction
-    {
-        public float[] vectorActions;
+        public void ClearActions()
+        {
+            Array.Clear(storedVectorActions, 0, storedVectorActions.Length);
+        }
+
+        public void CopyActions(ActionBuffers actionBuffers)
+        {
+            actionBuffers.PackActions(storedVectorActions);
+        }
    }

    /// <summary>
    /// can only take an action when it touches the ground, so several frames might elapse between
    /// one decision and the need for the next.
    ///
-    /// Use the <see cref="OnActionReceived"/> function to implement the actions your agent can take,
+    /// Use the <see cref="OnActionReceived(float[])"/> function to implement the actions your agent can take,
    /// such as moving to reach a goal or interacting with its environment.
    ///
    /// When you call <see cref="EndEpisode"/> on an agent or the agent reaches its <see cref="MaxStep"/> count,
        "docs/Learning-Environment-Design-Agents.md")]
    [Serializable]
    [RequireComponent(typeof(BehaviorParameters))]
-    public class Agent : MonoBehaviour, ISerializationCallbackReceiver
+    public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver
    {
        IPolicy m_Brain;
        BehaviorParameters m_PolicyFactory;

        /// Current Agent information (message sent to Brain).
        AgentInfo m_Info;
-
-        /// Current Agent action (message sent from Brain).
-        AgentAction m_Action;

        /// Represents the reward the agent accumulated during the current step.
        /// It is reset to 0 at the beginning of every step.
        internal VectorSensor collectObservationsSensor;

        /// <summary>
+        /// List of IActuators that this Agent will delegate actions to if any exist.
+        /// </summary>
+        ActuatorManager m_ActuatorManager;
+
+        /// <summary>
+        /// VectorActuator which is used by default if no other sensors exist on this Agent. This VectorSensor will
+        /// delegate its actions to <see cref="OnActionReceived(float[])"/> by default in order to keep backward compatibility
+        /// with the current behavior of Agent.
+        /// </summary>
+        IActuator m_VectorActuator;
+
+        /// <summary>
+        /// This is used to avoid allocation of a float array every frame if users are still using the old
+        /// OnActionReceived method.
+        /// </summary>
+        float[] m_LegacyActionCache;
+
+        /// <summary>
        /// Called when the attached [GameObject] becomes enabled and active.
        /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
        /// </summary>
            m_PolicyFactory = GetComponent<BehaviorParameters>();

            m_Info = new AgentInfo();
-            m_Action = new AgentAction();
            sensors = new List<ISensor>();

            Academy.Instance.AgentIncrementStep += AgentIncrementStep;
                InitializeSensors();
            }

+            using (TimerStack.Instance.Scoped("InitializeActuators"))
+            {
+                InitializeActuators();
+            }
+
+            m_Info.storedVectorActions = new float[m_ActuatorManager.TotalNumberOfActions];
+
            // The first time the Academy resets, all Agents in the scene will be
            // forced to reset through the <see cref="AgentForceReset"/> event.
            // To avoid the Agent resetting twice, the Agents will not begin their
        /// set the reward assigned to the current step with a specific value rather than
        /// increasing or decreasing it.
        ///
-        /// Typically, you assign rewards in the Agent subclass's <see cref="OnActionReceived(float[])"/>
+        /// Typically, you assign rewards in the Agent subclass's <see cref="IActionReceiver.OnActionReceived"/>
        /// implementation after carrying out the received action and evaluating its success.
        ///
        /// Rewards are used during reinforcement learning; they are ignored during inference.
        /// <remarks>
        /// Call `RequestAction()` to repeat the previous action returned by the agent's
        /// most recent decision. A new decision is not requested. When you call this function,
-        /// the Agent instance invokes <seealso cref="OnActionReceived(float[])"/> with the
+        /// the Agent instance invokes <seealso cref="IActionReceiver.OnActionReceived"/> with the
        /// existing action vector.
        ///
        /// You can use `RequestAction()` in situations where an agent must take an action
        /// at the end of an episode.
        void ResetData()
        {
-            var param = m_PolicyFactory.BrainParameters;
-            m_ActionMasker = new DiscreteActionMasker(param);
-            // If we haven't initialized vectorActions, initialize to 0. This should only
-            // happen during the creation of the Agent. In subsequent episodes, vectorAction
-            // should stay the previous action before the Done(), so that it is properly recorded.
-            if (m_Action.vectorActions == null)
-            {
-                m_Action.vectorActions = new float[param.NumActions];
-                m_Info.storedVectorActions = new float[param.NumActions];
-            }
+            m_ActuatorManager?.ResetData();
        }

        /// <summary>
        /// control of an agent using keyboard, mouse, or game controller input.
        ///
        /// Your heuristic implementation can use any decision making logic you specify. Assign decision
-        /// values to the float[] array, <paramref name="actionsOut"/>, passed to your function as a parameter.
+        /// values to the <see cref="ActionBuffers.ContinuousActions"/>  and <see cref="ActionBuffers.DiscreteActions"/>
+        /// arrays , passed to your function as a parameter.
-        /// <seealso cref="OnActionReceived(float[])"/> function, which receives this array and
+        /// <seealso cref="IActionReceiver.OnActionReceived"/> function, which receives this array and
        /// implements the corresponding agent behavior. See [Actions] for more information
        /// about agent actions.
        /// Note : Do not create a new float array of action in the `Heuristic()` method,
        /// You can also use the [Input System package], which provides a more flexible and
        /// configurable input system.
        /// <code>
-        ///     public override void Heuristic(float[] actionsOut)
+        ///     public override void Heuristic(ActionBuffers actionsOut)
-        ///         actionsOut[0] = Input.GetAxis("Horizontal");
-        ///         actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
-        ///         actionsOut[2] = Input.GetAxis("Vertical");
+        ///         actionsOut.ContinuousActions[0] = Input.GetAxis("Horizontal");
+        ///         actionsOut.ContinuousActions[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
+        ///         actionsOut.ContinuousActions[2] = Input.GetAxis("Vertical");
-        /// <param name="actionsOut">Array for the output actions.</param>
-        /// <seealso cref="OnActionReceived(float[])"/>
-        public virtual void Heuristic(float[] actionsOut)
+        /// <param name="actionsOut">The <see cref="ActionBuffers"/> which contain the continuous and
+        /// discrete action buffers to write to.</param>
+        /// <seealso cref="IActionReceiver.OnActionReceived"/>
+        public virtual void Heuristic(in ActionBuffers actionsOut)
-            Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions.");
-            Array.Clear(actionsOut, 0, actionsOut.Length);
+            // For backward compatibility
+            switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
+            {
+                case SpaceType.Continuous:
+                    Heuristic(actionsOut.ContinuousActions.Array);
+                    actionsOut.DiscreteActions.Clear();
+                    break;
+                case SpaceType.Discrete:
+                    var convertedOut = Array.ConvertAll(actionsOut.DiscreteActions.Array, x => (float)x);
+                    Heuristic(convertedOut);
+                    var discreteActionSegment = actionsOut.DiscreteActions;
+                    for (var i = 0; i < actionsOut.DiscreteActions.Length; i++)
+                    {
+                        discreteActionSegment[i] = (int)convertedOut[i];
+                    }
+                    actionsOut.ContinuousActions.Clear();
+                    break;
+            }
        }

        /// <summary>

 #if DEBUG
            // Make sure the names are actually unique
+
            for (var i = 0; i < sensors.Count - 1; i++)
            {
                Debug.Assert(
 #endif
        }

+        void InitializeActuators()
+        {
+            ActuatorComponent[] attachedActuators;
+            if (m_PolicyFactory.UseChildActuators)
+            {
+                attachedActuators = GetComponentsInChildren<ActuatorComponent>();
+            }
+            else
+            {
+                attachedActuators = GetComponents<ActuatorComponent>();
+            }
+
+            // Support legacy OnActionReceived
+            var param = m_PolicyFactory.BrainParameters;
+            m_VectorActuator = new VectorActuator(this, param.VectorActionSize, param.VectorActionSpaceType);
+            m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1);
+            m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions];
+
+            m_ActuatorManager.Add(m_VectorActuator);
+
+            foreach (var actuatorComponent in attachedActuators)
+            {
+                m_ActuatorManager.Add(actuatorComponent.CreateActuator());
+            }
+        }
+
        /// <summary>
        /// Sends the Agent info to the linked Brain.
        /// </summary>

            if (m_Info.done)
            {
-                Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);
+                m_Info.ClearActions();
-                Array.Copy(m_Action.vectorActions, m_Info.storedVectorActions, m_Action.vectorActions.Length);
+                m_ActuatorManager.StoredActions.PackActions(m_Info.storedVectorActions);
-            m_ActionMasker.ResetMask();
+
            UpdateSensors();
            using (TimerStack.Instance.Scoped("CollectObservations"))
            {
            {
-                if (m_PolicyFactory.BrainParameters.VectorActionSpaceType == SpaceType.Discrete)
-                {
-                    CollectDiscreteActionMasks(m_ActionMasker);
-                }
+                m_ActuatorManager.WriteActionMask();
-            m_Info.discreteActionMasks = m_ActionMasker.GetMask();
+            m_Info.discreteActionMasks = m_ActuatorManager.DiscreteActionMask?.GetMask();
            m_Info.reward = m_Reward;
            m_Info.done = false;
            m_Info.maxStepReached = false;
        /// <summary>
        /// Returns a read-only view of the observations that were generated in
        /// <see cref="CollectObservations(VectorSensor)"/>. This is mainly useful inside of a
-        /// <see cref="Heuristic(float[])"/> method to avoid recomputing the observations.
+        /// <see cref="Heuristic(float[], int[])"/> method to avoid recomputing the observations.
        /// </summary>
        /// <returns>A read-only view of the observations list.</returns>
        public ReadOnlyCollection<float> GetObservations()
        ///
        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// </remarks>
-        /// <seealso cref="OnActionReceived(float[])"/>
-        public virtual void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
+        /// <seealso cref="IActionReceiver.OnActionReceived"/>
+        public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
+            if (m_ActionMasker == null)
+            {
+                m_ActionMasker = new DiscreteActionMasker(actionMask);
+            }
+            CollectDiscreteActionMasks(m_ActionMasker);
+
+        ActionSpec IActionReceiver.ActionSpec { get; }

        /// <summary>
        /// Implement `OnActionReceived()` to specify agent behavior at every step, based
        /// three values in the action array to use as the force components. During
        /// training, the agent's  policy learns to set those particular elements of
        /// the array to maximize the training rewards the agent receives. (Of course,
-        /// if you implement a <seealso cref="Heuristic"/> function, it must use the same
+        /// if you implement a <seealso cref="Heuristic(float[], int[])"/> function, it must use the same
        /// elements of the action array for the same purpose since there is no learning
        /// involved.)
        ///
        ///
        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// </remarks>
-        /// <param name="vectorAction">
-        /// An array containing the action vector. The length of the array is specified
-        /// by the <see cref="BrainParameters"/> of the agent's associated
-        /// <see cref="BehaviorParameters"/> component.
+        /// <param name="actions">
+        /// Struct containing the buffers of actions to be executed at this step.
-        public virtual void OnActionReceived(float[] vectorAction) {}
+        public virtual void OnActionReceived(ActionBuffers actions)
+        {
+            actions.PackActions(m_LegacyActionCache);
+            OnActionReceived(m_LegacyActionCache);
+        }

        /// <summary>
        /// Implement `OnEpisodeBegin()` to set up an Agent instance at the beginning
        public virtual void OnEpisodeBegin() {}

        /// <summary>
-        /// Returns the last action that was decided on by the Agent.
+        /// Gets the last ActionBuffer for this agent.
-        /// <returns>
-        /// The last action that was decided by the Agent (or null if no decision has been made).
-        /// </returns>
-        /// <seealso cref="OnActionReceived(float[])"/>
-        public float[] GetAction()
+        public ActionBuffers GetStoredActionBuffers()
-            return m_Action.vectorActions;
+            return m_ActuatorManager.StoredActions;
        }

        /// <summary>
            if ((m_RequestAction) && (m_Brain != null))
            {
                m_RequestAction = false;
-                OnActionReceived(m_Action.vectorActions);
+                m_ActuatorManager.ExecuteActions();
            }

            if ((m_StepCount >= MaxStep) && (MaxStep > 0))

        void DecideAction()
        {
-            if (m_Action.vectorActions == null)
+            if (m_ActuatorManager.StoredActions.ContinuousActions.Array == null)
-            var action = m_Brain?.DecideAction();
-
-            if (action == null)
-            {
-                Array.Clear(m_Action.vectorActions, 0, m_Action.vectorActions.Length);
-            }
-            else
-            {
-                Array.Copy(action, m_Action.vectorActions, action.Length);
-            }
+            var actions = m_Brain?.DecideAction() ?? new ActionBuffers();
+            m_Info.CopyActions(actions);
+            m_ActuatorManager.UpdateActions(actions);
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
        }

        #region AgentAction
-        public static AgentAction ToAgentAction(this AgentActionProto aap)
-        {
-            return new AgentAction
-            {
-                vectorActions = aap.VectorActions.ToArray()
-            };
-        }
-
-        public static List<AgentAction> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
+        public static List<float[]> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
-            var agentActions = new List<AgentAction>(proto.Value.Count);
+            var agentActions = new List<float[]>(proto.Value.Count);
-                agentActions.Add(ap.ToAgentAction());
+                agentActions.Add(ap.VectorActions.ToArray());
            }
            return agentActions;
        }
--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
                    var agentId = m_OrderedAgentsRequestingDecisions[brainName][i];
                    if (m_LastActionsReceived[brainName].ContainsKey(agentId))
                    {
-                        m_LastActionsReceived[brainName][agentId] = agentAction.vectorActions;
+                        m_LastActionsReceived[brainName][agentId] = agentAction;
                    }
                }
            }
--- a/com.unity.ml-agents/Runtime/DecisionRequester.cs
+++ b/com.unity.ml-agents/Runtime/DecisionRequester.cs
        /// that the Agent will request a decision every 5 Academy steps. /// </summary>
        [Range(1, 20)]
        [Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
-                 "of 5 means that the Agent will request a decision every 5 Academy steps.")]
+            "of 5 means that the Agent will request a decision every 5 Academy steps.")]
        public int DecisionPeriod = 5;

        /// <summary>
        [Tooltip("Indicates whether or not the agent will take an action during the Academy " +
-                 "steps where it does not request a decision. Has no effect when DecisionPeriod " +
-                 "is set to 1.")]
+            "steps where it does not request a decision. Has no effect when DecisionPeriod " +
+            "is set to 1.")]
        [FormerlySerializedAs("RepeatAction")]
        public bool TakeActionsBetweenDecisions = true;

--- a/com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
+++ b/com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
 using System;
 using System.Collections.Generic;
-using System.Linq;
-using Unity.MLAgents.Policies;
+using Unity.MLAgents.Actuators;

 namespace Unity.MLAgents
 {
    /// may be illegal. For example, if an agent is adjacent to a wall or other obstacle
    /// you could mask any actions that direct the agent to move into the blocked space.
    /// </remarks>
-    public class DiscreteActionMasker
+    public class DiscreteActionMasker : IDiscreteActionMask
-        /// When using discrete control, is the starting indices of the actions
-        /// when all the branches are concatenated with each other.
-        int[] m_StartingActionIndices;
-
-        bool[] m_CurrentMask;
-
-        readonly BrainParameters m_BrainParameters;
+        IDiscreteActionMask m_Delegate;
-        internal DiscreteActionMasker(BrainParameters brainParameters)
+        internal DiscreteActionMasker(IDiscreteActionMask actionMask)
-            m_BrainParameters = brainParameters;
+            m_Delegate = actionMask;
        }

        /// <summary>
        /// <param name="actionIndices">The indices of the masked actions.</param>
        public void SetMask(int branch, IEnumerable<int> actionIndices)
        {
-            // If the branch does not exist, raise an error
-            if (branch >= m_BrainParameters.VectorActionSize.Length)
-                throw new UnityAgentsException(
-                    "Invalid Action Masking : Branch " + branch + " does not exist.");
-
-            var totalNumberActions = m_BrainParameters.VectorActionSize.Sum();
-
-            // By default, the masks are null. If we want to specify a new mask, we initialize
-            // the actionMasks with trues.
-            if (m_CurrentMask == null)
-            {
-                m_CurrentMask = new bool[totalNumberActions];
-            }
-
-            // If this is the first time the masked actions are used, we generate the starting
-            // indices for each branch.
-            if (m_StartingActionIndices == null)
-            {
-                m_StartingActionIndices = Utilities.CumSum(m_BrainParameters.VectorActionSize);
-            }
-
-            // Perform the masking
-            foreach (var actionIndex in actionIndices)
-            {
-                if (actionIndex >= m_BrainParameters.VectorActionSize[branch])
-                {
-                    throw new UnityAgentsException(
-                        "Invalid Action Masking: Action Mask is too large for specified branch.");
-                }
-                m_CurrentMask[actionIndex + m_StartingActionIndices[branch]] = true;
-            }
-        }
-
-        /// <summary>
-        /// Get the current mask for an agent.
-        /// </summary>
-        /// <returns>A mask for the agent. A boolean array of length equal to the total number of
-        /// actions.</returns>
-        internal bool[] GetMask()
-        {
-            if (m_CurrentMask != null)
-            {
-                AssertMask();
-            }
-            return m_CurrentMask;
+            m_Delegate.WriteMask(branch, actionIndices);
-        /// <summary>
-        /// Makes sure that the current mask is usable.
-        /// </summary>
-        void AssertMask()
+        public void WriteMask(int branch, IEnumerable<int> actionIndices)
-            // Action Masks can only be used in Discrete Control.
-            if (m_BrainParameters.VectorActionSpaceType != SpaceType.Discrete)
-            {
-                throw new UnityAgentsException(
-                    "Invalid Action Masking : Can only set action mask for Discrete Control.");
-            }
-
-            var numBranches = m_BrainParameters.VectorActionSize.Length;
-            for (var branchIndex = 0; branchIndex < numBranches; branchIndex++)
-            {
-                if (AreAllActionsMasked(branchIndex))
-                {
-                    throw new UnityAgentsException(
-                        "Invalid Action Masking : All the actions of branch " + branchIndex +
-                        " are masked.");
-                }
-            }
+            m_Delegate.WriteMask(branch, actionIndices);
-        /// <summary>
-        /// Resets the current mask for an agent.
-        /// </summary>
-        internal void ResetMask()
+        public bool[] GetMask()
-            if (m_CurrentMask != null)
-            {
-                Array.Clear(m_CurrentMask, 0, m_CurrentMask.Length);
-            }
+            return m_Delegate.GetMask();
-        /// <summary>
-        /// Checks if all the actions in the input branch are masked.
-        /// </summary>
-        /// <param name="branch"> The index of the branch to check.</param>
-        /// <returns> True if all the actions of the branch are masked.</returns>
-        bool AreAllActionsMasked(int branch)
+        public void ResetMask()
-            if (m_CurrentMask == null)
-            {
-                return false;
-            }
-            var start = m_StartingActionIndices[branch];
-            var end = m_StartingActionIndices[branch + 1];
-            for (var i = start; i < end; i++)
-            {
-                if (!m_CurrentMask[i])
-                {
-                    return false;
-                }
-            }
-            return true;
+            m_Delegate.ResetMask();
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
+using System;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Inference;
 using Unity.MLAgents.Sensors;

    internal class BarracudaPolicy : IPolicy
    {
        protected ModelRunner m_ModelRunner;
+        ActionBuffers m_LastActionBuffer;

        int m_AgentId;

        List<int[]> m_SensorShapes;
+        SpaceType m_SpaceType;

        /// <inheritdoc />
        public BarracudaPolicy(
        {
            var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, brainParameters, inferenceDevice);
            m_ModelRunner = modelRunner;
+            m_SpaceType = brainParameters.VectorActionSpaceType;
        }

        /// <inheritdoc />
        }

        /// <inheritdoc />
-        public float[] DecideAction()
+        public ref readonly ActionBuffers DecideAction()
-            return m_ModelRunner?.GetAction(m_AgentId);
+            var actions = m_ModelRunner?.GetAction(m_AgentId);
+            if (m_SpaceType == SpaceType.Continuous)
+            {
+                m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
+                return ref m_LastActionBuffer;
+            }
+
+            m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
+            return ref m_LastActionBuffer;
        }

        public void Dispose()
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
        [Tooltip("Use all Sensor components attached to child GameObjects of this Agent.")]
        bool m_UseChildSensors = true;

+        [HideInInspector]
+        [SerializeField]
+        [Tooltip("Use all Actuator components attached to child GameObjects of this Agent.")]
+        bool m_UseChildActuators = true;
+
        /// <summary>
        /// Whether or not to use all the sensor components attached to child GameObjects of the agent.
        /// Note that changing this after the Agent has been initialized will not have any effect.
            set { m_UseChildSensors = value; }
        }

+        /// <summary>
+        /// Whether or not to use all the actuator components attached to child GameObjects of the agent.
+        /// Note that changing this after the Agent has been initialized will not have any effect.
+        /// </summary>
+        public bool UseChildActuators
+        {
+            get { return m_UseChildActuators; }
+            set { m_UseChildActuators = value; }
+        }
+
        [HideInInspector, SerializeField]
        ObservableAttributeOptions m_ObservableAttributeHandling = ObservableAttributeOptions.Ignore;

            switch (m_BehaviorType)
            {
                case BehaviorType.HeuristicOnly:
-                    return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
+                    return GenerateHeuristicPolicy(heuristic);
                case BehaviorType.InferenceOnly:
                {
                    if (m_Model == null)
                    }
                    else
                    {
-                        return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
+                        return GenerateHeuristicPolicy(heuristic);
-                    return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
+                    return GenerateHeuristicPolicy(heuristic);
+        }
+
+        internal IPolicy GenerateHeuristicPolicy(HeuristicPolicy.ActionGenerator heuristic)
+        {
+            var numContinuousActions = 0;
+            var numDiscreteActions = 0;
+            if (m_BrainParameters.VectorActionSpaceType == SpaceType.Continuous)
+            {
+                numContinuousActions = m_BrainParameters.NumActions;
+            }
+            else if (m_BrainParameters.VectorActionSpaceType == SpaceType.Discrete)
+            {
+                numDiscreteActions = m_BrainParameters.NumActions;
+            }
+
+            return new HeuristicPolicy(heuristic, numContinuousActions, numDiscreteActions);
        }

        internal void UpdateAgentPolicy()
--- a/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
 using System.Collections.Generic;
 using System;
 using System.Collections;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 namespace Unity.MLAgents.Policies
    /// </summary>
    internal class HeuristicPolicy : IPolicy
    {
-        public delegate void ActionGenerator(float[] actionsOut);
+        public delegate void ActionGenerator(in ActionBuffers actionBuffers);
-        float[] m_LastDecision;
+        ActionBuffers m_ActionBuffers;
        bool m_Done;
        bool m_DecisionRequested;


        /// <inheritdoc />
-        public HeuristicPolicy(ActionGenerator heuristic, int numActions)
+        public HeuristicPolicy(ActionGenerator heuristic, int numContinuousActions, int numDiscreteActions)
-            m_LastDecision = new float[numActions];
+            var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);
+            var discreteDecision = new ActionSegment<int>(new int[numDiscreteActions], 0, numDiscreteActions);
+            m_ActionBuffers = new ActionBuffers(continuousDecision, discreteDecision);
        }

        /// <inheritdoc />
            m_Done = info.done;
            m_DecisionRequested = true;
-
-        public float[] DecideAction()
+        public ref readonly ActionBuffers DecideAction()
-                 m_Heuristic.Invoke(m_LastDecision);
+                m_Heuristic.Invoke(m_ActionBuffers);
-            return m_LastDecision;
+            return ref m_ActionBuffers;
        }

        public void Dispose()
            public float this[int index]
            {
                get { return 0.0f; }
-                set { }
+                set {}
            }
        }

--- a/com.unity.ml-agents/Runtime/Policies/IPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/IPolicy.cs
 using System;
 using System.Collections.Generic;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 namespace Unity.MLAgents.Policies
        /// it must be taken now. The Brain is expected to update the actions
        /// of the Agents at this point the latest.
        /// </summary>
-        float[] DecideAction();
+        ref readonly ActionBuffers DecideAction();
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
 using UnityEngine;
 using System.Collections.Generic;
 using System;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 namespace Unity.MLAgents.Policies
    {
        int m_AgentId;
        string m_FullyQualifiedBehaviorName;
+        SpaceType m_SpaceType;
+        ActionBuffers m_LastActionBuffer;

        internal ICommunicator m_Communicator;

        {
            m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
            m_Communicator = Academy.Instance.Communicator;
+            m_SpaceType = brainParameters.VectorActionSpaceType;
            m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, brainParameters);
        }

        }

        /// <inheritdoc />
-        public float[] DecideAction()
+        public ref readonly ActionBuffers DecideAction()
-            return m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
+            var actions = m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
+            if (m_SpaceType == SpaceType.Continuous)
+            {
+                m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
+                return ref m_LastActionBuffer;
+            }
+            m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
+            return ref m_LastActionBuffer;
        }

        public void Dispose()
--- a/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
 using NUnit.Framework;
+using Unity.MLAgents.Actuators;
 using UnityEngine;
 using Unity.MLAgents.Policies;

    public class BehaviorParameterTests
    {
-        static void DummyHeuristic(float[] actionsOut)
+        static void DummyHeuristic(in ActionBuffers actionsOut)
        {
            // No-op
        }
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
 using NUnit.Framework;
 using System.Reflection;
 using System.Collections.Generic;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Sensors.Reflection;
 using Unity.MLAgents.Policies;
    {
        public Action OnRequestDecision;
        ObservationWriter m_ObsWriter = new ObservationWriter();
+        static ActionSpec s_ActionSpec = ActionSpec.MakeContinuous(1);
+        static ActionBuffers s_EmptyActionBuffers = new ActionBuffers(new float[1], Array.Empty<int>());
        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
        {
            foreach (var sensor in sensors)
            OnRequestDecision?.Invoke();
        }

-        public float[] DecideAction() { return new float[0]; }
+        public ref readonly ActionBuffers DecideAction() { return ref s_EmptyActionBuffers; }

        public void Dispose() {}
    }
            sensor.AddObservation(collectObservationsCallsForEpisode);
        }

-        public override void OnActionReceived(float[] vectorAction)
+        public override void OnActionReceived(ActionBuffers buffers)
        {
            agentActionCalls += 1;
            agentActionCallsForEpisode += 1;
            agentActionCallsForEpisode = 0;
        }

-        public override void Heuristic(float[] actionsOut)
+        public override void Heuristic(in ActionBuffers actionsOut)
-            actionsOut[0] = obs[0];
+            var continuousActions = actionsOut.ContinuousActions;
+            continuousActions[0] = (int)obs[0];
            heuristicCalls++;
        }
    }
        public void TestAgent()
        {
            var agentGo1 = new GameObject("TestAgent");
+            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
+            bp1.BrainParameters.VectorActionSize = new[] { 1 };
+            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            var bp2 = agentGo2.AddComponent<BehaviorParameters>();
+            bp2.BrainParameters.VectorActionSize = new[] { 1 };
+            bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
            agentGo2.AddComponent<TestAgent>();
            var agent2 = agentGo2.GetComponent<TestAgent>();

        public void TestAgent()
        {
            var agentGo1 = new GameObject("TestAgent");
+            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
+            bp1.BrainParameters.VectorActionSize = new[] { 1 };
+            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            var bp2 = agentGo2.AddComponent<BehaviorParameters>();
+            bp2.BrainParameters.VectorActionSize = new[] { 1 };
+            bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
            agentGo2.AddComponent<TestAgent>();
            var agent2 = agentGo2.GetComponent<TestAgent>();

        public void AssertStackingReset()
        {
            var agentGo1 = new GameObject("TestAgent");
-            agentGo1.AddComponent<TestAgent>();
+            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
+            bp1.BrainParameters.VectorActionSize = new[] { 1 };
+            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            var agent1 = agentGo1.AddComponent<TestAgent>();
-            var agent1 = agentGo1.GetComponent<TestAgent>();
            var aca = Academy.Instance;
            agent1.LazyInitialize();
            var policy = new TestPolicy();
        public void TestCumulativeReward()
        {
            var agentGo1 = new GameObject("TestAgent");
-            agentGo1.AddComponent<TestAgent>();
-            var agent1 = agentGo1.GetComponent<TestAgent>();
+            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
+            bp1.BrainParameters.VectorActionSize = new[] { 1 };
+            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            var agent1 = agentGo1.AddComponent<TestAgent>();
-            agentGo2.AddComponent<TestAgent>();
-            var agent2 = agentGo2.GetComponent<TestAgent>();
+            var bp2 = agentGo2.AddComponent<BehaviorParameters>();
+            bp2.BrainParameters.VectorActionSize = new[] { 1 };
+            bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            var agent2 = agentGo2.AddComponent<TestAgent>();
            var aca = Academy.Instance;

            var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
        public void TestMaxStepsReset()
        {
            var agentGo1 = new GameObject("TestAgent");
+            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
+            bp1.BrainParameters.VectorActionSize = new[] { 1 };
+            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
            agentGo1.AddComponent<TestAgent>();
            var agent1 = agentGo1.GetComponent<TestAgent>();
            var aca = Academy.Instance;
        {
            // Make sure that Agents with HeuristicPolicies step their sensors each Academy step.
            var agentGo1 = new GameObject("TestAgent");
+            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
+            bp1.BrainParameters.VectorActionSize = new[] { 1 };
+            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
            agentGo1.AddComponent<TestAgent>();
            var agent1 = agentGo1.GetComponent<TestAgent>();
            var aca = Academy.Instance;
--- a/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
+++ b/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
 using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Sensors.Reflection;
 using NUnit.Framework;
+using Unity.MLAgents.Actuators;
 using UnityEngine;
 using UnityEngine.TestTools;

        [Observable]
        public float ObservableFloat;

-        public override void Heuristic(float[] actionsOut)
+        public override void Heuristic(in ActionBuffers actionsOut)
        {
            numHeuristicCalls++;
            base.Heuristic(actionsOut);

            Academy.Instance.EnvironmentStep();

-            var actions = agent.GetAction();
+            var actions = agent.GetStoredActionBuffers().DiscreteActions;
-            Assert.AreEqual(new[] {0.0f, 0.0f}, actions);
+            Assert.AreEqual(new ActionSegment<int>(new[] {0, 0}), actions);
            Assert.AreEqual(1, agent.numHeuristicCalls);

            Academy.Instance.EnvironmentStep();
--- a/config/ppo/WalkerDynamic.yaml
+++ b/config/ppo/WalkerDynamic.yaml
        gamma: 0.995
        strength: 1.0
    keep_checkpoints: 5
-    max_steps: 20000000
+    max_steps: 30000000
    time_horizon: 1000
    summary_freq: 30000
    threaded: true
--- a/config/ppo/WalkerStatic.yaml
+++ b/config/ppo/WalkerStatic.yaml
        gamma: 0.995
        strength: 1.0
    keep_checkpoints: 5
-    max_steps: 20000000
+    max_steps: 30000000
    time_horizon: 1000
    summary_freq: 30000
    threaded: true
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
 Note the `forceMultiplier` class variable is defined before the function. Since `forceMultiplier` is
 public, you can set the value from the Inspector window.

+## Final Editor Setup
+
+Now, that all the GameObjects and ML-Agent components are in place, it is time
+to connect everything together in the Unity Editor. This involves changing some
+of the Agent Component's properties so that they are compatible with our Agent
+code.
+
+1. Select the **RollerAgent** GameObject to show its properties in the Inspector
+   window.
+1. Drag the Target GameObject in the Hierarchy into the `Target` field in RollerAgent Script.
+1. Add the `Decision Requester` script with the Add Component button from the
+   RollerAgent Inspector.
+1. Change **Decision Period** to `10`. For more information on decisions, see [the Agent documentation](Learning-Environment-Design-Agents.md#decisions)
+1. Drag the Target GameObject from the Hierarchy window to the RollerAgent
+   Target field.
+1. Add the `Behavior Parameters` script with the Add Component button from the
+   RollerAgent Inspector.
+1. Modify the Behavior Parameters of the Agent :
+   - `Behavior Name` to _RollerBall_
+   - `Vector Observation` > `Space Size` = 8
+   - `Vector Action` > `Space Type` = **Continuous**
+   - `Vector Action` > `Space Size` = 2
+
+Now you are ready to test the environment before training.
+
 ## Testing the Environment

 It is always a good idea to first test your environment by controlling the Agent
 Console window and that the Agent resets when it reaches its target or falls
 from the platform.

-## Final Editor Setup
-
-Now, that all the GameObjects and ML-Agent components are in place, it is time
-to connect everything together in the Unity Editor. This involves changing some
-of the Agent Component's properties so that they are compatible with our Agent
-code.
-
-1. Select the **RollerAgent** GameObject to show its properties in the Inspector
-   window.
-1. Add the `Decision Requester` script with the Add Component button from the
-   RollerAgent Inspector.
-1. Change **Decision Period** to `10`. For more information on decisions, see [the Agent documentation](Learning-Environment-Design-Agents.md#decisions)
-1. Drag the Target GameObject from the Hierarchy window to the RollerAgent
-   Target field.
-1. Add the `Behavior Parameters` script with the Add Component button from the
-   RollerAgent Inspector.
-1. Modify the Behavior Parameters of the Agent :
-   - `Behavior Name` to _RollerBall_
-   - `Vector Observation` > `Space Size` = 8
-   - `Vector Action` > `Space Type` = **Continuous**
-   - `Vector Action` > `Space Size` = 2
-
-Now you are ready to test the environment before training.
-
 ## Training the Environment

 The process is the same as described in the
 pass to the `mlagents-learn` program. Create a new `rollerball_config.yaml` file
-and include the following hyperparameter values:
+under `config/` and include the following hyperparameter values:

 ```yml
 behaviors:
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
 - Set-up: Physics-based Humanoid agents with 26 degrees of freedom. These DOFs
  correspond to articulation of the following body-parts: hips, chest, spine,
  head, thighs, shins, feet, arms, forearms and hands.
- Goal: The agents must move its body toward the goal direction as quickly as
-  possible without falling.
-  - `WalkerStatic` - Goal direction is always forward.
+- Goal: The agents must move its body toward the goal direction without falling.
+  - `WalkerDynamicVariableSpeed`- Goal direction and walking speed are randomized.
+  - `WalkerStatic` - Goal direction is always forward.
+  - `WalkerStaticVariableSpeed` - Goal direction is always forward. Walking
+     speed is randomized
-  - +0.02 times body velocity in the goal direction. (run towards target)
-  - +0.01 times head direction alignment with goal direction. (face towards target)
-  - +0.005 times head y position - left foot y position. (encourage head height)
-  - +0.005 times head y position - right foot y position. (encourage head height)
+  The reward function is now geometric meaning the reward each step is a product
+  of all the rewards instead of a sum, this helps the agent try to maximize all
+  rewards instead of the easiest rewards.
+  - Body velocity matches goal velocity. (normalized between (0,1))
+  - Head direction alignment with goal direction. (normalized between (0,1))
-  - Vector Observation space: 236 variables corresponding to position, rotation,
+  - Vector Observation space: 243 variables corresponding to position, rotation,
    velocity, and angular velocities of each limb, along with goal direction.
  - Vector Action space: (Continuous) Size of 39, corresponding to target
    rotations and strength applicable to the joints.
    - Recommended Minimum:
    - Recommended Maximum:
  - hip_mass: Mass of the hip component of the walker
-    - Default: 15
+    - Default: 8
    - Recommended Minimum: 7
    - Recommended Maximum: 28
  - chest_mass: Mass of the chest component of the walker
  - spine_mass: Mass of the spine component of the walker
-    - Default: 10
+    - Default: 8
- Benchmark Mean Reward for `WalkerStatic`: 1500
- Benchmark Mean Reward for `WalkerDynamic`: 700
+- Benchmark Mean Reward for `WalkerDynamic`: 2500
+- Benchmark Mean Reward for `WalkerDynamicVariableSpeed`: 2500
+- Benchmark Mean Reward for `WalkerStatic`: 3500
+- Benchmark Mean Reward for `WalkerStaticVariableSpeed`: 3500
+
+

 ## Pyramids

--- a/gym-unity/gym_unity/init.py
+++ b/gym-unity/gym_unity/init.py
 # Version of the library that will be used to upload to pypi
-__version__ = "0.19.0"
+__version__ = "0.20.0.dev0"

 # Git tag that will be checked to determine whether to trigger upload to pypi
 __release_tag__ = "release_6"
--- a/ml-agents-envs/mlagents_envs/init.py
+++ b/ml-agents-envs/mlagents_envs/init.py
 # Version of the library that will be used to upload to pypi
-__version__ = "0.19.0"
+__version__ = "0.20.0.dev0"

 # Git tag that will be checked to determine whether to trigger upload to pypi
 __release_tag__ = "release_6"
--- a/ml-agents-envs/mlagents_envs/exception.py
+++ b/ml-agents-envs/mlagents_envs/exception.py
    def __init__(self, worker_id):
        message = self.MESSAGE_TEMPLATE.format(str(worker_id))
        super().__init__(message)
+
+
+class UnityPolicyException(UnityException):
+    """
+    Related to errors with the Trainer.
+    """
+
+    pass
--- a/ml-agents/mlagents/trainers/init.py
+++ b/ml-agents/mlagents/trainers/init.py
 # Version of the library that will be used to upload to pypi
-__version__ = "0.19.0"
+__version__ = "0.20.0.dev0"

 # Git tag that will be checked to determine whether to trigger upload to pypi
 __release_tag__ = "release_6"
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
        """
        policy = self.trainer.create_policy(parsed_behavior_id, behavior_spec)
        policy.create_tf_graph()
-        policy.initialize_or_load()
+        self.trainer.saver.initialize_or_load(policy)
        policy.init_load_weights()
        team_id = parsed_behavior_id.team_id
        self.controller.subscribe_team_id(team_id, self)
--- a/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
                self.reward_signals[reward_signal.value].update_dict
            )

+    @classmethod
-        self, learning_rate: tf.Tensor, name: str = "Adam"
+        cls, learning_rate: tf.Tensor, name: str = "Adam"
    ) -> tf.train.Optimizer:
        return tf.train.AdamOptimizer(learning_rate=learning_rate, name=name)

--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
 from mlagents_envs.base_env import DecisionSteps
 from mlagents_envs.exception import UnityException

-from mlagents.model_serialization import SerializationSettings
 from mlagents.trainers.action_info import ActionInfo
 from mlagents_envs.base_env import BehaviorSpec
 from mlagents.trainers.settings import TrainerSettings, NetworkSettings
        seed: int,
        behavior_spec: BehaviorSpec,
        trainer_settings: TrainerSettings,
-        model_path: str,
-        load: bool = False,
        tanh_squash: bool = False,
        reparameterize: bool = False,
        condition_sigma_on_obs: bool = True,
        self.vis_obs_size = sum(
            1 for shape in behavior_spec.observation_shapes if len(shape) == 3
        )
-        self.model_path = model_path
-        self.initialize_path = self.trainer_settings.init_path
-        self._keep_checkpoints = self.trainer_settings.keep_checkpoints
        self.use_continuous_act = behavior_spec.is_action_continuous()
        self.num_branches = self.behavior_spec.action_size
        self.previous_action_dict: Dict[str, np.array] = {}
-        self.load = load
        self.h_size = self.network_settings.hidden_units
        num_layers = self.network_settings.num_layers
        if num_layers < 1:

    @abstractmethod
    def get_current_step(self):
-        pass
-
-    @abstractmethod
-    def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
-        pass
-
-    @abstractmethod
-    def save(self, output_filepath: str, settings: SerializationSettings) -> None:
        pass

    @abstractmethod
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Callable
-from mlagents.model_serialization import SerializationSettings, export_policy_model
 from mlagents.tf_utils import tf
 from mlagents import tf_utils
 from mlagents_envs.exception import UnityException
    GaussianDistribution,
    MultiCategoricalDistribution,
 )
+from mlagents.tf_utils.globals import get_rank


 logger = get_logger(__name__)
    functions to save/load models and create the input placeholders.
    """

+    # Callback function used at the start of training to synchronize weights.
+    # By default, this nothing.
+    # If this needs to be used, it should be done from outside ml-agents.
+    broadcast_global_variables: Callable[[int], None] = lambda root_rank: None
+
-        model_path: str,
-        load: bool = False,
        tanh_squash: bool = False,
        reparameterize: bool = False,
        condition_sigma_on_obs: bool = True,
        :param seed: Random seed to use for TensorFlow.
        :param brain: The corresponding Brain for this policy.
        :param trainer_settings: The trainer parameters.
-        :param model_path: Where to load/save the model.
-        :param load: If True, load model from model_path. Otherwise, create new model.
-            model_path,
-            load,
            tanh_squash,
            reparameterize,
            condition_sigma_on_obs,
        self.sess = tf.Session(
            config=tf_utils.generate_session_config(), graph=self.graph
        )
-        self.saver: Optional[tf.Operation] = None
+        self.rank = get_rank()
        if create_tf_graph:
            self.create_tf_graph()


        # We do an initialize to make the Policy usable out of the box. If an optimizer is needed,
        # it will re-load the full graph
-        self._initialize_graph()
+        self.initialize()

    def _create_encoder(
        self,
        ver = LooseVersion(version_string)
        return tuple(map(int, ver.version[0:3]))

-    def _check_model_version(self, version: str) -> None:
-        """
-        Checks whether the model being loaded was created with the same version of
-        ML-Agents, and throw a warning if not so.
-        """
-        if self.version_tensors is not None:
-            loaded_ver = tuple(
-                num.eval(session=self.sess) for num in self.version_tensors
-            )
-            if loaded_ver != TFPolicy._convert_version_string(version):
-                logger.warning(
-                    f"The model checkpoint you are loading from was saved with ML-Agents version "
-                    f"{loaded_ver[0]}.{loaded_ver[1]}.{loaded_ver[2]} but your current ML-Agents"
-                    f"version is {version}. Model may not behave properly."
-                )
-
-    def _initialize_graph(self):
+    def initialize(self):
-            self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
-    def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None:
-        with self.graph.as_default():
-            self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
-            logger.info(f"Loading model from {model_path}.")
-            ckpt = tf.train.get_checkpoint_state(model_path)
-            if ckpt is None:
-                raise UnityPolicyException(
-                    "The model {} could not be loaded. Make "
-                    "sure you specified the right "
-                    "--run-id and that the previous run you are loading from had the same "
-                    "behavior names.".format(model_path)
-                )
-            try:
-                self.saver.restore(self.sess, ckpt.model_checkpoint_path)
-            except tf.errors.NotFoundError:
-                raise UnityPolicyException(
-                    "The model {} was found but could not be loaded. Make "
-                    "sure the model is from the same version of ML-Agents, has the same behavior parameters, "
-                    "and is using the same trainer configuration as the current run.".format(
-                        model_path
-                    )
-                )
-            self._check_model_version(__version__)
-            if reset_global_steps:
-                self._set_step(0)
-                logger.info(
-                    "Starting training from step 0 and saving to {}.".format(
-                        self.model_path
-                    )
-                )
-            else:
-                logger.info(f"Resuming training from step {self.get_current_step()}.")
-
-    def initialize_or_load(self):
-        # If there is an initialize path, load from that. Else, load from the set model path.
-        # If load is set to True, don't reset steps to 0. Else, do. This allows a user to,
-        # e.g., resume from an initialize path.
-        reset_steps = not self.load
-        if self.initialize_path is not None:
-            self._load_graph(self.initialize_path, reset_global_steps=reset_steps)
-        elif self.load:
-            self._load_graph(self.model_path, reset_global_steps=reset_steps)
-        else:
-            self._initialize_graph()
-
    def get_weights(self):
        with self.graph.as_default():
            _vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        step = self.sess.run(self.global_step)
        return step

-    def _set_step(self, step: int) -> int:
+    def set_step(self, step: int) -> int:
        """
        Sets current model step to step without creating additional ops.
        :param step: Step to set the current model step to.
        :return:list of update var names
        """
        return list(self.update_dict.keys())
-
-    def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
-        """
-        Checkpoints the policy on disk.
-
-        :param checkpoint_path: filepath to write the checkpoint
-        :param settings: SerializationSettings for exporting the model.
-        """
-        # Save the TF checkpoint and graph definition
-        with self.graph.as_default():
-            if self.saver:
-                self.saver.save(self.sess, f"{checkpoint_path}.ckpt")
-            tf.train.write_graph(
-                self.graph, self.model_path, "raw_graph_def.pb", as_text=False
-            )
-        # also save the policy so we have optimized model files for each checkpoint
-        self.save(checkpoint_path, settings)
-
-    def save(self, output_filepath: str, settings: SerializationSettings) -> None:
-        """
-        Saves the serialized model, given a path and SerializationSettings
-
-        This method will save the policy graph to the given filepath.  The path
-        should be provided without an extension as multiple serialized model formats
-        may be generated as a result.
-
-        :param output_filepath: path (without suffix) for the model file(s)
-        :param settings: SerializationSettings for how to save the model.
-        """
-        export_policy_model(output_filepath, settings, self.graph, self.sess)

    def update_normalization(self, vector_obs: np.ndarray) -> None:
        """
--- a/ml-agents/mlagents/trainers/ppo/optimizer.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer.py

                self.stream_names = list(self.reward_signals.keys())

-                self.tf_optimizer: Optional[tf.train.AdamOptimizer] = None
+                self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
                self.grads = None
                self.update_batch: Optional[tf.Operation] = None

                    "decay_beta": self.decay_beta,
                }
            )
-
-            self.policy.initialize_or_load()

    def _create_cc_critic(
        self, h_size: int, num_layers: int, vis_encode_type: EncoderType
        )

    def _create_ppo_optimizer_ops(self):
-        self.tf_optimizer = self.create_optimizer_op(self.learning_rate)
-        self.grads = self.tf_optimizer.compute_gradients(self.loss)
-        self.update_batch = self.tf_optimizer.minimize(self.loss)
+        self.tf_optimizer_op = self.create_optimizer_op(self.learning_rate)
+        self.grads = self.tf_optimizer_op.compute_gradients(self.loss)
+        self.update_batch = self.tf_optimizer_op.minimize(self.loss)

    @timed
    def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        :param artifact_path: The directory within which to store artifacts from this trainer.
        """
        super().__init__(
-            brain_name, trainer_settings, training, artifact_path, reward_buff_cap
+            brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
-        self.load = load
        self.seed = seed
        self.policy: Policy = None  # type: ignore

            self.seed,
            behavior_spec,
            self.trainer_settings,
-            model_path=self.artifact_path,
-            load=self.load,
+
+    def create_ppo_optimizer(self) -> PPOOptimizer:
+        return PPOOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)

    def add_policy(
        self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
            )
        self.policy = policy
        self.policies[parsed_behavior_id.behavior_id] = policy
-        self.optimizer = PPOOptimizer(
-            cast(TFPolicy, self.policy), self.trainer_settings
-        )
+        self.optimizer = self.create_ppo_optimizer()
+
+        self.saver.register(self.policy)
+        self.saver.register(self.optimizer)
+        self.saver.initialize_or_load()
+
        # Needed to resume loads properly
        self.step = policy.get_current_step()

--- a/ml-agents/mlagents/trainers/sac/optimizer.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer.py
                        [self.policy.update_normalization_op, target_update_norm]
                    )

-                self.policy.initialize_or_load()
-
        self.stats_name_to_update_name = {
            "Losses/Value Loss": "value_loss",
            "Losses/Policy Loss": "policy_loss",
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
        :param artifact_path: The directory within which to store artifacts from this trainer.
        """
        super().__init__(
-            brain_name, trainer_settings, training, artifact_path, reward_buff_cap
+            brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
-        self.load = load
        self.seed = seed
        self.policy: Policy = None  # type: ignore
        self.optimizer: SACOptimizer = None  # type: ignore
            self.seed,
            behavior_spec,
            self.trainer_settings,
-            self.artifact_path,
-            self.load,
            tanh_squash=True,
            reparameterize=True,
            create_tf_graph=False,
            for stat, stat_list in batch_update_stats.items():
                self._stats_reporter.add_stat(stat, np.mean(stat_list))

+    def create_sac_optimizer(self) -> SACOptimizer:
+        return SACOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)
+
    def add_policy(
        self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
    ) -> None:
            )
        self.policy = policy
        self.policies[parsed_behavior_id.behavior_id] = policy
-        self.optimizer = SACOptimizer(
-            cast(TFPolicy, self.policy), self.trainer_settings
-        )
+        self.optimizer = self.create_sac_optimizer()
+
+        self.saver.register(self.policy)
+        self.saver.register(self.optimizer)
+        self.saver.initialize_or_load()
+
        # Needed to resume loads properly
        self.step = policy.get_current_step()
        # Assume steps were updated at the correct ratio before
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
    return {key: cattr.unstructure(val) for key, val in d.items()}


+class SerializationSettings:
+    convert_to_barracuda = True
+    convert_to_onnx = True
+    onnx_opset = 9
+
+
@attr.s(auto_attribs=True)
 class ExportableSettings:
    def as_dict(self):
        PROGRESS: str = "progress"
        REWARD: str = "reward"

+    behavior: str
-    behavior: str = attr.ib(default="")
    min_lesson_length: int = 0
    signal_smoothing: bool = True
    threshold: float = attr.ib(default=0.0)
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.timers import set_gauge
 from mlagents.tf_utils import tf, generate_session_config
+from mlagents.tf_utils.globals import get_rank


 logger = get_logger(__name__)

 class GaugeWriter(StatsWriter):
    """
-    Write all stats that we recieve to the timer gauges, so we can track them offline easily
+    Write all stats that we receive to the timer gauges, so we can track them offline easily
    """

    @staticmethod
        # If self-play, we want to print ELO as well as reward
        self.self_play = False
        self.self_play_team = -1
+        self.rank = get_rank()

    def write_stats(
        self, category: str, values: Dict[str, StatsSummary], step: int
-            stats_summary = stats_summary = values["Is Training"]
+            stats_summary = values["Is Training"]
+        elapsed_time = time.time() - self.training_start_time
+        log_info: List[str] = [category]
+        log_info.append(f"Step: {step}")
+        log_info.append(f"Time Elapsed: {elapsed_time:0.3f} s")
-            logger.info(
-                "{}: Step: {}. "
-                "Time Elapsed: {:0.3f} s "
-                "Mean "
-                "Reward: {:0.3f}"
-                ". Std of Reward: {:0.3f}. {}".format(
-                    category,
-                    step,
-                    time.time() - self.training_start_time,
-                    stats_summary.mean,
-                    stats_summary.std,
-                    is_training,
-                )
-            )
+            if self.rank is not None:
+                log_info.append(f"Rank: {self.rank}")
+
+            log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
+            log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")
+            log_info.append(is_training)
+
-                logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ")
+                log_info.append(f"ELO: {elo_stats.mean:0.3f}")
-            logger.info(
-                "{}: Step: {}. No episode was completed since last summary. {}".format(
-                    category, step, is_training
-                )
-            )
+            log_info.append("No episode was completed since last summary")
+            log_info.append(is_training)
+        logger.info(". ".join(log_info))

    def add_property(
        self, category: str, property_type: StatsPropertyType, value: Any
--- a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
+++ b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
 import os
 import tempfile
-import pytest
-from mlagents.trainers.tests.test_nn_policy import create_policy_mock
-from mlagents.trainers.settings import TrainerSettings
-from mlagents.tf_utils import tf
-from mlagents.model_serialization import SerializationSettings


 def test_barracuda_converter():

    # cleanup
    os.remove(tmpfile)
-
-
-@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
-@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
-@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
-def test_policy_conversion(tmpdir, rnn, visual, discrete):
-    tf.reset_default_graph()
-    dummy_config = TrainerSettings()
-    policy = create_policy_mock(
-        dummy_config,
-        use_rnn=rnn,
-        model_path=os.path.join(tmpdir, "test"),
-        use_discrete=discrete,
-        use_visual=visual,
-    )
-    settings = SerializationSettings(policy.model_path, "MockBrain")
-    checkpoint_path = f"{tmpdir}/MockBrain-1"
-    policy.checkpoint(checkpoint_path, settings)
-
-    # These checks taken from test_barracuda_converter
-    assert os.path.isfile(checkpoint_path + ".nn")
-    assert os.path.getsize(checkpoint_path + ".nn") > 100
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
        NetworkSettings.MemorySettings() if use_rnn else None
    )
    policy = TFPolicy(
-        0,
-        mock_behavior_specs,
-        trainer_config,
-        "test",
-        False,
-        tanhresample,
-        tanhresample,
+        0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
    )
    with policy.graph.as_default():
        bc_module = BCModule(
            default_num_epoch=3,
            settings=bc_settings,
        )
-    policy.initialize_or_load()  # Normally the optimizer calls this after the BCModule is created
+    policy.initialize()  # Normally the optimizer calls this after the BCModule is created
    return bc_module


--- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
            yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
        )

-        param_manager = EnvironmentParameterManager(
-            run_options.environment_parameters, 1337, False
-        )
-        assert param_manager.update_lessons(
-            trainer_steps={"fake_behavior": 500},
-            trainer_max_steps={"fake_behavior": 1000},
-            trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        ) == (True, True)
-        assert param_manager.update_lessons(
-            trainer_steps={"fake_behavior": 500},
-            trainer_max_steps={"fake_behavior": 1000},
-            trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        ) == (True, True)
-        assert param_manager.update_lessons(
-            trainer_steps={"fake_behavior": 500},
-            trainer_max_steps={"fake_behavior": 1000},
-            trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        ) == (False, False)
-        assert param_manager.get_current_lesson_number() == {"param_1": 2}
+    param_manager = EnvironmentParameterManager(
+        run_options.environment_parameters, 1337, False
+    )
+    assert param_manager.update_lessons(
+        trainer_steps={"fake_behavior": 500},
+        trainer_max_steps={"fake_behavior": 1000},
+        trainer_reward_buffer={"fake_behavior": [1000] * 101},
+    ) == (True, True)
+    assert param_manager.update_lessons(
+        trainer_steps={"fake_behavior": 500},
+        trainer_max_steps={"fake_behavior": 1000},
+        trainer_reward_buffer={"fake_behavior": [1000] * 101},
+    ) == (True, True)
+    assert param_manager.update_lessons(
+        trainer_steps={"fake_behavior": 500},
+        trainer_max_steps={"fake_behavior": 1000},
+        trainer_reward_buffer={"fake_behavior": [1000] * 101},
+    ) == (False, False)
+    assert param_manager.get_current_lesson_number() == {"param_1": 2}


 test_everything_config_yaml = """
        "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
        "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
    }
+
+
+test_curriculum_no_behavior_yaml = """
+environment_parameters:
+    param_1:
+      curriculum:
+          - name: Lesson1
+            completion_criteria:
+                measure: reward
+                threshold: 30
+                min_lesson_length: 100
+                require_reset: true
+            value: 1
+          - name: Lesson2
+            value: 2
+"""
+
+
+def test_curriculum_no_behavior():
+    with pytest.raises(TypeError):
+        run_options = RunOptions.from_dict(
+            yaml.safe_load(test_curriculum_no_behavior_yaml)
+        )
+        EnvironmentParameterManager(run_options.environment_parameters, 1337, False)
--- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
 import pytest
-import os
-import unittest
-import tempfile
-from mlagents.model_serialization import SerializationSettings
-
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.tf.models import ModelUtils, Tensor3DShape
 from mlagents.trainers.exception import UnityTrainerException
-from mlagents.trainers import __version__


 VECTOR_ACTION_SPACE = 2
    use_rnn: bool = False,
    use_discrete: bool = True,
    use_visual: bool = False,
-    model_path: str = "",
-    load: bool = False,
    seed: int = 0,
 ) -> TFPolicy:
    mock_spec = mb.setup_test_behavior_specs(
    trainer_settings.network_settings.memory = (
        NetworkSettings.MemorySettings() if use_rnn else None
    )
-    policy = TFPolicy(
-        seed, mock_spec, trainer_settings, model_path=model_path, load=load
-    )
+    policy = TFPolicy(seed, mock_spec, trainer_settings)
-
-
-def test_load_save(tmp_path):
-    path1 = os.path.join(tmp_path, "runid1")
-    path2 = os.path.join(tmp_path, "runid2")
-    trainer_params = TrainerSettings()
-    policy = create_policy_mock(trainer_params, model_path=path1)
-    policy.initialize_or_load()
-    policy._set_step(2000)
-
-    mock_brain_name = "MockBrain"
-    checkpoint_path = f"{policy.model_path}/{mock_brain_name}-2000"
-    serialization_settings = SerializationSettings(policy.model_path, mock_brain_name)
-    policy.checkpoint(checkpoint_path, serialization_settings)
-
-    assert len(os.listdir(tmp_path)) > 0
-
-    # Try load from this path
-    policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
-    policy2.initialize_or_load()
-    _compare_two_policies(policy, policy2)
-    assert policy2.get_current_step() == 2000
-
-    # Try initialize from path 1
-    trainer_params.output_path = path2
-    trainer_params.init_path = path1
-    policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
-    policy3.initialize_or_load()
-
-    _compare_two_policies(policy2, policy3)
-    # Assert that the steps are 0.
-    assert policy3.get_current_step() == 0
-
-
-class ModelVersionTest(unittest.TestCase):
-    def test_version_compare(self):
-        # Test write_stats
-        with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
-            path1 = tempfile.mkdtemp()
-            trainer_params = TrainerSettings()
-            policy = create_policy_mock(trainer_params, model_path=path1)
-            policy.initialize_or_load()
-            policy._check_model_version(
-                "0.0.0"
-            )  # This is not the right version for sure
-            # Assert that 1 warning has been thrown with incorrect version
-            assert len(cm.output) == 1
-            policy._check_model_version(__version__)  # This should be the right version
-            # Assert that no additional warnings have been thrown wth correct ver
-            assert len(cm.output) == 1


 def _compare_two_policies(policy1: TFPolicy, policy2: TFPolicy) -> None:
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
 import attr
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers

+from mlagents.trainers.trainer.rl_trainer import RLTrainer
 from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
 from mlagents.trainers.ppo.optimizer import PPOOptimizer
 from mlagents.trainers.policy.tf_policy import TFPolicy
        0, mock_specs, trainer_settings, "test", False, create_tf_graph=False
    )
    optimizer = PPOOptimizer(policy, trainer_settings)
+    policy.initialize()
    return optimizer


    )


+@mock.patch.object(RLTrainer, "create_saver")
-def test_trainer_increment_step(ppo_optimizer):
+def test_trainer_increment_step(ppo_optimizer, mock_create_saver):
    trainer_params = PPO_CONFIG
    mock_optimizer = mock.Mock()
    mock_optimizer.reward_signals = {}
    assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0


+@mock.patch.object(RLTrainer, "create_saver")
-def test_add_get_policy(ppo_optimizer, dummy_config):
+def test_add_get_policy(ppo_optimizer, mock_create_saver, dummy_config):
    mock_optimizer = mock.Mock()
    mock_optimizer.reward_signals = {}
    ppo_optimizer.return_value = mock_optimizer
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
        optimizer = SACOptimizer(policy, trainer_settings)
    else:
        optimizer = PPOOptimizer(policy, trainer_settings)
+    optimizer.policy.initialize()
    return optimizer


--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+import os
 from unittest import mock
 import pytest
 import mlagents.trainers.tests.mock_brain as mb
        return self.update_policy

    def add_policy(self, mock_behavior_id, mock_policy):
+        def checkpoint_path(brain_name, step):
+            return os.path.join(self.saver.model_path, f"{brain_name}-{step}")
+
+        mock_saver = mock.Mock()
+        mock_saver.model_path = self.artifact_path
+        mock_saver.save_checkpoint.side_effect = checkpoint_path
+        self.saver = mock_saver

    def create_policy(self):
        return mock.Mock()
        "test_trainer",
        TrainerSettings(max_steps=100, checkpoint_interval=10, summary_freq=20),
        True,
+        False,
+        "mock_model_path",
        0,
    )
    trainer.set_is_policy_updating(True)
 def test_advance(mocked_clear_update_buffer, mocked_save_model):
    trainer = create_rl_trainer()
    mock_policy = mock.Mock()
-    mock_policy.model_path = "mock_model_path"
    trainer.add_policy("TestBrain", mock_policy)
    trajectory_queue = AgentManagerQueue("testbrain")
    policy_queue = AgentManagerQueue("testbrain")
 def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary):
    trainer = create_rl_trainer()
    mock_policy = mock.Mock()
-    mock_policy.model_path = "mock_model_path"
    trainer.add_policy("TestBrain", mock_policy)
    trajectory_queue = AgentManagerQueue("testbrain")
    policy_queue = AgentManagerQueue("testbrain")
    checkpoint_range = range(
        checkpoint_interval, num_trajectories * time_horizon, checkpoint_interval
    )
-    calls = [
-        mock.call(f"{mock_policy.model_path}/{trainer.brain_name}-{step}", mock.ANY)
-        for step in checkpoint_range
-    ]
-    mock_policy.checkpoint.assert_has_calls(calls, any_order=True)
+    calls = [mock.call(trainer.brain_name, step) for step in checkpoint_range]
+    trainer.saver.save_checkpoint.assert_has_calls(calls, any_order=True)

    add_checkpoint_calls = [
        mock.call(
-                f"{mock_policy.model_path}/{trainer.brain_name}-{step}.nn",
+                f"{trainer.saver.model_path}/{trainer.brain_name}-{step}.nn",
                None,
                mock.ANY,
            ),
--- a/ml-agents/mlagents/trainers/tests/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/test_sac.py
 from mlagents.tf_utils import tf
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers

+from mlagents.trainers.trainer.rl_trainer import RLTrainer
 from mlagents.trainers.sac.trainer import SACTrainer
 from mlagents.trainers.sac.optimizer import SACOptimizer
 from mlagents.trainers.policy.tf_policy import TFPolicy
        0, mock_brain, trainer_settings, "test", False, create_tf_graph=False
    )
    optimizer = SACOptimizer(policy, trainer_settings)
+    policy.initialize()
    return optimizer


    assert trainer2.update_buffer.num_experiences == buffer_len


+@mock.patch.object(RLTrainer, "create_saver")
-def test_add_get_policy(sac_optimizer, dummy_config):
+def test_add_get_policy(sac_optimizer, mock_create_saver, dummy_config):
    mock_optimizer = mock.Mock()
    mock_optimizer.reward_signals = {}
    sac_optimizer.return_value = mock_optimizer
    policy = trainer.create_policy(behavior_id, specs)
    policy.get_current_step = lambda: 200
    trainer.add_policy(behavior_id, policy)
+    trainer.saver.initialize_or_load(policy)
    trainer.optimizer.update = mock.Mock()
    trainer.optimizer.update_reward_signals = mock.Mock()
    trainer.optimizer.update_reward_signals.return_value = {}
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py


 # The reward processor is passed as an argument to _check_environment_trains.
-# It is applied to the list pf all final rewards for each brain individually.
+# It is applied to the list of all final rewards for each brain individually.
-# Custom reward processors shuld be built within the test function and passed to _check_environment_trains
+# Custom reward processors should be built within the test function and passed to _check_environment_trains
 # Default is average over the last 5 final rewards
 def default_reward_processor(rewards, last_n_rewards=5):
    rewards_to_use = rewards[-last_n_rewards:]

@pytest.mark.parametrize("use_discrete", [True, False])
 def test_recurrent_sac(use_discrete):
-    step_size = 0.2 if use_discrete else 1.0
+    step_size = 0.5 if use_discrete else 0.2
    env = MemoryEnvironment(
        [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
    )
        swap_steps=5000,
        team_change=2000,
    )
-    config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2000)
+    config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=3000)
    _check_environment_trains(
        env, {BRAIN_NAME: config, brain_name_opp: config}, success_threshold=None
    )
--- a/ml-agents/mlagents/trainers/tests/test_tf_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_tf_policy.py
-from mlagents.model_serialization import SerializationSettings
-from unittest import mock
 from mlagents.trainers.settings import TrainerSettings
 import numpy as np

    # Test dev versions
    result = TFPolicy._convert_version_string("200.300.100.dev0")
    assert result == (200, 300, 100)
-
-
-@mock.patch("mlagents.trainers.policy.tf_policy.export_policy_model")
-@mock.patch("time.time", mock.MagicMock(return_value=12345))
-def test_checkpoint_writes_tf_and_nn_checkpoints(export_policy_model_mock):
-    mock_brain = basic_mock_brain()
-    test_seed = 4  # moving up in the world
-    policy = FakePolicy(test_seed, mock_brain, TrainerSettings(), "output")
-    n_steps = 5
-    policy.get_current_step = MagicMock(return_value=n_steps)
-    policy.saver = MagicMock()
-    serialization_settings = SerializationSettings("output", mock_brain.brain_name)
-    checkpoint_path = f"output/{mock_brain.brain_name}-{n_steps}"
-    policy.checkpoint(checkpoint_path, serialization_settings)
-    policy.saver.save.assert_called_once_with(policy.sess, f"{checkpoint_path}.ckpt")
-    export_policy_model_mock.assert_called_once_with(
-        checkpoint_path, serialization_settings, policy.graph, policy.sess
-    )
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
 # # Unity ML-Agents Toolkit
-import os
-from mlagents.model_serialization import SerializationSettings, copy_model_files
 from mlagents.trainers.policy.checkpoint_manager import (
    NNCheckpoint,
    NNCheckpointManager,
 from mlagents_envs.timers import hierarchical_timer
 from mlagents.trainers.agent_processor import AgentManagerQueue
 from mlagents.trainers.trajectory import Trajectory
+from mlagents.trainers.settings import TrainerSettings
+from mlagents.trainers.saver.saver import BaseSaver
+from mlagents.trainers.saver.tf_saver import TFSaver

 RewardSignalResults = Dict[str, RewardSignalResult]

        )
        self._next_save_step = 0
        self._next_summary_step = 0
+        self.saver = self.create_saver(
+            self.trainer_settings, self.artifact_path, self.load
+        )

    def end_episode(self) -> None:
        """
            for agent_id in rewards:
                rewards[agent_id] = 0

+    @staticmethod
+    def create_saver(
+        trainer_settings: TrainerSettings, model_path: str, load: bool
+    ) -> BaseSaver:
+        saver = TFSaver(trainer_settings, model_path, load)
+        return saver
+
    def _update_end_episode_stats(self, agent_id: str, optimizer: Optimizer) -> None:
        for name, rewards in self.collected_rewards.items():
            if name == "environment":
            logger.warning(
                "Trainer has multiple policies, but default behavior only saves the first."
            )
-        policy = list(self.policies.values())[0]
-        model_path = policy.model_path
-        settings = SerializationSettings(model_path, self.brain_name)
-        checkpoint_path = os.path.join(model_path, f"{self.brain_name}-{self.step}")
-        policy.checkpoint(checkpoint_path, settings)
+        checkpoint_path = self.saver.save_checkpoint(self.brain_name, self.step)
        new_checkpoint = NNCheckpoint(
            int(self.step),
            f"{checkpoint_path}.nn",
            logger.warning(
                "Trainer has multiple policies, but default behavior only saves the first."
            )
-        policy = list(self.policies.values())[0]
-        copy_model_files(model_checkpoint.file_path, f"{policy.model_path}.nn")
+        self.saver.copy_final_model(model_checkpoint.file_path)
-            model_checkpoint, file_path=f"{policy.model_path}.nn"
+            model_checkpoint, file_path=f"{self.saver.model_path}.nn"
        )
        NNCheckpointManager.track_final_checkpoint(self.brain_name, final_checkpoint)

--- a/ml-agents/mlagents/trainers/trainer/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/trainer.py
        brain_name: str,
        trainer_settings: TrainerSettings,
        training: bool,
+        load: bool,
        artifact_path: str,
        reward_buff_cap: int = 1,
    ):
        self._threaded = trainer_settings.threaded
        self._stats_reporter = StatsReporter(brain_name)
        self.is_training = training
+        self.load = load
        self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
        self.policy_queues: List[AgentManagerQueue[Policy]] = []
        self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 from mlagents.trainers.trainer_util import TrainerFactory
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.agent_processor import AgentManager
+from mlagents.tf_utils.globals import get_rank


 class TrainerController:
        self.kill_trainers = False
        np.random.seed(training_seed)
        tf.set_random_seed(training_seed)
+        self.rank = get_rank()

    @timed
    def _save_models(self):
+        if self.rank is not None and self.rank != 0:
+            return
+
        for brain_name in self.trainers.keys():
            self.trainers[brain_name].save_model()
        self.logger.info("Saved Model")
        """
        Saves models for all trainers.
        """
+        if self.rank is not None and self.rank != 0:
+            return
+
        for brain_name in self.trainers.keys():
            self.trainers[brain_name].save_model()

        ) in self.param_manager.get_current_lesson_number().items():
            for trainer in self.trainers.values():
                trainer.stats_reporter.set_stat(
-                    f"Environment/Lesson/{param_name}", lesson_number
+                    f"Environment/Lesson Number/{param_name}", lesson_number
                )

        for trainer in self.trainers.values():
--- a/test_requirements.txt
+++ b/test_requirements.txt
 pytest-cov==2.6.1
 pytest-xdist

+# PyTorch tests are here for the time being, before they are used in the codebase.
+torch>=1.5.0
+
 # onnx doesn't currently have a wheel for 3.8
 tf2onnx>=1.5.5;python_version<'3.8'
--- a/utils/validate_release_links.py
+++ b/utils/validate_release_links.py
 ALLOW_LIST = {
    # Previous release table
    "README.md": re.compile(r"\*\*Release [0-9]+\*\*"),
+    "com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs": RELEASE_PATTERN,
+    "com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs": RELEASE_PATTERN,
    "docs/Versioning.md": None,
    "com.unity.ml-agents/CHANGELOG.md": None,
    "utils/make_readme_table.py": None,
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/DynamicTarget.prefab
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/DynamicTarget.prefab
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 3840539935788495952}
  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 1, y: 1, z: 1}
-  m_LocalScale: {x: 1.2356956, y: 1.2356961, z: 1.2356961}
+  m_LocalPosition: {x: 0, y: 1, z: 1}
+  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children: []
  m_Father: {fileID: 0}
  m_RootOrder: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 3840539935788495952}
  m_Enabled: 1
-  m_CastShadows: 0
+  m_CastShadows: 1
  m_ReceiveShadows: 1
  m_DynamicOccludee: 1
  m_MotionVectors: 1
  respawnIfTouched: 1
  respawnIfFallsOffPlatform: 1
  fallDistance: 5
-  triggerIsTouching: 0
  onTriggerEnterEvent:
    m_PersistentCalls:
      m_Calls: []
  onTriggerExitEvent:
    m_PersistentCalls:
      m_Calls: []
-  colliderIsTouching: 0
-      m_Calls:
-      - m_Target: {fileID: 0}
-        m_MethodName: TouchedTarget
-        m_Mode: 1
-        m_Arguments:
-          m_ObjectArgument: {fileID: 0}
-          m_ObjectArgumentAssemblyTypeName: UnityEngine.Object, UnityEngine
-          m_IntArgument: 0
-          m_FloatArgument: 0
-          m_StringArgument: 
-          m_BoolArgument: 0
-        m_CallState: 2
+      m_Calls: []
  onCollisionStayEvent:
    m_PersistentCalls:
      m_Calls: []
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/StaticTarget.prefab
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/StaticTarget.prefab
  m_PrefabInstance: {fileID: 0}
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 3840539935788495952}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 6.2, y: 1.15, z: 3.824}
-  m_LocalScale: {x: 1.2356956, y: 1.2356961, z: 1.2356961}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 1, y: 1, z: 1}
+  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children: []
  m_Father: {fileID: 0}
  m_RootOrder: 0
  m_IsKinematic: 0
  m_Interpolate: 0
  m_Constraints: 0
-  m_CollisionDetection: 0
+  m_CollisionDetection: 3
 --- !u!114 &3631016866778687563
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  tagToDetect: agent
-  moveTargetToRandomPosIfTouched: 0
-  targetSpawnRadius: 0
-  onTrtesiggerEnterEvent:
-    m_PersistentCalls:
-      m_Calls: []
-  triggerIsTouching: 0
+  spawnRadius: 0
+  respawnIfTouched: 0
+  respawnIfFallsOffPlatform: 1
+  fallDistance: 5
  onTriggerEnterEvent:
    m_PersistentCalls:
      m_Calls: []
  onTriggerExitEvent:
    m_PersistentCalls:
      m_Calls: []
-  colliderIsTouching: 0
  onCollisionEnterEvent:
    m_PersistentCalls:
      m_Calls: []
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 895268871264836243}
  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0.15, z: 0}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children:
  - {fileID: 895268873051627235}
  - component: {fileID: 895268871377934302}
  - component: {fileID: 895268871377934301}
  m_Layer: 0
-  m_Name: WalkerRagdoll
+  m_Name: WalkerRagdollBase
  m_TagString: Untagged
  m_Icon: {fileID: 0}
  m_NavMeshLayer: 0
  m_PrefabInstance: {fileID: 0}
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 895268871377934275}
-  m_LocalRotation: {x: 0, y: 0.7071068, z: 0, w: 0.7071068}
-  m_LocalPosition: {x: 0, y: 3.07, z: 0}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 0, y: 3, z: 0}
  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children:
  - {fileID: 895268871264836332}
  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 90, z: 0}
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
 --- !u!114 &895268871377934297
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    VectorObservationSize: 236
+    VectorObservationSize: 243
-  m_Model: {fileID: 11400000, guid: 3c6170922a9ad4d9f85261699ca00f5d, type: 3}
+  m_Model: {fileID: 11400000, guid: f598eaeeef9f94691989a2cfaaafb565, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: WalkerDynamic
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
  MaxStep: 5000
-  maximumWalkingSpeed: 999
+  targetWalkingSpeed: 10
+  randomizeWalkSpeedEachEpisode: 1
+  walkDirectionMethod: 0
+  worldDirToWalk: {x: 1, y: 0, z: 0}
+  worldPosToWalkTo: {x: 0, y: 0, z: 0}
  target: {fileID: 0}
  hips: {fileID: 895268871264836332}
  chest: {fileID: 7933235354845945071}
  armR: {fileID: 7933235355057813930}
  forearmR: {fileID: 7933235353195701980}
  handR: {fileID: 7933235354616748502}
-  orientationCube: {fileID: 7559180363928843817}
 --- !u!114 &895268871377934303
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  maxJointSpring: 40000
-  jointDampen: 3000
-  maxJointForceLimit: 10000
+  jointDampen: 5000
+  maxJointForceLimit: 20000
  bodyPartsList: []
 --- !u!114 &895268871377934302
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: 1513f8a85fedd47efba089213b7c5bde, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  updatedByAgent: 0
  transformToFollow: {fileID: 895268871264836332}
  targetToLookAt: {fileID: 0}
  heightOffset: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235353030744116}
  serializedVersion: 2
-  m_Mass: 3
+  m_Mass: 4
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0.55, y: 0, z: 0}
  m_Axis: {x: 0, y: -1, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.7000002, y: 0, z: 0}
+  m_ConnectedAnchor: {x: -0.7000001, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235353041637840}
  serializedVersion: 2
-  m_Mass: 1
+  m_Mass: 2
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0, y: 0, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.70000064, y: 0, z: 0}
+  m_ConnectedAnchor: {x: -0.70000017, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235353195701956}
  serializedVersion: 2
-  m_Mass: 2
+  m_Mass: 3
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: -0.5, y: 0, z: 0}
  m_Axis: {x: 0, y: 1, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.5000005, y: 0, z: 0}
+  m_ConnectedAnchor: {x: 0.5, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0.5, z: 0}
  m_Axis: {x: -1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.39999408, y: -0.29999986, z: 0}
+  m_ConnectedAnchor: {x: -0.39999396, y: -0.29999995, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235353240438151}
  serializedVersion: 2
-  m_Mass: 2
+  m_Mass: 3
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0.5, y: 0, z: 0}
  m_Axis: {x: 0, y: -1, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.5000005, y: 0, z: 0}
+  m_ConnectedAnchor: {x: -0.5, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0.5, z: 0}
  m_Axis: {x: -1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.00000011920929, y: -0.5, z: 0}
+  m_ConnectedAnchor: {x: 0, y: -0.5, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0, z: -0.1}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.00000011920929, y: -0.60000014, z: 0}
+  m_ConnectedAnchor: {x: 0, y: -0.60000014, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 1, z: 0}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0.5, z: 0}
  m_Axis: {x: -1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.00000011920929, y: -0.5, z: 0}
+  m_ConnectedAnchor: {x: 0, y: -0.5, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0.5, z: 0}
  m_Axis: {x: -1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.39999408, y: -0.29999986, z: 0}
+  m_ConnectedAnchor: {x: 0.39999396, y: -0.29999995, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235354074184675}
  serializedVersion: 2
-  m_Mass: 5
+  m_Mass: 6
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0, y: -0.85, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0, y: 0.5119996, z: 0}
+  m_ConnectedAnchor: {x: 0, y: 0.5119997, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235354616748503}
  serializedVersion: 2
-  m_Mass: 1
+  m_Mass: 2
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0, y: 0, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.70000064, y: 0, z: 0}
+  m_ConnectedAnchor: {x: 0.70000017, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: -0.3, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0, y: 0.383, z: 0}
+  m_ConnectedAnchor: {x: 0, y: 0.3829999, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: -0.5, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0, y: 0.3050003, z: 0}
+  m_ConnectedAnchor: {x: 0, y: 0.30500042, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0, z: -0.1}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.00000011920929, y: -0.60000014, z: 0}
+  m_ConnectedAnchor: {x: 0, y: -0.60000014, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 1, z: 0}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235355057813906}
  serializedVersion: 2
-  m_Mass: 3
+  m_Mass: 4
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: -0.55, y: 0, z: 0}
  m_Axis: {x: 0, y: 1, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.7000002, y: 0, z: 0}
+  m_ConnectedAnchor: {x: 0.7000001, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
    type: 3}
  m_PrefabInstance: {fileID: 7597605653427724053}
  m_PrefabAsset: {fileID: 0}
--- !u!114 &7559180363928843817 stripped
-MonoBehaviour:
-  m_CorrespondingSourceObject: {fileID: 114705911240010044, guid: 72f745913c5a34df5aaadd5c1f0024cb,
-    type: 3}
-  m_PrefabInstance: {fileID: 7597605653427724053}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 0}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 771e78c5e980e440e8cd19716b55075f, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!1 &6907050159044240885
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6902197503240654641}
+  - component: {fileID: 6894500521640151429}
+  - component: {fileID: 6885223417161833361}
+  - component: {fileID: 6859132155796343735}
+  m_Layer: 0
+  m_Name: Wall (1)
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6902197503240654641
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907050159044240885}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: -50, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 5, z: 101}
+  m_Children: []
+  m_Father: {fileID: 6902102727328990095}
+  m_RootOrder: 1
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!33 &6894500521640151429
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907050159044240885}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!23 &6885223417161833361
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907050159044240885}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 0
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!65 &6859132155796343735
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907050159044240885}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!1 &6907401236047902865
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6902265967514060089}
+  - component: {fileID: 6891025662345346653}
+  - component: {fileID: 6859036447448677835}
+  - component: {fileID: 6884684845870454579}
+  m_Layer: 14
+  m_Name: Ground
+  m_TagString: ground
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6902265967514060089
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907401236047902865}
+  m_LocalRotation: {x: 0, y: 0.7071068, z: 0, w: 0.7071068}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
+  m_LocalScale: {x: 100, y: 1, z: 100}
+  m_Children: []
+  m_Father: {fileID: 6902107422946006027}
+  m_RootOrder: 1
+  m_LocalEulerAnglesHint: {x: 0, y: 90, z: 0}
+--- !u!33 &6891025662345346653
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907401236047902865}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!65 &6859036447448677835
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907401236047902865}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!23 &6884684845870454579
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907401236047902865}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: acba6bf2a290a496bb8989b42bf8698d, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 1
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!1 &6907666814270504157
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6902102727328990095}
+  m_Layer: 0
+  m_Name: Walls
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6902102727328990095
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907666814270504157}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: 0, y: 2, z: 0}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children:
+  - {fileID: 6901873285403999439}
+  - {fileID: 6902197503240654641}
+  - {fileID: 6901900959948323433}
+  - {fileID: 6905948743199606957}
+  m_Father: {fileID: 6902107422946006027}
+  m_RootOrder: 0
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!1 &6907680617094430597
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6901873285403999439}
+  - component: {fileID: 6894618984257886823}
+  - component: {fileID: 6884854148710353183}
+  - component: {fileID: 6863062098498978603}
+  m_Layer: 0
+  m_Name: Wall
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6901873285403999439
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907680617094430597}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 50, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 5, z: 101}
+  m_Children: []
+  m_Father: {fileID: 6902102727328990095}
+  m_RootOrder: 0
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!33 &6894618984257886823
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907680617094430597}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!23 &6884854148710353183
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907680617094430597}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 0
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!65 &6863062098498978603
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907680617094430597}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!1 &6907740118844148851
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6902107422946006027}
+  m_Layer: 0
+  m_Name: PlatformDynamicTarget
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!4 &6902107422946006027
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907740118844148851}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children:
+  - {fileID: 6902102727328990095}
+  - {fileID: 6902265967514060089}
+  m_Father: {fileID: 0}
+  m_RootOrder: 0
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!1 &6907828132384848309
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6905948743199606957}
+  - component: {fileID: 6894463671975680535}
+  - component: {fileID: 6884868534516719387}
+  - component: {fileID: 6859048605259525735}
+  m_Layer: 0
+  m_Name: Wall (3)
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6905948743199606957
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907828132384848309}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: -50}
+  m_LocalScale: {x: 100, y: 5, z: 1}
+  m_Children: []
+  m_Father: {fileID: 6902102727328990095}
+  m_RootOrder: 3
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!33 &6894463671975680535
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907828132384848309}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!23 &6884868534516719387
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907828132384848309}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 0
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!65 &6859048605259525735
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907828132384848309}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!1 &6907860845836169157
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6901900959948323433}
+  - component: {fileID: 6893927248293796423}
+  - component: {fileID: 6885176866006237333}
+  - component: {fileID: 6859395915623032135}
+  m_Layer: 0
+  m_Name: Wall (2)
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6901900959948323433
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907860845836169157}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: 50}
+  m_LocalScale: {x: 100, y: 5, z: 1}
+  m_Children: []
+  m_Father: {fileID: 6902102727328990095}
+  m_RootOrder: 2
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!33 &6893927248293796423
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907860845836169157}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!23 &6885176866006237333
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907860845836169157}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 0
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!65 &6859395915623032135
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907860845836169157}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab.meta
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab.meta
+fileFormatVersion: 2
+guid: f0d7741d9e06247f6843b921a206b978
+PrefabImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets.meta
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets.meta
+fileFormatVersion: 2
+guid: 88818c9b63c96424aa8e0fca85552133
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta
+fileFormatVersion: 2
+guid: 9f87b3070a0fd4a1e838131a91399c2f
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: Assets/Demonstrations/ExpertWalkerDy.demo
+  externalObjects: {}
+  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta
+fileFormatVersion: 2
+guid: a4b02e2c382c247919eb63ce72e90a3b
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: Assets/Demonstrations/ExpertWalkerDyVS.demo
+  externalObjects: {}
+  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta
+fileFormatVersion: 2
+guid: edcbb505552464c5c829886a4a3817dd
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: Assets/Demonstrations/ExpertWalkerStVS.demo
+  externalObjects: {}
+  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta
+fileFormatVersion: 2
+guid: 1f3a5d62e6aea4b5eb053ac33f11b06d
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: Assets/Demonstrations/ExpertWalkerSta.demo
+  externalObjects: {}
+  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}