Merge branch 'develop-add-fire' into develop-add-fire-memoryclass

4 年前 · 42e25b25
--- a/.yamato/com.unity.ml-agents-performance.yml
+++ b/.yamato/com.unity.ml-agents-performance.yml
  variables:
    UNITY_VERSION: {{ editor.version }}
  commands:
-    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
    - unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
    - curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
    - chmod +x ./utr
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/DirectionIndicator.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/DirectionIndicator.cs
-using System.Collections;
-using System.Collections.Generic;
-using UnityEngine;
+using UnityEngine;
+       
+        public bool updatedByAgent; //should this be updated by the agent? If not, it will use local settings
-        
+
        void OnEnable()
        {
            m_StartingYPos = transform.position.y;
        {
-            transform.position = new Vector3(transformToFollow.position.x, m_StartingYPos + heightOffset, transformToFollow.position.z);
+            if (updatedByAgent)
+                return;
+            transform.position = new Vector3(transformToFollow.position.x, m_StartingYPos + heightOffset,
+                transformToFollow.position.z);
+        }
+
+        //Public method to allow an agent to directly update this component
+        public void MatchOrientation(Transform t)
+        {
+            transform.position = new Vector3(t.position.x, m_StartingYPos + heightOffset, t.position.z);
+            transform.rotation = t.rotation;
        }
    }
 }
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity.meta
 fileFormatVersion: 2
-guid: 79d5d2687bfbe45f5b78bd6c04992e0d
+guid: 65c87f50b8c81433d8fd7f6550773467
 DefaultImporter:
  externalObjects: {}
  userData: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStatic.unity
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStatic.unity
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
 using System;
-using MLAgentsExamples;
 using UnityEngine;
 using Unity.MLAgents;
 using Unity.MLAgentsExamples;

 public class WalkerAgent : Agent
 {
-    public float maximumWalkingSpeed = 999; //The max walk velocity magnitude an agent will be rewarded for
-    Vector3 m_WalkDir; //Direction to the target
-//    Quaternion m_WalkDirLookRot; //Will hold the rotation to our target
+    [Header("Walk Speed")]
+    [Range(0.1f, 10)]
+    [SerializeField]
+    //The walking speed to try and achieve
+    private float m_TargetWalkingSpeed = 10;
+
+    public float MTargetWalkingSpeed // property
+    {
+        get { return m_TargetWalkingSpeed; }
+        set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); }
+    }
+
+    const float m_maxWalkingSpeed = 10; //The max walking speed
+
+    //Should the agent sample a new goal velocity each episode?
+    //If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin() 
+    //If false, the goal velocity will be walkingSpeed
+    public bool randomizeWalkSpeedEachEpisode;
+
+    //The direction an agent will walk during training.
+    private Vector3 m_WorldDirToWalk = Vector3.right;
-    [Header("Target To Walk Towards")] [Space(10)]
-    public TargetController target; //Target the agent will walk towards.
+    [Header("Target To Walk Towards")] public Transform target; //Target the agent will walk towards during training.
-    [Header("Body Parts")] [Space(10)] public Transform hips;
+    [Header("Body Parts")] public Transform hips;
    public Transform chest;
    public Transform spine;
    public Transform head;
    public Transform forearmR;
    public Transform handR;

-    [Header("Orientation")] [Space(10)]
-    public OrientationCubeController orientationCube;
+    OrientationCubeController m_OrientationCube;
+    //The indicator graphic gameobject that points towards the target
+    DirectionIndicator m_DirectionIndicator;
-
-        orientationCube.UpdateOrientation(hips, target.transform);
+        m_OrientationCube = GetComponentInChildren<OrientationCubeController>();
+        m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();

        //Setup each body part
        m_JdController = GetComponent<JointDriveController>();
        }

        //Random start rotation to help generalize
-        transform.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
+        hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
+
+        UpdateOrientationObjects();
-        orientationCube.UpdateOrientation(hips, target.transform);
+        //Set our goal walking speed
+        MTargetWalkingSpeed =
+            randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : MTargetWalkingSpeed;

        SetResetParameters();
    }

        //Get velocities in the context of our orientation cube's space
        //Note: You can get these velocities in world space as well but it may not train as well.
-        sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.velocity));
-        sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
-        sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));

        if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR)
        {
    /// </summary>
    public override void CollectObservations(VectorSensor sensor)
    {
-        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, orientationCube.transform.forward));
-        sensor.AddObservation(Quaternion.FromToRotation(head.forward, orientationCube.transform.forward));
+        var cubeForward = m_OrientationCube.transform.forward;
-        sensor.AddObservation(orientationCube.transform.InverseTransformPoint(target.transform.position));
+        //velocity we want to match
+        var velGoal = cubeForward * MTargetWalkingSpeed;
+        //ragdoll's avg vel
+        var avgVel = GetAvgVelocity();
+
+        //current ragdoll velocity. normalized 
+        sensor.AddObservation(Vector3.Distance(velGoal, avgVel));
+        //avg body vel relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel));
+        //vel goal relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal));
+
+        //rotation deltas
+        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
+        sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));
+
+        //Position of target position relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));

        foreach (var bodyPart in m_JdController.bodyPartsList)
        {
        bpDict[forearmR].SetJointStrength(vectorAction[++i]);
    }

+    //Update OrientationCube and DirectionIndicator
+    void UpdateOrientationObjects()
+    {
+        m_WorldDirToWalk = target.position - hips.position;
+        m_OrientationCube.UpdateOrientation(hips, target);
+        if (m_DirectionIndicator)
+        {
+            m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
+        }
+    }
+
-        var cubeForward = orientationCube.transform.forward;
-        orientationCube.UpdateOrientation(hips, target.transform);
+        UpdateOrientationObjects();
+
+        var cubeForward = m_OrientationCube.transform.forward;
+
-        // a. Velocity alignment with goal direction.
-        var moveTowardsTargetReward = Vector3.Dot(cubeForward,
-            Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed));
-        if (float.IsNaN(moveTowardsTargetReward))
+        // a. Match target speed
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        var matchSpeedReward = GetMatchingVelocityReward(cubeForward * MTargetWalkingSpeed, GetAvgVelocity());
+
+        //Check for NaNs
+        if (float.IsNaN(matchSpeedReward))
-                $" cubeForward: {cubeForward}\n"+
-                $" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n"+
-                $" maximumWalkingSpeed: {maximumWalkingSpeed}"
+                $" cubeForward: {cubeForward}\n" +
+                $" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n" +
+                $" maximumWalkingSpeed: {m_maxWalkingSpeed}"
-        // b. Rotation alignment with goal direction.
-        var lookAtTargetReward = Vector3.Dot(cubeForward, head.forward);
+        // b. Rotation alignment with target direction.
+        //This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
+        var lookAtTargetReward = (Vector3.Dot(cubeForward, head.forward) + 1) * .5F;
+
+        //Check for NaNs
-                $" cubeForward: {cubeForward}\n"+
+                $" cubeForward: {cubeForward}\n" +
-        // c. Encourage head height. //Should normalize to ~1
-        var headHeightOverFeetReward =
-            ((head.position.y - footL.position.y) + (head.position.y - footR.position.y) / 10);
-        if (float.IsNaN(headHeightOverFeetReward))
+        AddReward(matchSpeedReward * lookAtTargetReward);
+    }
+
+    //Returns the average velocity of all of the body parts
+    //Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
+    //...using the average helps prevent this erratic movement
+    Vector3 GetAvgVelocity()
+    {
+        Vector3 velSum = Vector3.zero;
+        Vector3 avgVel = Vector3.zero;
+
+        //ALL RBS
+        int numOfRB = 0;
+        foreach (var item in m_JdController.bodyPartsList)
-            throw new ArgumentException(
-                "NaN in headHeightOverFeetReward.\n" +
-                $" head.position: {head.position}\n"+
-                $" footL.position: {footL.position}\n"+
-                $" footR.position: {footR.position}"
-            );
+            numOfRB++;
+            velSum += item.rb.velocity;
-        AddReward(
-            + 0.02f * moveTowardsTargetReward
-            + 0.02f * lookAtTargetReward
-            + 0.005f * headHeightOverFeetReward
-        );
+        avgVel = velSum / numOfRB;
+        return avgVel;
+    }
+
+    //normalized value of the difference in avg speed vs goal walking speed.
+    public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity)
+    {
+        //distance between our actual velocity and goal velocity
+        var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, MTargetWalkingSpeed);
+
+        //return the value on a declining sigmoid shaped curve that decays from 1 to 0
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / MTargetWalkingSpeed, 2), 2);
    }

    /// <summary>
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta
 fileFormatVersion: 2
-guid: e785133c5b0ac461588106642550d1b3
+guid: 8cbae6de45ea44d0c97366e252052722
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta
 fileFormatVersion: 2
-guid: 8dfd4337ed40e4d48872a4f86919c9da
+guid: 185990f76b7804d1e83378e9d4454c6b
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
--- a/Project/ProjectSettings/EditorBuildSettings.asset
+++ b/Project/ProjectSettings/EditorBuildSettings.asset
 EditorBuildSettings:
  m_ObjectHideFlags: 0
  serializedVersion: 2
-  m_Scenes:
-  - enabled: 1
-    path: Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
-    guid: b9ac0cbf961bf4dacbfa0aa9c0d60aaa
+  m_Scenes: []
  m_configObjects: {}
--- a/Project/ProjectSettings/UnityConnectSettings.asset
+++ b/Project/ProjectSettings/UnityConnectSettings.asset
 UnityConnectSettings:
  m_ObjectHideFlags: 0
  serializedVersion: 1
-  m_Enabled: 0
+  m_Enabled: 1
  m_TestMode: 0
  m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events
  m_EventUrl: https://cdp.cloud.unity3d.com/v1/events
--- a/com.unity.ml-agents.extensions/Editor/Unity.ML-Agents.Extensions.Editor.asmdef
+++ b/com.unity.ml-agents.extensions/Editor/Unity.ML-Agents.Extensions.Editor.asmdef
 {
    "name": "Unity.ML-Agents.Extensions.Editor",
    "references": [
-      "Unity.ML-Agents.Extensions"
+      "Unity.ML-Agents.Extensions",
+      "Unity.ML-Agents",
+      "Unity.ML-Agents.Editor"
    ],
    "includePlatforms": [
        "Editor"
--- a/com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs
+++ b/com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs
 using System.Runtime.CompilerServices;

 [assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.EditorTests")]
+[assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.Editor")]
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs
            return new Pose { rotation = t.rotation, position = t.position };
        }

+        /// <inheritdoc/>
+        protected internal override Object GetObjectAt(int index)
+        {
+            return m_Bodies[index];
+        }
+
+
+        internal IEnumerable<ArticulationBody> GetEnabledArticulationBodies()
+        {
+            if (m_Bodies == null)
+            {
+                yield break;
+            }
+
+            for (var i = 0; i < m_Bodies.Length; i++)
+            {
+                var articBody = m_Bodies[i];
+                if (articBody == null)
+                {
+                    // Ignore a virtual root.
+                    continue;
+                }
+
+                if (IsPoseEnabled(i))
+                {
+                    yield return articBody;
+                }
+            }
+        }
    }
 }
 #endif // UNITY_2020_1_OR_NEWER
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs
            var poseExtractor = new ArticulationBodyPoseExtractor(RootBody);
            var numPoseObservations = poseExtractor.GetNumPoseObservations(Settings);
            var numJointObservations = 0;
-            // Start from i=1 to ignore the root
-            for (var i = 1; i < poseExtractor.Bodies.Length; i++)
+
+            foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
-                numJointObservations += ArticulationBodyJointExtractor.NumObservations(
-                    poseExtractor.Bodies[i], Settings
-                );
+                numJointObservations += ArticulationBodyJointExtractor.NumObservations(articBody, Settings);
            }
            return new[] { numPoseObservations + numJointObservations };
        }
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
+using System.Collections.Generic;
 using UnityEngine;
 using Unity.MLAgents.Sensors;

        string m_SensorName;

        PoseExtractor m_PoseExtractor;
-        IJointExtractor[] m_JointExtractors;
+        List<IJointExtractor> m_JointExtractors;
-        ///  Construct a new PhysicsBodySensor
+        /// Construct a new PhysicsBodySensor
-        /// <param name="rootBody">The root Rigidbody. This has no Joints on it (but other Joints may connect to it).</param>
-        /// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
-        /// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
+        /// <param name="poseExtractor"></param>
-            Rigidbody rootBody,
-            GameObject rootGameObject,
-            GameObject virtualRoot,
+            RigidBodyPoseExtractor poseExtractor,
-            string sensorName=null
+            string sensorName
-            var poseExtractor = new RigidBodyPoseExtractor(rootBody, rootGameObject, virtualRoot);
-            m_SensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{rootBody?.name}" : sensorName;
+            m_SensorName = sensorName;
-            var rigidBodies = poseExtractor.Bodies;
-            if (rigidBodies != null)
-            {
-                m_JointExtractors = new IJointExtractor[rigidBodies.Length - 1]; // skip the root
-                for (var i = 1; i < rigidBodies.Length; i++)
-                {
-                    var jointExtractor = new RigidBodyJointExtractor(rigidBodies[i]);
-                    numJointExtractorObservations += jointExtractor.NumObservations(settings);
-                    m_JointExtractors[i - 1] = jointExtractor;
-                }
-            }
-            else
+            m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
+            foreach(var rb in poseExtractor.GetEnabledRigidbodies())
-                m_JointExtractors = new IJointExtractor[0];
+                var jointExtractor = new RigidBodyJointExtractor(rb);
+                numJointExtractorObservations += jointExtractor.NumObservations(settings);
+                m_JointExtractors.Add(jointExtractor);
            }

            var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);
            m_Settings = settings;

            var numJointExtractorObservations = 0;
-            var articBodies = poseExtractor.Bodies;
-            if (articBodies != null)
+            m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
+            foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
-                m_JointExtractors = new IJointExtractor[articBodies.Length - 1]; // skip the root
-                for (var i = 1; i < articBodies.Length; i++)
-                {
-                    var jointExtractor = new ArticulationBodyJointExtractor(articBodies[i]);
-                    numJointExtractorObservations += jointExtractor.NumObservations(settings);
-                    m_JointExtractors[i - 1] = jointExtractor;
-                }
-            }
-            else
-            {
-                m_JointExtractors = new IJointExtractor[0];
+                var jointExtractor = new ArticulationBodyJointExtractor(articBody);
+                numJointExtractorObservations += jointExtractor.NumObservations(settings);
+                m_JointExtractors.Add(jointExtractor);
            }

            var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs
+using System;
+using Object = UnityEngine.Object;

 namespace Unity.MLAgents.Extensions.Sensors
 {
        {
            if (m_ParentIndices == null)
            {
-                return -1;
+                throw new NullReferenceException("No parent indices set");
            }

            return m_ParentIndices[index];
        public void SetPoseEnabled(int index, bool val)
        {
            m_PoseEnabled[index] = val;
+        }
+
+        public bool IsPoseEnabled(int index)
+        {
+            return m_PoseEnabled[index];
        }

        /// <summary>
        /// <returns></returns>
        protected internal abstract Vector3 GetLinearVelocityAt(int index);

+        /// <summary>
+        /// Return the underlying object at the given index. This is only
+        /// used for display in the inspector.
+        /// </summary>
+        /// <param name="index"></param>
+        /// <returns></returns>
+        protected internal virtual Object GetObjectAt(int index)
+        {
+            return null;
+        }
+

        /// <summary>
        /// Update the internal model space transform storage based on the underlying system.
                Debug.DrawLine(current.position+offset, current.position+offset+.1f*localRight, Color.blue);
            }
        }
+
+        /// <summary>
+        /// Simplified representation of the a node in the hierarchy for display.
+        /// </summary>
+        internal struct DisplayNode
+        {
+            /// <summary>
+            /// Underlying object in the hierarchy. Pass to EditorGUIUtility.ObjectContent() for display.
+            /// </summary>
+            public Object NodeObject;
+
+            /// <summary>
+            /// Whether the poses for the object are enabled.
+            /// </summary>
+            public bool Enabled;
+
+            /// <summary>
+            /// Depth in the hierarchy, used for adjusting the indent level.
+            /// </summary>
+            public int Depth;
+
+            /// <summary>
+            /// The index of the corresponding object in the PoseExtractor.
+            /// </summary>
+            public int OriginalIndex;
+        }
+
+        /// <summary>
+        /// Get a list of display nodes in depth-first order.
+        /// </summary>
+        /// <returns></returns>
+        internal IList<DisplayNode> GetDisplayNodes()
+        {
+            if (NumPoses == 0)
+            {
+                return Array.Empty<DisplayNode>();
+            }
+            var nodesOut = new List<DisplayNode>(NumPoses);
+
+            // List of children for each node
+            var tree = new Dictionary<int, List<int>>();
+            for (var i = 0; i < NumPoses; i++)
+            {
+                var parent = GetParentIndex(i);
+                if (i == -1)
+                {
+                    continue;
+                }
+
+                if (!tree.ContainsKey(parent))
+                {
+                    tree[parent] = new List<int>();
+                }
+                tree[parent].Add(i);
+            }
+
+            // Store (index, depth) in the stack
+            var stack = new Stack<(int, int)>();
+            stack.Push((0, 0));
+
+            while (stack.Count != 0)
+            {
+                var (current, depth) = stack.Pop();
+                var obj = GetObjectAt(current);
+
+                var node = new DisplayNode
+                {
+                    NodeObject = obj,
+                    Enabled = IsPoseEnabled(current),
+                    OriginalIndex = current,
+                    Depth = depth
+                };
+                nodesOut.Add(node);
+
+                // Add children
+                if (tree.ContainsKey(current))
+                {
+                    // Push to the stack in reverse order
+                    var children = tree[current];
+                    for (var childIdx = children.Count-1; childIdx >= 0; childIdx--)
+                    {
+                        stack.Push((children[childIdx], depth+1));
+                    }
+                }
+
+                // Safety check
+                // This shouldn't even happen, but in case we have a cycle in the graph
+                // exit instead of looping forever and eating up all the memory.
+                if (nodesOut.Count > NumPoses)
+                {
+                    return nodesOut;
+                }
+            }
+
+            return nodesOut;
+        }
+
    }

    /// <summary>
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs
        /// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
        /// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
        /// separate from the actual Rigidbodies in the hierarchy. For locomotion tasks, with ragdolls, this provides
-        /// a stabilized refernece frame, which can improve learning.</param>
-        public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null, GameObject virtualRoot = null)
+        /// a stabilized reference frame, which can improve learning.</param>
+        /// <param name="enableBodyPoses">Optional mapping of whether a body's psoe should be enabled or not.</param>
+        public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null,
+            GameObject virtualRoot = null, Dictionary<Rigidbody, bool> enableBodyPoses = null)
        {
            if (rootBody == null)
            {
            Rigidbody[] rbs;
+            Joint[] joints;
+                joints = rootBody.GetComponentsInChildren <Joint>();
+                joints = rootGameObject.GetComponentsInChildren<Joint>();
            }

            if (rbs == null || rbs.Length == 0)
            }

-                if (rbs[0] != rootBody)
+            if (rbs[0] != rootBody)
            {
                Debug.Log("Expected root body at index 0");
                return;
                }
            }

-            var joints = rootBody.GetComponentsInChildren <Joint>();
-
-
            foreach (var j in joints)
            {
                var parent = j.connectedBody;

            // By default, ignore the root
            SetPoseEnabled(0, false);
+
+            if (enableBodyPoses != null)
+            {
+                foreach (var pair in enableBodyPoses)
+                {
+                    var rb = pair.Key;
+                    if (bodyToIndex.TryGetValue(rb, out var index))
+                    {
+                        SetPoseEnabled(index, pair.Value);
+                    }
+                }
+            }
        }

        /// <inheritdoc/>
            return new Pose { rotation = body.rotation, position = body.position };
        }

+        /// <inheritdoc/>
+        protected internal override Object GetObjectAt(int index)
+        {
+            if (index == 0 && m_VirtualRoot != null)
+            {
+                return m_VirtualRoot;
+            }
+            return m_Bodies[index];
+        }
+
+
+        /// <summary>
+        /// Get a dictionary indicating which Rigidbodies' poses are enabled or disabled.
+        /// </summary>
+        /// <returns></returns>
+        internal Dictionary<Rigidbody, bool> GetBodyPosesEnabled()
+        {
+            var bodyPosesEnabled = new Dictionary<Rigidbody, bool>(m_Bodies.Length);
+            for (var i = 0; i < m_Bodies.Length; i++)
+            {
+                var rb = m_Bodies[i];
+                if (rb == null)
+                {
+                    continue; // skip virtual root
+                }
+
+                bodyPosesEnabled[rb] = IsPoseEnabled(i);
+            }
+
+            return bodyPosesEnabled;
+        }
+
+        internal IEnumerable<Rigidbody> GetEnabledRigidbodies()
+        {
+            if (m_Bodies == null)
+            {
+                yield break;
+            }
+
+            for (var i = 0; i < m_Bodies.Length; i++)
+            {
+                var rb = m_Bodies[i];
+                if (rb == null)
+                {
+                    // Ignore a virtual root.
+                    continue;
+                }
+
+                if (IsPoseEnabled(i))
+                {
+                    yield return rb;
+                }
+            }
+        }
    }

 }
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs
+using System.Collections.Generic;
 using UnityEngine;
 using Unity.MLAgents.Sensors;

        /// <summary>
        /// Optional sensor name. This must be unique for each Agent.
        /// </summary>
+        [SerializeField]
+        [SerializeField]
+        [HideInInspector]
+        RigidBodyPoseExtractor m_PoseExtractor;
+
        /// <summary>
        /// Creates a PhysicsBodySensor.
        /// </summary>
-            return new PhysicsBodySensor(RootBody, gameObject, VirtualRoot, Settings, sensorName);
+            var _sensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{RootBody?.name}" : sensorName;
+            return new PhysicsBodySensor(GetPoseExtractor(), Settings, _sensorName);
        }

        /// <inheritdoc/>
                return new[] { 0 };
            }

-            // TODO static method in PhysicsBodySensor?
-            // TODO only update PoseExtractor when body changes?
-            var poseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot);
+            var poseExtractor = GetPoseExtractor();
-            // Start from i=1 to ignore the root
-            for (var i = 1; i < poseExtractor.Bodies.Length; i++)
+            foreach(var rb in poseExtractor.GetEnabledRigidbodies())
-                var body = poseExtractor.Bodies[i];
-                var joint = body?.GetComponent<Joint>();
-                numJointObservations += RigidBodyJointExtractor.NumObservations(body, joint, Settings);
+                var joint = rb.GetComponent<Joint>();
+                numJointObservations += RigidBodyJointExtractor.NumObservations(rb, joint, Settings);
+        }
+
+        /// <summary>
+        /// Get the DisplayNodes of the hierarchy.
+        /// </summary>
+        /// <returns></returns>
+        internal IList<PoseExtractor.DisplayNode> GetDisplayNodes()
+        {
+            return GetPoseExtractor().GetDisplayNodes();
+        }
+
+        /// <summary>
+        /// Lazy construction of the PoseExtractor.
+        /// </summary>
+        /// <returns></returns>
+        RigidBodyPoseExtractor GetPoseExtractor()
+        {
+            if (m_PoseExtractor == null)
+            {
+                ResetPoseExtractor();
+            }
+
+            return m_PoseExtractor;
+        }
+
+        /// <summary>
+        /// Reset the pose extractor, trying to keep the enabled state of the corresponding poses the same.
+        /// </summary>
+        internal void ResetPoseExtractor()
+        {
+            // Get the current enabled state of each body, so that we can reinitialize with them.
+            Dictionary<Rigidbody, bool> bodyPosesEnabled = null;
+            if (m_PoseExtractor != null)
+            {
+                bodyPosesEnabled = m_PoseExtractor.GetBodyPosesEnabled();
+            }
+            m_PoseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot, bodyPosesEnabled);
+        }
+
+        /// <summary>
+        /// Toggle the pose at the given index.
+        /// </summary>
+        /// <param name="index"></param>
+        /// <param name="enabled"></param>
+        internal void SetPoseEnabled(int index, bool enabled)
+        {
+            GetPoseExtractor().SetPoseEnabled(index, enabled);
        }
    }

--- a/com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs
+++ b/com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs
+using System;
 using UnityEngine;
 using NUnit.Framework;
 using Unity.MLAgents.Extensions.Sensors;
    public class PoseExtractorTests
    {
-        class UselessPoseExtractor : PoseExtractor
+
+        class BasicPoseExtractor : PoseExtractor
        {
            protected internal override Pose GetPoseAt(int index)
            {
-            protected internal override Vector3 GetLinearVelocityAt(int index)
+            protected  internal override Vector3 GetLinearVelocityAt(int index)
+        }
+        class UselessPoseExtractor : BasicPoseExtractor
+        {
            public void Init(int[] parentIndices)
            {
                Setup(parentIndices);
            poseExtractor.UpdateModelSpacePoses();

            Assert.AreEqual(0, poseExtractor.NumPoses);
+
+            // Iterating through poses and velocities should be an empty loop
+            foreach (var pose in poseExtractor.GetEnabledModelSpacePoses())
+            {
+                throw new UnityAgentsException("This shouldn't happen");
+            }
+
+            foreach (var pose in poseExtractor.GetEnabledLocalSpacePoses())
+            {
+                throw new UnityAgentsException("This shouldn't happen");
+            }
+
+            foreach (var vel in poseExtractor.GetEnabledModelSpaceVelocities())
+            {
+                throw new UnityAgentsException("This shouldn't happen");
+            }
+
+            foreach (var vel in poseExtractor.GetEnabledLocalSpaceVelocities())
+            {
+                throw new UnityAgentsException("This shouldn't happen");
+            }
+
+            // Getting a parent index should throw an index exception
+            Assert.Throws <NullReferenceException>(
+                () => poseExtractor.GetParentIndex(0)
+            );
+
+            // DisplayNodes should be empty
+            var displayNodes = poseExtractor.GetDisplayNodes();
+            Assert.AreEqual(0, displayNodes.Count);
        }

        [Test]
            Assert.AreEqual(size, localPoseIndex);
        }

-        class BadPoseExtractor : PoseExtractor
+        [Test]
+        public void TestChainDisplayNodes()
+        {
+            var size = 4;
+            var chain = new ChainPoseExtractor(size);
+
+            var displayNodes = chain.GetDisplayNodes();
+            Assert.AreEqual(size, displayNodes.Count);
+
+            for (var i = 0; i < size; i++)
+            {
+                var displayNode = displayNodes[i];
+                Assert.AreEqual(i, displayNode.OriginalIndex);
+                Assert.AreEqual(null, displayNode.NodeObject);
+                Assert.AreEqual(i, displayNode.Depth);
+                Assert.AreEqual(true, displayNode.Enabled);
+            }
+        }
+
+        [Test]
+        public void TestDisplayNodesLoop()
+        {
+            // Degenerate case with a loop
+            var poseExtractor = new UselessPoseExtractor();
+            poseExtractor.Init(new[] {-1, 2, 1});
+
+            // This just shouldn't blow up
+            poseExtractor.GetDisplayNodes();
+
+            // Self-loop
+            poseExtractor.Init(new[] {-1, 1});
+
+            // This just shouldn't blow up
+            poseExtractor.GetDisplayNodes();
+        }
+
+        class BadPoseExtractor : BasicPoseExtractor
        {
            public BadPoseExtractor()
            {
                }
                Setup(parents);
            }
-
-            protected internal override Pose GetPoseAt(int index)
-            {
-                return Pose.identity;
-            }
-
-            protected  internal override Vector3 GetLinearVelocityAt(int index)
-            {
-                return Vector3.zero;
-            }
        }

        [Test]
                var bad = new BadPoseExtractor();
            });
        }
+
    }

    public class PoseExtensionTests
--- a/com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs
+++ b/com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs
            var rootRb = go.AddComponent<Rigidbody>();
            var poseExtractor = new RigidBodyPoseExtractor(rootRb);
            Assert.AreEqual(1, poseExtractor.NumPoses);
+
+            // Also pass the GameObject
+            poseExtractor = new RigidBodyPoseExtractor(rootRb, go);
+            Assert.AreEqual(1, poseExtractor.NumPoses);
+        }
+
+        [Test]
+        public void TestNoBodiesFound()
+        {
+            // Check that if we can't find any bodies under the game object, we get an empty extractor
+            var gameObj = new GameObject();
+            var rootRb = gameObj.AddComponent<Rigidbody>();
+            var otherGameObj = new GameObject();
+            var poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
+            Assert.AreEqual(0, poseExtractor.NumPoses);
+
+            // Add an RB under the other GameObject. Constructor will find a rigid body, but not the root.
+            var otherRb = otherGameObj.AddComponent<Rigidbody>();
+            poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
+            Assert.AreEqual(0, poseExtractor.NumPoses);
        }

        [Test]
            Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(0).position);
            Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(0).rotation);
            Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(0));
+
+            // Check DisplayNodes gives expected results
+            var displayNodes = poseExtractor.GetDisplayNodes();
+            Assert.AreEqual(2, displayNodes.Count);
+            Assert.AreEqual(rb1, displayNodes[0].NodeObject);
+            Assert.AreEqual(false, displayNodes[0].Enabled);
+
+            Assert.AreEqual(rb2, displayNodes[1].NodeObject);
+            Assert.AreEqual(true, displayNodes[1].Enabled);
        }

        [Test]
            Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(1).position);
            Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(1).rotation);
            Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(1));
+        }
+
+        [Test]
+        public void TestBodyPosesEnabledDictionary()
+        {
+            // * rootObj
+            //   - rb1
+            //   * go2
+            //     - rb2
+            //     - joint
+            var rootObj = new GameObject();
+            var rb1 = rootObj.AddComponent<Rigidbody>();
+
+            var go2 = new GameObject();
+            var rb2 = go2.AddComponent<Rigidbody>();
+            go2.transform.SetParent(rootObj.transform);
+
+            var joint = go2.AddComponent<ConfigurableJoint>();
+            joint.connectedBody = rb1;
+
+            var poseExtractor = new RigidBodyPoseExtractor(rb1);
+
+            // Expect the root body disabled and the attached one enabled.
+            Assert.IsFalse(poseExtractor.IsPoseEnabled(0));
+            Assert.IsTrue(poseExtractor.IsPoseEnabled(1));
+            var bodyPosesEnabled = poseExtractor.GetBodyPosesEnabled();
+            Assert.IsFalse(bodyPosesEnabled[rb1]);
+            Assert.IsTrue(bodyPosesEnabled[rb2]);
+
+            // Swap the values
+            bodyPosesEnabled[rb1] = true;
+            bodyPosesEnabled[rb2] = false;
+
+            var poseExtractor2 = new RigidBodyPoseExtractor(rb1, null, null, bodyPosesEnabled);
+            Assert.IsTrue(poseExtractor2.IsPoseEnabled(0));
+            Assert.IsFalse(poseExtractor2.IsPoseEnabled(1));
+
+
        }
    }
 }
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)

-## [1.3.0-preview] 2020-08-12
+## [1.3.0-preview] - 2020-08-12

 ### Major Changes
 #### com.unity.ml-agents (C#)
 Previously, this would result in an infinite loop and cause the editor to hang.
 (#4226)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
+- The algorithm used to normalize observations was introducing NaNs if the initial observations were too large
+due to incorrect initialization. The initialization was fixed and is now the observation means from the
+first trajectory processed. (#4299)

 ## [1.2.0-preview] - 2020-07-15

--- a/com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
    /// the offset into the original array, and an length.
    /// </summary>
    /// <typeparam name="T">The type of object stored in the underlying <see cref="Array"/></typeparam>
-    internal readonly struct ActionSegment<T> : IEnumerable<T>, IEquatable<ActionSegment<T>>
+    public readonly struct ActionSegment<T> : IEnumerable<T>, IEquatable<ActionSegment<T>>
        where T : struct
    {
        /// <summary>
        /// </summary>
        public static ActionSegment<T> Empty = new ActionSegment<T>(System.Array.Empty<T>(), 0, 0);

-        static void CheckParameters(T[] actionArray, int offset, int length)
+        static void CheckParameters(IReadOnlyCollection<T> actionArray, int offset, int length)
-            if (offset + length > actionArray.Length)
+            if (offset + length > actionArray.Count)
-                    $"are out of bounds of actionArray: {actionArray.Length}.");
+                    $"are out of bounds of actionArray: {actionArray.Count}.");
+        /// Construct an <see cref="ActionSegment{T}"/> with just an actionArray.  The <see cref="Offset"/> will
+        /// be set to 0 and the <see cref="Length"/> will be set to `actionArray.Length`.
+        /// </summary>
+        /// <param name="actionArray">The action array to use for the this segment.</param>
+        public ActionSegment(T[] actionArray) : this(actionArray, 0, actionArray.Length) { }
+
+        /// <summary>
        /// Construct an <see cref="ActionSegment{T}"/> with an underlying array
        /// and offset, and a length.
        /// </summary>
        public ActionSegment(T[] actionArray, int offset, int length)
        {
+#if DEBUG
+#endif
            Array = actionArray;
            Offset = offset;
            Length = length;
                }
                return Array[Offset + index];
            }
+            set
+            {
+                if (index < 0 || index > Length)
+                {
+                    throw new IndexOutOfRangeException($"Index out of bounds, expected a number between 0 and {Length}");
+                }
+                Array[Offset + index] = value;
+            }
+        }
+
+        /// <summary>
+        /// Sets the segment of the backing array to all zeros.
+        /// </summary>
+        public void Clear()
+        {
+            System.Array.Clear(Array, Offset, Length);
        }

        /// <inheritdoc cref="IEnumerable{T}.GetEnumerator"/>
--- a/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
    /// <summary>
    /// Defines the structure of an Action Space to be used by the Actuator system.
    /// </summary>
-    internal readonly struct ActionSpec
+    public readonly struct ActionSpec
    {

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
        /// <summary>
        /// Returns the previously stored actions for the actuators in this list.
        /// </summary>
-        public float[] StoredContinuousActions { get; private set; }
+        // public float[] StoredContinuousActions { get; private set; }
-        public int[] StoredDiscreteActions { get; private set; }
+        // public int[] StoredDiscreteActions { get; private set; }
+
+        public ActionBuffers StoredActions { get; private set; }

        /// <summary>
        /// Create an ActuatorList with a preset capacity.

            // Sort the Actuators by name to ensure determinism
            SortActuators();
-            StoredContinuousActions = numContinuousActions == 0 ? Array.Empty<float>() : new float[numContinuousActions];
-            StoredDiscreteActions = numDiscreteBranches == 0 ? Array.Empty<int>() : new int[numDiscreteBranches];
+            var continuousActions = numContinuousActions == 0 ? ActionSegment<float>.Empty :
+                new ActionSegment<float>(new float[numContinuousActions]);
+            var discreteActions = numDiscreteBranches == 0 ? ActionSegment<int>.Empty : new ActionSegment<int>(new int[numDiscreteBranches]);
+
+            StoredActions = new ActionBuffers(continuousActions, discreteActions);
            m_DiscreteActionMask = new ActuatorDiscreteActionMask(actuators, sumOfDiscreteBranches, numDiscreteBranches);
            m_ReadyForExecution = true;
        }
        /// continuous actions for the IActuators in this list.</param>
        /// <param name="discreteActionBuffer">The action buffer which contains all of the
        /// discrete actions for the IActuators in this list.</param>
-        public void UpdateActions(float[] continuousActionBuffer, int[] discreteActionBuffer)
+        public void UpdateActions(ActionBuffers actions)
-            UpdateActionArray(continuousActionBuffer, StoredContinuousActions);
-            UpdateActionArray(discreteActionBuffer, StoredDiscreteActions);
+            UpdateActionArray(actions.ContinuousActions, StoredActions.ContinuousActions);
+            UpdateActionArray(actions.DiscreteActions, StoredActions.DiscreteActions);
-        static void UpdateActionArray<T>(T[] sourceActionBuffer, T[] destination)
+        static void UpdateActionArray<T>(ActionSegment<T> sourceActionBuffer, ActionSegment<T> destination)
+            where T : struct
-            if (sourceActionBuffer == null || sourceActionBuffer.Length == 0)
+            if (sourceActionBuffer.Length <= 0)
-                Array.Clear(destination, 0, destination.Length);
+                destination.Clear();
            }
            else
            {

-                Array.Copy(sourceActionBuffer, destination, destination.Length);
+                Array.Copy(sourceActionBuffer.Array,
+                    sourceActionBuffer.Offset,
+                    destination.Array,
+                    destination.Offset,
+                    destination.Length);
            }
        }

            for (var i = 0; i < m_Actuators.Count; i++)
            {
                var actuator = m_Actuators[i];
-                m_DiscreteActionMask.CurrentBranchOffset = offset;
-                actuator.WriteDiscreteActionMask(m_DiscreteActionMask);
-                offset += actuator.ActionSpec.NumDiscreteActions;
+                if (actuator.ActionSpec.NumDiscreteActions > 0)
+                {
+                    m_DiscreteActionMask.CurrentBranchOffset = offset;
+                    actuator.WriteDiscreteActionMask(m_DiscreteActionMask);
+                    offset += actuator.ActionSpec.NumDiscreteActions;
+                }
            }
        }

                var continuousActions = ActionSegment<float>.Empty;
                if (numContinuousActions > 0)
                {
-                    continuousActions = new ActionSegment<float>(StoredContinuousActions,
+                    continuousActions = new ActionSegment<float>(StoredActions.ContinuousActions.Array,
                        continuousStart,
                        numContinuousActions);
                }
                {
-                    discreteActions = new ActionSegment<int>(StoredDiscreteActions,
+                    discreteActions = new ActionSegment<int>(StoredActions.DiscreteActions.Array,
                        discreteStart,
                        numDiscreteActions);
                }
        }

        /// <summary>
-        /// Resets the <see cref="StoredContinuousActions"/> and <see cref="StoredDiscreteActions"/> buffers to be all
+        /// Resets the <see cref="ActionBuffers"/> to be all
        /// zeros and calls <see cref="IActuator.ResetData"/> on each <see cref="IActuator"/> managed by this object.
        /// </summary>
        public void ResetData()
                return;
            }
-            Array.Clear(StoredContinuousActions, 0, StoredContinuousActions.Length);
-            Array.Clear(StoredDiscreteActions, 0, StoredDiscreteActions.Length);
+            StoredActions.Clear();
+            m_DiscreteActionMask.ResetMask();
        }


--- a/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
 using System;
 using System.Linq;
+using UnityEngine;

 namespace Unity.MLAgents.Actuators
 {
    /// </summary>
-    internal readonly struct ActionBuffers
+    public readonly struct ActionBuffers
    {
        /// <summary>
        /// An empty action buffer.
        public ActionSegment<int> DiscreteActions { get; }

        /// <summary>
+        /// Create an <see cref="ActionBuffers"/> instance with discrete actions stored as a float array.  This exists
+        /// to achieve backward compatibility with the former Agent methods which used a float array for both continuous
+        /// and discrete actions.
+        /// </summary>
+        /// <param name="discreteActions">The float array of discrete actions.</param>
+        /// <returns>An <see cref="ActionBuffers"/> instance initialized with a <see cref="DiscreteActions"/>
+        /// <see cref="ActionSegment{T}"/> initialized from a float array.</returns>
+        public static ActionBuffers FromDiscreteActions(float[] discreteActions)
+        {
+           return new ActionBuffers(ActionSegment<float>.Empty, discreteActions == null ? ActionSegment<int>.Empty
+                               : new ActionSegment<int>(Array.ConvertAll(discreteActions,
+                                   x => (int)x)));
+        }
+
+        public ActionBuffers(float[] continuousActions, int[] discreteActions)
+            : this(new ActionSegment<float>(continuousActions), new ActionSegment<int>(discreteActions)) { }
+
+        /// <summary>
        /// Construct an <see cref="ActionBuffers"/> instance with the continuous and discrete actions that will
        /// be used.
        /// </summary>
            DiscreteActions = discreteActions;
        }

+        /// <summary>
+        /// Clear the <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/> segments to be all zeros.
+        /// </summary>
+        public void Clear()
+        {
+            ContinuousActions.Clear();
+            DiscreteActions.Clear();
+        }
+
        /// <inheritdoc cref="ValueType.Equals(object)"/>
        public override bool Equals(object obj)
        {
                return (ContinuousActions.GetHashCode() * 397) ^ DiscreteActions.GetHashCode();
            }
        }
+
+        /// <summary>
+        /// Packs the continuous and discrete actions into one float array.  The array passed into this method
+        /// must have a Length that is greater than or equal to the sum of the Lengths of
+        /// <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/>.
+        /// </summary>
+        /// <param name="destination">A float array to pack actions into whose length is greater than or
+        /// equal to the addition of the Lengths of this objects <see cref="ContinuousActions"/> and
+        /// <see cref="DiscreteActions"/> segments.</param>
+        public void PackActions(in float[] destination)
+        {
+            Debug.Assert(destination.Length >= ContinuousActions.Length + DiscreteActions.Length,
+                $"argument '{nameof(destination)}' is not large enough to pack the actions into.\n" +
+                $"{nameof(destination)}.Length: {destination.Length}\n" +
+                $"{nameof(ContinuousActions)}.Length + {nameof(DiscreteActions)}.Length: {ContinuousActions.Length + DiscreteActions.Length}");
+
+            var start = 0;
+            if (ContinuousActions.Length > 0)
+            {
+                Array.Copy(ContinuousActions.Array,
+                    ContinuousActions.Offset,
+                    destination,
+                    start,
+                    ContinuousActions.Length);
+                start = ContinuousActions.Length;
+            }
+            if (start >= destination.Length)
+            {
+                return;
+            }
+
+            if (DiscreteActions.Length > 0)
+            {
+                Array.Copy(DiscreteActions.Array,
+                    DiscreteActions.Offset,
+                    destination,
+                    start,
+                    DiscreteActions.Length);
+            }
+        }
-    internal interface IActionReceiver
+    public interface IActionReceiver
    {

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Actuators/IActuator.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IActuator.cs
    /// <summary>
    /// Abstraction that facilitates the execution of actions.
    /// </summary>
-    internal interface IActuator : IActionReceiver
+    public interface IActuator : IActionReceiver
    {
        int TotalNumberOfActions { get; }

--- a/com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs
    /// <summary>
    /// Interface for writing a mask to disable discrete actions for agents for the next decision.
    /// </summary>
-    internal interface IDiscreteActionMask
+    public interface IDiscreteActionMask
    {
        /// <summary>
        /// Modifies an action mask for discrete control agents.
--- a/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs

 namespace Unity.MLAgents.Actuators
 {
-    internal class VectorActuator : IActuator
+    public class VectorActuator : IActuator
    {
        IActionReceiver m_ActionReceiver;

--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
 using System;
 using System.Collections.Generic;
 using System.Collections.ObjectModel;
+using System.Linq;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Sensors.Reflection;
 using Unity.MLAgents.Demonstrations;
        /// to separate between different agents in the environment.
        /// </summary>
        public int episodeId;
-    }
-    /// <summary>
-    /// Struct that contains the action information sent from the Brain to the
-    /// Agent.
-    /// </summary>
-    internal struct AgentAction
-    {
-        public float[] vectorActions;
+        public void ClearActions()
+        {
+            Array.Clear(storedVectorActions, 0, storedVectorActions.Length);
+        }
+
+        public void CopyActions(ActionBuffers actionBuffers)
+        {
+            actionBuffers.PackActions(storedVectorActions);
+        }
    }

    /// <summary>
    /// can only take an action when it touches the ground, so several frames might elapse between
    /// one decision and the need for the next.
    ///
-    /// Use the <see cref="OnActionReceived"/> function to implement the actions your agent can take,
+    /// Use the <see cref="OnActionReceived(float[])"/> function to implement the actions your agent can take,
    /// such as moving to reach a goal or interacting with its environment.
    ///
    /// When you call <see cref="EndEpisode"/> on an agent or the agent reaches its <see cref="MaxStep"/> count,
        "docs/Learning-Environment-Design-Agents.md")]
    [Serializable]
    [RequireComponent(typeof(BehaviorParameters))]
-    public class Agent : MonoBehaviour, ISerializationCallbackReceiver
+    public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver
    {
        IPolicy m_Brain;
        BehaviorParameters m_PolicyFactory;

        /// Current Agent information (message sent to Brain).
        AgentInfo m_Info;
-
-        /// Current Agent action (message sent from Brain).
-        AgentAction m_Action;

        /// Represents the reward the agent accumulated during the current step.
        /// It is reset to 0 at the beginning of every step.
        internal VectorSensor collectObservationsSensor;

        /// <summary>
+        /// List of IActuators that this Agent will delegate actions to if any exist.
+        /// </summary>
+        ActuatorManager m_ActuatorManager;
+
+        /// <summary>
+        /// VectorActuator which is used by default if no other sensors exist on this Agent. This VectorSensor will
+        /// delegate its actions to <see cref="OnActionReceived(float[])"/> by default in order to keep backward compatibility
+        /// with the current behavior of Agent.
+        /// </summary>
+        IActuator m_VectorActuator;
+
+        /// <summary>
+        /// This is used to avoid allocation of a float array every frame if users are still using the old
+        /// OnActionReceived method.
+        /// </summary>
+        float[] m_LegacyActionCache;
+
+        /// <summary>
        /// Called when the attached [GameObject] becomes enabled and active.
        /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
        /// </summary>
            m_PolicyFactory = GetComponent<BehaviorParameters>();

            m_Info = new AgentInfo();
-            m_Action = new AgentAction();
            sensors = new List<ISensor>();

            Academy.Instance.AgentIncrementStep += AgentIncrementStep;
                InitializeSensors();
            }

+            using (TimerStack.Instance.Scoped("InitializeActuators"))
+            {
+                InitializeActuators();
+            }
+
+            m_Info.storedVectorActions = new float[m_ActuatorManager.TotalNumberOfActions];
+
            // The first time the Academy resets, all Agents in the scene will be
            // forced to reset through the <see cref="AgentForceReset"/> event.
            // To avoid the Agent resetting twice, the Agents will not begin their
        /// set the reward assigned to the current step with a specific value rather than
        /// increasing or decreasing it.
        ///
-        /// Typically, you assign rewards in the Agent subclass's <see cref="OnActionReceived(float[])"/>
+        /// Typically, you assign rewards in the Agent subclass's <see cref="IActionReceiver.OnActionReceived"/>
        /// implementation after carrying out the received action and evaluating its success.
        ///
        /// Rewards are used during reinforcement learning; they are ignored during inference.
        /// <remarks>
        /// Call `RequestAction()` to repeat the previous action returned by the agent's
        /// most recent decision. A new decision is not requested. When you call this function,
-        /// the Agent instance invokes <seealso cref="OnActionReceived(float[])"/> with the
+        /// the Agent instance invokes <seealso cref="IActionReceiver.OnActionReceived"/> with the
        /// existing action vector.
        ///
        /// You can use `RequestAction()` in situations where an agent must take an action
        /// at the end of an episode.
        void ResetData()
        {
-            var param = m_PolicyFactory.BrainParameters;
-            m_ActionMasker = new DiscreteActionMasker(param);
-            // If we haven't initialized vectorActions, initialize to 0. This should only
-            // happen during the creation of the Agent. In subsequent episodes, vectorAction
-            // should stay the previous action before the Done(), so that it is properly recorded.
-            if (m_Action.vectorActions == null)
-            {
-                m_Action.vectorActions = new float[param.NumActions];
-                m_Info.storedVectorActions = new float[param.NumActions];
-            }
+            m_ActuatorManager?.ResetData();
        }

        /// <summary>
        /// control of an agent using keyboard, mouse, or game controller input.
        ///
        /// Your heuristic implementation can use any decision making logic you specify. Assign decision
-        /// values to the float[] array, <paramref name="actionsOut"/>, passed to your function as a parameter.
+        /// values to the <see cref="ActionBuffers.ContinuousActions"/>  and <see cref="ActionBuffers.DiscreteActions"/>
+        /// arrays , passed to your function as a parameter.
-        /// <seealso cref="OnActionReceived(float[])"/> function, which receives this array and
+        /// <seealso cref="IActionReceiver.OnActionReceived"/> function, which receives this array and
        /// implements the corresponding agent behavior. See [Actions] for more information
        /// about agent actions.
        /// Note : Do not create a new float array of action in the `Heuristic()` method,
        /// You can also use the [Input System package], which provides a more flexible and
        /// configurable input system.
        /// <code>
-        ///     public override void Heuristic(float[] actionsOut)
+        ///     public override void Heuristic(ActionBuffers actionsOut)
-        ///         actionsOut[0] = Input.GetAxis("Horizontal");
-        ///         actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
-        ///         actionsOut[2] = Input.GetAxis("Vertical");
+        ///         actionsOut.ContinuousActions[0] = Input.GetAxis("Horizontal");
+        ///         actionsOut.ContinuousActions[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
+        ///         actionsOut.ContinuousActions[2] = Input.GetAxis("Vertical");
-        /// <param name="actionsOut">Array for the output actions.</param>
-        /// <seealso cref="OnActionReceived(float[])"/>
-        public virtual void Heuristic(float[] actionsOut)
+        /// <param name="actionsOut">The <see cref="ActionBuffers"/> which contain the continuous and
+        /// discrete action buffers to write to.</param>
+        /// <seealso cref="IActionReceiver.OnActionReceived"/>
+        public virtual void Heuristic(in ActionBuffers actionsOut)
-            Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions.");
-            Array.Clear(actionsOut, 0, actionsOut.Length);
+            // For backward compatibility
+            switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
+            {
+                case SpaceType.Continuous:
+                    Heuristic(actionsOut.ContinuousActions.Array);
+                    actionsOut.DiscreteActions.Clear();
+                    break;
+                case SpaceType.Discrete:
+                    var convertedOut = Array.ConvertAll(actionsOut.DiscreteActions.Array, x => (float)x);
+                    Heuristic(convertedOut);
+                    var discreteActionSegment = actionsOut.DiscreteActions;
+                    for (var i = 0; i < actionsOut.DiscreteActions.Length; i++)
+                    {
+                        discreteActionSegment[i] = (int)convertedOut[i];
+                    }
+                    actionsOut.ContinuousActions.Clear();
+                    break;
+            }
        }

        /// <summary>

 #if DEBUG
            // Make sure the names are actually unique
+
            for (var i = 0; i < sensors.Count - 1; i++)
            {
                Debug.Assert(
 #endif
        }

+        void InitializeActuators()
+        {
+            ActuatorComponent[] attachedActuators;
+            if (m_PolicyFactory.UseChildActuators)
+            {
+                attachedActuators = GetComponentsInChildren<ActuatorComponent>();
+            }
+            else
+            {
+                attachedActuators = GetComponents<ActuatorComponent>();
+            }
+
+            // Support legacy OnActionReceived
+            var param = m_PolicyFactory.BrainParameters;
+            m_VectorActuator = new VectorActuator(this, param.VectorActionSize, param.VectorActionSpaceType);
+            m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1);
+            m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions];
+
+            m_ActuatorManager.Add(m_VectorActuator);
+
+            foreach (var actuatorComponent in attachedActuators)
+            {
+                m_ActuatorManager.Add(actuatorComponent.CreateActuator());
+            }
+        }
+
        /// <summary>
        /// Sends the Agent info to the linked Brain.
        /// </summary>

            if (m_Info.done)
            {
-                Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);
+                m_Info.ClearActions();
-                Array.Copy(m_Action.vectorActions, m_Info.storedVectorActions, m_Action.vectorActions.Length);
+                m_ActuatorManager.StoredActions.PackActions(m_Info.storedVectorActions);
-            m_ActionMasker.ResetMask();
+
            UpdateSensors();
            using (TimerStack.Instance.Scoped("CollectObservations"))
            {
            {
-                if (m_PolicyFactory.BrainParameters.VectorActionSpaceType == SpaceType.Discrete)
-                {
-                    CollectDiscreteActionMasks(m_ActionMasker);
-                }
+                m_ActuatorManager.WriteActionMask();
-            m_Info.discreteActionMasks = m_ActionMasker.GetMask();
+            m_Info.discreteActionMasks = m_ActuatorManager.DiscreteActionMask?.GetMask();
            m_Info.reward = m_Reward;
            m_Info.done = false;
            m_Info.maxStepReached = false;
        /// <summary>
        /// Returns a read-only view of the observations that were generated in
        /// <see cref="CollectObservations(VectorSensor)"/>. This is mainly useful inside of a
-        /// <see cref="Heuristic(float[])"/> method to avoid recomputing the observations.
+        /// <see cref="Heuristic(float[], int[])"/> method to avoid recomputing the observations.
        /// </summary>
        /// <returns>A read-only view of the observations list.</returns>
        public ReadOnlyCollection<float> GetObservations()
        ///
        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// </remarks>
-        /// <seealso cref="OnActionReceived(float[])"/>
-        public virtual void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
+        /// <seealso cref="IActionReceiver.OnActionReceived"/>
+        public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
+            if (m_ActionMasker == null)
+            {
+                m_ActionMasker = new DiscreteActionMasker(actionMask);
+            }
+            CollectDiscreteActionMasks(m_ActionMasker);
+
+        ActionSpec IActionReceiver.ActionSpec { get; }

        /// <summary>
        /// Implement `OnActionReceived()` to specify agent behavior at every step, based
        /// three values in the action array to use as the force components. During
        /// training, the agent's  policy learns to set those particular elements of
        /// the array to maximize the training rewards the agent receives. (Of course,
-        /// if you implement a <seealso cref="Heuristic"/> function, it must use the same
+        /// if you implement a <seealso cref="Heuristic(float[], int[])"/> function, it must use the same
        /// elements of the action array for the same purpose since there is no learning
        /// involved.)
        ///
        ///
        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// </remarks>
-        /// <param name="vectorAction">
-        /// An array containing the action vector. The length of the array is specified
-        /// by the <see cref="BrainParameters"/> of the agent's associated
-        /// <see cref="BehaviorParameters"/> component.
+        /// <param name="actions">
+        /// Struct containing the buffers of actions to be executed at this step.
-        public virtual void OnActionReceived(float[] vectorAction) {}
+        public virtual void OnActionReceived(ActionBuffers actions)
+        {
+            actions.PackActions(m_LegacyActionCache);
+            OnActionReceived(m_LegacyActionCache);
+        }

        /// <summary>
        /// Implement `OnEpisodeBegin()` to set up an Agent instance at the beginning
        public virtual void OnEpisodeBegin() {}

        /// <summary>
-        /// Returns the last action that was decided on by the Agent.
+        /// Gets the last ActionBuffer for this agent.
-        /// <returns>
-        /// The last action that was decided by the Agent (or null if no decision has been made).
-        /// </returns>
-        /// <seealso cref="OnActionReceived(float[])"/>
-        public float[] GetAction()
+        public ActionBuffers GetStoredContinuousActions()
-            return m_Action.vectorActions;
+            return m_ActuatorManager.StoredActions;
        }

        /// <summary>
            if ((m_RequestAction) && (m_Brain != null))
            {
                m_RequestAction = false;
-                OnActionReceived(m_Action.vectorActions);
+                m_ActuatorManager.ExecuteActions();
            }

            if ((m_StepCount >= MaxStep) && (MaxStep > 0))

        void DecideAction()
        {
-            if (m_Action.vectorActions == null)
+            if (m_ActuatorManager.StoredActions.ContinuousActions.Array == null)
-            var action = m_Brain?.DecideAction();
-
-            if (action == null)
-            {
-                Array.Clear(m_Action.vectorActions, 0, m_Action.vectorActions.Length);
-            }
-            else
-            {
-                Array.Copy(action, m_Action.vectorActions, action.Length);
-            }
+            var actions = m_Brain?.DecideAction() ?? new ActionBuffers();
+            m_Info.CopyActions(actions);
+            m_ActuatorManager.UpdateActions(actions);
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
        }

        #region AgentAction
-        public static AgentAction ToAgentAction(this AgentActionProto aap)
-        {
-            return new AgentAction
-            {
-                vectorActions = aap.VectorActions.ToArray()
-            };
-        }
-
-        public static List<AgentAction> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
+        public static List<float[]> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
-            var agentActions = new List<AgentAction>(proto.Value.Count);
+            var agentActions = new List<float[]>(proto.Value.Count);
-                agentActions.Add(ap.ToAgentAction());
+                agentActions.Add(ap.VectorActions.ToArray());
            }
            return agentActions;
        }
--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
                    var agentId = m_OrderedAgentsRequestingDecisions[brainName][i];
                    if (m_LastActionsReceived[brainName].ContainsKey(agentId))
                    {
-                        m_LastActionsReceived[brainName][agentId] = agentAction.vectorActions;
+                        m_LastActionsReceived[brainName][agentId] = agentAction;
                    }
                }
            }
--- a/com.unity.ml-agents/Runtime/DecisionRequester.cs
+++ b/com.unity.ml-agents/Runtime/DecisionRequester.cs
        /// that the Agent will request a decision every 5 Academy steps. /// </summary>
        [Range(1, 20)]
        [Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
-                 "of 5 means that the Agent will request a decision every 5 Academy steps.")]
+            "of 5 means that the Agent will request a decision every 5 Academy steps.")]
        public int DecisionPeriod = 5;

        /// <summary>
        [Tooltip("Indicates whether or not the agent will take an action during the Academy " +
-                 "steps where it does not request a decision. Has no effect when DecisionPeriod " +
-                 "is set to 1.")]
+            "steps where it does not request a decision. Has no effect when DecisionPeriod " +
+            "is set to 1.")]
        [FormerlySerializedAs("RepeatAction")]
        public bool TakeActionsBetweenDecisions = true;

--- a/com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
+++ b/com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
 using System;
 using System.Collections.Generic;
-using System.Linq;
-using Unity.MLAgents.Policies;
+using Unity.MLAgents.Actuators;

 namespace Unity.MLAgents
 {
    /// may be illegal. For example, if an agent is adjacent to a wall or other obstacle
    /// you could mask any actions that direct the agent to move into the blocked space.
    /// </remarks>
-    public class DiscreteActionMasker
+    public class DiscreteActionMasker : IDiscreteActionMask
-        /// When using discrete control, is the starting indices of the actions
-        /// when all the branches are concatenated with each other.
-        int[] m_StartingActionIndices;
-
-        bool[] m_CurrentMask;
-
-        readonly BrainParameters m_BrainParameters;
+        IDiscreteActionMask m_Delegate;
-        internal DiscreteActionMasker(BrainParameters brainParameters)
+        internal DiscreteActionMasker(IDiscreteActionMask actionMask)
-            m_BrainParameters = brainParameters;
+            m_Delegate = actionMask;
        }

        /// <summary>
        /// <param name="actionIndices">The indices of the masked actions.</param>
        public void SetMask(int branch, IEnumerable<int> actionIndices)
        {
-            // If the branch does not exist, raise an error
-            if (branch >= m_BrainParameters.VectorActionSize.Length)
-                throw new UnityAgentsException(
-                    "Invalid Action Masking : Branch " + branch + " does not exist.");
-
-            var totalNumberActions = m_BrainParameters.VectorActionSize.Sum();
-
-            // By default, the masks are null. If we want to specify a new mask, we initialize
-            // the actionMasks with trues.
-            if (m_CurrentMask == null)
-            {
-                m_CurrentMask = new bool[totalNumberActions];
-            }
-
-            // If this is the first time the masked actions are used, we generate the starting
-            // indices for each branch.
-            if (m_StartingActionIndices == null)
-            {
-                m_StartingActionIndices = Utilities.CumSum(m_BrainParameters.VectorActionSize);
-            }
-
-            // Perform the masking
-            foreach (var actionIndex in actionIndices)
-            {
-                if (actionIndex >= m_BrainParameters.VectorActionSize[branch])
-                {
-                    throw new UnityAgentsException(
-                        "Invalid Action Masking: Action Mask is too large for specified branch.");
-                }
-                m_CurrentMask[actionIndex + m_StartingActionIndices[branch]] = true;
-            }
-        }
-
-        /// <summary>
-        /// Get the current mask for an agent.
-        /// </summary>
-        /// <returns>A mask for the agent. A boolean array of length equal to the total number of
-        /// actions.</returns>
-        internal bool[] GetMask()
-        {
-            if (m_CurrentMask != null)
-            {
-                AssertMask();
-            }
-            return m_CurrentMask;
+            m_Delegate.WriteMask(branch, actionIndices);
-        /// <summary>
-        /// Makes sure that the current mask is usable.
-        /// </summary>
-        void AssertMask()
+        public void WriteMask(int branch, IEnumerable<int> actionIndices)
-            // Action Masks can only be used in Discrete Control.
-            if (m_BrainParameters.VectorActionSpaceType != SpaceType.Discrete)
-            {
-                throw new UnityAgentsException(
-                    "Invalid Action Masking : Can only set action mask for Discrete Control.");
-            }
-
-            var numBranches = m_BrainParameters.VectorActionSize.Length;
-            for (var branchIndex = 0; branchIndex < numBranches; branchIndex++)
-            {
-                if (AreAllActionsMasked(branchIndex))
-                {
-                    throw new UnityAgentsException(
-                        "Invalid Action Masking : All the actions of branch " + branchIndex +
-                        " are masked.");
-                }
-            }
+            m_Delegate.WriteMask(branch, actionIndices);
-        /// <summary>
-        /// Resets the current mask for an agent.
-        /// </summary>
-        internal void ResetMask()
+        public bool[] GetMask()
-            if (m_CurrentMask != null)
-            {
-                Array.Clear(m_CurrentMask, 0, m_CurrentMask.Length);
-            }
+            return m_Delegate.GetMask();
-        /// <summary>
-        /// Checks if all the actions in the input branch are masked.
-        /// </summary>
-        /// <param name="branch"> The index of the branch to check.</param>
-        /// <returns> True if all the actions of the branch are masked.</returns>
-        bool AreAllActionsMasked(int branch)
+        public void ResetMask()
-            if (m_CurrentMask == null)
-            {
-                return false;
-            }
-            var start = m_StartingActionIndices[branch];
-            var end = m_StartingActionIndices[branch + 1];
-            for (var i = start; i < end; i++)
-            {
-                if (!m_CurrentMask[i])
-                {
-                    return false;
-                }
-            }
-            return true;
+            m_Delegate.ResetMask();
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
+using System;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Inference;
 using Unity.MLAgents.Sensors;

    internal class BarracudaPolicy : IPolicy
    {
        protected ModelRunner m_ModelRunner;
+        ActionBuffers m_LastActionBuffer;

        int m_AgentId;

        List<int[]> m_SensorShapes;
+        SpaceType m_SpaceType;

        /// <inheritdoc />
        public BarracudaPolicy(
        {
            var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, brainParameters, inferenceDevice);
            m_ModelRunner = modelRunner;
+            m_SpaceType = brainParameters.VectorActionSpaceType;
        }

        /// <inheritdoc />
        }

        /// <inheritdoc />
-        public float[] DecideAction()
+        public ref readonly ActionBuffers DecideAction()
-            return m_ModelRunner?.GetAction(m_AgentId);
+            var actions = m_ModelRunner?.GetAction(m_AgentId);
+            if (m_SpaceType == SpaceType.Continuous)
+            {
+                m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
+                return ref m_LastActionBuffer;
+            }
+
+            m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
+            return ref m_LastActionBuffer;
        }

        public void Dispose()
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
        [Tooltip("Use all Sensor components attached to child GameObjects of this Agent.")]
        bool m_UseChildSensors = true;

+        [HideInInspector]
+        [SerializeField]
+        [Tooltip("Use all Actuator components attached to child GameObjects of this Agent.")]
+        bool m_UseChildActuators = true;
+
        /// <summary>
        /// Whether or not to use all the sensor components attached to child GameObjects of the agent.
        /// Note that changing this after the Agent has been initialized will not have any effect.
            set { m_UseChildSensors = value; }
        }

+        /// <summary>
+        /// Whether or not to use all the actuator components attached to child GameObjects of the agent.
+        /// Note that changing this after the Agent has been initialized will not have any effect.
+        /// </summary>
+        public bool UseChildActuators
+        {
+            get { return m_UseChildActuators; }
+            set { m_UseChildActuators = value; }
+        }
+
        [HideInInspector, SerializeField]
        ObservableAttributeOptions m_ObservableAttributeHandling = ObservableAttributeOptions.Ignore;

            switch (m_BehaviorType)
            {
                case BehaviorType.HeuristicOnly:
-                    return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
+                    return GenerateHeuristicPolicy(heuristic);
                case BehaviorType.InferenceOnly:
                {
                    if (m_Model == null)
                    }
                    else
                    {
-                        return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
+                        return GenerateHeuristicPolicy(heuristic);
-                    return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
+                    return GenerateHeuristicPolicy(heuristic);
+        }
+
+        internal IPolicy GenerateHeuristicPolicy(HeuristicPolicy.ActionGenerator heuristic)
+        {
+            var numContinuousActions = 0;
+            var numDiscreteActions = 0;
+            if (m_BrainParameters.VectorActionSpaceType == SpaceType.Continuous)
+            {
+                numContinuousActions = m_BrainParameters.NumActions;
+            }
+            else if (m_BrainParameters.VectorActionSpaceType == SpaceType.Discrete)
+            {
+                numDiscreteActions = m_BrainParameters.NumActions;
+            }
+
+            return new HeuristicPolicy(heuristic, numContinuousActions, numDiscreteActions);
        }

        internal void UpdateAgentPolicy()
--- a/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
 using System.Collections.Generic;
 using System;
 using System.Collections;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 namespace Unity.MLAgents.Policies
    /// </summary>
    internal class HeuristicPolicy : IPolicy
    {
-        public delegate void ActionGenerator(float[] actionsOut);
+        public delegate void ActionGenerator(in ActionBuffers actionBuffers);
-        float[] m_LastDecision;
+        ActionBuffers m_ActionBuffers;
        bool m_Done;
        bool m_DecisionRequested;


        /// <inheritdoc />
-        public HeuristicPolicy(ActionGenerator heuristic, int numActions)
+        public HeuristicPolicy(ActionGenerator heuristic, int numContinuousActions, int numDiscreteActions)
-            m_LastDecision = new float[numActions];
+            var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);
+            var discreteDecision = new ActionSegment<int>(new int[numDiscreteActions], 0, numDiscreteActions);
+            m_ActionBuffers = new ActionBuffers(continuousDecision, discreteDecision);
        }

        /// <inheritdoc />
            m_Done = info.done;
            m_DecisionRequested = true;
-
-        public float[] DecideAction()
+        public ref readonly ActionBuffers DecideAction()
-                 m_Heuristic.Invoke(m_LastDecision);
+                m_Heuristic.Invoke(m_ActionBuffers);
-            return m_LastDecision;
+            return ref m_ActionBuffers;
        }

        public void Dispose()
            public float this[int index]
            {
                get { return 0.0f; }
-                set { }
+                set {}
            }
        }

--- a/com.unity.ml-agents/Runtime/Policies/IPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/IPolicy.cs
 using System;
 using System.Collections.Generic;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 namespace Unity.MLAgents.Policies
        /// it must be taken now. The Brain is expected to update the actions
        /// of the Agents at this point the latest.
        /// </summary>
-        float[] DecideAction();
+        ref readonly ActionBuffers DecideAction();
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
 using UnityEngine;
 using System.Collections.Generic;
 using System;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;

 namespace Unity.MLAgents.Policies
    {
        int m_AgentId;
        string m_FullyQualifiedBehaviorName;
+        SpaceType m_SpaceType;
+        ActionBuffers m_LastActionBuffer;

        internal ICommunicator m_Communicator;

        {
            m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
            m_Communicator = Academy.Instance.Communicator;
+            m_SpaceType = brainParameters.VectorActionSpaceType;
            m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, brainParameters);
        }

        }

        /// <inheritdoc />
-        public float[] DecideAction()
+        public ref readonly ActionBuffers DecideAction()
-            return m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
+            var actions = m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
+            if (m_SpaceType == SpaceType.Continuous)
+            {
+                m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
+                return ref m_LastActionBuffer;
+            }
+            m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
+            return ref m_LastActionBuffer;
        }

        public void Dispose()
--- a/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
                actuator1ActionSpaceDef.SumOfDiscreteBranchSizes + actuator2ActionSpaceDef.SumOfDiscreteBranchSizes,
                actuator1ActionSpaceDef.NumDiscreteActions + actuator2ActionSpaceDef.NumDiscreteActions);

-            manager.UpdateActions(new[]
-                { 0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, 10f, 11f }, Array.Empty<int>());
+            manager.UpdateActions(new ActionBuffers(new[]
+                { 0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, 10f, 11f }, Array.Empty<int>()));
-            Assert.IsTrue(12 == manager.StoredContinuousActions.Length);
-            Assert.IsTrue(0 == manager.StoredDiscreteActions.Length);
+            Assert.IsTrue(12 == manager.StoredActions.ContinuousActions.Length);
+            Assert.IsTrue(0 == manager.StoredActions.DiscreteActions.Length);
        }

        [Test]
                actuator1ActionSpaceDef.SumOfDiscreteBranchSizes + actuator2ActionSpaceDef.SumOfDiscreteBranchSizes,
                actuator1ActionSpaceDef.NumDiscreteActions + actuator2ActionSpaceDef.NumDiscreteActions);

-            manager.UpdateActions(Array.Empty<float>(),
-                new[] { 0, 1, 2, 3, 4, 5, 6});
+            manager.UpdateActions(new ActionBuffers(Array.Empty<float>(),
+                new[] { 0, 1, 2, 3, 4, 5, 6}));
-            Assert.IsTrue(0 == manager.StoredContinuousActions.Length);
-            Assert.IsTrue(7 == manager.StoredDiscreteActions.Length);
+            Assert.IsTrue(0 == manager.StoredActions.ContinuousActions.Length);
+            Assert.IsTrue(7 == manager.StoredActions.DiscreteActions.Length);
        }

        [Test]
            manager.Add(actuator2);

            var discreteActionBuffer = new[] { 0, 1, 2, 3, 4, 5, 6};
-            manager.UpdateActions(Array.Empty<float>(),
-                discreteActionBuffer);
+            manager.UpdateActions(new ActionBuffers(Array.Empty<float>(),
+                discreteActionBuffer));

            manager.ExecuteActions();
            var actuator1Actions = actuator1.LastActionBuffer.DiscreteActions;
            manager.Add(actuator2);

            var continuousActionBuffer = new[] { 0f, 1f, 2f, 3f, 4f, 5f};
-            manager.UpdateActions(continuousActionBuffer,
-                Array.Empty<int>());
+            manager.UpdateActions(new ActionBuffers(continuousActionBuffer,
+                Array.Empty<int>()));

            manager.ExecuteActions();
            var actuator1Actions = actuator1.LastActionBuffer.ContinuousActions;
            manager.Add(actuator1);
            manager.Add(actuator2);
            var continuousActionBuffer = new[] { 0f, 1f, 2f, 3f, 4f, 5f};
-            manager.UpdateActions(continuousActionBuffer,
-                Array.Empty<int>());
+            manager.UpdateActions(new ActionBuffers(continuousActionBuffer,
+                Array.Empty<int>()));
-            Assert.IsTrue(manager.StoredContinuousActions.SequenceEqual(continuousActionBuffer));
+            Assert.IsTrue(manager.StoredActions.ContinuousActions.SequenceEqual(continuousActionBuffer));
        }

        [Test]
            manager.Add(actuator1);
            manager.Add(actuator2);
            var discreteActionBuffer = new[] { 0, 1, 2, 3, 4, 5};
-            manager.UpdateActions(Array.Empty<float>(),
-                discreteActionBuffer);
+            manager.UpdateActions(new ActionBuffers(Array.Empty<float>(),
+                discreteActionBuffer));
-            Debug.Log(manager.StoredDiscreteActions);
+            Debug.Log(manager.StoredActions.DiscreteActions);
-            Assert.IsTrue(manager.StoredDiscreteActions.SequenceEqual(discreteActionBuffer));
+            Assert.IsTrue(manager.StoredActions.DiscreteActions.SequenceEqual(discreteActionBuffer));
        }

        [Test]
            manager.Add(actuator1);
            manager.Add(actuator2);
            var continuousActionBuffer = new[] { 0f, 1f, 2f, 3f, 4f, 5f};
-            manager.UpdateActions(continuousActionBuffer,
-                Array.Empty<int>());
+            manager.UpdateActions(new ActionBuffers(continuousActionBuffer,
+                Array.Empty<int>()));
-            Assert.IsTrue(manager.StoredContinuousActions.SequenceEqual(continuousActionBuffer));
+            Assert.IsTrue(manager.StoredActions.ContinuousActions.SequenceEqual(continuousActionBuffer));
-            Assert.IsTrue(manager.StoredContinuousActions.SequenceEqual(new[] { 0f, 0f, 0f, 0f, 0f, 0f}));
+            Assert.IsTrue(manager.StoredActions.ContinuousActions.SequenceEqual(new[] { 0f, 0f, 0f, 0f, 0f, 0f}));
        }

        [Test]
--- a/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
 using NUnit.Framework;
+using Unity.MLAgents.Actuators;
 using UnityEngine;
 using Unity.MLAgents.Policies;

    public class BehaviorParameterTests
    {
-        static void DummyHeuristic(float[] actionsOut)
+        static void DummyHeuristic(in ActionBuffers actionsOut)
        {
            // No-op
        }
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
 using NUnit.Framework;
 using System.Reflection;
 using System.Collections.Generic;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Sensors.Reflection;
 using Unity.MLAgents.Policies;
    {
        public Action OnRequestDecision;
        ObservationWriter m_ObsWriter = new ObservationWriter();
+        static ActionBuffers s_EmptyActionBuffers = new ActionBuffers(Array.Empty<float>(), Array.Empty<int>());
        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
        {
            foreach (var sensor in sensors)
            OnRequestDecision?.Invoke();
        }

-        public float[] DecideAction() { return new float[0]; }
+        public ref readonly ActionBuffers DecideAction() { return ref s_EmptyActionBuffers; }

        public void Dispose() {}
    }
--- a/config/ppo/WalkerDynamic.yaml
+++ b/config/ppo/WalkerDynamic.yaml
        gamma: 0.995
        strength: 1.0
    keep_checkpoints: 5
-    max_steps: 20000000
+    max_steps: 30000000
    time_horizon: 1000
    summary_freq: 30000
    threaded: true
--- a/config/ppo/WalkerStatic.yaml
+++ b/config/ppo/WalkerStatic.yaml
        gamma: 0.995
        strength: 1.0
    keep_checkpoints: 5
-    max_steps: 20000000
+    max_steps: 30000000
    time_horizon: 1000
    summary_freq: 30000
    threaded: true
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
 - Set-up: Physics-based Humanoid agents with 26 degrees of freedom. These DOFs
  correspond to articulation of the following body-parts: hips, chest, spine,
  head, thighs, shins, feet, arms, forearms and hands.
- Goal: The agents must move its body toward the goal direction as quickly as
-  possible without falling.
-  - `WalkerStatic` - Goal direction is always forward.
+- Goal: The agents must move its body toward the goal direction without falling.
+  - `WalkerDynamicVariableSpeed`- Goal direction and walking speed are randomized.
+  - `WalkerStatic` - Goal direction is always forward.
+  - `WalkerStaticVariableSpeed` - Goal direction is always forward. Walking
+     speed is randomized
-  - +0.02 times body velocity in the goal direction. (run towards target)
-  - +0.01 times head direction alignment with goal direction. (face towards target)
-  - +0.005 times head y position - left foot y position. (encourage head height)
-  - +0.005 times head y position - right foot y position. (encourage head height)
+  The reward function is now geometric meaning the reward each step is a product
+  of all the rewards instead of a sum, this helps the agent try to maximize all
+  rewards instead of the easiest rewards.
+  - Body velocity matches goal velocity. (normalized between (0,1))
+  - Head direction alignment with goal direction. (normalized between (0,1))
-  - Vector Observation space: 236 variables corresponding to position, rotation,
+  - Vector Observation space: 243 variables corresponding to position, rotation,
    velocity, and angular velocities of each limb, along with goal direction.
  - Vector Action space: (Continuous) Size of 39, corresponding to target
    rotations and strength applicable to the joints.
    - Recommended Minimum:
    - Recommended Maximum:
  - hip_mass: Mass of the hip component of the walker
-    - Default: 15
+    - Default: 8
    - Recommended Minimum: 7
    - Recommended Maximum: 28
  - chest_mass: Mass of the chest component of the walker
  - spine_mass: Mass of the spine component of the walker
-    - Default: 10
+    - Default: 8
- Benchmark Mean Reward for `WalkerStatic`: 1500
- Benchmark Mean Reward for `WalkerDynamic`: 700
+- Benchmark Mean Reward for `WalkerDynamic`: 2500
+- Benchmark Mean Reward for `WalkerDynamicVariableSpeed`: 2500
+- Benchmark Mean Reward for `WalkerStatic`: 3500
+- Benchmark Mean Reward for `WalkerStaticVariableSpeed`: 3500
+
+

 ## Pyramids

--- a/ml-agents-envs/mlagents_envs/exception.py
+++ b/ml-agents-envs/mlagents_envs/exception.py
    def __init__(self, worker_id):
        message = self.MESSAGE_TEMPLATE.format(str(worker_id))
        super().__init__(message)
+
+
+class UnityPolicyException(UnityException):
+    """
+    Related to errors with the Trainer.
+    """
+
+    pass
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
        """
        policy = self.trainer.create_policy(parsed_behavior_id, behavior_spec)
        policy.create_tf_graph()
-        policy.initialize_or_load()
+        self.trainer.saver.initialize_or_load(policy)
        policy.init_load_weights()
        team_id = parsed_behavior_id.team_id
        self.controller.subscribe_team_id(team_id, self)
--- a/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
                self.reward_signals[reward_signal.value].update_dict
            )

+    @classmethod
-        self, learning_rate: tf.Tensor, name: str = "Adam"
+        cls, learning_rate: tf.Tensor, name: str = "Adam"
    ) -> tf.train.Optimizer:
        return tf.train.AdamOptimizer(learning_rate=learning_rate, name=name)

--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
 from mlagents_envs.base_env import DecisionSteps
 from mlagents_envs.exception import UnityException

-from mlagents.model_serialization import SerializationSettings
 from mlagents.trainers.action_info import ActionInfo
 from mlagents_envs.base_env import BehaviorSpec
 from mlagents.trainers.settings import TrainerSettings, NetworkSettings
        seed: int,
        behavior_spec: BehaviorSpec,
        trainer_settings: TrainerSettings,
-        model_path: str,
-        load: bool = False,
        tanh_squash: bool = False,
        reparameterize: bool = False,
        condition_sigma_on_obs: bool = True,
        self.vis_obs_size = sum(
            1 for shape in behavior_spec.observation_shapes if len(shape) == 3
        )
-        self.model_path = model_path
-        self.initialize_path = self.trainer_settings.init_path
-        self._keep_checkpoints = self.trainer_settings.keep_checkpoints
+        self.vis_obs_shape = (
+            [shape for shape in behavior_spec.observation_shapes if len(shape) == 3][0]
+            if self.vis_obs_size > 0
+            else None
+        )
        self.use_continuous_act = behavior_spec.is_action_continuous()
        self.num_branches = self.behavior_spec.action_size
        self.previous_action_dict: Dict[str, np.array] = {}
-        self.load = load
        self.h_size = self.network_settings.hidden_units
        num_layers = self.network_settings.num_layers
        if num_layers < 1:

    @abstractmethod
    def get_current_step(self):
-        pass
-
-    @abstractmethod
-    def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
-        pass
-
-    @abstractmethod
-    def save(self, output_filepath: str, settings: SerializationSettings) -> None:
        pass

    @abstractmethod
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Callable
-from mlagents.model_serialization import SerializationSettings, export_policy_model
 from mlagents.tf_utils import tf
 from mlagents import tf_utils
 from mlagents_envs.exception import UnityException
    GaussianDistribution,
    MultiCategoricalDistribution,
 )
+from mlagents.tf_utils.globals import get_rank


 logger = get_logger(__name__)
    Contains a learning model, and the necessary
    functions to save/load models and create the input placeholders.
    """
+
+    # Callback function used at the start of training to synchronize weights.
+    # By default, this nothing.
+    # If this needs to be used, it should be done from outside ml-agents.
+    broadcast_global_variables: Callable[[int], None] = lambda root_rank: None

    def __init__(
        self,
-        model_path: str,
-        load: bool = False,
        tanh_squash: bool = False,
        reparameterize: bool = False,
        condition_sigma_on_obs: bool = True,
        :param seed: Random seed to use for TensorFlow.
        :param brain: The corresponding Brain for this policy.
        :param trainer_settings: The trainer parameters.
-        :param model_path: Where to load/save the model.
-        :param load: If True, load model from model_path. Otherwise, create new model.
-            model_path,
-            load,
            tanh_squash,
            reparameterize,
            condition_sigma_on_obs,
        self.assign_ops: List[tf.Operation] = []
        self.update_dict: Dict[str, tf.Tensor] = {}
        self.inference_dict: Dict[str, tf.Tensor] = {}
+        self.first_normalization_update: bool = False
-        self.saver: Optional[tf.Operation] = None
+        self.rank = get_rank()
        if create_tf_graph:
            self.create_tf_graph()


        # We do an initialize to make the Policy usable out of the box. If an optimizer is needed,
        # it will re-load the full graph
-        self._initialize_graph()
+        self.initialize()

    def _create_encoder(
        self,
        ver = LooseVersion(version_string)
        return tuple(map(int, ver.version[0:3]))

-    def _check_model_version(self, version: str) -> None:
-        """
-        Checks whether the model being loaded was created with the same version of
-        ML-Agents, and throw a warning if not so.
-        """
-        if self.version_tensors is not None:
-            loaded_ver = tuple(
-                num.eval(session=self.sess) for num in self.version_tensors
-            )
-            if loaded_ver != TFPolicy._convert_version_string(version):
-                logger.warning(
-                    f"The model checkpoint you are loading from was saved with ML-Agents version "
-                    f"{loaded_ver[0]}.{loaded_ver[1]}.{loaded_ver[2]} but your current ML-Agents"
-                    f"version is {version}. Model may not behave properly."
-                )
-
-    def _initialize_graph(self):
+    def initialize(self):
-            self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
-    def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None:
-        with self.graph.as_default():
-            self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
-            logger.info(f"Loading model from {model_path}.")
-            ckpt = tf.train.get_checkpoint_state(model_path)
-            if ckpt is None:
-                raise UnityPolicyException(
-                    "The model {} could not be loaded. Make "
-                    "sure you specified the right "
-                    "--run-id and that the previous run you are loading from had the same "
-                    "behavior names.".format(model_path)
-                )
-            try:
-                self.saver.restore(self.sess, ckpt.model_checkpoint_path)
-            except tf.errors.NotFoundError:
-                raise UnityPolicyException(
-                    "The model {} was found but could not be loaded. Make "
-                    "sure the model is from the same version of ML-Agents, has the same behavior parameters, "
-                    "and is using the same trainer configuration as the current run.".format(
-                        model_path
-                    )
-                )
-            self._check_model_version(__version__)
-            if reset_global_steps:
-                self._set_step(0)
-                logger.info(
-                    "Starting training from step 0 and saving to {}.".format(
-                        self.model_path
-                    )
-                )
-            else:
-                logger.info(f"Resuming training from step {self.get_current_step()}.")
-
-    def initialize_or_load(self):
-        # If there is an initialize path, load from that. Else, load from the set model path.
-        # If load is set to True, don't reset steps to 0. Else, do. This allows a user to,
-        # e.g., resume from an initialize path.
-        reset_steps = not self.load
-        if self.initialize_path is not None:
-            self._load_graph(self.initialize_path, reset_global_steps=reset_steps)
-        elif self.load:
-            self._load_graph(self.model_path, reset_global_steps=reset_steps)
-        else:
-            self._initialize_graph()
-
    def get_weights(self):
        with self.graph.as_default():
            _vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        step = self.sess.run(self.global_step)
        return step

-    def _set_step(self, step: int) -> int:
+    def set_step(self, step: int) -> int:
        """
        Sets current model step to step without creating additional ops.
        :param step: Step to set the current model step to.
        """
        return list(self.update_dict.keys())

-    def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
-        """
-        Checkpoints the policy on disk.
-
-        :param checkpoint_path: filepath to write the checkpoint
-        :param settings: SerializationSettings for exporting the model.
-        """
-        # Save the TF checkpoint and graph definition
-        with self.graph.as_default():
-            if self.saver:
-                self.saver.save(self.sess, f"{checkpoint_path}.ckpt")
-            tf.train.write_graph(
-                self.graph, self.model_path, "raw_graph_def.pb", as_text=False
-            )
-        # also save the policy so we have optimized model files for each checkpoint
-        self.save(checkpoint_path, settings)
-
-    def save(self, output_filepath: str, settings: SerializationSettings) -> None:
-        """
-        Saves the serialized model, given a path and SerializationSettings
-
-        This method will save the policy graph to the given filepath.  The path
-        should be provided without an extension as multiple serialized model formats
-        may be generated as a result.
-
-        :param output_filepath: path (without suffix) for the model file(s)
-        :param settings: SerializationSettings for how to save the model.
-        """
-        export_policy_model(output_filepath, settings, self.graph, self.sess)
-
    def update_normalization(self, vector_obs: np.ndarray) -> None:
        """
        If this policy normalizes vector observations, this will update the norm values in the graph.
-            self.sess.run(
-                self.update_normalization_op, feed_dict={self.vector_in: vector_obs}
-            )
+            if self.first_normalization_update:
+                self.sess.run(
+                    self.init_normalization_op, feed_dict={self.vector_in: vector_obs}
+                )
+                self.first_normalization_update = False
+            else:
+                self.sess.run(
+                    self.update_normalization_op, feed_dict={self.vector_in: vector_obs}
+                )

    @property
    def use_vis_obs(self):
        self.normalization_steps: Optional[tf.Variable] = None
        self.running_mean: Optional[tf.Variable] = None
        self.running_variance: Optional[tf.Variable] = None
+        self.init_normalization_op: Optional[tf.Operation] = None
        self.update_normalization_op: Optional[tf.Operation] = None
        self.value: Optional[tf.Tensor] = None
        self.all_log_probs: tf.Tensor = None
                self.behavior_spec.observation_shapes
            )
            if self.normalize:
+                self.first_normalization_update = True
+                self.init_normalization_op = normalization_tensors.init_op
                self.normalization_steps = normalization_tensors.steps
                self.running_mean = normalization_tensors.running_mean
                self.running_variance = normalization_tensors.running_variance
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
 import numpy as np
 import torch

-import os
-from torch import onnx
-from mlagents.model_serialization import SerializationSettings
-
 from mlagents.trainers.policy import Policy
 from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
 from mlagents_envs.timers import timed
-from mlagents.trainers.torch.networks import SharedActorCritic, SeparateActorCritic
+from mlagents.trainers.torch.networks import (
+    SharedActorCritic,
+    SeparateActorCritic,
+    GlobalSteps,
+)
 from mlagents.trainers.torch.utils import ModelUtils

 EPSILON = 1e-7  # Small value to avoid divide by zero
        seed: int,
        behavior_spec: BehaviorSpec,
        trainer_settings: TrainerSettings,
-        model_path: str,
-        load: bool = False,
        tanh_squash: bool = False,
        reparameterize: bool = False,
        separate_critic: bool = True,
            seed,
            behavior_spec,
            trainer_settings,
-            model_path,
-            load,
-        self.global_step = 0
+        self.global_step = (
+            GlobalSteps()
+        )  # could be much simpler if TorchPolicy is nn.Module
+
        if TestingConfiguration.device != "cpu":
            torch.set_default_tensor_type(torch.cuda.FloatTensor)
        else:
            agent_ids=list(decision_requests.agent_id),
        )

-    def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
-        """
-        Checkpoints the policy on disk.
-
-        :param checkpoint_path: filepath to write the checkpoint
-        :param settings: SerializationSettings for exporting the model.
-        """
-        if not os.path.exists(self.model_path):
-            os.makedirs(self.model_path)
-        torch.save(self.actor_critic.state_dict(), f"{checkpoint_path}.pt")
-
-    def save(self, output_filepath: str, settings: SerializationSettings) -> None:
-        self.export_model(self.global_step)
-
-    def load_model(self, step=0):  # TODO: this doesn't work
-        load_path = self.model_path + "/model-" + str(step) + ".pt"
-        self.actor_critic.load_state_dict(torch.load(load_path))
-
-    def export_model(self, step=0):
-        fake_vec_obs = [torch.zeros([1] + [self.vec_obs_size])]
-        fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
-        fake_masks = torch.ones([1] + self.actor_critic.act_size)
-        # fake_memories = torch.zeros([1] + [self.m_size])
-        export_path = "./model-" + str(step) + ".onnx"
-        output_names = ["action", "action_probs"]
-        input_names = ["vector_observation", "action_mask"]
-        dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]}
-        onnx.export(
-            self.actor_critic,
-            (fake_vec_obs, fake_vis_obs, fake_masks),
-            export_path,
-            verbose=True,
-            opset_version=12,
-            input_names=input_names,
-            output_names=output_names,
-            dynamic_axes=dynamic_axes,
-        )
-
    @property
    def use_vis_obs(self):
        return self.vis_obs_size > 0
        Gets current model step.
        :return: current model step.
        """
-        step = self.global_step
+        return self.global_step.current_step
+
+    def set_step(self, step: int) -> int:
+        """
+        Sets current model step to step without creating additional ops.
+        :param step: Step to set the current model step to.
+        :return: The step the model was set to.
+        """
+        self.global_step.current_step = step
        return step

    def increment_step(self, n_steps):
-        self.global_step += n_steps
+        self.global_step.increment(n_steps)
        return self.get_current_step()

    def load_weights(self, values: List[np.ndarray]) -> None:

    def get_weights(self) -> List[np.ndarray]:
        return []
+
+    def get_modules(self):
+        return {"Policy": self.actor_critic, "global_step": self.global_step}
--- a/ml-agents/mlagents/trainers/ppo/optimizer_tf.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_tf.py

                self.stream_names = list(self.reward_signals.keys())

-                self.tf_optimizer: Optional[tf.train.AdamOptimizer] = None
+                self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
                self.grads = None
                self.update_batch: Optional[tf.Operation] = None

                    "decay_beta": self.decay_beta,
                }
            )
-
-            self.policy.initialize_or_load()

    def _create_cc_critic(
        self, h_size: int, num_layers: int, vis_encode_type: EncoderType
        )

    def _create_ppo_optimizer_ops(self):
-        self.tf_optimizer = self.create_optimizer_op(self.learning_rate)
-        self.grads = self.tf_optimizer.compute_gradients(self.loss)
-        self.update_batch = self.tf_optimizer.minimize(self.loss)
+        self.tf_optimizer_op = self.create_optimizer_op(self.learning_rate)
+        self.grads = self.tf_optimizer_op.compute_gradients(self.loss)
+        self.update_batch = self.tf_optimizer_op.minimize(self.loss)

    @timed
    def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
            torch.clamp(r_theta, 1.0 - decay_epsilon, 1.0 + decay_epsilon) * advantage
        )
        policy_loss = -1 * ModelUtils.masked_mean(
-            torch.min(p_opt_a, p_opt_b).flatten(), loss_masks
+            torch.min(p_opt_a, p_opt_b), loss_masks
        )
        return policy_loss

        loss = (
            policy_loss
            + 0.5 * value_loss
-            - decay_bet * ModelUtils.masked_mean(entropy.flatten(), loss_masks)
+            - decay_bet * ModelUtils.masked_mean(entropy, loss_masks)
        )

        # Set optimizer learning rate
            update_stats.update(reward_provider.update(batch))

        return update_stats
+
+    def get_modules(self):
+        return {"Optimizer": self.optimizer}
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        :param artifact_path: The directory within which to store artifacts from this trainer.
        """
        super().__init__(
-            brain_name, trainer_settings, training, artifact_path, reward_buff_cap
+            brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
-        self.load = load
        self.seed = seed
        if TestingConfiguration.max_steps > 0:
            self.trainer_settings.max_steps = TestingConfiguration.max_steps
            self.seed,
            behavior_spec,
            self.trainer_settings,
-            model_path=self.artifact_path,
-            load=self.load,
            condition_sigma_on_obs=False,  # Faster training for PPO
        )
        return policy
            self.seed,
            behavior_spec,
            self.trainer_settings,
-            self.artifact_path,
-            self.load,
+    def create_ppo_optimizer(self) -> PPOOptimizer:
+        if self.framework == FrameworkType.PYTORCH:
+            return TorchPPOOptimizer(  # type: ignore
+                cast(TorchPolicy, self.policy), self.trainer_settings  # type: ignore
+            )  # type: ignore
+        else:
+            return PPOOptimizer(  # type: ignore
+                cast(TFPolicy, self.policy), self.trainer_settings  # type: ignore
+            )  # type: ignore
+
    def add_policy(
        self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
    ) -> None:
            )
        self.policy = policy
        self.policies[parsed_behavior_id.behavior_id] = policy
-        if self.framework == FrameworkType.PYTORCH:
-            self.optimizer = TorchPPOOptimizer(  # type: ignore
-                self.policy, self.trainer_settings  # type: ignore
-            )  # type: ignore
-        else:
-            self.optimizer = PPOOptimizer(  # type: ignore
-                self.policy, self.trainer_settings  # type: ignore
-            )  # type: ignore
+
+        self.optimizer = self.create_ppo_optimizer()
+
+        self.saver.register(self.policy)
+        self.saver.register(self.optimizer)
+        self.saver.initialize_or_load()
+
        # Needed to resume loads properly
        self.step = policy.get_current_step()

--- a/ml-agents/mlagents/trainers/sac/optimizer.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer.py
                        [self.policy.update_normalization_op, target_update_norm]
                    )

-                self.policy.initialize_or_load()
-
        self.stats_name_to_update_name = {
            "Losses/Value Loss": "value_loss",
            "Losses/Policy Loss": "policy_loss",
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
            memories = None
            next_memories = None
        # Q network memories are 0'ed out, since we don't have them during inference.
-        q_memories = torch.zeros_like(next_memories)
+        q_memories = (
+            torch.zeros_like(next_memories) if next_memories is not None else None
+        )

        vis_obs: List[torch.Tensor] = []
        next_vis_obs: List[torch.Tensor] = []
        self, reward_signal_minibatches: Mapping[str, AgentBuffer], num_sequences: int
    ) -> Dict[str, float]:
        return {}
+
+    def get_modules(self):
+        return {
+            "Optimizer:value_network": self.value_network,
+            "Optimizer:target_network": self.target_network,
+            "Optimizer:policy_optimizer": self.policy_optimizer,
+            "Optimizer:value_optimizer": self.value_optimizer,
+            "Optimizer:entropy_optimizer": self.entropy_optimizer,
+        }
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
        :param artifact_path: The directory within which to store artifacts from this trainer.
        """
        super().__init__(
-            brain_name, trainer_settings, training, artifact_path, reward_buff_cap
+            brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
-        self.load = load
        self.seed = seed
        self.policy: Policy = None  # type: ignore
        self.optimizer: SACOptimizer = None  # type: ignore
            self.seed,
            behavior_spec,
            self.trainer_settings,
-            self.artifact_path,
-            self.load,
            tanh_squash=True,
            reparameterize=True,
            create_tf_graph=False,
            self.seed,
            behavior_spec,
            self.trainer_settings,
-            self.artifact_path,
-            self.load,
            condition_sigma_on_obs=True,
            tanh_squash=True,
            separate_critic=True,
            for stat, stat_list in batch_update_stats.items():
                self._stats_reporter.add_stat(stat, np.mean(stat_list))

+    def create_sac_optimizer(self) -> SACOptimizer:
+        if self.framework == FrameworkType.PYTORCH:
+            return TorchSACOptimizer(  # type: ignore
+                cast(TorchPolicy, self.policy), self.trainer_settings  # type: ignore
+            )  # type: ignore
+        else:
+            return SACOptimizer(  # type: ignore
+                cast(TFPolicy, self.policy), self.trainer_settings  # type: ignore
+            )  # type: ignore
+
    def add_policy(
        self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
    ) -> None:
            )
        self.policy = policy
        self.policies[parsed_behavior_id.behavior_id] = policy
-        if self.framework == FrameworkType.PYTORCH:
-            self.optimizer = TorchSACOptimizer(  # type: ignore
-                self.policy, self.trainer_settings  # type: ignore
-            )  # type: ignore
-        else:
-            self.optimizer = SACOptimizer(  # type: ignore
-                self.policy, self.trainer_settings  # type: ignore
-            )  # type: ignore
+        self.optimizer = self.create_sac_optimizer()
+
+        self.saver.register(self.policy)
+        self.saver.register(self.optimizer)
+        self.saver.initialize_or_load()
+
        # Needed to resume loads properly
        self.step = policy.get_current_step()
        # Assume steps were updated at the correct ratio before
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
    device = "cpu"


+class SerializationSettings:
+    convert_to_barracuda = True
+    convert_to_onnx = True
+    onnx_opset = 9
+
+
@attr.s(auto_attribs=True)
 class ExportableSettings:
    def as_dict(self):
        PROGRESS: str = "progress"
        REWARD: str = "reward"

+    behavior: str
-    behavior: str = attr.ib(default="")
    min_lesson_length: int = 0
    signal_smoothing: bool = True
    threshold: float = attr.ib(default=0.0)
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.timers import set_gauge
 from mlagents.tf_utils import tf, generate_session_config
+from mlagents.tf_utils.globals import get_rank


 logger = get_logger(__name__)

 class GaugeWriter(StatsWriter):
    """
-    Write all stats that we recieve to the timer gauges, so we can track them offline easily
+    Write all stats that we receive to the timer gauges, so we can track them offline easily
    """

    @staticmethod
        # If self-play, we want to print ELO as well as reward
        self.self_play = False
        self.self_play_team = -1
+        self.rank = get_rank()

    def write_stats(
        self, category: str, values: Dict[str, StatsSummary], step: int
-            stats_summary = stats_summary = values["Is Training"]
+            stats_summary = values["Is Training"]
+        elapsed_time = time.time() - self.training_start_time
+        log_info: List[str] = [category]
+        log_info.append(f"Step: {step}")
+        log_info.append(f"Time Elapsed: {elapsed_time:0.3f} s")
-            logger.info(
-                "{}: Step: {}. "
-                "Time Elapsed: {:0.3f} s "
-                "Mean "
-                "Reward: {:0.3f}"
-                ". Std of Reward: {:0.3f}. {}".format(
-                    category,
-                    step,
-                    time.time() - self.training_start_time,
-                    stats_summary.mean,
-                    stats_summary.std,
-                    is_training,
-                )
-            )
+            if self.rank is not None:
+                log_info.append(f"Rank: {self.rank}")
+
+            log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
+            log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")
+            log_info.append(is_training)
+
-                logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ")
+                log_info.append(f"ELO: {elo_stats.mean:0.3f}")
-            logger.info(
-                "{}: Step: {}. No episode was completed since last summary. {}".format(
-                    category, step, is_training
-                )
-            )
+            log_info.append("No episode was completed since last summary")
+            log_info.append(is_training)
+        logger.info(". ".join(log_info))

    def add_property(
        self, category: str, property_type: StatsPropertyType, value: Any
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
            memory=memory,
        )
        steps_list.append(experience)
+    obs = []
+    for _shape in observation_shapes:
+        obs.append(np.ones(_shape, dtype=np.float32))
    last_experience = AgentExperience(
        obs=obs,
        reward=reward,
--- a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
+++ b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
 import os
 import tempfile
-import pytest
-from mlagents.trainers.tests.test_nn_policy import create_policy_mock
-from mlagents.trainers.settings import TrainerSettings
-from mlagents.tf_utils import tf
-from mlagents.model_serialization import SerializationSettings


 def test_barracuda_converter():

    # cleanup
    os.remove(tmpfile)
-
-
-@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
-@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
-@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
-def test_policy_conversion(tmpdir, rnn, visual, discrete):
-    tf.reset_default_graph()
-    dummy_config = TrainerSettings()
-    policy = create_policy_mock(
-        dummy_config,
-        use_rnn=rnn,
-        model_path=os.path.join(tmpdir, "test"),
-        use_discrete=discrete,
-        use_visual=visual,
-    )
-    settings = SerializationSettings(policy.model_path, "MockBrain")
-    checkpoint_path = f"{tmpdir}/MockBrain-1"
-    policy.checkpoint(checkpoint_path, settings)
-
-    # These checks taken from test_barracuda_converter
-    assert os.path.isfile(checkpoint_path + ".nn")
-    assert os.path.getsize(checkpoint_path + ".nn") > 100
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
        NetworkSettings.MemorySettings() if use_rnn else None
    )
    policy = TFPolicy(
-        0,
-        mock_behavior_specs,
-        trainer_config,
-        "test",
-        False,
-        tanhresample,
-        tanhresample,
+        0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
    )
    with policy.graph.as_default():
        bc_module = BCModule(
            default_num_epoch=3,
            settings=bc_settings,
        )
-    policy.initialize_or_load()  # Normally the optimizer calls this after the BCModule is created
+    policy.initialize()  # Normally the optimizer calls this after the BCModule is created
    return bc_module


--- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
            yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
        )

-        param_manager = EnvironmentParameterManager(
-            run_options.environment_parameters, 1337, False
-        )
-        assert param_manager.update_lessons(
-            trainer_steps={"fake_behavior": 500},
-            trainer_max_steps={"fake_behavior": 1000},
-            trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        ) == (True, True)
-        assert param_manager.update_lessons(
-            trainer_steps={"fake_behavior": 500},
-            trainer_max_steps={"fake_behavior": 1000},
-            trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        ) == (True, True)
-        assert param_manager.update_lessons(
-            trainer_steps={"fake_behavior": 500},
-            trainer_max_steps={"fake_behavior": 1000},
-            trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        ) == (False, False)
-        assert param_manager.get_current_lesson_number() == {"param_1": 2}
+    param_manager = EnvironmentParameterManager(
+        run_options.environment_parameters, 1337, False
+    )
+    assert param_manager.update_lessons(
+        trainer_steps={"fake_behavior": 500},
+        trainer_max_steps={"fake_behavior": 1000},
+        trainer_reward_buffer={"fake_behavior": [1000] * 101},
+    ) == (True, True)
+    assert param_manager.update_lessons(
+        trainer_steps={"fake_behavior": 500},
+        trainer_max_steps={"fake_behavior": 1000},
+        trainer_reward_buffer={"fake_behavior": [1000] * 101},
+    ) == (True, True)
+    assert param_manager.update_lessons(
+        trainer_steps={"fake_behavior": 500},
+        trainer_max_steps={"fake_behavior": 1000},
+        trainer_reward_buffer={"fake_behavior": [1000] * 101},
+    ) == (False, False)
+    assert param_manager.get_current_lesson_number() == {"param_1": 2}


 test_everything_config_yaml = """
        "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
        "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
    }
+
+
+test_curriculum_no_behavior_yaml = """
+environment_parameters:
+    param_1:
+      curriculum:
+          - name: Lesson1
+            completion_criteria:
+                measure: reward
+                threshold: 30
+                min_lesson_length: 100
+                require_reset: true
+            value: 1
+          - name: Lesson2
+            value: 2
+"""
+
+
+def test_curriculum_no_behavior():
+    with pytest.raises(TypeError):
+        run_options = RunOptions.from_dict(
+            yaml.safe_load(test_curriculum_no_behavior_yaml)
+        )
+        EnvironmentParameterManager(run_options.environment_parameters, 1337, False)
--- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
 import pytest
-import os
-import unittest
-import tempfile
-from mlagents.model_serialization import SerializationSettings
-

 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.tf.models import ModelUtils, Tensor3DShape
 from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
-from mlagents.trainers import __version__


 VECTOR_ACTION_SPACE = 2
 NUM_AGENTS = 12
+EPSILON = 1e-7


 def create_policy_mock(
    use_visual: bool = False,
-    model_path: str = "",
-    load: bool = False,
    seed: int = 0,
 ) -> TFPolicy:
    mock_spec = mb.setup_test_behavior_specs(
    trainer_settings.network_settings.memory = (
        NetworkSettings.MemorySettings() if use_rnn else None
    )
-    policy = TFPolicy(
-        seed, mock_spec, trainer_settings, model_path=model_path, load=load
-    )
+    policy = TFPolicy(seed, mock_spec, trainer_settings)
-def test_load_save(tmp_path):
-    path1 = os.path.join(tmp_path, "runid1")
-    path2 = os.path.join(tmp_path, "runid2")
-    trainer_params = TrainerSettings()
-    policy = create_policy_mock(trainer_params, model_path=path1)
-    policy.initialize_or_load()
-    policy._set_step(2000)
-
-    mock_brain_name = "MockBrain"
-    checkpoint_path = f"{policy.model_path}/{mock_brain_name}-2000"
-    serialization_settings = SerializationSettings(policy.model_path, mock_brain_name)
-    policy.checkpoint(checkpoint_path, serialization_settings)
-
-    assert len(os.listdir(tmp_path)) > 0
-
-    # Try load from this path
-    policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
-    policy2.initialize_or_load()
-    _compare_two_policies(policy, policy2)
-    assert policy2.get_current_step() == 2000
-
-    # Try initialize from path 1
-    trainer_params.output_path = path2
-    trainer_params.init_path = path1
-    policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
-    policy3.initialize_or_load()
-
-    _compare_two_policies(policy2, policy3)
-    # Assert that the steps are 0.
-    assert policy3.get_current_step() == 0
-
-
-class ModelVersionTest(unittest.TestCase):
-    def test_version_compare(self):
-        # Test write_stats
-        with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
-            path1 = tempfile.mkdtemp()
-            trainer_params = TrainerSettings()
-            policy = create_policy_mock(trainer_params, model_path=path1)
-            policy.initialize_or_load()
-            policy._check_model_version(
-                "0.0.0"
-            )  # This is not the right version for sure
-            # Assert that 1 warning has been thrown with incorrect version
-            assert len(cm.output) == 1
-            policy._check_model_version(__version__)  # This should be the right version
-            # Assert that no additional warnings have been thrown wth correct ver
-            assert len(cm.output) == 1
-
-
 def _compare_two_policies(policy1: TFPolicy, policy2: TFPolicy) -> None:
    """
    Make sure two policies have the same output for the same input.
        assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)


+def test_large_normalization():
+    behavior_spec = mb.setup_test_behavior_specs(
+        use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1
+    )
+    # Taken from Walker seed 3713 which causes NaN without proper initialization
+    large_obs1 = [
+        1800.00036621,
+        1799.96972656,
+        1800.01245117,
+        1800.07214355,
+        1800.02758789,
+        1799.98303223,
+        1799.88647461,
+        1799.89575195,
+        1800.03479004,
+        1800.14025879,
+        1800.17675781,
+        1800.20581055,
+        1800.33740234,
+        1800.36450195,
+        1800.43457031,
+        1800.45544434,
+        1800.44604492,
+        1800.56713867,
+        1800.73901367,
+    ]
+    large_obs2 = [
+        1799.99975586,
+        1799.96679688,
+        1799.92980957,
+        1799.89550781,
+        1799.93774414,
+        1799.95300293,
+        1799.94067383,
+        1799.92993164,
+        1799.84057617,
+        1799.69873047,
+        1799.70605469,
+        1799.82849121,
+        1799.85095215,
+        1799.76977539,
+        1799.78283691,
+        1799.76708984,
+        1799.67163086,
+        1799.59191895,
+        1799.5135498,
+        1799.45556641,
+        1799.3717041,
+    ]
+    policy = TFPolicy(
+        0,
+        behavior_spec,
+        TrainerSettings(network_settings=NetworkSettings(normalize=True)),
+        "testdir",
+        False,
+    )
+    time_horizon = len(large_obs1)
+    trajectory = make_fake_trajectory(
+        length=time_horizon,
+        max_step_complete=True,
+        observation_shapes=[(1,)],
+        action_space=[2],
+    )
+    for i in range(time_horizon):
+        trajectory.steps[i].obs[0] = np.array([large_obs1[i]], dtype=np.float32)
+    trajectory_buffer = trajectory.to_agentbuffer()
+    policy.update_normalization(trajectory_buffer["vector_obs"])
+
+    # Check that the running mean and variance is correct
+    steps, mean, variance = policy.sess.run(
+        [policy.normalization_steps, policy.running_mean, policy.running_variance]
+    )
+    assert mean[0] == pytest.approx(np.mean(large_obs1, dtype=np.float32), abs=0.01)
+    assert variance[0] / steps == pytest.approx(
+        np.var(large_obs1, dtype=np.float32), abs=0.01
+    )
+
+    time_horizon = len(large_obs2)
+    trajectory = make_fake_trajectory(
+        length=time_horizon,
+        max_step_complete=True,
+        observation_shapes=[(1,)],
+        action_space=[2],
+    )
+    for i in range(time_horizon):
+        trajectory.steps[i].obs[0] = np.array([large_obs2[i]], dtype=np.float32)
+
+    trajectory_buffer = trajectory.to_agentbuffer()
+    policy.update_normalization(trajectory_buffer["vector_obs"])
+
+    steps, mean, variance = policy.sess.run(
+        [policy.normalization_steps, policy.running_mean, policy.running_variance]
+    )
+
+    assert mean[0] == pytest.approx(
+        np.mean(large_obs1 + large_obs2, dtype=np.float32), abs=0.01
+    )
+    assert variance[0] / steps == pytest.approx(
+        np.var(large_obs1 + large_obs2, dtype=np.float32), abs=0.01
+    )
+
+
-
    time_horizon = 6
    trajectory = make_fake_trajectory(
        length=time_horizon,

    assert steps == 6
    assert mean[0] == 0.5
-    # Note: variance is divided by number of steps, and initialized to 1 to avoid
-    # divide by 0. The right answer is 0.25
-    assert (variance[0] - 1) / steps == 0.25
-
+    # Note: variance is initalized to the variance of the initial trajectory + EPSILON
+    # (to avoid divide by 0) and multiplied by the number of steps. The correct answer is 0.25
+    assert variance[0] / steps == pytest.approx(0.25, abs=0.01)
    # Make another update, this time with all 1's
    time_horizon = 10
    trajectory = make_fake_trajectory(

    assert steps == 16
    assert mean[0] == 0.8125
-    assert (variance[0] - 1) / steps == pytest.approx(0.152, abs=0.01)
+    assert variance[0] / steps == pytest.approx(0.152, abs=0.01)


 def test_min_visual_size():
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
 import attr
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers

+from mlagents.trainers.trainer.rl_trainer import RLTrainer
 from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
 from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
 from mlagents.trainers.policy.tf_policy import TFPolicy
        0, mock_specs, trainer_settings, "test", False, create_tf_graph=False
    )
    optimizer = PPOOptimizer(policy, trainer_settings)
+    policy.initialize()
    return optimizer


    )


+@mock.patch.object(RLTrainer, "create_saver")
-def test_trainer_increment_step(ppo_optimizer):
+def test_trainer_increment_step(ppo_optimizer, mock_create_saver):
    trainer_params = PPO_CONFIG
    mock_optimizer = mock.Mock()
    mock_optimizer.reward_signals = {}
    assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0


+@mock.patch.object(RLTrainer, "create_saver")
-def test_add_get_policy(ppo_optimizer, dummy_config):
+def test_add_get_policy(ppo_optimizer, mock_create_saver, dummy_config):
    mock_optimizer = mock.Mock()
    mock_optimizer.reward_signals = {}
    ppo_optimizer.return_value = mock_optimizer
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
        optimizer = SACOptimizer(policy, trainer_settings)
    else:
        optimizer = PPOOptimizer(policy, trainer_settings)
+    optimizer.policy.initialize()
    return optimizer


--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+import os
 from unittest import mock
 import pytest
 import mlagents.trainers.tests.mock_brain as mb
        return self.update_policy

    def add_policy(self, mock_behavior_id, mock_policy):
+        def checkpoint_path(brain_name, step):
+            return os.path.join(self.saver.model_path, f"{brain_name}-{step}")
+
+        mock_saver = mock.Mock()
+        mock_saver.model_path = self.artifact_path
+        mock_saver.save_checkpoint.side_effect = checkpoint_path
+        self.saver = mock_saver
-    def create_tf_policy(self):
+    def create_tf_policy(self, parsed_behavior_id, behavior_spec):
-    def create_torch_policy(self):
+    def create_torch_policy(self, parsed_behavior_id, behavior_spec):
        return mock.Mock()

    def _process_trajectory(self, trajectory):
        "test_trainer",
        TrainerSettings(max_steps=100, checkpoint_interval=10, summary_freq=20),
        True,
+        False,
+        "mock_model_path",
        0,
    )
    trainer.set_is_policy_updating(True)
 def test_advance(mocked_clear_update_buffer, mocked_save_model):
    trainer = create_rl_trainer()
    mock_policy = mock.Mock()
-    mock_policy.model_path = "mock_model_path"
    trainer.add_policy("TestBrain", mock_policy)
    trajectory_queue = AgentManagerQueue("testbrain")
    policy_queue = AgentManagerQueue("testbrain")
 def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary):
    trainer = create_rl_trainer()
    mock_policy = mock.Mock()
-    mock_policy.model_path = "mock_model_path"
    trainer.add_policy("TestBrain", mock_policy)
    trajectory_queue = AgentManagerQueue("testbrain")
    policy_queue = AgentManagerQueue("testbrain")
    checkpoint_range = range(
        checkpoint_interval, num_trajectories * time_horizon, checkpoint_interval
    )
-    calls = [
-        mock.call(f"{mock_policy.model_path}/{trainer.brain_name}-{step}", mock.ANY)
-        for step in checkpoint_range
-    ]
-    mock_policy.checkpoint.assert_has_calls(calls, any_order=True)
+    calls = [mock.call(trainer.brain_name, step) for step in checkpoint_range]
+    trainer.saver.save_checkpoint.assert_has_calls(calls, any_order=True)

    add_checkpoint_calls = [
        mock.call(
-                f"{mock_policy.model_path}/{trainer.brain_name}-{step}.nn",
+                f"{trainer.saver.model_path}/{trainer.brain_name}-{step}.nn",
                None,
                mock.ANY,
            ),
--- a/ml-agents/mlagents/trainers/tests/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/test_sac.py
 from mlagents.tf_utils import tf
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers

+from mlagents.trainers.trainer.rl_trainer import RLTrainer
 from mlagents.trainers.sac.trainer import SACTrainer
 from mlagents.trainers.sac.optimizer import SACOptimizer
 from mlagents.trainers.policy.tf_policy import TFPolicy
        0, mock_brain, trainer_settings, "test", False, create_tf_graph=False
    )
    optimizer = SACOptimizer(policy, trainer_settings)
+    optimizer.policy.initialize()
    return optimizer


    assert trainer2.update_buffer.num_experiences == buffer_len


+@mock.patch.object(RLTrainer, "create_saver")
-def test_add_get_policy(sac_optimizer, dummy_config):
+def test_add_get_policy(sac_optimizer, mock_create_saver, dummy_config):
    mock_optimizer = mock.Mock()
    mock_optimizer.reward_signals = {}
    sac_optimizer.return_value = mock_optimizer
    policy = trainer.create_policy(behavior_id, specs)
    policy.get_current_step = lambda: 200
    trainer.add_policy(behavior_id, policy)
+    trainer.saver.initialize_or_load(policy)
+    trainer.saver.initialize_or_load(policy)
    trainer.optimizer.update_reward_signals = mock.Mock()
    trainer.optimizer.update_reward_signals.return_value = {}
    trainer.optimizer.update.return_value = {}
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py


 # The reward processor is passed as an argument to _check_environment_trains.
-# It is applied to the list pf all final rewards for each brain individually.
+# It is applied to the list of all final rewards for each brain individually.
-# Custom reward processors shuld be built within the test function and passed to _check_environment_trains
+# Custom reward processors should be built within the test function and passed to _check_environment_trains
 # Default is average over the last 5 final rewards
 def default_reward_processor(rewards, last_n_rewards=5):
    rewards_to_use = rewards[-last_n_rewards:]

@pytest.mark.parametrize("use_discrete", [True, False])
 def test_recurrent_sac(use_discrete):
-    step_size = 0.2 if use_discrete else 1.0
+    step_size = 0.5 if use_discrete else 0.2
    env = MemoryEnvironment(
        [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
    )
        swap_steps=5000,
        team_change=2000,
    )
-    config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2000)
+    config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=3000)
    _check_environment_trains(
        env, {BRAIN_NAME: config, brain_name_opp: config}, success_threshold=None
    )
--- a/ml-agents/mlagents/trainers/tests/test_tf_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_tf_policy.py
-from mlagents.model_serialization import SerializationSettings
-from unittest import mock
 from mlagents.trainers.settings import TrainerSettings
 import numpy as np

    # Test dev versions
    result = TFPolicy._convert_version_string("200.300.100.dev0")
    assert result == (200, 300, 100)
-
-
-@mock.patch("mlagents.trainers.policy.tf_policy.export_policy_model")
-@mock.patch("time.time", mock.MagicMock(return_value=12345))
-def test_checkpoint_writes_tf_and_nn_checkpoints(export_policy_model_mock):
-    mock_brain = basic_mock_brain()
-    test_seed = 4  # moving up in the world
-    policy = FakePolicy(test_seed, mock_brain, TrainerSettings(), "output")
-    n_steps = 5
-    policy.get_current_step = MagicMock(return_value=n_steps)
-    policy.saver = MagicMock()
-    serialization_settings = SerializationSettings("output", mock_brain.brain_name)
-    checkpoint_path = f"output/{mock_brain.brain_name}-{n_steps}"
-    policy.checkpoint(checkpoint_path, serialization_settings)
-    policy.saver.save.assert_called_once_with(policy.sess, f"{checkpoint_path}.ckpt")
-    export_policy_model_mock.assert_called_once_with(
-        checkpoint_path, serialization_settings, policy.graph, policy.sess
-    )
--- a/ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py
        NetworkSettings.MemorySettings() if use_rnn else None
    )
    policy = TorchPolicy(
-        0,
-        mock_behavior_specs,
-        trainer_config,
-        "test",
-        False,
-        tanhresample,
-        tanhresample,
+        0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
    )
    bc_module = BCModule(
        policy,
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
    masks = torch.tensor([False, False, False, False, False])
    mean = ModelUtils.masked_mean(test_input, masks=masks)
    assert mean == 0.0
+
+    # Make sure it works with 2d arrays of shape (mask_length, N)
+    test_input = torch.tensor([1, 2, 3, 4, 5]).repeat(2, 1).T
+    masks = torch.tensor([False, False, True, True, True])
+    mean = ModelUtils.masked_mean(test_input, masks=masks)
+    assert mean == 4.0
--- a/ml-agents/mlagents/trainers/tf/models.py
+++ b/ml-agents/mlagents/trainers/tf/models.py


 class NormalizerTensors(NamedTuple):
+    init_op: tf.Operation
    update_op: tf.Operation
    steps: tf.Tensor
    running_mean: tf.Tensor
        :return: A NormalizerTensors tuple that holds running mean, running variance, number of steps,
            and the update operation.
        """
-
+
        steps = tf.get_variable(
            "normalization_steps",
            [],
            dtype=tf.float32,
            initializer=tf.ones_initializer(),
        )
-        update_normalization = ModelUtils.create_normalizer_update(
+        initialize_normalization, update_normalization = ModelUtils.create_normalizer_update(
-            update_normalization, steps, running_mean, running_variance
+            initialize_normalization,
+            update_normalization,
+            steps,
+            running_mean,
+            running_variance,
        )

    @staticmethod
        running_mean: tf.Tensor,
        running_variance: tf.Tensor,
-    ) -> tf.Operation:
+    ) -> Tuple[tf.Operation, tf.Operation]:
        """
        Creates the update operation for the normalizer.
        :param vector_input: Vector observation to use for updating the running mean and variance.
        update_mean = tf.assign(running_mean, new_mean)
        update_variance = tf.assign(running_variance, new_variance)
        update_norm_step = tf.assign(steps, total_new_steps)
-        return tf.group([update_mean, update_variance, update_norm_step])
+        # First mean and variance calculated normally
+        initial_mean, initial_variance = tf.nn.moments(vector_input, axes=[0])
+        initialize_mean = tf.assign(running_mean, initial_mean)
+        # Multiplied by total_new_step because it is divided by total_new_step in the normalization
+        initialize_variance = tf.assign(
+            running_variance,
+            (initial_variance + EPSILON) * tf.cast(total_new_steps, dtype=tf.float32),
+        )
+        return (
+            tf.group([initialize_mean, initialize_variance, update_norm_step]),
+            tf.group([update_mean, update_variance, update_norm_step]),
+        )

    @staticmethod
    def create_vector_observation_encoder(
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        vec_encodes = []
+        encodes = []
        for idx, encoder in enumerate(self.vector_encoders):
            vec_input = vec_inputs[idx]
            if actions is not None:
-            vec_encodes.append(hidden)
+            encodes.append(hidden)
-        vis_encodes = []
-            vis_encodes.append(hidden)
+            encodes.append(hidden)
-        if len(vec_encodes) > 0 and len(vis_encodes) > 0:
-            vec_encodes_tensor = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
-            vis_encodes_tensor = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
-            encoding = torch.stack(
-                [vec_encodes_tensor, vis_encodes_tensor], dim=-1
-            ).sum(dim=-1)
-        elif len(vec_encodes) > 0:
-            encoding = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
-        elif len(vis_encodes) > 0:
-            encoding = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
-        else:
+        if len(encodes) == 0:
+
+        # Constants don't work in Barracuda
+        encoding = encodes[0]
+        if len(encodes) > 1:
+            for _enc in encodes[1:]:
+                encoding += _enc

        if self.use_lstm:
            # Resize to (batch, sequence length, encoding size)
        )
        action_list = self.sample_action(dists)
        sampled_actions = torch.stack(action_list, dim=-1)
+        if self.act_type == ActionType.CONTINUOUS:
+            log_probs = dists[0].log_prob(sampled_actions)
+        else:
+            log_probs = dists[0].all_log_prob()
-            dists[0].pdf(sampled_actions),
+            log_probs,
            self.version_number,
            self.memory_size_param,
            self.is_continuous_int,
 class GlobalSteps(nn.Module):
    def __init__(self):
        super().__init__()
-        self.global_step = torch.Tensor([0])
+        self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
+
+    @property
+    def current_step(self):
+        return int(self.__global_step.item())
+
+    @current_step.setter
+    def current_step(self, value):
+        self.__global_step[:] = value
-        self.global_step += value
+        self.__global_step += value


 class LearningRate(nn.Module):
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
        :param tensor: Tensor which needs mean computation.
        :param masks: Boolean tensor of masks with same dimension as tensor.
        """
-        return (tensor * masks).sum() / torch.clamp(masks.float().sum(), min=1.0)
+        return (tensor.T * masks).sum() / torch.clamp(
+            (torch.ones_like(tensor.T) * masks).float().sum(), min=1.0
+        )
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
 # # Unity ML-Agents Toolkit
-import os
-from mlagents.model_serialization import SerializationSettings, copy_model_files
 from mlagents.trainers.policy.checkpoint_manager import (
    NNCheckpoint,
    NNCheckpointManager,
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.agent_processor import AgentManagerQueue
 from mlagents.trainers.trajectory import Trajectory
-from mlagents.trainers.settings import TestingConfiguration, FrameworkType
+from mlagents.trainers.settings import (
+    TestingConfiguration,
+    TrainerSettings,
+    FrameworkType,
+)
+from mlagents.trainers.saver.saver import BaseSaver
+from mlagents.trainers.saver.torch_saver import TorchSaver
+from mlagents.trainers.saver.tf_saver import TFSaver
 from mlagents.trainers.exception import UnityTrainerException

 try:
            self.trainer_settings.max_steps = TestingConfiguration.max_steps
        self._next_save_step = 0
        self._next_summary_step = 0
+        self.saver = self.create_saver(
+            self.framework, self.trainer_settings, self.artifact_path, self.load
+        )

    def end_episode(self) -> None:
        """
        """
        pass

+    @staticmethod
+    def create_saver(
+        framework: str, trainer_settings: TrainerSettings, model_path: str, load: bool
+    ) -> BaseSaver:
+        if framework == FrameworkType.PYTORCH:
+            saver = TorchSaver(  # type: ignore
+                trainer_settings, model_path, load
+            )
+        else:
+            saver = TFSaver(  # type: ignore
+                trainer_settings, model_path, load
+            )
+        return saver
+
    def _policy_mean_reward(self) -> Optional[float]:
        """ Returns the mean episode reward for the current policy. """
        rewards = self.cumulative_returns_since_policy_update
            logger.warning(
                "Trainer has multiple policies, but default behavior only saves the first."
            )
-        policy = list(self.policies.values())[0]
-        model_path = policy.model_path
-        settings = SerializationSettings(model_path, self.brain_name)
-        checkpoint_path = os.path.join(model_path, f"{self.brain_name}-{self.step}")
-        policy.checkpoint(checkpoint_path, settings)
+        checkpoint_path = self.saver.save_checkpoint(self.brain_name, self.step)
        new_checkpoint = NNCheckpoint(
            int(self.step),
            f"{checkpoint_path}.nn",
        elif n_policies == 0:
            logger.warning("Trainer has no policies, not saving anything.")
            return
-        policy = list(self.policies.values())[0]
+
-
-        # Copy the checkpointed model files to the final output location
-        copy_model_files(model_checkpoint.file_path, f"{policy.model_path}.nn")
-
+        self.saver.copy_final_model(model_checkpoint.file_path)
-            model_checkpoint, file_path=f"{policy.model_path}.nn"
+            model_checkpoint, file_path=f"{self.saver.model_path}.nn"
        )
        NNCheckpointManager.track_final_checkpoint(self.brain_name, final_checkpoint)

--- a/ml-agents/mlagents/trainers/trainer/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/trainer.py
        brain_name: str,
        trainer_settings: TrainerSettings,
        training: bool,
+        load: bool,
        artifact_path: str,
        reward_buff_cap: int = 1,
    ):
        self._threaded = trainer_settings.threaded
        self._stats_reporter = StatsReporter(brain_name)
        self.is_training = training
+        self.load = load
        self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
        self.policy_queues: List[AgentManagerQueue[Policy]] = []
        self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 from mlagents.trainers.trainer_util import TrainerFactory
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.agent_processor import AgentManager
+from mlagents.tf_utils.globals import get_rank


 class TrainerController:
        self.kill_trainers = False
        np.random.seed(training_seed)
        tf.set_random_seed(training_seed)
+        self.rank = get_rank()

    @timed
    def _save_models(self):
+        if self.rank is not None and self.rank != 0:
+            return
+
        for brain_name in self.trainers.keys():
            self.trainers[brain_name].save_model()
        self.logger.info("Saved Model")
        """
        Saves models for all trainers.
        """
+        if self.rank is not None and self.rank != 0:
+            return
+
        for brain_name in self.trainers.keys():
            self.trainers[brain_name].save_model()

        ) in self.param_manager.get_current_lesson_number().items():
            for trainer in self.trainers.values():
                trainer.stats_reporter.set_stat(
-                    f"Environment/Lesson/{param_name}", lesson_number
+                    f"Environment/Lesson Number/{param_name}", lesson_number
                )

        for trainer in self.trainers.values():
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/DynamicTarget.prefab
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/DynamicTarget.prefab
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 3840539935788495952}
  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 1, y: 1, z: 1}
-  m_LocalScale: {x: 1.2356956, y: 1.2356961, z: 1.2356961}
+  m_LocalPosition: {x: 0, y: 1, z: 1}
+  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children: []
  m_Father: {fileID: 0}
  m_RootOrder: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 3840539935788495952}
  m_Enabled: 1
-  m_CastShadows: 0
+  m_CastShadows: 1
  m_ReceiveShadows: 1
  m_DynamicOccludee: 1
  m_MotionVectors: 1
  respawnIfTouched: 1
  respawnIfFallsOffPlatform: 1
  fallDistance: 5
-  triggerIsTouching: 0
  onTriggerEnterEvent:
    m_PersistentCalls:
      m_Calls: []
  onTriggerExitEvent:
    m_PersistentCalls:
      m_Calls: []
-  colliderIsTouching: 0
-      m_Calls:
-      - m_Target: {fileID: 0}
-        m_MethodName: TouchedTarget
-        m_Mode: 1
-        m_Arguments:
-          m_ObjectArgument: {fileID: 0}
-          m_ObjectArgumentAssemblyTypeName: UnityEngine.Object, UnityEngine
-          m_IntArgument: 0
-          m_FloatArgument: 0
-          m_StringArgument: 
-          m_BoolArgument: 0
-        m_CallState: 2
+      m_Calls: []
  onCollisionStayEvent:
    m_PersistentCalls:
      m_Calls: []
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/StaticTarget.prefab
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/StaticTarget.prefab
  m_PrefabInstance: {fileID: 0}
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 3840539935788495952}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 6.2, y: 1.15, z: 3.824}
-  m_LocalScale: {x: 1.2356956, y: 1.2356961, z: 1.2356961}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 1, y: 1, z: 1}
+  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children: []
  m_Father: {fileID: 0}
  m_RootOrder: 0
  m_IsKinematic: 0
  m_Interpolate: 0
  m_Constraints: 0
-  m_CollisionDetection: 0
+  m_CollisionDetection: 3
 --- !u!114 &3631016866778687563
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  tagToDetect: agent
-  moveTargetToRandomPosIfTouched: 0
-  targetSpawnRadius: 0
-  onTrtesiggerEnterEvent:
-    m_PersistentCalls:
-      m_Calls: []
-  triggerIsTouching: 0
+  spawnRadius: 0
+  respawnIfTouched: 0
+  respawnIfFallsOffPlatform: 1
+  fallDistance: 5
  onTriggerEnterEvent:
    m_PersistentCalls:
      m_Calls: []
  onTriggerExitEvent:
    m_PersistentCalls:
      m_Calls: []
-  colliderIsTouching: 0
  onCollisionEnterEvent:
    m_PersistentCalls:
      m_Calls: []
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 895268871264836243}
  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0.15, z: 0}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children:
  - {fileID: 895268873051627235}
  - component: {fileID: 895268871377934302}
  - component: {fileID: 895268871377934301}
  m_Layer: 0
-  m_Name: WalkerRagdoll
+  m_Name: WalkerRagdollBase
  m_TagString: Untagged
  m_Icon: {fileID: 0}
  m_NavMeshLayer: 0
  m_PrefabInstance: {fileID: 0}
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 895268871377934275}
-  m_LocalRotation: {x: 0, y: 0.7071068, z: 0, w: 0.7071068}
-  m_LocalPosition: {x: 0, y: 3.07, z: 0}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 0, y: 3, z: 0}
  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children:
  - {fileID: 895268871264836332}
  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 90, z: 0}
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
 --- !u!114 &895268871377934297
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    VectorObservationSize: 236
+    VectorObservationSize: 243
-  m_Model: {fileID: 11400000, guid: 3c6170922a9ad4d9f85261699ca00f5d, type: 3}
+  m_Model: {fileID: 11400000, guid: f598eaeeef9f94691989a2cfaaafb565, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: WalkerDynamic
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
  MaxStep: 5000
-  maximumWalkingSpeed: 999
+  targetWalkingSpeed: 10
+  randomizeWalkSpeedEachEpisode: 1
+  walkDirectionMethod: 0
+  worldDirToWalk: {x: 1, y: 0, z: 0}
+  worldPosToWalkTo: {x: 0, y: 0, z: 0}
  target: {fileID: 0}
  hips: {fileID: 895268871264836332}
  chest: {fileID: 7933235354845945071}
  armR: {fileID: 7933235355057813930}
  forearmR: {fileID: 7933235353195701980}
  handR: {fileID: 7933235354616748502}
-  orientationCube: {fileID: 7559180363928843817}
 --- !u!114 &895268871377934303
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  maxJointSpring: 40000
-  jointDampen: 3000
-  maxJointForceLimit: 10000
+  jointDampen: 5000
+  maxJointForceLimit: 20000
  bodyPartsList: []
 --- !u!114 &895268871377934302
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: 1513f8a85fedd47efba089213b7c5bde, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  updatedByAgent: 0
  transformToFollow: {fileID: 895268871264836332}
  targetToLookAt: {fileID: 0}
  heightOffset: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235353030744116}
  serializedVersion: 2
-  m_Mass: 3
+  m_Mass: 4
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0.55, y: 0, z: 0}
  m_Axis: {x: 0, y: -1, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.7000002, y: 0, z: 0}
+  m_ConnectedAnchor: {x: -0.7000001, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235353041637840}
  serializedVersion: 2
-  m_Mass: 1
+  m_Mass: 2
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0, y: 0, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.70000064, y: 0, z: 0}
+  m_ConnectedAnchor: {x: -0.70000017, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235353195701956}
  serializedVersion: 2
-  m_Mass: 2
+  m_Mass: 3
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: -0.5, y: 0, z: 0}
  m_Axis: {x: 0, y: 1, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.5000005, y: 0, z: 0}
+  m_ConnectedAnchor: {x: 0.5, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0.5, z: 0}
  m_Axis: {x: -1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.39999408, y: -0.29999986, z: 0}
+  m_ConnectedAnchor: {x: -0.39999396, y: -0.29999995, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235353240438151}
  serializedVersion: 2
-  m_Mass: 2
+  m_Mass: 3
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0.5, y: 0, z: 0}
  m_Axis: {x: 0, y: -1, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.5000005, y: 0, z: 0}
+  m_ConnectedAnchor: {x: -0.5, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0.5, z: 0}
  m_Axis: {x: -1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.00000011920929, y: -0.5, z: 0}
+  m_ConnectedAnchor: {x: 0, y: -0.5, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0, z: -0.1}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.00000011920929, y: -0.60000014, z: 0}
+  m_ConnectedAnchor: {x: 0, y: -0.60000014, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 1, z: 0}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0.5, z: 0}
  m_Axis: {x: -1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.00000011920929, y: -0.5, z: 0}
+  m_ConnectedAnchor: {x: 0, y: -0.5, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0.5, z: 0}
  m_Axis: {x: -1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.39999408, y: -0.29999986, z: 0}
+  m_ConnectedAnchor: {x: 0.39999396, y: -0.29999995, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235354074184675}
  serializedVersion: 2
-  m_Mass: 5
+  m_Mass: 6
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0, y: -0.85, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0, y: 0.5119996, z: 0}
+  m_ConnectedAnchor: {x: 0, y: 0.5119997, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235354616748503}
  serializedVersion: 2
-  m_Mass: 1
+  m_Mass: 2
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: 0, y: 0, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.70000064, y: 0, z: 0}
+  m_ConnectedAnchor: {x: 0.70000017, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: -0.3, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0, y: 0.383, z: 0}
+  m_ConnectedAnchor: {x: 0, y: 0.3829999, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: -0.5, z: 0}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0, y: 0.3050003, z: 0}
+  m_ConnectedAnchor: {x: 0, y: 0.30500042, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: -1}
  m_XMotion: 0
  m_Anchor: {x: 0, y: 0, z: -0.1}
  m_Axis: {x: 1, y: 0, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: -0.00000011920929, y: -0.60000014, z: 0}
+  m_ConnectedAnchor: {x: 0, y: -0.60000014, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 1, z: 0}
  m_XMotion: 0
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 7933235355057813906}
  serializedVersion: 2
-  m_Mass: 3
+  m_Mass: 4
  m_Drag: 0.05
  m_AngularDrag: 0.05
  m_UseGravity: 1
  m_Anchor: {x: -0.55, y: 0, z: 0}
  m_Axis: {x: 0, y: 1, z: 0}
  m_AutoConfigureConnectedAnchor: 1
-  m_ConnectedAnchor: {x: 0.7000002, y: 0, z: 0}
+  m_ConnectedAnchor: {x: 0.7000001, y: 0.00000011920929, z: 0}
  serializedVersion: 2
  m_SecondaryAxis: {x: 0, y: 0, z: 1}
  m_XMotion: 0
    type: 3}
  m_PrefabInstance: {fileID: 7597605653427724053}
  m_PrefabAsset: {fileID: 0}
--- !u!114 &7559180363928843817 stripped
-MonoBehaviour:
-  m_CorrespondingSourceObject: {fileID: 114705911240010044, guid: 72f745913c5a34df5aaadd5c1f0024cb,
-    type: 3}
-  m_PrefabInstance: {fileID: 7597605653427724053}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 0}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 771e78c5e980e440e8cd19716b55075f, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!1 &6907050159044240885
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6902197503240654641}
+  - component: {fileID: 6894500521640151429}
+  - component: {fileID: 6885223417161833361}
+  - component: {fileID: 6859132155796343735}
+  m_Layer: 0
+  m_Name: Wall (1)
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6902197503240654641
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907050159044240885}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: -50, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 5, z: 101}
+  m_Children: []
+  m_Father: {fileID: 6902102727328990095}
+  m_RootOrder: 1
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!33 &6894500521640151429
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907050159044240885}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!23 &6885223417161833361
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907050159044240885}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 0
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!65 &6859132155796343735
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907050159044240885}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!1 &6907401236047902865
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6902265967514060089}
+  - component: {fileID: 6891025662345346653}
+  - component: {fileID: 6859036447448677835}
+  - component: {fileID: 6884684845870454579}
+  m_Layer: 14
+  m_Name: Ground
+  m_TagString: ground
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6902265967514060089
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907401236047902865}
+  m_LocalRotation: {x: 0, y: 0.7071068, z: 0, w: 0.7071068}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
+  m_LocalScale: {x: 100, y: 1, z: 100}
+  m_Children: []
+  m_Father: {fileID: 6902107422946006027}
+  m_RootOrder: 1
+  m_LocalEulerAnglesHint: {x: 0, y: 90, z: 0}
+--- !u!33 &6891025662345346653
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907401236047902865}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!65 &6859036447448677835
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907401236047902865}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!23 &6884684845870454579
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907401236047902865}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: acba6bf2a290a496bb8989b42bf8698d, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 1
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!1 &6907666814270504157
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6902102727328990095}
+  m_Layer: 0
+  m_Name: Walls
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6902102727328990095
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907666814270504157}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: 0, y: 2, z: 0}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children:
+  - {fileID: 6901873285403999439}
+  - {fileID: 6902197503240654641}
+  - {fileID: 6901900959948323433}
+  - {fileID: 6905948743199606957}
+  m_Father: {fileID: 6902107422946006027}
+  m_RootOrder: 0
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!1 &6907680617094430597
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6901873285403999439}
+  - component: {fileID: 6894618984257886823}
+  - component: {fileID: 6884854148710353183}
+  - component: {fileID: 6863062098498978603}
+  m_Layer: 0
+  m_Name: Wall
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6901873285403999439
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907680617094430597}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 50, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 5, z: 101}
+  m_Children: []
+  m_Father: {fileID: 6902102727328990095}
+  m_RootOrder: 0
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!33 &6894618984257886823
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907680617094430597}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!23 &6884854148710353183
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907680617094430597}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 0
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!65 &6863062098498978603
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907680617094430597}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!1 &6907740118844148851
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6902107422946006027}
+  m_Layer: 0
+  m_Name: PlatformDynamicTarget
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!4 &6902107422946006027
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907740118844148851}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children:
+  - {fileID: 6902102727328990095}
+  - {fileID: 6902265967514060089}
+  m_Father: {fileID: 0}
+  m_RootOrder: 0
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!1 &6907828132384848309
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6905948743199606957}
+  - component: {fileID: 6894463671975680535}
+  - component: {fileID: 6884868534516719387}
+  - component: {fileID: 6859048605259525735}
+  m_Layer: 0
+  m_Name: Wall (3)
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6905948743199606957
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907828132384848309}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: -50}
+  m_LocalScale: {x: 100, y: 5, z: 1}
+  m_Children: []
+  m_Father: {fileID: 6902102727328990095}
+  m_RootOrder: 3
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!33 &6894463671975680535
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907828132384848309}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!23 &6884868534516719387
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907828132384848309}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 0
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!65 &6859048605259525735
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907828132384848309}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
+--- !u!1 &6907860845836169157
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 6901900959948323433}
+  - component: {fileID: 6893927248293796423}
+  - component: {fileID: 6885176866006237333}
+  - component: {fileID: 6859395915623032135}
+  m_Layer: 0
+  m_Name: Wall (2)
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 4294967295
+  m_IsActive: 1
+--- !u!4 &6901900959948323433
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907860845836169157}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: 50}
+  m_LocalScale: {x: 100, y: 5, z: 1}
+  m_Children: []
+  m_Father: {fileID: 6902102727328990095}
+  m_RootOrder: 2
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
+--- !u!33 &6893927248293796423
+MeshFilter:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907860845836169157}
+  m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
+--- !u!23 &6885176866006237333
+MeshRenderer:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907860845836169157}
+  m_Enabled: 1
+  m_CastShadows: 1
+  m_ReceiveShadows: 1
+  m_DynamicOccludee: 1
+  m_MotionVectors: 1
+  m_LightProbeUsage: 1
+  m_ReflectionProbeUsage: 1
+  m_RenderingLayerMask: 1
+  m_RendererPriority: 0
+  m_Materials:
+  - {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
+  m_StaticBatchInfo:
+    firstSubMesh: 0
+    subMeshCount: 0
+  m_StaticBatchRoot: {fileID: 0}
+  m_ProbeAnchor: {fileID: 0}
+  m_LightProbeVolumeOverride: {fileID: 0}
+  m_ScaleInLightmap: 1
+  m_PreserveUVs: 0
+  m_IgnoreNormalsForChartDetection: 0
+  m_ImportantGI: 0
+  m_StitchLightmapSeams: 0
+  m_SelectedEditorRenderState: 3
+  m_MinimumChartSize: 4
+  m_AutoUVMaxDistance: 0.5
+  m_AutoUVMaxAngle: 89
+  m_LightmapParameters: {fileID: 0}
+  m_SortingLayerID: 0
+  m_SortingLayer: 0
+  m_SortingOrder: 0
+--- !u!65 &6859395915623032135
+BoxCollider:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6907860845836169157}
+  m_Material: {fileID: 0}
+  m_IsTrigger: 0
+  m_Enabled: 1
+  serializedVersion: 2
+  m_Size: {x: 1, y: 1, z: 1}
+  m_Center: {x: 0, y: 0, z: 0}
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab.meta
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab.meta
+fileFormatVersion: 2
+guid: f0d7741d9e06247f6843b921a206b978
+PrefabImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets.meta
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets.meta
+fileFormatVersion: 2
+guid: 88818c9b63c96424aa8e0fca85552133
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta
+fileFormatVersion: 2
+guid: 9f87b3070a0fd4a1e838131a91399c2f
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: Assets/Demonstrations/ExpertWalkerDy.demo
+  externalObjects: {}
+  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta
+fileFormatVersion: 2
+guid: a4b02e2c382c247919eb63ce72e90a3b
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: Assets/Demonstrations/ExpertWalkerDyVS.demo
+  externalObjects: {}
+  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta
+fileFormatVersion: 2
+guid: edcbb505552464c5c829886a4a3817dd
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: Assets/Demonstrations/ExpertWalkerStVS.demo
+  externalObjects: {}
+  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta
+fileFormatVersion: 2
+guid: 1f3a5d62e6aea4b5eb053ac33f11b06d
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: Assets/Demonstrations/ExpertWalkerSta.demo
+  externalObjects: {}
+  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Platforms.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Platforms.meta
+fileFormatVersion: 2
+guid: cd296ba30964e4cf086044f1a7618c0b
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll.meta
+fileFormatVersion: 2
+guid: d64d77dc566364a31896e5da2ac8534b
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamicVariableSpeed.unity
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamicVariableSpeed.unity
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamicVariableSpeed.unity.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamicVariableSpeed.unity.meta
+fileFormatVersion: 2
+guid: 2b839ee93e7a4467f9f8b4803c4a239b
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: