Merging master

4 年前 · 14378aa5
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - TensorFlow trainers have been removed, please use the Torch trainers instead. (#4707)
- PyTorch trainers now support training agents with both continuous and discrete action spaces.
-Currently, this can only be done with Actuators. Please see
-[here](../Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs) for an
-example of how to use Actuators. (#4702)
-
+- PyTorch trainers now support training agents with both continuous and discrete action spaces. (#4702)
+- Agents with both continuous and discrete actions are now supported. You can specify
+both continuous and discrete action sizes in Behavior Parameters. (#4702, #4718)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - `ActionSpec.validate_action()` now enforces that `UnityEnvironment.set_action_for_agent()` receives a 1D `np.array`.

--- a/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
+++ b/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
        float m_TimeSinceModelReload;
        // Whether or not the model needs to be reloaded
        bool m_RequireReload;
+        const string k_BehaviorName = "m_BehaviorName";
+        const string k_BrainParametersName = "m_BrainParameters";
+        const string k_ModelName = "m_Model";
+        const string k_InferenceDeviceName = "m_InferenceDevice";
+        const string k_BehaviorTypeName = "m_BehaviorType";
+        const string k_TeamIdName = "TeamId";
+        const string k_UseChildSensorsName = "m_UseChildSensors";
+        const string k_ObservableAttributeHandlingName = "m_ObservableAttributeHandling";

        public override void OnInspectorGUI()
        {
+            bool needBrainParametersUpdate; // Whether the brain parameters changed

            // Drawing the Behavior Parameters
            EditorGUI.indentLevel++;
            {
-                EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
+                EditorGUILayout.PropertyField(so.FindProperty(k_BehaviorName));
+            EditorGUI.BeginChangeCheck();
-                EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_BrainParametersName), true);
+            needBrainParametersUpdate = EditorGUI.EndChangeCheck();
-                EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_ModelName), true);
-                EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_InferenceDeviceName), true);
                EditorGUI.indentLevel--;
            }
            needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();
-                EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
+                EditorGUILayout.PropertyField(so.FindProperty(k_BehaviorTypeName));
-            EditorGUILayout.PropertyField(so.FindProperty("TeamId"));
+            EditorGUILayout.PropertyField(so.FindProperty(k_TeamIdName));
-                EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
-                EditorGUILayout.PropertyField(so.FindProperty("m_ObservableAttributeHandling"), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_UseChildSensorsName), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_ObservableAttributeHandlingName), true);
            }
            EditorGUI.EndDisabledGroup();

            // Display all failed checks
            D.logEnabled = false;
            Model barracudaModel = null;
-            var model = (NNModel)serializedObject.FindProperty("m_Model").objectReferenceValue;
+            var model = (NNModel)serializedObject.FindProperty(k_ModelName).objectReferenceValue;
            var behaviorParameters = (BehaviorParameters)target;

            // Grab the sensor components, since we need them to determine the observation sizes.
--- a/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
+++ b/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
        // The height of a line in the Unity Inspectors
        const float k_LineHeight = 17f;
        const int k_VecObsNumLine = 3;
-        const string k_ActionSizePropName = "VectorActionSize";
-        const string k_ActionTypePropName = "VectorActionSpaceType";
+        const string k_ActionSpecName = "m_ActionSpec";
+        const string k_ContinuousActionSizeName = "m_NumContinuousActions";
+        const string k_DiscreteBranchSizeName = "BranchSizes";
        const string k_ActionDescriptionPropName = "VectorActionDescriptions";
        const string k_VecObsPropName = "VectorObservationSize";
        const string k_NumVecObsPropName = "NumStackedVectorObservations";
            EditorGUI.LabelField(position, "Vector Action");
            position.y += k_LineHeight;
            EditorGUI.indentLevel++;
-            var bpVectorActionType = property.FindPropertyRelative(k_ActionTypePropName);
-            EditorGUI.PropertyField(
-                position,
-                bpVectorActionType,
-                new GUIContent("Space Type",
-                    "Corresponds to whether state vector contains  a single integer (Discrete) " +
-                    "or a series of real-valued floats (Continuous)."));
+            var actionSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
+            DrawContinuousVectorAction(position, actionSpecProperty);
-            if (bpVectorActionType.enumValueIndex == 1)
-            {
-                DrawContinuousVectorAction(position, property);
-            }
-            else
-            {
-                DrawDiscreteVectorAction(position, property);
-            }
+            DrawDiscreteVectorAction(position, actionSpecProperty);
        }

        /// <summary>
        /// to make the custom GUI for.</param>
        static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
        {
-            var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
-
-            // This check is here due to:
-            // https://fogbugz.unity3d.com/f/cases/1246524/
-            // If this case has been resolved, please remove this if condition.
-            if (vecActionSize.arraySize != 1)
-            {
-                vecActionSize.arraySize = 1;
-            }
-            var continuousActionSize =
-                vecActionSize.GetArrayElementAtIndex(0);
+            var continuousActionSize = property.FindPropertyRelative(k_ContinuousActionSizeName);
-                new GUIContent("Space Size", "Length of continuous action vector."));
+                new GUIContent("Continuous Action Size", "Length of continuous action vector."));
        }

        /// <summary>
        /// to make the custom GUI for.</param>
        static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
        {
-            var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
+            var branchSizes = property.FindPropertyRelative(k_DiscreteBranchSizeName);
-                position, "Branches Size", vecActionSize.arraySize);
+                position, "Discrete Branch Size", branchSizes.arraySize);
-            if (newSize != vecActionSize.arraySize)
+            if (newSize != branchSizes.arraySize)
-                vecActionSize.arraySize = newSize;
+                branchSizes.arraySize = newSize;
            }

            position.y += k_LineHeight;
-                 branchIndex < vecActionSize.arraySize;
+                 branchIndex < branchSizes.arraySize;
-                    vecActionSize.GetArrayElementAtIndex(branchIndex);
+                    branchSizes.GetArrayElementAtIndex(branchIndex);

                EditorGUI.PropertyField(
                    position,
        /// <returns>The height of the drawer of the Vector Action.</returns>
        static float GetHeightDrawVectorAction(SerializedProperty property)
        {
-            var actionSize = 2 + property.FindPropertyRelative(k_ActionSizePropName).arraySize;
-            if (property.FindPropertyRelative(k_ActionTypePropName).enumValueIndex == 0)
-            {
-                actionSize += 1;
-            }
-            return actionSize * k_LineHeight;
+            var actionSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
+            var numActionLines = 3 + actionSpecProperty.FindPropertyRelative(k_DiscreteBranchSizeName).arraySize;
+            return numActionLines * k_LineHeight;
        }
    }
 }
--- a/com.unity.ml-agents/Editor/DemonstrationDrawer.cs
+++ b/com.unity.ml-agents/Editor/DemonstrationDrawer.cs
 using System.Text;
 using UnityEditor;
 using Unity.MLAgents.Demonstrations;
-using Unity.MLAgents.Policies;


 namespace Unity.MLAgents.Editor
        SerializedProperty m_BrainParameters;
        SerializedProperty m_DemoMetaData;
        SerializedProperty m_ObservationShapes;
+        const string k_BrainParametersName = "brainParameters";
+        const string k_MetaDataName = "metaData";
+        const string k_ObservationSummariesName = "observationSummaries";
+        const string k_DemonstrationName = "demonstrationName";
+        const string k_NumberStepsName = "numberSteps";
+        const string k_NumberEpisodesName = "numberEpisodes";
+        const string k_MeanRewardName = "meanReward";
+        const string k_ActionSpecName = "ActionSpec";
+        const string k_NumContinuousActionsName = "m_NumContinuousActions";
+        const string k_NumDiscreteActionsName = "m_NumDiscreteActions";
+        const string k_ShapeName = "shape";
+
-            m_BrainParameters = serializedObject.FindProperty("brainParameters");
-            m_DemoMetaData = serializedObject.FindProperty("metaData");
-            m_ObservationShapes = serializedObject.FindProperty("observationSummaries");
+            m_BrainParameters = serializedObject.FindProperty(k_BrainParametersName);
+            m_DemoMetaData = serializedObject.FindProperty(k_MetaDataName);
+            m_ObservationShapes = serializedObject.FindProperty(k_ObservationSummariesName);
        }

        /// <summary>
        {
-            var nameProp = property.FindPropertyRelative("demonstrationName");
-            var experiencesProp = property.FindPropertyRelative("numberSteps");
-            var episodesProp = property.FindPropertyRelative("numberEpisodes");
-            var rewardsProp = property.FindPropertyRelative("meanReward");
+            var nameProp = property.FindPropertyRelative(k_DemonstrationName);
+            var experiencesProp = property.FindPropertyRelative(k_NumberStepsName);
+            var episodesProp = property.FindPropertyRelative(k_NumberEpisodesName);
+            var rewardsProp = property.FindPropertyRelative(k_MeanRewardName);

            var nameLabel = nameProp.displayName + ": " + nameProp.stringValue;
            var experiencesLabel = experiencesProp.displayName + ": " + experiencesProp.intValue;
        /// </summary>
        void MakeActionsProperty(SerializedProperty property)
        {
-            var actSizeProperty = property.FindPropertyRelative("VectorActionSize");
-            var actSpaceTypeProp = property.FindPropertyRelative("VectorActionSpaceType");
+            var actSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
+            var continuousSizeProperty = actSpecProperty.FindPropertyRelative(k_NumContinuousActionsName);
+            var discreteSizeProperty = actSpecProperty.FindPropertyRelative(k_NumDiscreteActionsName);
-            var vecActSizeLabel =
-                actSizeProperty.displayName + ": " + BuildIntArrayLabel(actSizeProperty);
-            var actSpaceTypeLabel = actSpaceTypeProp.displayName + ": " +
-                (SpaceType)actSpaceTypeProp.enumValueIndex;
+            var continuousSizeLabel =
+                continuousSizeProperty.displayName + ": " + continuousSizeProperty.intValue;
+            var discreteSizeLabel = discreteSizeProperty.displayName + ": " +
+                discreteSizeProperty.intValue;
-            EditorGUILayout.LabelField(vecActSizeLabel);
-            EditorGUILayout.LabelField(actSpaceTypeLabel);
+            EditorGUILayout.LabelField(continuousSizeLabel);
+            EditorGUILayout.LabelField(discreteSizeLabel);
        }

        /// <summary>
            for (var i = 0; i < numObservations; i++)
            {
                var summary = obsSummariesProperty.GetArrayElementAtIndex(i);
-                var shapeProperty = summary.FindPropertyRelative("shape");
+                var shapeProperty = summary.FindPropertyRelative(k_ShapeName);
                shapesLabels.Add(BuildIntArrayLabel(shapeProperty));
            }

--- a/com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
        /// <summary>
        /// Check if the segment is empty.
        /// </summary>
+        /// <returns>Whether or not the segment is empty.</returns>
        public bool IsEmpty()
        {
            return Array == null || Array.Length == 0;
--- a/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
 using System;
-using System.Collections.Generic;
+using UnityEngine;

 namespace Unity.MLAgents.Actuators
 {
-    public readonly struct ActionSpec
+    [Serializable]
+    public struct ActionSpec
+        [SerializeField]
+        int m_NumContinuousActions;
+
        /// <summary>
        /// An array of branch sizes for our action space.
        ///
        ///
        /// For an IActuator with a Continuous it will be null.
        /// </summary>
-        public readonly int[] BranchSizes;
+        public int[] BranchSizes;
-        public int NumContinuousActions { get; }
+        public int NumContinuousActions { get { return m_NumContinuousActions; } set { m_NumContinuousActions = value; } }
-        public int NumDiscreteActions { get; }
+        public int NumDiscreteActions { get { return BranchSizes == null ? 0 : BranchSizes.Length; } }
-        public int SumOfDiscreteBranchSizes { get; }
+        public int SumOfDiscreteBranchSizes { get { return BranchSizes == null ? 0 : BranchSizes.Sum(); } }

        /// <summary>
        /// Creates a Continuous <see cref="ActionSpec"/> with the number of actions available.
        public static ActionSpec MakeContinuous(int numActions)
        {
-            var actuatorSpace = new ActionSpec(numActions, 0);
+            var actuatorSpace = new ActionSpec(numActions, null);
            return actuatorSpace;
        }

        public static ActionSpec MakeDiscrete(params int[] branchSizes)
        {
            var numActions = branchSizes.Length;
-            var actuatorSpace = new ActionSpec(0, numActions, branchSizes);
+            var actuatorSpace = new ActionSpec(0, branchSizes);
-        internal ActionSpec(int numContinuousActions, int numDiscreteActions, int[] branchSizes = null)
+        internal ActionSpec(int numContinuousActions, int[] branchSizes = null)
-            NumContinuousActions = numContinuousActions;
-            NumDiscreteActions = numDiscreteActions;
+            m_NumContinuousActions = numContinuousActions;
-            SumOfDiscreteBranchSizes = branchSizes?.Sum() ?? 0;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
                }
            }

-            return new ActionSpec(numContinuousActions, numDiscreteActions, combinedBranchSizes);
+            return new ActionSpec(numContinuousActions, combinedBranchSizes);
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
                return (ContinuousActions.GetHashCode() * 397) ^ DiscreteActions.GetHashCode();
            }
        }
+
+        /// <summary>
+        /// Packs the continuous and discrete actions into one float array.  The array passed into this method
+        /// must have a Length that is greater than or equal to the sum of the Lengths of
+        /// <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/>.
+        /// </summary>
+        /// <param name="destination">A float array to pack actions into whose length is greater than or
+        /// equal to the addition of the Lengths of this objects <see cref="ContinuousActions"/> and
+        /// <see cref="DiscreteActions"/> segments.</param>
+        /// [Obsolete("PackActions has been deprecated.")]
+        public void PackActions(in float[] destination)
+        {
+            Debug.Assert(destination.Length >= ContinuousActions.Length + DiscreteActions.Length,
+                $"argument '{nameof(destination)}' is not large enough to pack the actions into.\n" +
+                $"{nameof(destination)}.Length: {destination.Length}\n" +
+                $"{nameof(ContinuousActions)}.Length + {nameof(DiscreteActions)}.Length: {ContinuousActions.Length + DiscreteActions.Length}");
+
+            var start = 0;
+            if (ContinuousActions.Length > 0)
+            {
+                Array.Copy(ContinuousActions.Array,
+                    ContinuousActions.Offset,
+                    destination,
+                    start,
+                    ContinuousActions.Length);
+                start = ContinuousActions.Length;
+            }
+            if (start >= destination.Length)
+            {
+                return;
+            }
+
+            if (DiscreteActions.Length > 0)
+            {
+                Array.Copy(DiscreteActions.Array,
+                    DiscreteActions.Offset,
+                    destination,
+                    start,
+                    DiscreteActions.Length);
+            }
+        }
    }

    /// <summary>
--- a/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
        /// Create a VectorActuator that forwards to the provided IActionReceiver.
        /// </summary>
        /// <param name="actionReceiver">The <see cref="IActionReceiver"/> used for OnActionReceived and WriteDiscreteActionMask.</param>
-        /// <param name="vectorActionSize">For discrete action spaces, the branch sizes for each action.
-        /// For continuous action spaces, the number of actions is the 0th element.</param>
-        /// <param name="spaceType"></param>
+        /// <param name="actionSpec"></param>
-        /// <exception cref="ArgumentOutOfRangeException">Thrown for invalid <see cref="SpaceType"/></exception>
-                              int[] vectorActionSize,
-                              SpaceType spaceType,
+                              ActionSpec actionSpec,
+            ActionSpec = actionSpec;
-            switch (spaceType)
+            if (actionSpec.NumContinuousActions == 0)
-                case SpaceType.Continuous:
-                    ActionSpec = ActionSpec.MakeContinuous(vectorActionSize[0]);
-                    suffix = "-Continuous";
-                    break;
-                case SpaceType.Discrete:
-                    ActionSpec = ActionSpec.MakeDiscrete(vectorActionSize);
-                    suffix = "-Discrete";
-                    break;
-                default:
-                    throw new ArgumentOutOfRangeException(nameof(spaceType),
-                        spaceType,
-                        "Unknown enum value.");
+                suffix = "-Discrete";
+            }
+            else if (actionSpec.NumDiscreteActions == 0)
+            {
+                suffix = "-Continuous";
+            }
+            else
+            {
+                suffix = $"-Continuous-{actionSpec.NumContinuousActions}-Discrete-{actionSpec.NumDiscreteActions}";
            }
            Name = name + suffix;
        }
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
 using System;
 using System.Collections.Generic;
 using System.Collections.ObjectModel;
-using System.Linq;
 using UnityEngine;
 using Unity.Barracuda;
 using Unity.MLAgents.Actuators;
            // Support legacy OnActionReceived
            // TODO don't set this up if the sizes are 0?
            var param = m_PolicyFactory.BrainParameters;
-            m_VectorActuator = new VectorActuator(this, param.VectorActionSize, param.VectorActionSpaceType);
+            m_VectorActuator = new VectorActuator(this, param.ActionSpec);
            m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1);
            m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions()];

--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
        {
            var brainParametersProto = new BrainParametersProto
            {
-                VectorActionSizeDeprecated = { bp.VectorActionSize },
-                IsTraining = isTraining
+                IsTraining = isTraining,
+                ActionSpec = ToActionSpecProto(bp.ActionSpec),
+            if (bp.VectorActionSize != null)
+            {
+                brainParametersProto.VectorActionSizeDeprecated.AddRange(bp.VectorActionSize);
+            }
            if (bp.VectorActionDescriptions != null)
            {
                brainParametersProto.VectorActionDescriptionsDeprecated.AddRange(bp.VectorActionDescriptions);
            var brainParametersProto = new BrainParametersProto
            {
                BrainName = name,
-                IsTraining = isTraining
+                IsTraining = isTraining,
+                ActionSpec = ToActionSpecProto(actionSpec),
-            var actionSpecProto = new ActionSpecProto
-            {
-                NumContinuousActions = actionSpec.NumContinuousActions,
-                NumDiscreteActions = actionSpec.NumDiscreteActions,
-            };
-            if (actionSpec.BranchSizes != null)
-            {
-                actionSpecProto.DiscreteBranchSizes.AddRange(actionSpec.BranchSizes);
-            }
-            brainParametersProto.ActionSpec = actionSpecProto;

            var supportHybrid = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.HybridActions;
            if (!supportHybrid)
        {
            var bp = new BrainParameters
            {
-                VectorActionSize = bpp.VectorActionSizeDeprecated.ToArray(),
-                VectorActionSpaceType = (SpaceType)bpp.VectorActionSpaceTypeDeprecated
+                ActionSpec = ToActionSpec(bpp.ActionSpec),
+        }
+
+        /// <summary>
+        /// Convert a ActionSpecProto to a ActionSpec struct.
+        /// </summary>
+        /// <param name="actionSpecProto">An instance of an action spec protobuf object.</param>
+        /// <returns>An ActionSpec struct.</returns>
+        public static ActionSpec ToActionSpec(this ActionSpecProto actionSpecProto)
+        {
+            var actionSpec = new ActionSpec(actionSpecProto.NumContinuousActions);
+            if (actionSpecProto.DiscreteBranchSizes != null)
+            {
+                actionSpec.BranchSizes = actionSpecProto.DiscreteBranchSizes.ToArray();
+            }
+            return actionSpec;
+        }
+
+        /// <summary>
+        /// Convert a ActionSpec struct to a ActionSpecProto.
+        /// </summary>
+        /// <param name="actionSpecProto">An instance of an action spec struct.</param>
+        /// <returns>An ActionSpecProto.</returns>
+        public static ActionSpecProto ToActionSpecProto(this ActionSpec actionSpec)
+        {
+            var actionSpecProto = new ActionSpecProto
+            {
+                NumContinuousActions = actionSpec.NumContinuousActions,
+                NumDiscreteActions = actionSpec.NumDiscreteActions,
+            };
+            if (actionSpec.BranchSizes != null)
+            {
+                actionSpecProto.DiscreteBranchSizes.AddRange(actionSpec.BranchSizes);
+            }
+            return actionSpecProto;
        }

        #endregion
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
            BrainParameters brainParameters, TensorProxy tensorProxy,
            SensorComponent[] sensorComponents, int observableAttributeTotalSize)
        {
-            // TODO: Update this check after intergrating ActionSpec into BrainParameters
-            var numberActionsBp = brainParameters.VectorActionSize.Length;
+            var numberActionsBp = brainParameters.ActionSpec.NumDiscreteActions;
            var numberActionsT = tensorProxy.shape[tensorProxy.shape.Length - 1];
            if (numberActionsBp != numberActionsT)
            {
        {
            var failedModelChecks = new List<string>();

-            var tensorTester = new Dictionary<string, Func<BrainParameters, ActuatorComponent[], TensorShape?, int, int, string>>();
-            if (model.HasContinuousOutputs())
+            // If the model expects an output but it is not in this list
+            var modelContinuousActionSize = model.ContinuousOutputSize();
+            var continuousError = CheckContinuousActionOutputShape(brainParameters, actuatorComponents, modelContinuousActionSize);
+            if (continuousError != null)
-                tensorTester[model.ContinuousOutputName()] = CheckContinuousActionOutputShape;
+                failedModelChecks.Add(continuousError);
-            if (model.HasDiscreteOutputs())
-            {
-                tensorTester[model.DiscreteOutputName()] = CheckDiscreteActionOutputShape;
-            }
-
-            // If the model expects an output but it is not in this list
-            var modelContinuousActionSize = model.ContinuousOutputSize();
-            foreach (var name in model.outputs)
+            var discreteError = CheckDiscreteActionOutputShape(brainParameters, actuatorComponents, modelSumDiscreteBranchSizes);
+            if (discreteError != null)
-                if (tensorTester.ContainsKey(name))
-                {
-                    var tester = tensorTester[name];
-                    var error = tester.Invoke(brainParameters, actuatorComponents, model.GetShapeByName(name), modelContinuousActionSize, modelSumDiscreteBranchSizes);
-                    if (error != null)
-                    {
-                        failedModelChecks.Add(error);
-                    }
-                }
+                failedModelChecks.Add(discreteError);
            }
            return failedModelChecks;
        }
        /// check failed. If the check passed, returns null.
        /// </returns>
        static string CheckDiscreteActionOutputShape(
-            BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
+            BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, int modelSumDiscreteBranchSizes)
-            var sumOfDiscreteBranchSizes = 0;
-            if (brainParameters.VectorActionSpaceType == SpaceType.Discrete)
-            {
-                sumOfDiscreteBranchSizes += brainParameters.VectorActionSize.Sum();
-            }
+            // TODO: check each branch size instead of sum of branch sizes
+            var sumOfDiscreteBranchSizes = brainParameters.ActionSpec.SumOfDiscreteBranchSizes;

            foreach (var actuatorComponent in actuatorComponents)
            {
        /// <returns>If the Check failed, returns a string containing information about why the
        /// check failed. If the check passed, returns null.</returns>
        static string CheckContinuousActionOutputShape(
-            BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
+            BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, int modelContinuousActionSize)
-            var numContinuousActions = 0;
-            if (brainParameters.VectorActionSpaceType == SpaceType.Continuous)
-            {
-                numContinuousActions += brainParameters.NumActions;
-            }
+            var numContinuousActions = brainParameters.ActionSpec.NumContinuousActions;

            foreach (var actuatorComponent in actuatorComponents)
            {
--- a/com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
 using System;
 using UnityEngine;
 using UnityEngine.Serialization;
+using Unity.MLAgents.Actuators;

 namespace Unity.MLAgents.Policies
 {
    /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
    /// </remarks>
    [Serializable]
-    public class BrainParameters
+    public class BrainParameters : ISerializationCallbackReceiver
    {
        /// <summary>
        /// The number of the observations that are added in
        [FormerlySerializedAs("numStackedVectorObservations")]
        [Range(1, 50)] public int NumStackedVectorObservations = 1;

+        [SerializeField]
+        internal ActionSpec m_ActionSpec = new ActionSpec(0, null);
+
-        /// The size of the action space.
+        /// The specification of the Action space for the BrainParameters.
+        /// </summary>
+        public ActionSpec ActionSpec
+        {
+            get { return m_ActionSpec; }
+            set
+            {
+                m_ActionSpec.NumContinuousActions = value.NumContinuousActions;
+                m_ActionSpec.BranchSizes = value.BranchSizes;
+                SyncDeprecatedActionFields();
+            }
+        }
+
+        /// <summary>
+        /// (Deprecated) The size of the action space.
        /// </summary>
        /// <remarks>The size specified is interpreted differently depending on whether
        /// the agent uses the continuous or the discrete action space.</remarks>
        /// For the discrete action space: the number of branches in the action space.
        /// </value>
+        /// [Obsolete("VectorActionSize has been deprecated, please use ActionSpec instead.")]
        [FormerlySerializedAs("vectorActionSize")]
        public int[] VectorActionSize = new[] { 1 };

        public string[] VectorActionDescriptions;

        /// <summary>
-        /// Defines if the action is discrete or continuous.
+        /// (Deprecated) Defines if the action is discrete or continuous.
+        /// [Obsolete("VectorActionSpaceType has been deprecated, please use ActionSpec instead.")]
+        [SerializeField]
+        [HideInInspector]
+        internal bool hasUpgradedBrainParametersWithActionSpec;
+
-        /// The number of actions specified by this Brain.
+        /// (Deprecated) The number of actions specified by this Brain.
+        /// [Obsolete("NumActions has been deprecated, please use ActionSpec instead.")]
-                switch (VectorActionSpaceType)
-                {
-                    case SpaceType.Discrete:
-                        return VectorActionSize.Length;
-                    case SpaceType.Continuous:
-                        return VectorActionSize[0];
-                    default:
-                        return 0;
-                }
+                return ActionSpec.NumContinuousActions > 0 ? ActionSpec.NumContinuousActions : ActionSpec.NumDiscreteActions;
            }
        }

            {
                VectorObservationSize = VectorObservationSize,
                NumStackedVectorObservations = NumStackedVectorObservations,
-                VectorActionSize = (int[])VectorActionSize.Clone(),
-                VectorActionSpaceType = VectorActionSpaceType
+                ActionSpec = new ActionSpec(ActionSpec.NumContinuousActions, ActionSpec.BranchSizes),
+                VectorActionSize = (int[])VectorActionSize.Clone(),
+                VectorActionSpaceType = VectorActionSpaceType,
+        }
+
+        /// <summary>
+        /// Propogate ActionSpec fields from deprecated fields
+        /// </summary>
+        private void UpdateToActionSpec()
+        {
+            if (!hasUpgradedBrainParametersWithActionSpec)
+            {
+                if (VectorActionSpaceType == SpaceType.Continuous)
+                {
+                    m_ActionSpec.NumContinuousActions = VectorActionSize[0];
+                    m_ActionSpec.BranchSizes = null;
+                }
+                if (VectorActionSpaceType == SpaceType.Discrete)
+                {
+                    m_ActionSpec.NumContinuousActions = 0;
+                    m_ActionSpec.BranchSizes = (int[])VectorActionSize.Clone();
+                }
+
+                hasUpgradedBrainParametersWithActionSpec = true;
+            }
+        }
+
+        /// <summary>
+        /// Sync values in ActionSpec fields to deprecated fields
+        /// </summary>
+        private void SyncDeprecatedActionFields()
+        {
+            if (m_ActionSpec.NumContinuousActions == 0)
+            {
+                VectorActionSize = (int[])ActionSpec.BranchSizes.Clone();
+                VectorActionSpaceType = SpaceType.Discrete;
+            }
+            else if (m_ActionSpec.NumDiscreteActions == 0)
+            {
+                VectorActionSize = new[] { m_ActionSpec.NumContinuousActions };
+                VectorActionSpaceType = SpaceType.Continuous;
+            }
+            else
+            {
+                VectorActionSize = null;
+            }
+        }
+
+        /// <summary>
+        /// Called by Unity immediately before serializing this object.
+        /// </summary>
+        public void OnBeforeSerialize()
+        {
+            UpdateToActionSpec();
+            SyncDeprecatedActionFields();
+        }
+
+        /// <summary>
+        /// Called by Unity immediately after deserializing this object.
+        /// </summary>
+        public void OnAfterDeserialize()
+        {
+            UpdateToActionSpec();
+            SyncDeprecatedActionFields();
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
        public void TestConstruct()
        {
            var ar = new TestActionReceiver();
-            var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
-            var va1 = new VectorActuator(ar, new[] { 4 }, SpaceType.Continuous, "name");
+            var va1 = new VectorActuator(ar, ActionSpec.MakeContinuous(4), "name");

            Assert.IsTrue(va1.ActionSpec.NumContinuousActions == 4);
            Assert.IsTrue(va1.ActionSpec.SumOfDiscreteBranchSizes == 0);
        public void TestOnActionReceived()
        {
            var ar = new TestActionReceiver();
-            var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");

            var discreteActions = new[] { 0, 1, 1 };
            var ab = new ActionBuffers(ActionSegment<float>.Empty,
        public void TestResetData()
        {
            var ar = new TestActionReceiver();
-            var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");

            var discreteActions = new[] { 0, 1, 1 };
            var ab = new ActionBuffers(ActionSegment<float>.Empty,
        public void TestWriteDiscreteActionMask()
        {
            var ar = new TestActionReceiver();
-            var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
            var bdam = new ActuatorDiscreteActionMask(new[] { va }, 6, 3);

            var groundTruthMask = new[] { false, true, false, false, true, true };
--- a/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
            bp.BrainParameters.VectorObservationSize = 3;
            bp.BrainParameters.NumStackedVectorObservations = 2;
            bp.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
-            bp.BrainParameters.VectorActionSize = new[] { 2, 2 };
-            bp.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
+            bp.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);

            gameobj.AddComponent<TestAgent>();

            bpA.BrainParameters.VectorObservationSize = 3;
            bpA.BrainParameters.NumStackedVectorObservations = 1;
            bpA.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
-            bpA.BrainParameters.VectorActionSize = new[] { 2, 2 };
-            bpA.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
+            bpA.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);

            agentGo1.AddComponent<ObservationAgent>();
            var agent1 = agentGo1.GetComponent<ObservationAgent>();
--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
        [Test]
        public void ApplyDiscreteActionOutput()
        {
-            var actionSpec = ActionSpec.MakeDiscrete(new int[] { 2, 3 });
+            var actionSpec = ActionSpec.MakeDiscrete(2, 3);
            var inputTensor = new TensorProxy()
            {
                shape = new long[] { 2, 5 },
        [Test]
        public void ApplyHybridActionOutput()
        {
-            var actionSpec = new ActionSpec(3, 2, new int[] { 2, 3 });
+            var actionSpec = new ActionSpec(3, new int[] { 2, 3 });
            var continuousInputTensor = new TensorProxy()
            {
                shape = new long[] { 2, 3 },
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
-            bp2.BrainParameters.VectorActionSize = new[] { 1 };
-            bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            agentGo2.AddComponent<TestAgent>();
            var agent2 = agentGo2.GetComponent<TestAgent>();

        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
-            bp2.BrainParameters.VectorActionSize = new[] { 1 };
-            bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            agentGo2.AddComponent<TestAgent>();
            var agent2 = agentGo2.GetComponent<TestAgent>();

        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            var agent1 = agentGo1.AddComponent<TestAgent>();
            var behaviorParameters = agentGo1.GetComponent<BehaviorParameters>();
            behaviorParameters.BrainParameters.NumStackedVectorObservations = 3;
        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
-            bp2.BrainParameters.VectorActionSize = new[] { 1 };
-            bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            var agent2 = agentGo2.AddComponent<TestAgent>();
            var aca = Academy.Instance;

        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            agentGo1.AddComponent<TestAgent>();
            var agent1 = agentGo1.GetComponent<TestAgent>();
            var aca = Academy.Instance;
            // Make sure that Agents with HeuristicPolicies step their sensors each Academy step.
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            agentGo1.AddComponent<TestAgent>();
            var agent1 = agentGo1.GetComponent<TestAgent>();
            var aca = Academy.Instance;
--- a/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs

        ActionSpec GetHybrid0vis53vec_3c_2dActionSpec()
        {
-            return new ActionSpec(3, 1, new int[] { 2 });
+            return new ActionSpec(3, new int[] { 2 });
        }

        [SetUp]
--- a/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
        {
            var validBrainParameters = new BrainParameters();
            validBrainParameters.VectorObservationSize = 8;
-            validBrainParameters.VectorActionSize = new[] { 2 };
-            validBrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            validBrainParameters.ActionSpec = ActionSpec.MakeContinuous(2);
            return validBrainParameters;
        }

            validBrainParameters.VectorObservationSize = 0;
-            validBrainParameters.VectorActionSize = new[] { 2, 3 };
-            validBrainParameters.VectorActionSpaceType = SpaceType.Discrete;
+            validBrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 3);
-        // TODO: update and enable this after integrating action spec into BrainParameters
-        // BrainParameters GetHybridBrainParameters()
-        // {
-        //     var validBrainParameters = new BrainParameters();
-        //     validBrainParameters.VectorObservationSize = 53;
-        //     validBrainParameters.VectorActionSize = new[] { 2 };
-        //     validBrainParameters.NumStackedVectorObservations = 1;
-        //     validBrainParameters.VectorActionSpaceType = SpaceType.Discrete;
-        //     return validBrainParameters;
-        // }
+        BrainParameters GetHybridBrainParameters()
+        {
+            var validBrainParameters = new BrainParameters();
+            validBrainParameters.VectorObservationSize = 53;
+            validBrainParameters.NumStackedVectorObservations = 1;
+            validBrainParameters.ActionSpec = new ActionSpec(3, new int[] { 2 });
+            return validBrainParameters;
+        }

        [SetUp]
        public void SetUp()
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }

-        // TODO: update and enable this test after integrating action spec into BrainParameters
-        // [Test]
-        // public void TestCheckModelValidHybrid()
-        // {
-        //     var model = ModelLoader.Load(hybridModel);
-        //     var validBrainParameters = GetHybridBrainParameters();
+        [Test]
+        public void TestCheckModelValidHybrid()
+        {
+            var model = ModelLoader.Load(hybridONNXModel);
+            var validBrainParameters = GetHybridBrainParameters();
-        //     var errors = BarracudaModelParamLoader.CheckModel(
-        //         model, validBrainParameters,
-        //         new SensorComponent[] { }, new ActuatorComponent[0]
-        //     );
-        //     Assert.AreEqual(0, errors.Count()); // There should not be any errors
-        // }
+            var errors = BarracudaModelParamLoader.CheckModel(
+                model, validBrainParameters,
+                new SensorComponent[] { }, new ActuatorComponent[0]
+            );
+            Assert.AreEqual(0, errors.Count()); // There should not be any errors
+        }

        [TestCase(true)]
        [TestCase(false)]
            Assert.Greater(errors.Count(), 0);
        }

-        // TODO: update and enable this test after integrating action spec into BrainParameters
-        // [Test]
-        // public void TestCheckModelThrowsVectorObservationHybrid()
-        // {
-        //     var model = ModelLoader.Load(hybridModel);
+        [Test]
+        public void TestCheckModelThrowsVectorObservationHybrid()
+        {
+            var model = ModelLoader.Load(hybridONNXModel);
-        //     var brainParameters = GetHybridBrainParameters();
-        //     brainParameters.VectorObservationSize = 9; // Invalid observation
-        //     var errors = BarracudaModelParamLoader.CheckModel(
-        //         model, brainParameters,
-        //         new SensorComponent[] { }, new ActuatorComponent[0]
-        //     );
-        //     Assert.Greater(errors.Count(), 0);
+            var brainParameters = GetHybridBrainParameters();
+            brainParameters.VectorObservationSize = 9; // Invalid observation
+            var errors = BarracudaModelParamLoader.CheckModel(
+                model, brainParameters,
+                new SensorComponent[] { }, new ActuatorComponent[0]
+            );
+            Assert.Greater(errors.Count(), 0);
-        //     brainParameters = GetContinuous2vis8vec2actionBrainParameters();
-        //     brainParameters.NumStackedVectorObservations = 2;// Invalid stacking
-        //     errors = BarracudaModelParamLoader.CheckModel(
-        //         model, brainParameters,
-        //         new SensorComponent[] { }, new ActuatorComponent[0]
-        //     );
-        //     Assert.Greater(errors.Count(), 0);
-        // }
+            brainParameters = GetContinuous2vis8vec2actionBrainParameters();
+            brainParameters.NumStackedVectorObservations = 2;// Invalid stacking
+            errors = BarracudaModelParamLoader.CheckModel(
+                model, brainParameters,
+                new SensorComponent[] { }, new ActuatorComponent[0]
+            );
+            Assert.Greater(errors.Count(), 0);
+        }

        [TestCase(true)]
        [TestCase(false)]

            var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
-            brainParameters.VectorActionSize = new[] { 3 }; // Invalid action
+            brainParameters.ActionSpec = ActionSpec.MakeContinuous(3); // Invalid action
-            brainParameters.VectorActionSpaceType = SpaceType.Discrete;// Invalid SpaceType
+            brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3); // Invalid SpaceType
            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }
            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);

            var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
-            brainParameters.VectorActionSize = new[] { 3, 3 }; // Invalid action
+            brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3, 3); // Invalid action
-            brainParameters.VectorActionSpaceType = SpaceType.Continuous;// Invalid SpaceType
+            brainParameters.ActionSpec = ActionSpec.MakeContinuous(2); // Invalid SpaceType
-        // TODO: update and enable this test after integrating action spec into BrainParameters
-        // [Test]
-        // public void TestCheckModelThrowsActionHybrid()
-        // {
-        //     var model = ModelLoader.Load(hybridModel);
+        [Test]
+        public void TestCheckModelThrowsActionHybrid()
+        {
+            var model = ModelLoader.Load(hybridONNXModel);
-        //     var brainParameters = GetHybridBrainParameters();
-        //     brainParameters.VectorActionSize = new[] { 3 }; // Invalid action
-        //     var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
-        //     Assert.Greater(errors.Count(), 0);
+            var brainParameters = GetHybridBrainParameters();
+            brainParameters.ActionSpec = new ActionSpec(3, new int[] { 3 }); ; // Invalid discrete action size
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            Assert.Greater(errors.Count(), 0);
-        //     brainParameters = GetContinuous2vis8vec2actionBrainParameters();
-        //     brainParameters.VectorActionSpaceType = SpaceType.Discrete;// Invalid SpaceType
-        //     errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
-        //     Assert.Greater(errors.Count(), 0);
-        // }
+            brainParameters = GetContinuous2vis8vec2actionBrainParameters();
+            brainParameters.ActionSpec = ActionSpec.MakeDiscrete(2); // Missing continuous action
+            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            Assert.Greater(errors.Count(), 0);
+        }

        [Test]
        public void TestCheckModelThrowsNoModel()
--- a/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
+++ b/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
            behaviorParams.BrainParameters.VectorObservationSize = 3;
            behaviorParams.BrainParameters.NumStackedVectorObservations = 2;
            behaviorParams.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
-            behaviorParams.BrainParameters.VectorActionSize = new[] { 2, 2 };
-            behaviorParams.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
+            behaviorParams.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);
            behaviorParams.BehaviorName = "TestBehavior";
            behaviorParams.TeamId = 42;
            behaviorParams.UseChildSensors = true;
--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
  "unity": "2018.4",
  "description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
  "dependencies": {
-    "com.unity.barracuda": "1.1.2-preview",
+    "com.unity.barracuda": "1.2.1-preview",
    "com.unity.modules.imageconversion": "1.0.0",
    "com.unity.modules.jsonserialize": "1.0.0",
    "com.unity.modules.physics": "1.0.0",
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md
 eight elements: the `x` and `z` components of the agent cube's rotation and the
 `x`, `y`, and `z` components of the ball's relative position and velocity.

-#### Behavior Parameters : Vector Action Space
+#### Behavior Parameters : Actions

 An Agent is given instructions in the form of actions.
 ML-Agents Toolkit classifies actions into two types: continuous and discrete.
        Number of Visual Observations (per agent): 0
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 1
-        Vector Action space type: continuous
-        Vector Action space size (per agent): [2]
-        Vector Action descriptions: ,
 INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain 3DBallLearning:
        batch_size:          64
        beta:                0.001
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
  - [Raycast Observations](#raycast-observations)
    - [RayCast Observation Summary & Best Practices](#raycast-observation-summary--best-practices)
 - [Actions](#actions)
-  - [Continuous Action Space](#continuous-action-space)
-  - [Discrete Action Space](#discrete-action-space)
+  - [Continuous Actions](#continuous-actions)
+  - [Discrete Actions](#discrete-actions)
    - [Masking Discrete Actions](#masking-discrete-actions)
  - [Actions Summary & Best Practices](#actions-summary--best-practices)
 - [Rewards](#rewards)
  method calls `VectorSensor.AddObservation()` such that vector size adds up to 8,
  the Behavior Parameters of the Agent are set with vector observation space
  with a state size of 8.
- `Agent.OnActionReceived()` — The vector action spaces result
+- `Agent.OnActionReceived()` — The action results
  in a small change in the agent cube's rotation at each step. In this example,
  an Agent receives a small positive reward for each step it keeps the ball on the
  agent cube's head and a larger, negative reward for dropping the ball. An

 An action is an instruction from the Policy that the agent carries out. The
 action is passed to the Agent as the `ActionBuffers` parameter when the Academy invokes the
-agent's `OnActionReceived()` function. There are two types of actions supported:
+agent's `OnActionReceived()` function. There are two types of actions that an Agent can use:
 **Continuous** and **Discrete**.

 Neither the Policy nor the training algorithm know anything about what the
 for an Agent is in the `OnActionReceived()` function.

 For example, if you designed an agent to move in two dimensions, you could use
-either continuous or the discrete vector actions. In the continuous case, you
-would set the vector action size to two (one for each dimension), and the
-agent's Policy would create an action with two floating point values. In the
+either continuous or the discrete actions. In the continuous case, you
+would set the action size to two (one for each dimension), and the
+agent's Policy would output an action with two floating point values. In the
-movement), and the Policy would create an action array containing two elements
-with values ranging from zero to one.
+movement), and the Policy would output an action array containing two elements
+with values ranging from zero to one. You could alternatively use a combination of continuous
+and discrete actions e.g., using one continuous action for horizontal movement
+and a discrete branch of size two for the vertical movement.
-The [3DBall](Learning-Environment-Examples.md#3dball-3d-balance-ball) and
-[Area](Learning-Environment-Examples.md#push-block) example environments are set
-up to use either the continuous or the discrete vector action spaces.
-
-### Continuous Action Space
+### Continuous Actions
-is an array with length equal to the `Vector Action Space Size` property value. The
+is an array with length equal to the `Continuous Action Size` property value. The
-The [Reacher example](Learning-Environment-Examples.md#reacher) defines a
-continuous action space with four control values.
+The [Reacher example](Learning-Environment-Examples.md#reacher) uses
+continuous actions with four control values.

 ![reacher](images/reacher.png)

 ```

 By default the output from our provided PPO algorithm pre-clamps the values of
-`vectorAction` into the [-1, 1] range. It is a best practice to manually clip
+`ActionBuffers.ContinuousActions` into the [-1, 1] range. It is a best practice to manually clip
-### Discrete Action Space
+### Discrete Actions
-is an array of integers. When defining the discrete vector action space, `Branches`
+is an array of integers with length equal to `Discrete Branch Size`. When defining the discrete actions, `Branches`
 is an array of integers, each value corresponds to the number of possibilities for each branch.

 For example, if we wanted an Agent that can move in a plane and jump, we could

 ### Actions Summary & Best Practices

- Agents can either use `Discrete` or `Continuous` actions.
+- Agents can use `Discrete` and/or `Continuous` actions.
- In general, smaller action spaces will make for easier learning.
- Be sure to set the Vector Action's Space Size to the number of used Vector
-  Actions, and not greater, as doing the latter can interfere with the
+- In general, fewer actions will make for easier learning.
+- Be sure to set the Continuous Action Size and Discrete Branch Size to the desired
+  number for each type of action, and not greater, as doing the latter can interfere with the
  efficiency of the training process.
 - Continuous action values should be clipped to an
  appropriate range. The provided PPO model automatically clips these values
      be stacked and used collectively for decision making. This results in the
      effective size of the vector observation being passed to the Policy being:
      _Space Size_ x _Stacked Vectors_.
-  - `Vector Action`
-    - `Space Type` - Corresponds to whether action vector contains a single
-      integer (Discrete) or a series of real-valued floats (Continuous).
-    - `Space Size` (Continuous) - Length of action vector.
-    - `Branches` (Discrete) - An array of integers, defines multiple concurrent
-      discrete actions. The values in the `Branches` array correspond to the
-      number of possible discrete values for each action branch.
+  - `Actions`
+    - `Continuous Actions` - The number of concurrent continuous actions that
+     the Agent can take.
+    - `Discrete Branches` - An array of integers, defines multiple concurrent
+      discrete actions. The values in the `Discrete Branches` array correspond
+      to the number of possible discrete values for each action branch.
  - `Model` - The neural network model used for inference (obtained after
    training)
  - `Inference Device` - Whether to use CPU or GPU to run the model during
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
  - +1.0 for arriving at optimal state.
 - Behavior Parameters:
  - Vector Observation space: One variable corresponding to current state.
-  - Vector Action space: (Discrete) Two possible actions (Move left, move
+  - Actions: 1 discrete action branch with 3 actions (Move left, do nothing, move
    right).
  - Visual Observations: None
 - Float Properties: None
    cube, and position and velocity of ball.
  - Vector Observation space (Hard Version): 5 variables corresponding to
    rotation of the agent cube and position of ball.
-  - Vector Action space: (Continuous) Size of 2, with one value corresponding to
+  - Actions: 2 continuous actions, with one value corresponding to
    X-rotation, and the other to Z-rotation.
  - Visual Observations: Third-person view from the upper-front of the agent. Use
    `Visual3DBall` scene.
  - -1.0 if the agent navigates to an obstacle (episode ends).
 - Behavior Parameters:
  - Vector Observation space: None
-  - Vector Action space: (Discrete) Size of 4, corresponding to movement in
-    cardinal directions. Note that for this environment,
+  - Actions: 1 discrete action branch with 5 actions, corresponding to movement in
+    cardinal directions or not moving. Note that for this environment,
    [action masking](Learning-Environment-Design-Agents.md#masking-discrete-actions)
    is turned on by default (this option can be toggled using the `Mask Actions`
    checkbox within the `trueAgent` GameObject). The trained model file provided
 - Behavior Parameters:
  - Vector Observation space: 9 variables corresponding to position, velocity
    and orientation of ball and racket.
-  - Vector Action space: (Continuous) Size of 3, corresponding to movement
+  - Actions: 3 continuous actions, corresponding to movement
    toward net or away from net, jumping and rotation.
  - Visual Observations: None
 - Float Properties: Three
  - Vector Observation space: (Continuous) 70 variables corresponding to 14
    ray-casts each detecting one of three possible objects (wall, goal, or
    block).
-  - Vector Action space: (Discrete) Size of 6, corresponding to turn clockwise
-    and counterclockwise and move along four different face directions.
+  - Actions: 1 discrete action branch with 7 actions, corresponding to turn clockwise
+    and counterclockwise, move along four different face directions, or do nothing.
  - Visual Observations (Optional): One first-person camera. Use
    `VisualPushBlock` scene. **The visual observation version of this
    environment does not train with the provided default training parameters.**
  - Vector Observation space: Size of 74, corresponding to 14 ray casts each
    detecting 4 possible objects. plus the global position of the agent and
    whether or not the agent is grounded.
-  - Vector Action space: (Discrete) 4 Branches:
+  - Actions: 4 discrete action branches:
    - Forward Motion (3 possible actions: Forward, Backwards, No Action)
    - Rotation (3 possible actions: Rotate Left, Rotate Right, No Action)
    - Side Motion (3 possible actions: Left, Right, No Action)
 - Behavior Parameters:
  - Vector Observation space: 26 variables corresponding to position, rotation,
    velocity, and angular velocities of the two arm rigid bodies.
-  - Vector Action space: (Continuous) Size of 4, corresponding to torque
+  - Actions: 4 continuous actions, corresponding to torque
    applicable to two joints.
  - Visual Observations: None.
 - Float Properties: Five
  - Vector Observation space: 172 variables corresponding to position, rotation,
    velocity, and angular velocities of each limb plus the acceleration and
    angular acceleration of the body.
-  - Vector Action space: (Continuous) Size of 20, corresponding to target
+  - Actions: 20 continuous actions, corresponding to target
    rotations for joints.
  - Visual Observations: None
 - Float Properties: None
  - Vector Observation space: 64 variables corresponding to position, rotation,
    velocity, and angular velocities of each limb plus the acceleration and
    angular acceleration of the body.
-  - Vector Action space: (Continuous) Size of 9, corresponding to target
+  - Actions: 9 continuous actions, corresponding to target
    rotations for joints.
  - Visual Observations: None
 - Float Properties: None
    agent is frozen and/or shot its laser (2), plus ray-based perception of
    objects around agent's forward direction (49; 7 raycast angles with 7
    measurements for each).
-  - Vector Action space: (Discrete) 4 Branches:
+  - Actions: 4 discrete action ranches:
    - Forward Motion (3 possible actions: Forward, Backwards, No Action)
    - Side Motion (3 possible actions: Left, Right, No Action)
    - Rotation (3 possible actions: Rotate Left, Rotate Right, No Action)
 - Behavior Parameters:
  - Vector Observation space: 30 corresponding to local ray-casts detecting
    objects, goals, and walls.
-  - Vector Action space: (Discrete) 1 Branch, 4 actions corresponding to agent
+  - Actions: 1 discrete action Branch, with 4 actions corresponding to agent
    rotation and forward/backward movement.
  - Visual Observations (Optional): First-person view for the agent. Use
    `VisualHallway` scene. **The visual observation version of this environment
 - Behavior Parameters:
  - Vector Observation space: 6 corresponding to local position of agent and
    green cube.
-  - Vector Action space: (Continuous) 3 corresponding to agent force applied for
+  - Actions: 3 continuous actions corresponding to agent force applied for
    the jump.
  - Visual Observations: None
 - Float Properties: Two
    degrees each detecting 6 possible object types, along with the object's
    distance. The forward ray-casts contribute 264 state dimensions and backward
    72 state dimensions over three observation stacks.
-  - Vector Action space: (Discrete) Three branched actions corresponding to
+  - Actions: 3 discrete branched actions corresponding to
    forward, backward, sideways movement, as well as rotation.
  - Visual Observations: None
 - Float Properties: Two
    degrees each detecting 5 possible object types, along with the object's
    distance. The forward ray-casts contribute 231 state dimensions and backward
    63 state dimensions over three observation stacks.
-  - Striker Vector Action space: (Discrete) Three branched actions corresponding
+  - Striker Actions: 3 discrete branched actions corresponding
-  - Goalie Vector Action space: (Discrete) Three branched actions corresponding
+  - Goalie Actions: 3 discrete branched actions corresponding
    to forward, backward, sideways movement, as well as rotation.
  - Visual Observations: None
 - Float Properties: Two
 - Behavior Parameters:
  - Vector Observation space: 243 variables corresponding to position, rotation,
    velocity, and angular velocities of each limb, along with goal direction.
-  - Vector Action space: (Continuous) Size of 39, corresponding to target
+  - Actions: 39 continuous actions, corresponding to target
    rotations and strength applicable to the joints.
  - Visual Observations: None
 - Float Properties: Four
  - Vector Observation space: 148 corresponding to local ray-casts detecting
    switch, bricks, golden brick, and walls, plus variable indicating switch
    state.
-  - Vector Action space: (Discrete) 4 corresponding to agent rotation and
+  - Actions: 1 discrete action branch, with 4 actions corresponding to agent rotation and
    forward/backward movement.
  - Visual Observations (Optional): First-person camera per-agent. Us
    `VisualPyramids` scene. **The visual observation version of this environment
--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md
        Number of Visual Observations (per agent): 0
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 1
-        Vector Action space type: continuous
-        Vector Action space size (per agent): [2]
-        Vector Action descriptions: ,
 INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain Ball3DLearning:
        batch_size:          64
        beta:                0.001
--- a/docs/ML-Agents-Overview.md
+++ b/docs/ML-Agents-Overview.md
 one in which opposing agents are equal in form, function and objective. Examples
 of symmetric games are our Tennis and Soccer example environments. In
 reinforcement learning, this means both agents have the same observation and
-action spaces and learn from the same reward function and so _they can share the
+actions and learn from the same reward function and so _they can share the
-have the same observation or action spaces and so sharing policy networks is not
+have the same observation or actions and so sharing policy networks is not
 necessarily ideal.

 With self-play, an agent learns in adversarial games by competing against fixed,
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
  name of the group the Agent belongs to and `agent_id` is the integer
  identifier of the Agent. `action` is an `ActionTuple` as described above.
 **Note:** If no action is provided for an agent group between two calls to
-`env.step()` then the default action will be all zeros (in either discrete or
-continuous action space)
+`env.step()` then the default action will be all zeros.

 #### DecisionSteps and DecisionStep

--- a/docs/Training-Configuration-File.md
+++ b/docs/Training-Configuration-File.md
 | `init_path`              | (default = None) Initialize trainer from a previously saved model. Note that the prior run should have used the same trainer configurations as the current run, and have been saved with the same version of ML-Agents. <br><br>You should provide the full path to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`. This option is provided in case you want to initialize different behaviors from different runs; in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize all models from the same run.                                                                                                                                  |
 | `threaded`               | (default = `true`) By default, model updates can happen while the environment is being stepped. This violates the [on-policy](https://spinningup.openai.com/en/latest/user/algorithms.html#the-on-policy-algorithms) assumption of PPO slightly in exchange for a training speedup. To maintain the strict on-policyness of PPO, you can disable parallel updates by setting `threaded` to `false`. There is usually no reason to turn `threaded` off for SAC.                                                                                                                                                                                                                                                       |
 | `hyperparameters -> learning_rate`          | (default = `3e-4`) Initial learning rate for gradient descent. Corresponds to the strength of each gradient descent update step. This should typically be decreased if training is unstable, and the reward does not consistently increase. <br><br>Typical range: `1e-5` - `1e-3`                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| `hyperparameters -> batch_size`             | Number of experiences in each iteration of gradient descent. **This should always be multiple times smaller than `buffer_size`**. If you are using a continuous action space, this value should be large (in the order of 1000s). If you are using a discrete action space, this value should be smaller (in order of 10s). <br><br> Typical range: (Continuous - PPO): `512` - `5120`; (Continuous - SAC): `128` - `1024`; (Discrete, PPO & SAC): `32` - `512`.                                                                                                                                                                                                                                                               |
+| `hyperparameters -> batch_size`             | Number of experiences in each iteration of gradient descent. **This should always be multiple times smaller than `buffer_size`**. If you are using continuous actions, this value should be large (on the order of 1000s). If you are using only discrete actions, this value should be smaller (on the order of 10s). <br><br> Typical range: (Continuous - PPO): `512` - `5120`; (Continuous - SAC): `128` - `1024`; (Discrete, PPO & SAC): `32` - `512`.                                                                                                                                                                                                                                                               |
 | `hyperparameters -> buffer_size`            | (default = `10240` for PPO and `50000` for SAC)<br> **PPO:** Number of experiences to collect before updating the policy model. Corresponds to how many experiences should be collected before we do any learning or updating of the model. **This should be multiple times larger than `batch_size`**. Typically a larger `buffer_size` corresponds to more stable training updates. <br> **SAC:** The max size of the experience buffer - on the order of thousands of times longer than your episodes, so that SAC can learn from old as well as new experiences. <br><br>Typical range: PPO: `2048` - `409600`; SAC: `50000` - `1000000`                                                                                                                                                      |
 | `hyperparameters -> learning_rate_schedule` | (default = `linear` for PPO and `constant` for SAC) Determines how learning rate changes over time. For PPO, we recommend decaying learning rate until max_steps so learning converges more stably. However, for some cases (e.g. training for an unknown amount of time) this feature can be disabled. For SAC, we recommend holding learning rate constant so that the agent can continue to learn until its Q function converges naturally. <br><br>`linear` decays the learning_rate linearly, reaching 0 at max_steps, while `constant` keeps the learning rate constant for the entire training run.                                                                                                           |
 | `network_settings -> hidden_units`           | (default = `128`) Number of units in the hidden layers of the neural network. Correspond to how many units are in each fully connected layer of the neural network. For simple problems where the correct action is a straightforward combination of the observation inputs, this should be small. For problems where the action is a very complex interaction between the observation variables, this should be larger. <br><br> Typical range: `32` - `512`                                                                                                                                                                                                                                                                                    |

 A few considerations when deciding to use memory:

- LSTM does not work well with continuous vector actions. Please use
+- LSTM does not work well with continuous actions. Please use
  discrete actions for better results.
 - Since the memories must be sent back and forth between Python and Unity, using
  too large `memory_size` will slow down training.
--- a/docs/Training-on-Microsoft-Azure.md
+++ b/docs/Training-on-Microsoft-Azure.md
 1. [Move](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/copy-files-to-linux-vm-using-scp)
   the `ml-agents` sub-folder of this ml-agents repo to the remote Azure
   instance, and set it as the working directory.
-2. Install the required packages with `pip3 install .`.
+2. Install the required packages:
+   Torch: `pip3 install torch==1.7.0 -f https://download.pytorch.org/whl/torch_stable.html` and
+   MLAgents: `pip3 install mlagents`

 ## Testing

 ```python
 from mlagents_envs.environment import UnityEnvironment

-env = UnityEnvironment(<your_env>)
+env = UnityEnvironment(file_name="<your_env>", seed=1, side_channels=[])
-Where `<your_env>` corresponds to the path to your environment executable.
+Where `<your_env>` corresponds to the path to your environment executable (i.e. `/home/UserName/Build/yourFile`).
+
+**Note:** When running your environment in headless mode, you must append `--no-graphics` to your mlagents-learn command, as it won't train otherwise.
+You can test this simply by aborting a training and check if it says "Model Saved" or "Aborted", or see if it generated the .onnx in the result folder.

 ## Running Training on your Virtual Machine

--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
     since the last simulation step.
     - agent_id is an int and an unique identifier for the corresponding Agent.
     - action_mask is an optional list of one dimensional array of booleans.
-     Only available in multi-discrete action space type.
+     Only available when using multi-discrete actions.
     Each array corresponds to an action branch. Each array contains a mask
     for each action of the branch. If true, the action is not available for
     the agent during this simulation step.
     identifier for the corresponding Agent. This is used to track Agents
     across simulation steps.
     - action_mask is an optional list of two dimensional array of booleans.
-     Only available in multi-discrete action space type.
+     Only available when using multi-discrete actions.
     Each array corresponds to an action branch. The first dimension of each
     array is the batch size and the second contains a mask for each action of
     the branch. If true, the action is not available for the agent during
--- a/ml-agents/mlagents/trainers/cli_utils.py
+++ b/ml-agents/mlagents/trainers/cli_utils.py
 logger = logging_util.get_logger(__name__)


-class RaiseDeprecationWarning(argparse.Action):
+class RaiseRemovedWarning(argparse.Action):
    """
    Internal custom Action to raise warning when argument is called.
    """

    def __call__(self, arg_parser, namespace, values, option_string=None):
-        logger.warning(f"The command line argument {option_string} was deprecated")
+        logger.warning(f"The command line argument {option_string} was removed.")


 class DetectDefault(argparse.Action):
    argparser.add_argument(
        "--torch",
        default=False,
-        action=RaiseDeprecationWarning,
-        help="(Deprecated) Use the PyTorch framework. Note that this option is not required anymore as PyTorch is the"
-        "default framework, and will be removed in the next release.",
+        action=RaiseRemovedWarning,
+        help="(Removed) Use the PyTorch framework.",
-        action=RaiseDeprecationWarning,
-        help="(Deprecated) Use the TensorFlow framework instead of PyTorch. Install TensorFlow "
-        "before using this option.",
+        action=RaiseRemovedWarning,
+        help="(Removed) Use the TensorFlow framework.",
    )

    eng_conf = argparser.add_argument_group(title="Engine Configuration")
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
        # check action dimensions in demonstration match
        if behavior_spec.action_spec != expected_behavior_spec.action_spec:
            raise RuntimeError(
-                "The action spaces {} in demonstration do not match the policy's {}.".format(
+                "The actions {} in demonstration do not match the policy's {}.".format(
                    behavior_spec.action_spec, expected_behavior_spec.action_spec
                )
            )
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
        """
        Policy that uses a multilayer perceptron to map the observations to actions. Could
        also use a CNN to encode visual input prior to the MLP. Supports discrete and
-        continuous action spaces, as well as recurrent networks.
+        continuous actions, as well as recurrent networks.
        :param seed: Random seed.
        :param behavior_spec: Assigned BehaviorSpec object.
        :param trainer_settings: Defined training parameters.
        :param seq_len: Sequence length when using RNN.
        :return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
        """
-        actions, log_probs, entropies, _, memories = self.actor_critic.get_action_stats_and_value(
+        actions, log_probs, entropies, memories = self.actor_critic.get_action_stats(
            obs, masks, memories, seq_len
        )
        return (actions, log_probs, entropies, memories)
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py

    :int num_agents: Number of "agents" to imitate.
    :List observation_shapes: A List of the observation spaces in your steps
-    :int num_vector_acts: Number of actions in your action space
-    :bool discrete: Whether or not action space is discrete
+    :int action_spec: ActionSpec for the agent
    :bool done: Whether all the agents in the batch are done
    """
    obs_list = []
--- a/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
        SAC_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
-        max_steps=4000,
+        max_steps=3500,
    )
    check_environment_trains(env, {BRAIN_NAME: config})
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
        """
        pass

+    def get_action_stats(
+        self,
+        inputs: List[torch.Tensor],
+        masks: Optional[torch.Tensor] = None,
+        memories: Optional[torch.Tensor] = None,
+        sequence_length: int = 1,
+    ) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor, torch.Tensor]:
+        """
+        Returns sampled actions.
+        If memory is enabled, return the memories as well.
+        :param vec_inputs: A List of vector inputs as tensors.
+        :param vis_inputs: A List of visual inputs as tensors.
+        :param masks: If using discrete actions, a Tensor of action masks.
+        :param memories: If using memory, a Tensor of initial memories.
+        :param sequence_length: If using memory, the sequence length.
+        :return: A Tuple of AgentAction, ActionLogProbs, entropies, and memories.
+            Memories will be None if not using memory.
+        """
+        pass
+
    @abc.abstractmethod
    def forward(
        self,
        AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
    ]:
        """
-        Returns distributions, from which actions can be sampled, and value estimates.
+        Returns sampled actions and value estimates.
        If memory is enabled, return the memories as well.
        :param inputs: A List of vector inputs as tensors.
        :param masks: If using discrete actions, a Tensor of action masks.

    def update_normalization(self, buffer: AgentBuffer) -> None:
        self.network_body.update_normalization(buffer)
+
+    def get_action_stats(
+        self,
+        inputs: List[torch.Tensor],
+        masks: Optional[torch.Tensor] = None,
+        memories: Optional[torch.Tensor] = None,
+        sequence_length: int = 1,
+    ) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor, torch.Tensor]:
+
+        encoding, memories = self.network_body(
+            inputs, memories=memories, sequence_length=sequence_length
+        )
+        action, log_probs, entropies = self.action_model(encoding, masks)
+        return action, log_probs, entropies, memories

    def forward(
        self,
    def memory_size(self) -> int:
        return self.network_body.memory_size + self.critic.memory_size

+    def _get_actor_critic_mem(
+        self, memories: Optional[torch.Tensor] = None
+    ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor]]:
+        if self.use_lstm and memories is not None:
+            # Use only the back half of memories for critic and actor
+            actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
+        else:
+            critic_mem = None
+            actor_mem = None
+        return actor_mem, critic_mem
+
    def critic_pass(
        self,
        inputs: List[torch.Tensor],
-        actor_mem, critic_mem = None, None
-        if self.use_lstm:
-            # Use only the back half of memories for critic
-            actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
+        actor_mem, critic_mem = self._get_actor_critic_mem(memories)
        value_outputs, critic_mem_out = self.critic(
            inputs, memories=critic_mem, sequence_length=sequence_length
        )
        memories: Optional[torch.Tensor] = None,
        sequence_length: int = 1,
    ) -> Tuple[ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor]]:
-        if self.use_lstm:
-            # Use only the back half of memories for critic and actor
-            actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
-        else:
-            critic_mem = None
-            actor_mem = None
+        actor_mem, critic_mem = self._get_actor_critic_mem(memories)
        encoding, actor_mem_outs = self.network_body(
            inputs, memories=actor_mem, sequence_length=sequence_length
        )

        return log_probs, entropies, value_outputs

+    def get_action_stats(
+        self,
+        inputs: List[torch.Tensor],
+        masks: Optional[torch.Tensor] = None,
+        memories: Optional[torch.Tensor] = None,
+        sequence_length: int = 1,
+    ) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor, torch.Tensor]:
+        actor_mem, critic_mem = self._get_actor_critic_mem(memories)
+        action, log_probs, entropies, actor_mem_out = super().get_action_stats(
+            inputs, masks=masks, memories=actor_mem, sequence_length=sequence_length
+        )
+        if critic_mem is not None:
+            # Make memories with the actor mem unchanged
+            memories_out = torch.cat([actor_mem_out, critic_mem], dim=-1)
+        else:
+            memories_out = None
+        return action, log_probs, entropies, memories_out
+
    def get_action_stats_and_value(
        self,
        inputs: List[torch.Tensor],
    ) -> Tuple[
        AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
    ]:
-        if self.use_lstm:
-            # Use only the back half of memories for critic and actor
-            actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
-        else:
-            critic_mem = None
-            actor_mem = None
+        actor_mem, critic_mem = self._get_actor_critic_mem(memories)
        encoding, actor_mem_outs = self.network_body(
            inputs, memories=actor_mem, sequence_length=sequence_length
        )
--- a/docs/images/monitor.png
+++ b/docs/images/monitor.png