Merge branch 'master' into develop-centralizedcritic

3 年前 · 25dfd883
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
    - 'gym-unity/**'
    - 'test_constraints*.txt'
    - 'test_requirements.txt'
+    - '.github/workflows/pytest.yml'
  push:
    branches: [master]

      run: python -c "import sys; print(sys.version)"
    - name: Install dependencies
      run: |
-        python -m pip install --upgrade pip
+        # pin pip to workaround https://github.com/pypa/pip/issues/9180
+        python -m pip install pip==20.2
        python -m pip install --upgrade setuptools
        python -m pip install --progress-bar=off -e ./ml-agents-envs -c ${{ matrix.pip_constraints }}
        python -m pip install --progress-bar=off -e ./ml-agents -c ${{ matrix.pip_constraints }}
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
        if (maskActions)
        {
            // Prevents the agent from picking an action that would make it collide with a wall
-            var positionX = (int)transform.position.x;
-            var positionZ = (int)transform.position.z;
+            var positionX = (int)transform.localPosition.x;
+            var positionZ = (int)transform.localPosition.z;
            var maxPosition = (int)m_ResetParams.GetWithDefault("gridSize", 5f) - 1;

            if (positionX == 0)
--- a/Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
    /// The agent's four actions correspond to torques on each of the two joints.
    /// </summary>
    public override void OnActionReceived(ActionBuffers actionBuffers)
-
    {
        m_GoalDegree += m_GoalSpeed;
        UpdateGoalPosition();
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 ### Major Changes
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
-
+- PyTorch trainers now support training agents with both continuous and discrete action spaces. (#4702)
+- Agent with both continuous and discrete actions is now supported. You can specify
+continuous and discrete action sizes repectively in Behavior Parameters. (#4702, #4718)
+- `ActionSpec.validate_action()` now enforces that `UnityEnvironment.set_action_for_agent()` receives a 1D `np.array`.

 ### Bug Fixes
 #### com.unity.ml-agents (C#)
--- a/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
+++ b/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
        float m_TimeSinceModelReload;
        // Whether or not the model needs to be reloaded
        bool m_RequireReload;
+        const string k_BehaviorName = "m_BehaviorName";
+        const string k_BrainParametersName = "m_BrainParameters";
+        const string k_ModelName = "m_Model";
+        const string k_InferenceDeviceName = "m_InferenceDevice";
+        const string k_BehaviorTypeName = "m_BehaviorType";
+        const string k_TeamIdName = "TeamId";
+        const string k_GroupIdName = "GroupId";
+        const string k_UseChildSensorsName = "m_UseChildSensors";
+        const string k_ObservableAttributeHandlingName = "m_ObservableAttributeHandling";

        public override void OnInspectorGUI()
        {
+            bool needBrainParametersUpdate; // Whether the brain parameters changed

            // Drawing the Behavior Parameters
            EditorGUI.indentLevel++;
            {
-                EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
+                EditorGUILayout.PropertyField(so.FindProperty(k_BehaviorName));
+            EditorGUI.BeginChangeCheck();
-                EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_BrainParametersName), true);
+            needBrainParametersUpdate = EditorGUI.EndChangeCheck();
-                EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_ModelName), true);
-                EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_InferenceDeviceName), true);
                EditorGUI.indentLevel--;
            }
            needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();
-                EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
+                EditorGUILayout.PropertyField(so.FindProperty(k_BehaviorTypeName));
-            EditorGUILayout.PropertyField(so.FindProperty("TeamId"));
-            EditorGUILayout.PropertyField(so.FindProperty("GroupId"));
+            EditorGUILayout.PropertyField(so.FindProperty(k_GroupIdName));
+            EditorGUILayout.PropertyField(so.FindProperty(k_TeamIdName));
-                EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
-                EditorGUILayout.PropertyField(so.FindProperty("m_ObservableAttributeHandling"), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_UseChildSensorsName), true);
+                EditorGUILayout.PropertyField(so.FindProperty(k_ObservableAttributeHandlingName), true);
            }
            EditorGUI.EndDisabledGroup();

            // Display all failed checks
            D.logEnabled = false;
            Model barracudaModel = null;
-            var model = (NNModel)serializedObject.FindProperty("m_Model").objectReferenceValue;
+            var model = (NNModel)serializedObject.FindProperty(k_ModelName).objectReferenceValue;
            var behaviorParameters = (BehaviorParameters)target;

            // Grab the sensor components, since we need them to determine the observation sizes.
--- a/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
+++ b/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
        // The height of a line in the Unity Inspectors
        const float k_LineHeight = 17f;
        const int k_VecObsNumLine = 3;
-        const string k_ActionSizePropName = "VectorActionSize";
-        const string k_ActionTypePropName = "VectorActionSpaceType";
+        const string k_ActionSpecName = "m_ActionSpec";
+        const string k_ContinuousActionSizeName = "m_NumContinuousActions";
+        const string k_DiscreteBranchSizeName = "BranchSizes";
        const string k_ActionDescriptionPropName = "VectorActionDescriptions";
        const string k_VecObsPropName = "VectorObservationSize";
        const string k_NumVecObsPropName = "NumStackedVectorObservations";
            EditorGUI.LabelField(position, "Vector Action");
            position.y += k_LineHeight;
            EditorGUI.indentLevel++;
-            var bpVectorActionType = property.FindPropertyRelative(k_ActionTypePropName);
-            EditorGUI.PropertyField(
-                position,
-                bpVectorActionType,
-                new GUIContent("Space Type",
-                    "Corresponds to whether state vector contains  a single integer (Discrete) " +
-                    "or a series of real-valued floats (Continuous)."));
+            var actionSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
+            DrawContinuousVectorAction(position, actionSpecProperty);
-            if (bpVectorActionType.enumValueIndex == 1)
-            {
-                DrawContinuousVectorAction(position, property);
-            }
-            else
-            {
-                DrawDiscreteVectorAction(position, property);
-            }
+            DrawDiscreteVectorAction(position, actionSpecProperty);
        }

        /// <summary>
        /// to make the custom GUI for.</param>
        static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
        {
-            var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
-
-            // This check is here due to:
-            // https://fogbugz.unity3d.com/f/cases/1246524/
-            // If this case has been resolved, please remove this if condition.
-            if (vecActionSize.arraySize != 1)
-            {
-                vecActionSize.arraySize = 1;
-            }
-            var continuousActionSize =
-                vecActionSize.GetArrayElementAtIndex(0);
+            var continuousActionSize = property.FindPropertyRelative(k_ContinuousActionSizeName);
-                new GUIContent("Space Size", "Length of continuous action vector."));
+                new GUIContent("Continuous Action Size", "Length of continuous action vector."));
        }

        /// <summary>
        /// to make the custom GUI for.</param>
        static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
        {
-            var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
+            var branchSizes = property.FindPropertyRelative(k_DiscreteBranchSizeName);
-                position, "Branches Size", vecActionSize.arraySize);
+                position, "Discrete Branch Size", branchSizes.arraySize);
-            if (newSize != vecActionSize.arraySize)
+            if (newSize != branchSizes.arraySize)
-                vecActionSize.arraySize = newSize;
+                branchSizes.arraySize = newSize;
            }

            position.y += k_LineHeight;
-                 branchIndex < vecActionSize.arraySize;
+                 branchIndex < branchSizes.arraySize;
-                    vecActionSize.GetArrayElementAtIndex(branchIndex);
+                    branchSizes.GetArrayElementAtIndex(branchIndex);

                EditorGUI.PropertyField(
                    position,
        /// <returns>The height of the drawer of the Vector Action.</returns>
        static float GetHeightDrawVectorAction(SerializedProperty property)
        {
-            var actionSize = 2 + property.FindPropertyRelative(k_ActionSizePropName).arraySize;
-            if (property.FindPropertyRelative(k_ActionTypePropName).enumValueIndex == 0)
-            {
-                actionSize += 1;
-            }
-            return actionSize * k_LineHeight;
+            var actionSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
+            var numActionLines = 3 + actionSpecProperty.FindPropertyRelative(k_DiscreteBranchSizeName).arraySize;
+            return numActionLines * k_LineHeight;
        }
    }
 }
--- a/com.unity.ml-agents/Editor/DemonstrationDrawer.cs
+++ b/com.unity.ml-agents/Editor/DemonstrationDrawer.cs
 using System.Text;
 using UnityEditor;
 using Unity.MLAgents.Demonstrations;
-using Unity.MLAgents.Policies;


 namespace Unity.MLAgents.Editor
        SerializedProperty m_BrainParameters;
        SerializedProperty m_DemoMetaData;
        SerializedProperty m_ObservationShapes;
+        const string k_BrainParametersName = "brainParameters";
+        const string k_MetaDataName = "metaData";
+        const string k_ObservationSummariesName = "observationSummaries";
+        const string k_DemonstrationName = "demonstrationName";
+        const string k_NumberStepsName = "numberSteps";
+        const string k_NumberEpisodesName = "numberEpisodes";
+        const string k_MeanRewardName = "meanReward";
+        const string k_ActionSpecName = "ActionSpec";
+        const string k_NumContinuousActionsName = "m_NumContinuousActions";
+        const string k_NumDiscreteActionsName = "m_NumDiscreteActions";
+        const string k_ShapeName = "shape";
+
-            m_BrainParameters = serializedObject.FindProperty("brainParameters");
-            m_DemoMetaData = serializedObject.FindProperty("metaData");
-            m_ObservationShapes = serializedObject.FindProperty("observationSummaries");
+            m_BrainParameters = serializedObject.FindProperty(k_BrainParametersName);
+            m_DemoMetaData = serializedObject.FindProperty(k_MetaDataName);
+            m_ObservationShapes = serializedObject.FindProperty(k_ObservationSummariesName);
        }

        /// <summary>
        {
-            var nameProp = property.FindPropertyRelative("demonstrationName");
-            var experiencesProp = property.FindPropertyRelative("numberSteps");
-            var episodesProp = property.FindPropertyRelative("numberEpisodes");
-            var rewardsProp = property.FindPropertyRelative("meanReward");
+            var nameProp = property.FindPropertyRelative(k_DemonstrationName);
+            var experiencesProp = property.FindPropertyRelative(k_NumberStepsName);
+            var episodesProp = property.FindPropertyRelative(k_NumberEpisodesName);
+            var rewardsProp = property.FindPropertyRelative(k_MeanRewardName);

            var nameLabel = nameProp.displayName + ": " + nameProp.stringValue;
            var experiencesLabel = experiencesProp.displayName + ": " + experiencesProp.intValue;
        /// </summary>
        void MakeActionsProperty(SerializedProperty property)
        {
-            var actSizeProperty = property.FindPropertyRelative("VectorActionSize");
-            var actSpaceTypeProp = property.FindPropertyRelative("VectorActionSpaceType");
+            var actSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
+            var continuousSizeProperty = actSpecProperty.FindPropertyRelative(k_NumContinuousActionsName);
+            var discreteSizeProperty = actSpecProperty.FindPropertyRelative(k_NumDiscreteActionsName);
-            var vecActSizeLabel =
-                actSizeProperty.displayName + ": " + BuildIntArrayLabel(actSizeProperty);
-            var actSpaceTypeLabel = actSpaceTypeProp.displayName + ": " +
-                (SpaceType)actSpaceTypeProp.enumValueIndex;
+            var continuousSizeLabel =
+                continuousSizeProperty.displayName + ": " + continuousSizeProperty.intValue;
+            var discreteSizeLabel = discreteSizeProperty.displayName + ": " +
+                discreteSizeProperty.intValue;
-            EditorGUILayout.LabelField(vecActSizeLabel);
-            EditorGUILayout.LabelField(actSpaceTypeLabel);
+            EditorGUILayout.LabelField(continuousSizeLabel);
+            EditorGUILayout.LabelField(discreteSizeLabel);
        }

        /// <summary>
            for (var i = 0; i < numObservations; i++)
            {
                var summary = obsSummariesProperty.GetArrayElementAtIndex(i);
-                var shapeProperty = summary.FindPropertyRelative("shape");
+                var shapeProperty = summary.FindPropertyRelative(k_ShapeName);
                shapesLabels.Add(BuildIntArrayLabel(shapeProperty));
            }

--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        ///         <term>1.2.0</term>
        ///         <description>Support compression mapping for stacked compressed observations.</description>
        ///     </item>
+        ///     <item>
+        ///         <term>1.3.0</term>
+        ///         <description>Support action spaces with both continuous and discrete actions.</description>
+        ///     </item>
-        const string k_ApiVersion = "1.2.0";
+        const string k_ApiVersion = "1.3.0";

        /// <summary>
        /// Unity package version of com.unity.ml-agents.
                Dispose();
            }
        }
+
 #endif

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
            System.Array.Clear(Array, Offset, Length);
        }

+        /// <summary>
+        /// Check if the segment is empty.
+        /// </summary>
+        /// <returns>Whether or not the segment is empty.</returns>
+        public bool IsEmpty()
+        {
+            return Array == null || Array.Length == 0;
+        }
+
        /// <inheritdoc/>
        IEnumerator<T> IEnumerable<T>.GetEnumerator()
        {
--- a/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
 using System;
-using System.Collections.Generic;
+using UnityEngine;

 namespace Unity.MLAgents.Actuators
 {
-    public readonly struct ActionSpec
+    [Serializable]
+    public struct ActionSpec
+        [SerializeField]
+        int m_NumContinuousActions;

        /// <summary>
        /// An array of branch sizes for our action space.
        ///
        /// For an IActuator with a Continuous it will be null.
        /// </summary>
-        public readonly int[] BranchSizes;
+        public int[] BranchSizes;
-        public int NumContinuousActions { get; }
+        public int NumContinuousActions { get { return m_NumContinuousActions; } set { m_NumContinuousActions = value; } }
-        public int NumDiscreteActions { get; }
+        public int NumDiscreteActions { get { return BranchSizes == null ? 0 : BranchSizes.Length; } }
-        public int SumOfDiscreteBranchSizes { get; }
+        public int SumOfDiscreteBranchSizes { get { return BranchSizes == null ? 0 : BranchSizes.Sum(); } }

        /// <summary>
        /// Creates a Continuous <see cref="ActionSpec"/> with the number of actions available.
        public static ActionSpec MakeContinuous(int numActions)
        {
-            var actuatorSpace = new ActionSpec(numActions, 0);
+            var actuatorSpace = new ActionSpec(numActions, null);
            return actuatorSpace;
        }

        public static ActionSpec MakeDiscrete(params int[] branchSizes)
        {
            var numActions = branchSizes.Length;
-            var actuatorSpace = new ActionSpec(0, numActions, branchSizes);
+            var actuatorSpace = new ActionSpec(0, branchSizes);
-        internal ActionSpec(int numContinuousActions, int numDiscreteActions, int[] branchSizes = null)
+        internal ActionSpec(int numContinuousActions, int[] branchSizes = null)
-            NumContinuousActions = numContinuousActions;
-            NumDiscreteActions = numDiscreteActions;
+            m_NumContinuousActions = numContinuousActions;
-            SumOfDiscreteBranchSizes = branchSizes?.Sum() ?? 0;
-        /// Temporary check that the ActionSpec uses either all continuous or all discrete actions.
-        /// This should be removed once the trainer supports them.
+        /// Check that the ActionSpec uses either all continuous or all discrete actions.
+        /// This is only used when connecting to old versions of the trainer that don't support this.
-        internal void CheckNotHybrid()
+        internal void CheckAllContinuousOrDiscrete()
-                throw new UnityAgentsException("ActionSpecs must be all continuous or all discrete.");
+                throw new UnityAgentsException(
+                    "Action spaces with both continuous and discrete actions are not supported by the trainer. " +
+                    "ActionSpecs must be all continuous or all discrete."
+                );
            }
        }
    }
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
                }
            }

-            return new ActionSpec(numContinuousActions, numDiscreteActions, combinedBranchSizes);
+            return new ActionSpec(numContinuousActions, combinedBranchSizes);
        }

        /// <summary>
                Debug.Assert(
                    !m_Actuators[i].Name.Equals(m_Actuators[i + 1].Name),
                    "Actuator names must be unique.");
-                var first = m_Actuators[i].ActionSpec;
-                var second = m_Actuators[i + 1].ActionSpec;
-                Debug.Assert(first.NumContinuousActions > 0 == second.NumContinuousActions > 0,
-                    "Actuators on the same Agent must have the same action SpaceType.");
            }
        }

--- a/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
        }

        /// <summary>
+        /// Construct an <see cref="ActionBuffers"/> instance with <see cref="ActionSpec"/>. All values are initialized to zeros.
+        /// /// </summary>
+        /// <param name="actionSpec">The <see cref="ActionSpec"/>  to send to an <see cref="IActionReceiver"/>.</param>
+        public ActionBuffers(ActionSpec actionSpec)
+            : this(new ActionSegment<float>(new float[actionSpec.NumContinuousActions]),
+            new ActionSegment<int>(new int[actionSpec.NumDiscreteActions]))
+        { }
+
+        /// <summary>
+        /// Create an <see cref="ActionBuffers"/> instance with ActionSpec and all actions stored as a float array.
+        /// </summary>
+        /// <param name="actionSpec"><see cref="ActionSpec"/> of the <see cref="ActionBuffers"/></param>
+        /// <param name="actions">The float array of all actions, including discrete and continuous actions.</param>
+        /// <returns>An <see cref="ActionBuffers"/> instance initialized with a <see cref="ActionSpec"/> and a float array.
+        internal static ActionBuffers FromActionSpec(ActionSpec actionSpec, float[] actions)
+        {
+            if (actions == null)
+            {
+                return ActionBuffers.Empty;
+            }
+
+            Debug.Assert(actions.Length == actionSpec.NumContinuousActions + actionSpec.NumDiscreteActions,
+                $"The length of '{nameof(actions)}' does not match the total size of ActionSpec.\n" +
+                $"{nameof(actions)}.Length: {actions.Length}\n" +
+                $"{nameof(actionSpec)}: {actionSpec.NumContinuousActions + actionSpec.NumDiscreteActions}");
+
+            ActionSegment<float> continuousActionSegment = ActionSegment<float>.Empty;
+            ActionSegment<int> discreteActionSegment = ActionSegment<int>.Empty;
+            int offset = 0;
+            if (actionSpec.NumContinuousActions > 0)
+            {
+                continuousActionSegment = new ActionSegment<float>(actions, 0, actionSpec.NumContinuousActions);
+                offset += actionSpec.NumContinuousActions;
+            }
+            if (actionSpec.NumDiscreteActions > 0)
+            {
+                int[] discreteActions = new int[actionSpec.NumDiscreteActions];
+                for (var i = 0; i < actionSpec.NumDiscreteActions; i++)
+                {
+                    discreteActions[i] = (int)actions[i + offset];
+                }
+                discreteActionSegment = new ActionSegment<int>(discreteActions);
+            }
+
+            return new ActionBuffers(continuousActionSegment, discreteActionSegment);
+        }
+
+        /// <summary>
        /// Clear the <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/> segments to be all zeros.
        /// </summary>
        public void Clear()
+        }
+
+        /// <summary>
+        /// Check if the <see cref="ActionBuffers"/> is empty.
+        /// </summary>
+        public bool IsEmpty()
+        {
+            return ContinuousActions.IsEmpty() && DiscreteActions.IsEmpty();
        }

        /// <inheritdoc/>
        /// <param name="destination">A float array to pack actions into whose length is greater than or
        /// equal to the addition of the Lengths of this objects <see cref="ContinuousActions"/> and
        /// <see cref="DiscreteActions"/> segments.</param>
+        /// [Obsolete("PackActions has been deprecated.")]
        public void PackActions(in float[] destination)
        {
            Debug.Assert(destination.Length >= ContinuousActions.Length + DiscreteActions.Length,
--- a/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
        /// Create a VectorActuator that forwards to the provided IActionReceiver.
        /// </summary>
        /// <param name="actionReceiver">The <see cref="IActionReceiver"/> used for OnActionReceived and WriteDiscreteActionMask.</param>
-        /// <param name="vectorActionSize">For discrete action spaces, the branch sizes for each action.
-        /// For continuous action spaces, the number of actions is the 0th element.</param>
-        /// <param name="spaceType"></param>
+        /// <param name="actionSpec"></param>
-        /// <exception cref="ArgumentOutOfRangeException">Thrown for invalid <see cref="SpaceType"/></exception>
-                              int[] vectorActionSize,
-                              SpaceType spaceType,
+                              ActionSpec actionSpec,
+            ActionSpec = actionSpec;
-            switch (spaceType)
+            if (actionSpec.NumContinuousActions == 0)
-                case SpaceType.Continuous:
-                    ActionSpec = ActionSpec.MakeContinuous(vectorActionSize[0]);
-                    suffix = "-Continuous";
-                    break;
-                case SpaceType.Discrete:
-                    ActionSpec = ActionSpec.MakeDiscrete(vectorActionSize);
-                    suffix = "-Discrete";
-                    break;
-                default:
-                    throw new ArgumentOutOfRangeException(nameof(spaceType),
-                        spaceType,
-                        "Unknown enum value.");
+                suffix = "-Discrete";
+            }
+            else if (actionSpec.NumDiscreteActions == 0)
+            {
+                suffix = "-Continuous";
+            }
+            else
+            {
+                suffix = $"-Continuous-{actionSpec.NumContinuousActions}-Discrete-{actionSpec.NumDiscreteActions}";
            }
            Name = name + suffix;
        }
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
 using System;
 using System.Collections.Generic;
 using System.Collections.ObjectModel;
-using System.Linq;
 using UnityEngine;
 using Unity.Barracuda;
 using Unity.MLAgents.Actuators;
        /// <summary>
        /// Keeps track of the last vector action taken by the Brain.
        /// </summary>
-        public float[] storedVectorActions;
+        public ActionBuffers storedVectorActions;

        /// <summary>
        /// For discrete control, specifies the actions that the agent cannot take.

        public void ClearActions()
        {
-            Array.Clear(storedVectorActions, 0, storedVectorActions.Length);
+            storedVectorActions.Clear();
-            actionBuffers.PackActions(storedVectorActions);
+            var continuousActions = storedVectorActions.ContinuousActions;
+            for (var i = 0; i < actionBuffers.ContinuousActions.Length; i++)
+            {
+                continuousActions[i] = actionBuffers.ContinuousActions[i];
+            }
+            var discreteActions = storedVectorActions.DiscreteActions;
+            for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++)
+            {
+                discreteActions[i] = actionBuffers.DiscreteActions[i];
+            }
        }
    }

    /// * <see cref="BehaviorType.InferenceOnly"/>: decisions are always made using the trained
    ///   model specified in the <see cref="BehaviorParameters"/> component.
    /// * <see cref="BehaviorType.HeuristicOnly"/>: when a decision is needed, the agent's
-    ///   <see cref="Heuristic"/> function is called. Your implementation is responsible for
+    ///   <see cref="Heuristic(in ActionBuffers)"/> function is called. Your implementation is responsible for
    ///   providing the appropriate action.
    ///
    /// To trigger an agent decision automatically, you can attach a <see cref="DecisionRequester"/>
    /// can only take an action when it touches the ground, so several frames might elapse between
    /// one decision and the need for the next.
    ///
-    /// Use the <see cref="OnActionReceived(float[])"/> function to implement the actions your agent can take,
+    /// Use the <see cref="OnActionReceived(ActionBuffers)"/> function to implement the actions your agent can take,
    /// such as moving to reach a goal or interacting with its environment.
    ///
    /// When you call <see cref="EndEpisode"/> on an agent or the agent reaches its <see cref="MaxStep"/> count,
    /// only use the [MonoBehaviour.Update] function for cosmetic purposes. If you override the [MonoBehaviour]
    /// methods, [OnEnable()] or [OnDisable()], always call the base Agent class implementations.
    ///
-    /// You can implement the <see cref="Heuristic"/> function to specify agent actions using
+    /// You can implement the <see cref="Heuristic(in ActionBuffers)"/> function to specify agent actions using
    /// your own heuristic algorithm. Implementing a heuristic function can be useful
    /// for debugging. For example, you can use keyboard input to select agent actions in
    /// order to manually control an agent's behavior.

        /// <summary>
        /// VectorActuator which is used by default if no other sensors exist on this Agent. This VectorSensor will
-        /// delegate its actions to <see cref="OnActionReceived(float[])"/> by default in order to keep backward compatibility
+        /// delegate its actions to <see cref="OnActionReceived(ActionBuffers)"/> by default in order to keep backward compatibility
        /// with the current behavior of Agent.
        /// </summary>
        IActuator m_VectorActuator;
                InitializeSensors();
            }

-            m_Info.storedVectorActions = new float[m_ActuatorManager.TotalNumberOfActions];
+            m_Info.storedVectorActions = new ActionBuffers(
+                new float[m_ActuatorManager.NumContinuousActions],
+                new int[m_ActuatorManager.NumDiscreteActions]
+            );

            // The first time the Academy resets, all Agents in the scene will be
            // forced to reset through the <see cref="AgentForceReset"/> event.
            m_CumulativeReward = 0f;
            m_RequestAction = false;
            m_RequestDecision = false;
-            Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);
+            m_Info.storedVectorActions.Clear();
        }

        /// <summary>
        /// Use <see cref="AddReward(float)"/> to incrementally change the reward rather than
        /// overriding it.
        ///
-        /// Typically, you assign rewards in the Agent subclass's <see cref="OnActionReceived(float[])"/>
+        /// Typically, you assign rewards in the Agent subclass's <see cref="OnActionReceived(ActionBuffers)"/>
        /// implementation after carrying out the received action and evaluating its success.
        ///
        /// Rewards are used during reinforcement learning; they are ignored during inference.
        /// You can also use the [Input System package], which provides a more flexible and
        /// configurable input system.
        /// <code>
-        ///     public override void Heuristic(ActionBuffers actionsOut)
+        ///     public override void Heuristic(in ActionBuffers actionsOut)
-        ///         actionsOut.ContinuousActions[0] = Input.GetAxis("Horizontal");
-        ///         actionsOut.ContinuousActions[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
-        ///         actionsOut.ContinuousActions[2] = Input.GetAxis("Vertical");
+        ///         var continuousActionsOut = actionsOut.ContinuousActions;
+        ///         continuousActionsOut[0] = Input.GetAxis("Horizontal");
+        ///         continuousActionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
+        ///         continuousActionsOut[2] = Input.GetAxis("Vertical");
        ///     }
        /// </code>
        /// [Input Manager]: https://docs.unity3d.com/Manual/class-InputManager.html
            // Support legacy OnActionReceived
            // TODO don't set this up if the sizes are 0?
            var param = m_PolicyFactory.BrainParameters;
-            m_VectorActuator = new VectorActuator(this, param.VectorActionSize, param.VectorActionSpaceType);
+            m_VectorActuator = new VectorActuator(this, param.ActionSpec);
            m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1);
            m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions()];

            }
            else
            {
-                m_ActuatorManager.StoredActions.PackActions(m_Info.storedVectorActions);
+                m_Info.CopyActions(m_ActuatorManager.StoredActions);
            }

            UpdateSensors();
        /// </param>
        public virtual void OnActionReceived(ActionBuffers actions)
        {
-            actions.PackActions(m_LegacyActionCache);
+            if (!actions.ContinuousActions.IsEmpty())
+            {
+                m_LegacyActionCache = actions.ContinuousActions.Array;
+            }
+            else
+            {
+                m_LegacyActionCache = Array.ConvertAll(actions.DiscreteActions.Array, x => (float)x);
+            }
            OnActionReceived(m_LegacyActionCache);
        }

            {
                OnEpisodeBegin();
            }
-
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Agent.deprecated.cs
+++ b/com.unity.ml-agents/Runtime/Agent.deprecated.cs
        }

        /// <summary>
-        /// This method passes in a float array that is to be populated with actions.
+        /// Deprecated, use <see cref="Heuristic(in ActionBuffers)"/> instead.
        /// </summary>
        /// <param name="actionsOut"></param>
        public virtual void Heuristic(float[] actionsOut)
        /// <returns>
        /// The last action that was decided by the Agent (or null if no decision has been made).
        /// </returns>
-        /// <seealso cref="OnActionReceived(float[])"/>
+        /// <seealso cref="OnActionReceived(ActionBuffers)"/>
-            return m_Info.storedVectorActions;
+            var storedAction = m_Info.storedVectorActions;
+            if (!storedAction.ContinuousActions.IsEmpty())
+            {
+                return storedAction.ContinuousActions.Array;
+            }
+            else
+            {
+                return Array.ConvertAll(storedAction.DiscreteActions.Array, x => (float)x);
+            }
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
            var agentInfoProto = ai.ToAgentInfoProto();

            var agentActionProto = new AgentActionProto();
-            if (ai.storedVectorActions != null)
+
+            if (!ai.storedVectorActions.IsEmpty())
-                agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
+                if (!ai.storedVectorActions.ContinuousActions.IsEmpty())
+                {
+                    agentActionProto.ContinuousActions.AddRange(ai.storedVectorActions.ContinuousActions.Array);
+                }
+                if (!ai.storedVectorActions.DiscreteActions.IsEmpty())
+                {
+                    agentActionProto.DiscreteActions.AddRange(ai.storedVectorActions.DiscreteActions.Array);
+                }
            }

            return new AgentInfoActionPairProto
            return summariesOut;
        }

-
        #endregion

        #region BrainParameters
        {
            var brainParametersProto = new BrainParametersProto
            {
-                VectorActionSize = { bp.VectorActionSize },
-                VectorActionSpaceType = (SpaceTypeProto)bp.VectorActionSpaceType,
+                VectorActionSpaceTypeDeprecated = (SpaceTypeProto)bp.VectorActionSpaceType,
-                IsTraining = isTraining
+                IsTraining = isTraining,
+                ActionSpec = ToActionSpecProto(bp.ActionSpec),
+            if (bp.VectorActionSize != null)
+            {
+                brainParametersProto.VectorActionSizeDeprecated.AddRange(bp.VectorActionSize);
+            }
-                brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+                brainParametersProto.VectorActionDescriptionsDeprecated.AddRange(bp.VectorActionDescriptions);
            }
            return brainParametersProto;
        }
        /// <param name="isTraining">Whether or not the Brain is training.</param>
        public static BrainParametersProto ToBrainParametersProto(this ActionSpec actionSpec, string name, bool isTraining)
        {
-            actionSpec.CheckNotHybrid();
-
-                IsTraining = isTraining
+                IsTraining = isTraining,
+                ActionSpec = ToActionSpecProto(actionSpec),
-            if (actionSpec.NumContinuousActions > 0)
-            {
-                brainParametersProto.VectorActionSize.Add(actionSpec.NumContinuousActions);
-                brainParametersProto.VectorActionSpaceType = SpaceTypeProto.Continuous;
-            }
-            else if (actionSpec.NumDiscreteActions > 0)
+
+            var supportHybrid = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.HybridActions;
+            if (!supportHybrid)
-                brainParametersProto.VectorActionSize.AddRange(actionSpec.BranchSizes);
-                brainParametersProto.VectorActionSpaceType = SpaceTypeProto.Discrete;
+                actionSpec.CheckAllContinuousOrDiscrete();
+                if (actionSpec.NumContinuousActions > 0)
+                {
+                    brainParametersProto.VectorActionSizeDeprecated.Add(actionSpec.NumContinuousActions);
+                    brainParametersProto.VectorActionSpaceTypeDeprecated = SpaceTypeProto.Continuous;
+                }
+                else if (actionSpec.NumDiscreteActions > 0)
+                {
+                    brainParametersProto.VectorActionSizeDeprecated.AddRange(actionSpec.BranchSizes);
+                    brainParametersProto.VectorActionSpaceTypeDeprecated = SpaceTypeProto.Discrete;
+                }
            }

            // TODO handle ActionDescriptions?
        {
            var bp = new BrainParameters
            {
-                VectorActionSize = bpp.VectorActionSize.ToArray(),
-                VectorActionDescriptions = bpp.VectorActionDescriptions.ToArray(),
-                VectorActionSpaceType = (SpaceType)bpp.VectorActionSpaceType
+                VectorActionDescriptions = bpp.VectorActionDescriptionsDeprecated.ToArray(),
+                ActionSpec = ToActionSpec(bpp.ActionSpec),
+        /// <summary>
+        /// Convert a ActionSpecProto to a ActionSpec struct.
+        /// </summary>
+        /// <param name="actionSpecProto">An instance of an action spec protobuf object.</param>
+        /// <returns>An ActionSpec struct.</returns>
+        public static ActionSpec ToActionSpec(this ActionSpecProto actionSpecProto)
+        {
+            var actionSpec = new ActionSpec(actionSpecProto.NumContinuousActions);
+            if (actionSpecProto.DiscreteBranchSizes != null)
+            {
+                actionSpec.BranchSizes = actionSpecProto.DiscreteBranchSizes.ToArray();
+            }
+            return actionSpec;
+        }
+
+        /// <summary>
+        /// Convert a ActionSpec struct to a ActionSpecProto.
+        /// </summary>
+        /// <param name="actionSpecProto">An instance of an action spec struct.</param>
+        /// <returns>An ActionSpecProto.</returns>
+        public static ActionSpecProto ToActionSpecProto(this ActionSpec actionSpec)
+        {
+            var actionSpecProto = new ActionSpecProto
+            {
+                NumContinuousActions = actionSpec.NumContinuousActions,
+                NumDiscreteActions = actionSpec.NumDiscreteActions,
+            };
+            if (actionSpec.BranchSizes != null)
+            {
+                actionSpecProto.DiscreteBranchSizes.AddRange(actionSpec.BranchSizes);
+            }
+            return actionSpecProto;
+        }
+
        #endregion

        #region DemonstrationMetaData
            }
            return dm;
        }
+
        #endregion

        public static UnityRLInitParameters ToUnityRLInitParameters(this UnityRLInitializationInputProto inputProto)
        }

        #region AgentAction
-        public static List<float[]> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
+        public static List<ActionBuffers> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
-            var agentActions = new List<float[]>(proto.Value.Count);
+            var agentActions = new List<ActionBuffers>(proto.Value.Count);
-                agentActions.Add(ap.VectorActions.ToArray());
+                agentActions.Add(ap.ToActionBuffers());
+
+        public static ActionBuffers ToActionBuffers(this AgentActionProto proto)
+        {
+            return new ActionBuffers(proto.ContinuousActions.ToArray(), proto.DiscreteActions.ToArray());
+        }
+
        #endregion

        #region Observations
                    if (!s_HaveWarnedTrainerCapabilitiesMapping)
                    {
                        Debug.LogWarning($"The sensor {sensor.GetName()} is using non-trivial mapping and " +
-                                "the attached trainer doesn't support compression mapping. " +
-                                "Switching to uncompressed observations.");
+                            "the attached trainer doesn't support compression mapping. " +
+                            "Switching to uncompressed observations.");
                        s_HaveWarnedTrainerCapabilitiesMapping = true;
                    }
                    compressionType = SensorCompressionType.None;
                        $"GetCompressedObservation() returned null data for sensor named {sensor.GetName()}. " +
                        "You must return a byte[]. If you don't want to use compressed observations, " +
                        "return SensorCompressionType.None from GetCompressionType()."
-                        );
+                    );
                }
                observationProto = new ObservationProto
                {
            observationProto.Shape.AddRange(shape);
            return observationProto;
        }
+
        #endregion

        public static UnityRLCapabilities ToRLCapabilities(this UnityRLCapabilitiesProto proto)
                BaseRLCapabilities = proto.BaseRLCapabilities,
                ConcatenatedPngObservations = proto.ConcatenatedPngObservations,
                CompressedChannelMapping = proto.CompressedChannelMapping,
+                HybridActions = proto.HybridActions,
            };
        }

                BaseRLCapabilities = rlCaps.BaseRLCapabilities,
                ConcatenatedPngObservations = rlCaps.ConcatenatedPngObservations,
                CompressedChannelMapping = rlCaps.CompressedChannelMapping,
+                HybridActions = rlCaps.HybridActions,
            };
        }

--- a/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
        /// <param name="key">A key to identify which behavior actions to get.</param>
        /// <param name="agentId">A key to identify which Agent actions to get.</param>
        /// <returns></returns>
-        float[] GetActions(string key, int agentId);
+        ActionBuffers GetActions(string key, int agentId);
    }
 }
--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
        UnityRLOutputProto m_CurrentUnityRlOutput =
            new UnityRLOutputProto();

-        Dictionary<string, Dictionary<int, float[]>> m_LastActionsReceived =
-            new Dictionary<string, Dictionary<int, float[]>>();
+        Dictionary<string, Dictionary<int, ActionBuffers>> m_LastActionsReceived =
+            new Dictionary<string, Dictionary<int, ActionBuffers>>();

        // Brains that we have sent over the communicator with agents.
        HashSet<string> m_SentBrainKeys = new HashSet<string>();
                {
                    return false;
                }
-
            }
            else if (unityVersion.Major != pythonVersion.Major)
            {
            }
            if (!m_LastActionsReceived.ContainsKey(behaviorName))
            {
-                m_LastActionsReceived[behaviorName] = new Dictionary<int, float[]>();
+                m_LastActionsReceived[behaviorName] = new Dictionary<int, ActionBuffers>();
-            m_LastActionsReceived[behaviorName][info.episodeId] = null;
+            m_LastActionsReceived[behaviorName][info.episodeId] = ActionBuffers.Empty;
            if (info.done)
            {
                m_LastActionsReceived[behaviorName].Remove(info.episodeId);
            }
        }

-        public float[] GetActions(string behaviorName, int agentId)
+        public ActionBuffers GetActions(string behaviorName, int agentId)
        {
            if (m_LastActionsReceived.ContainsKey(behaviorName))
            {
                }
            }
-            return null;
+            return ActionBuffers.Empty;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
        public bool BaseRLCapabilities;
        public bool ConcatenatedPngObservations;
        public bool CompressedChannelMapping;
+        public bool HybridActions;
-        public UnityRLCapabilities(bool baseRlCapabilities = true, bool concatenatedPngObservations = true, bool compressedChannelMapping = true)
+        public UnityRLCapabilities(
+            bool baseRlCapabilities = true,
+            bool concatenatedPngObservations = true,
+            bool compressedChannelMapping = true,
+            bool hybridActions = true)
+            HybridActions = hybridActions;
        }

        /// <summary>
                return false;
            }
            Debug.LogWarning("Unity has connected to a Training process that does not support" +
-                             "Base Reinforcement Learning Capabilities.  Please make sure you have the" +
-                             " latest training codebase installed for this version of the ML-Agents package.");
+                "Base Reinforcement Learning Capabilities.  Please make sure you have the" +
+                " latest training codebase installed for this version of the ML-Agents package.");
-
    }
 }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentAction.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentAction.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2FnZW50X2Fj",
-            "dGlvbi5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMiSwoQQWdlbnRBY3Rp",
-            "b25Qcm90bxIWCg52ZWN0b3JfYWN0aW9ucxgBIAMoAhINCgV2YWx1ZRgEIAEo",
-            "AkoECAIQA0oECAMQBEoECAUQBkIlqgIiVW5pdHkuTUxBZ2VudHMuQ29tbXVu",
-            "aWNhdG9yT2JqZWN0c2IGcHJvdG8z"));
+            "dGlvbi5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMijAEKEEFnZW50QWN0",
+            "aW9uUHJvdG8SIQoZdmVjdG9yX2FjdGlvbnNfZGVwcmVjYXRlZBgBIAMoAhIN",
+            "CgV2YWx1ZRgEIAEoAhIaChJjb250aW51b3VzX2FjdGlvbnMYBiADKAISGAoQ",
+            "ZGlzY3JldGVfYWN0aW9ucxgHIAMoBUoECAIQA0oECAMQBEoECAUQBkIlqgIi",
+            "VW5pdHkuTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0c2IGcHJvdG8z"));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentActionProto), global::Unity.MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActions", "Value" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentActionProto), global::Unity.MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActionsDeprecated", "Value", "ContinuousActions", "DiscreteActions" }, null, null, null)
          }));
    }
    #endregion

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public AgentActionProto(AgentActionProto other) : this() {
-      vectorActions_ = other.vectorActions_.Clone();
+      vectorActionsDeprecated_ = other.vectorActionsDeprecated_.Clone();
+      continuousActions_ = other.continuousActions_.Clone();
+      discreteActions_ = other.discreteActions_.Clone();
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

    }

-    /// <summary>Field number for the "vector_actions" field.</summary>
-    public const int VectorActionsFieldNumber = 1;
-    private static readonly pb::FieldCodec<float> _repeated_vectorActions_codec
+    /// <summary>Field number for the "vector_actions_deprecated" field.</summary>
+    public const int VectorActionsDeprecatedFieldNumber = 1;
+    private static readonly pb::FieldCodec<float> _repeated_vectorActionsDeprecated_codec
-    private readonly pbc::RepeatedField<float> vectorActions_ = new pbc::RepeatedField<float>();
+    private readonly pbc::RepeatedField<float> vectorActionsDeprecated_ = new pbc::RepeatedField<float>();
+    /// <summary>
+    /// mark as deprecated in communicator v1.3.0
+    /// </summary>
-    public pbc::RepeatedField<float> VectorActions {
-      get { return vectorActions_; }
+    public pbc::RepeatedField<float> VectorActionsDeprecated {
+      get { return vectorActionsDeprecated_; }
    }

    /// <summary>Field number for the "value" field.</summary>
      }
    }

+    /// <summary>Field number for the "continuous_actions" field.</summary>
+    public const int ContinuousActionsFieldNumber = 6;
+    private static readonly pb::FieldCodec<float> _repeated_continuousActions_codec
+        = pb::FieldCodec.ForFloat(50);
+    private readonly pbc::RepeatedField<float> continuousActions_ = new pbc::RepeatedField<float>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<float> ContinuousActions {
+      get { return continuousActions_; }
+    }
+
+    /// <summary>Field number for the "discrete_actions" field.</summary>
+    public const int DiscreteActionsFieldNumber = 7;
+    private static readonly pb::FieldCodec<int> _repeated_discreteActions_codec
+        = pb::FieldCodec.ForInt32(58);
+    private readonly pbc::RepeatedField<int> discreteActions_ = new pbc::RepeatedField<int>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<int> DiscreteActions {
+      get { return discreteActions_; }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as AgentActionProto);
      if (ReferenceEquals(other, this)) {
        return true;
      }
-      if(!vectorActions_.Equals(other.vectorActions_)) return false;
+      if(!vectorActionsDeprecated_.Equals(other.vectorActionsDeprecated_)) return false;
+      if(!continuousActions_.Equals(other.continuousActions_)) return false;
+      if(!discreteActions_.Equals(other.discreteActions_)) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

-      hash ^= vectorActions_.GetHashCode();
+      hash ^= vectorActionsDeprecated_.GetHashCode();
+      hash ^= continuousActions_.GetHashCode();
+      hash ^= discreteActions_.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public void WriteTo(pb::CodedOutputStream output) {
-      vectorActions_.WriteTo(output, _repeated_vectorActions_codec);
+      vectorActionsDeprecated_.WriteTo(output, _repeated_vectorActionsDeprecated_codec);
+      continuousActions_.WriteTo(output, _repeated_continuousActions_codec);
+      discreteActions_.WriteTo(output, _repeated_discreteActions_codec);
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
    public int CalculateSize() {
      int size = 0;
-      size += vectorActions_.CalculateSize(_repeated_vectorActions_codec);
+      size += vectorActionsDeprecated_.CalculateSize(_repeated_vectorActionsDeprecated_codec);
+      size += continuousActions_.CalculateSize(_repeated_continuousActions_codec);
+      size += discreteActions_.CalculateSize(_repeated_discreteActions_codec);
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
      if (other == null) {
        return;
      }
-      vectorActions_.Add(other.vectorActions_);
+      vectorActionsDeprecated_.Add(other.vectorActionsDeprecated_);
+      continuousActions_.Add(other.continuousActions_);
+      discreteActions_.Add(other.discreteActions_);
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

            break;
          case 10:
          case 13: {
-            vectorActions_.AddEntriesFrom(input, _repeated_vectorActions_codec);
+            vectorActionsDeprecated_.AddEntriesFrom(input, _repeated_vectorActionsDeprecated_codec);
+            break;
+          }
+          case 50:
+          case 53: {
+            continuousActions_.AddEntriesFrom(input, _repeated_continuousActions_codec);
+            break;
+          }
+          case 58:
+          case 56: {
+            discreteActions_.AddEntriesFrom(input, _repeated_discreteActions_codec);
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/BrainParameters.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/BrainParameters.cs
            "CjltbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2JyYWluX3Bh",
            "cmFtZXRlcnMucHJvdG8SFGNvbW11bmljYXRvcl9vYmplY3RzGjNtbGFnZW50",
            "c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL3NwYWNlX3R5cGUucHJvdG8i",
-            "2QEKFEJyYWluUGFyYW1ldGVyc1Byb3RvEhoKEnZlY3Rvcl9hY3Rpb25fc2l6",
-            "ZRgDIAMoBRIiChp2ZWN0b3JfYWN0aW9uX2Rlc2NyaXB0aW9ucxgFIAMoCRJG",
-            "Chh2ZWN0b3JfYWN0aW9uX3NwYWNlX3R5cGUYBiABKA4yJC5jb21tdW5pY2F0",
-            "b3Jfb2JqZWN0cy5TcGFjZVR5cGVQcm90bxISCgpicmFpbl9uYW1lGAcgASgJ",
-            "EhMKC2lzX3RyYWluaW5nGAggASgISgQIARACSgQIAhADSgQIBBAFQiWqAiJV",
-            "bml0eS5NTEFnZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
+            "iwEKD0FjdGlvblNwZWNQcm90bxIeChZudW1fY29udGludW91c19hY3Rpb25z",
+            "GAEgASgFEhwKFG51bV9kaXNjcmV0ZV9hY3Rpb25zGAIgASgFEh0KFWRpc2Ny",
+            "ZXRlX2JyYW5jaF9zaXplcxgDIAMoBRIbChNhY3Rpb25fZGVzY3JpcHRpb25z",
+            "GAQgAygJIrYCChRCcmFpblBhcmFtZXRlcnNQcm90bxIlCh12ZWN0b3JfYWN0",
+            "aW9uX3NpemVfZGVwcmVjYXRlZBgDIAMoBRItCiV2ZWN0b3JfYWN0aW9uX2Rl",
+            "c2NyaXB0aW9uc19kZXByZWNhdGVkGAUgAygJElEKI3ZlY3Rvcl9hY3Rpb25f",
+            "c3BhY2VfdHlwZV9kZXByZWNhdGVkGAYgASgOMiQuY29tbXVuaWNhdG9yX29i",
+            "amVjdHMuU3BhY2VUeXBlUHJvdG8SEgoKYnJhaW5fbmFtZRgHIAEoCRITCgtp",
+            "c190cmFpbmluZxgIIAEoCBI6CgthY3Rpb25fc3BlYxgJIAEoCzIlLmNvbW11",
+            "bmljYXRvcl9vYmplY3RzLkFjdGlvblNwZWNQcm90b0oECAEQAkoECAIQA0oE",
+            "CAQQBUIlqgIiVW5pdHkuTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0c2IG",
+            "cHJvdG8z"));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto), global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto.Parser, new[]{ "VectorActionSize", "VectorActionDescriptions", "VectorActionSpaceType", "BrainName", "IsTraining" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto), global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto.Parser, new[]{ "NumContinuousActions", "NumDiscreteActions", "DiscreteBranchSizes", "ActionDescriptions" }, null, null, null),
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto), global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto.Parser, new[]{ "VectorActionSizeDeprecated", "VectorActionDescriptionsDeprecated", "VectorActionSpaceTypeDeprecated", "BrainName", "IsTraining", "ActionSpec" }, null, null, null)
          }));
    }
    #endregion
+  internal sealed partial class ActionSpecProto : pb::IMessage<ActionSpecProto> {
+    private static readonly pb::MessageParser<ActionSpecProto> _parser = new pb::MessageParser<ActionSpecProto>(() => new ActionSpecProto());
+    private pb::UnknownFieldSet _unknownFields;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public static pb::MessageParser<ActionSpecProto> Parser { get { return _parser; } }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public static pbr::MessageDescriptor Descriptor {
+      get { return global::Unity.MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor.MessageTypes[0]; }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    pbr::MessageDescriptor pb::IMessage.Descriptor {
+      get { return Descriptor; }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public ActionSpecProto() {
+      OnConstruction();
+    }
+
+    partial void OnConstruction();
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public ActionSpecProto(ActionSpecProto other) : this() {
+      numContinuousActions_ = other.numContinuousActions_;
+      numDiscreteActions_ = other.numDiscreteActions_;
+      discreteBranchSizes_ = other.discreteBranchSizes_.Clone();
+      actionDescriptions_ = other.actionDescriptions_.Clone();
+      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public ActionSpecProto Clone() {
+      return new ActionSpecProto(this);
+    }
+
+    /// <summary>Field number for the "num_continuous_actions" field.</summary>
+    public const int NumContinuousActionsFieldNumber = 1;
+    private int numContinuousActions_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int NumContinuousActions {
+      get { return numContinuousActions_; }
+      set {
+        numContinuousActions_ = value;
+      }
+    }
+
+    /// <summary>Field number for the "num_discrete_actions" field.</summary>
+    public const int NumDiscreteActionsFieldNumber = 2;
+    private int numDiscreteActions_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int NumDiscreteActions {
+      get { return numDiscreteActions_; }
+      set {
+        numDiscreteActions_ = value;
+      }
+    }
+
+    /// <summary>Field number for the "discrete_branch_sizes" field.</summary>
+    public const int DiscreteBranchSizesFieldNumber = 3;
+    private static readonly pb::FieldCodec<int> _repeated_discreteBranchSizes_codec
+        = pb::FieldCodec.ForInt32(26);
+    private readonly pbc::RepeatedField<int> discreteBranchSizes_ = new pbc::RepeatedField<int>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<int> DiscreteBranchSizes {
+      get { return discreteBranchSizes_; }
+    }
+
+    /// <summary>Field number for the "action_descriptions" field.</summary>
+    public const int ActionDescriptionsFieldNumber = 4;
+    private static readonly pb::FieldCodec<string> _repeated_actionDescriptions_codec
+        = pb::FieldCodec.ForString(34);
+    private readonly pbc::RepeatedField<string> actionDescriptions_ = new pbc::RepeatedField<string>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<string> ActionDescriptions {
+      get { return actionDescriptions_; }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public override bool Equals(object other) {
+      return Equals(other as ActionSpecProto);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public bool Equals(ActionSpecProto other) {
+      if (ReferenceEquals(other, null)) {
+        return false;
+      }
+      if (ReferenceEquals(other, this)) {
+        return true;
+      }
+      if (NumContinuousActions != other.NumContinuousActions) return false;
+      if (NumDiscreteActions != other.NumDiscreteActions) return false;
+      if(!discreteBranchSizes_.Equals(other.discreteBranchSizes_)) return false;
+      if(!actionDescriptions_.Equals(other.actionDescriptions_)) return false;
+      return Equals(_unknownFields, other._unknownFields);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public override int GetHashCode() {
+      int hash = 1;
+      if (NumContinuousActions != 0) hash ^= NumContinuousActions.GetHashCode();
+      if (NumDiscreteActions != 0) hash ^= NumDiscreteActions.GetHashCode();
+      hash ^= discreteBranchSizes_.GetHashCode();
+      hash ^= actionDescriptions_.GetHashCode();
+      if (_unknownFields != null) {
+        hash ^= _unknownFields.GetHashCode();
+      }
+      return hash;
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public override string ToString() {
+      return pb::JsonFormatter.ToDiagnosticString(this);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public void WriteTo(pb::CodedOutputStream output) {
+      if (NumContinuousActions != 0) {
+        output.WriteRawTag(8);
+        output.WriteInt32(NumContinuousActions);
+      }
+      if (NumDiscreteActions != 0) {
+        output.WriteRawTag(16);
+        output.WriteInt32(NumDiscreteActions);
+      }
+      discreteBranchSizes_.WriteTo(output, _repeated_discreteBranchSizes_codec);
+      actionDescriptions_.WriteTo(output, _repeated_actionDescriptions_codec);
+      if (_unknownFields != null) {
+        _unknownFields.WriteTo(output);
+      }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int CalculateSize() {
+      int size = 0;
+      if (NumContinuousActions != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumContinuousActions);
+      }
+      if (NumDiscreteActions != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumDiscreteActions);
+      }
+      size += discreteBranchSizes_.CalculateSize(_repeated_discreteBranchSizes_codec);
+      size += actionDescriptions_.CalculateSize(_repeated_actionDescriptions_codec);
+      if (_unknownFields != null) {
+        size += _unknownFields.CalculateSize();
+      }
+      return size;
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public void MergeFrom(ActionSpecProto other) {
+      if (other == null) {
+        return;
+      }
+      if (other.NumContinuousActions != 0) {
+        NumContinuousActions = other.NumContinuousActions;
+      }
+      if (other.NumDiscreteActions != 0) {
+        NumDiscreteActions = other.NumDiscreteActions;
+      }
+      discreteBranchSizes_.Add(other.discreteBranchSizes_);
+      actionDescriptions_.Add(other.actionDescriptions_);
+      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public void MergeFrom(pb::CodedInputStream input) {
+      uint tag;
+      while ((tag = input.ReadTag()) != 0) {
+        switch(tag) {
+          default:
+            _unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input);
+            break;
+          case 8: {
+            NumContinuousActions = input.ReadInt32();
+            break;
+          }
+          case 16: {
+            NumDiscreteActions = input.ReadInt32();
+            break;
+          }
+          case 26:
+          case 24: {
+            discreteBranchSizes_.AddEntriesFrom(input, _repeated_discreteBranchSizes_codec);
+            break;
+          }
+          case 34: {
+            actionDescriptions_.AddEntriesFrom(input, _repeated_actionDescriptions_codec);
+            break;
+          }
+        }
+      }
+    }
+
+  }
+
  internal sealed partial class BrainParametersProto : pb::IMessage<BrainParametersProto> {
    private static readonly pb::MessageParser<BrainParametersProto> _parser = new pb::MessageParser<BrainParametersProto>(() => new BrainParametersProto());
    private pb::UnknownFieldSet _unknownFields;
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public static pbr::MessageDescriptor Descriptor {
-      get { return global::Unity.MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor.MessageTypes[0]; }
+      get { return global::Unity.MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor.MessageTypes[1]; }
    }

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public BrainParametersProto(BrainParametersProto other) : this() {
-      vectorActionSize_ = other.vectorActionSize_.Clone();
-      vectorActionDescriptions_ = other.vectorActionDescriptions_.Clone();
-      vectorActionSpaceType_ = other.vectorActionSpaceType_;
+      vectorActionSizeDeprecated_ = other.vectorActionSizeDeprecated_.Clone();
+      vectorActionDescriptionsDeprecated_ = other.vectorActionDescriptionsDeprecated_.Clone();
+      vectorActionSpaceTypeDeprecated_ = other.vectorActionSpaceTypeDeprecated_;
+      ActionSpec = other.actionSpec_ != null ? other.ActionSpec.Clone() : null;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

    }

-    /// <summary>Field number for the "vector_action_size" field.</summary>
-    public const int VectorActionSizeFieldNumber = 3;
-    private static readonly pb::FieldCodec<int> _repeated_vectorActionSize_codec
+    /// <summary>Field number for the "vector_action_size_deprecated" field.</summary>
+    public const int VectorActionSizeDeprecatedFieldNumber = 3;
+    private static readonly pb::FieldCodec<int> _repeated_vectorActionSizeDeprecated_codec
-    private readonly pbc::RepeatedField<int> vectorActionSize_ = new pbc::RepeatedField<int>();
+    private readonly pbc::RepeatedField<int> vectorActionSizeDeprecated_ = new pbc::RepeatedField<int>();
+    /// <summary>
+    /// mark as deprecated in communicator v1.3.0
+    /// </summary>
-    public pbc::RepeatedField<int> VectorActionSize {
-      get { return vectorActionSize_; }
+    public pbc::RepeatedField<int> VectorActionSizeDeprecated {
+      get { return vectorActionSizeDeprecated_; }
-    /// <summary>Field number for the "vector_action_descriptions" field.</summary>
-    public const int VectorActionDescriptionsFieldNumber = 5;
-    private static readonly pb::FieldCodec<string> _repeated_vectorActionDescriptions_codec
+    /// <summary>Field number for the "vector_action_descriptions_deprecated" field.</summary>
+    public const int VectorActionDescriptionsDeprecatedFieldNumber = 5;
+    private static readonly pb::FieldCodec<string> _repeated_vectorActionDescriptionsDeprecated_codec
-    private readonly pbc::RepeatedField<string> vectorActionDescriptions_ = new pbc::RepeatedField<string>();
+    private readonly pbc::RepeatedField<string> vectorActionDescriptionsDeprecated_ = new pbc::RepeatedField<string>();
+    /// <summary>
+    /// mark as deprecated in communicator v1.3.0
+    /// </summary>
-    public pbc::RepeatedField<string> VectorActionDescriptions {
-      get { return vectorActionDescriptions_; }
+    public pbc::RepeatedField<string> VectorActionDescriptionsDeprecated {
+      get { return vectorActionDescriptionsDeprecated_; }
-    /// <summary>Field number for the "vector_action_space_type" field.</summary>
-    public const int VectorActionSpaceTypeFieldNumber = 6;
-    private global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto vectorActionSpaceType_ = 0;
+    /// <summary>Field number for the "vector_action_space_type_deprecated" field.</summary>
+    public const int VectorActionSpaceTypeDeprecatedFieldNumber = 6;
+    private global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto vectorActionSpaceTypeDeprecated_ = 0;
+    /// <summary>
+    /// mark as deprecated in communicator v1.3.0
+    /// </summary>
-    public global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto VectorActionSpaceType {
-      get { return vectorActionSpaceType_; }
+    public global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto VectorActionSpaceTypeDeprecated {
+      get { return vectorActionSpaceTypeDeprecated_; }
-        vectorActionSpaceType_ = value;
+        vectorActionSpaceTypeDeprecated_ = value;
      }
    }

      }
    }

+    /// <summary>Field number for the "action_spec" field.</summary>
+    public const int ActionSpecFieldNumber = 9;
+    private global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto actionSpec_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto ActionSpec {
+      get { return actionSpec_; }
+      set {
+        actionSpec_ = value;
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as BrainParametersProto);
      if (ReferenceEquals(other, this)) {
        return true;
      }
-      if(!vectorActionSize_.Equals(other.vectorActionSize_)) return false;
-      if(!vectorActionDescriptions_.Equals(other.vectorActionDescriptions_)) return false;
-      if (VectorActionSpaceType != other.VectorActionSpaceType) return false;
+      if(!vectorActionSizeDeprecated_.Equals(other.vectorActionSizeDeprecated_)) return false;
+      if(!vectorActionDescriptionsDeprecated_.Equals(other.vectorActionDescriptionsDeprecated_)) return false;
+      if (VectorActionSpaceTypeDeprecated != other.VectorActionSpaceTypeDeprecated) return false;
+      if (!object.Equals(ActionSpec, other.ActionSpec)) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

-      hash ^= vectorActionSize_.GetHashCode();
-      hash ^= vectorActionDescriptions_.GetHashCode();
-      if (VectorActionSpaceType != 0) hash ^= VectorActionSpaceType.GetHashCode();
+      hash ^= vectorActionSizeDeprecated_.GetHashCode();
+      hash ^= vectorActionDescriptionsDeprecated_.GetHashCode();
+      if (VectorActionSpaceTypeDeprecated != 0) hash ^= VectorActionSpaceTypeDeprecated.GetHashCode();
+      if (actionSpec_ != null) hash ^= ActionSpec.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public void WriteTo(pb::CodedOutputStream output) {
-      vectorActionSize_.WriteTo(output, _repeated_vectorActionSize_codec);
-      vectorActionDescriptions_.WriteTo(output, _repeated_vectorActionDescriptions_codec);
-      if (VectorActionSpaceType != 0) {
+      vectorActionSizeDeprecated_.WriteTo(output, _repeated_vectorActionSizeDeprecated_codec);
+      vectorActionDescriptionsDeprecated_.WriteTo(output, _repeated_vectorActionDescriptionsDeprecated_codec);
+      if (VectorActionSpaceTypeDeprecated != 0) {
-        output.WriteEnum((int) VectorActionSpaceType);
+        output.WriteEnum((int) VectorActionSpaceTypeDeprecated);
      }
      if (BrainName.Length != 0) {
        output.WriteRawTag(58);
        output.WriteRawTag(64);
        output.WriteBool(IsTraining);
      }
+      if (actionSpec_ != null) {
+        output.WriteRawTag(74);
+        output.WriteMessage(ActionSpec);
+      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
    public int CalculateSize() {
      int size = 0;
-      size += vectorActionSize_.CalculateSize(_repeated_vectorActionSize_codec);
-      size += vectorActionDescriptions_.CalculateSize(_repeated_vectorActionDescriptions_codec);
-      if (VectorActionSpaceType != 0) {
-        size += 1 + pb::CodedOutputStream.ComputeEnumSize((int) VectorActionSpaceType);
+      size += vectorActionSizeDeprecated_.CalculateSize(_repeated_vectorActionSizeDeprecated_codec);
+      size += vectorActionDescriptionsDeprecated_.CalculateSize(_repeated_vectorActionDescriptionsDeprecated_codec);
+      if (VectorActionSpaceTypeDeprecated != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeEnumSize((int) VectorActionSpaceTypeDeprecated);
      }
      if (BrainName.Length != 0) {
        size += 1 + pb::CodedOutputStream.ComputeStringSize(BrainName);
+      }
+      if (actionSpec_ != null) {
+        size += 1 + pb::CodedOutputStream.ComputeMessageSize(ActionSpec);
      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      if (other == null) {
        return;
      }
-      vectorActionSize_.Add(other.vectorActionSize_);
-      vectorActionDescriptions_.Add(other.vectorActionDescriptions_);
-      if (other.VectorActionSpaceType != 0) {
-        VectorActionSpaceType = other.VectorActionSpaceType;
+      vectorActionSizeDeprecated_.Add(other.vectorActionSizeDeprecated_);
+      vectorActionDescriptionsDeprecated_.Add(other.vectorActionDescriptionsDeprecated_);
+      if (other.VectorActionSpaceTypeDeprecated != 0) {
+        VectorActionSpaceTypeDeprecated = other.VectorActionSpaceTypeDeprecated;
      }
      if (other.BrainName.Length != 0) {
        BrainName = other.BrainName;
      }
+      if (other.actionSpec_ != null) {
+        if (actionSpec_ == null) {
+          actionSpec_ = new global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto();
+        }
+        ActionSpec.MergeFrom(other.ActionSpec);
+      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

            break;
          case 26:
          case 24: {
-            vectorActionSize_.AddEntriesFrom(input, _repeated_vectorActionSize_codec);
+            vectorActionSizeDeprecated_.AddEntriesFrom(input, _repeated_vectorActionSizeDeprecated_codec);
-            vectorActionDescriptions_.AddEntriesFrom(input, _repeated_vectorActionDescriptions_codec);
+            vectorActionDescriptionsDeprecated_.AddEntriesFrom(input, _repeated_vectorActionDescriptionsDeprecated_codec);
-            vectorActionSpaceType_ = (global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto) input.ReadEnum();
+            vectorActionSpaceTypeDeprecated_ = (global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto) input.ReadEnum();
            break;
          }
          case 58: {
          case 64: {
            IsTraining = input.ReadBool();
+            break;
+          }
+          case 74: {
+            if (actionSpec_ == null) {
+              actionSpec_ = new global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto();
+            }
+            input.ReadMessage(actionSpec_);
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
-            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMifQoYVW5pdHlSTENh",
-            "cGFiaWxpdGllc1Byb3RvEhoKEmJhc2VSTENhcGFiaWxpdGllcxgBIAEoCBIj",
-            "Chtjb25jYXRlbmF0ZWRQbmdPYnNlcnZhdGlvbnMYAiABKAgSIAoYY29tcHJl",
-            "c3NlZENoYW5uZWxNYXBwaW5nGAMgASgIQiWqAiJVbml0eS5NTEFnZW50cy5D",
-            "b21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
+            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMilAEKGFVuaXR5UkxD",
+            "YXBhYmlsaXRpZXNQcm90bxIaChJiYXNlUkxDYXBhYmlsaXRpZXMYASABKAgS",
+            "IwobY29uY2F0ZW5hdGVkUG5nT2JzZXJ2YXRpb25zGAIgASgIEiAKGGNvbXBy",
+            "ZXNzZWRDaGFubmVsTWFwcGluZxgDIAEoCBIVCg1oeWJyaWRBY3Rpb25zGAQg",
+            "ASgIQiWqAiJVbml0eS5NTEFnZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZw",
+            "cm90bzM="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions" }, null, null, null)
          }));
    }
    #endregion
      baseRLCapabilities_ = other.baseRLCapabilities_;
      concatenatedPngObservations_ = other.concatenatedPngObservations_;
      compressedChannelMapping_ = other.compressedChannelMapping_;
+      hybridActions_ = other.hybridActions_;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

      }
    }

+    /// <summary>Field number for the "hybridActions" field.</summary>
+    public const int HybridActionsFieldNumber = 4;
+    private bool hybridActions_;
+    /// <summary>
+    /// support for hybrid action spaces (discrete + continuous)
+    /// </summary>
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public bool HybridActions {
+      get { return hybridActions_; }
+      set {
+        hybridActions_ = value;
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as UnityRLCapabilitiesProto);
      if (BaseRLCapabilities != other.BaseRLCapabilities) return false;
      if (ConcatenatedPngObservations != other.ConcatenatedPngObservations) return false;
      if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
+      if (HybridActions != other.HybridActions) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

      if (BaseRLCapabilities != false) hash ^= BaseRLCapabilities.GetHashCode();
      if (ConcatenatedPngObservations != false) hash ^= ConcatenatedPngObservations.GetHashCode();
      if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
+      if (HybridActions != false) hash ^= HybridActions.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }
        output.WriteRawTag(24);
        output.WriteBool(CompressedChannelMapping);
      }
+      if (HybridActions != false) {
+        output.WriteRawTag(32);
+        output.WriteBool(HybridActions);
+      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
        size += 1 + 1;
      }
      if (CompressedChannelMapping != false) {
+        size += 1 + 1;
+      }
+      if (HybridActions != false) {
        size += 1 + 1;
      }
      if (_unknownFields != null) {
      if (other.CompressedChannelMapping != false) {
        CompressedChannelMapping = other.CompressedChannelMapping;
      }
+      if (other.HybridActions != false) {
+        HybridActions = other.HybridActions;
+      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

          }
          case 24: {
            CompressedChannelMapping = input.ReadBool();
+            break;
+          }
+          case 32: {
+            HybridActions = input.ReadBool();
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
 using System.Collections.Generic;
 using System.Linq;
 using Unity.MLAgents.Inference.Utils;
+using Unity.MLAgents.Actuators;
 using Unity.Barracuda;
 using UnityEngine;

    /// </summary>
    internal class ContinuousActionOutputApplier : TensorApplier.IApplier
    {
-        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+        readonly ActionSpec m_ActionSpec;
+
+        public ContinuousActionOutputApplier(ActionSpec actionSpec)
+        {
+            m_ActionSpec = actionSpec;
+        }
+
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
            var agentIndex = 0;
                {
-                    var actionValue = lastActions[agentId];
-                    if (actionValue == null)
+                    var actionBuffer = lastActions[agentId];
+                    if (actionBuffer.IsEmpty())
-                        actionValue = new float[actionSize];
-                        lastActions[agentId] = actionValue;
+                        actionBuffer = new ActionBuffers(m_ActionSpec);
+                        lastActions[agentId] = actionBuffer;
+                    var continuousBuffer = actionBuffer.ContinuousActions;
-                        actionValue[j] = tensorProxy.data[agentIndex, j];
+                        continuousBuffer[j] = tensorProxy.data[agentIndex, j];
                    }
                }
                agentIndex++;
        readonly int[] m_ActionSize;
        readonly Multinomial m_Multinomial;
        readonly ITensorAllocator m_Allocator;
+        readonly ActionSpec m_ActionSpec;
-        public DiscreteActionOutputApplier(int[] actionSize, int seed, ITensorAllocator allocator)
+        public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
-            m_ActionSize = actionSize;
+            m_ActionSize = actionSpec.BranchSizes;
+            m_ActionSpec = actionSpec;
-        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            //var tensorDataProbabilities = tensorProxy.Data as float[,];
            var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
            {
                if (lastActions.ContainsKey(agentId))
                {
-                    var actionVal = lastActions[agentId];
-                    if (actionVal == null)
+                    var actionBuffer = lastActions[agentId];
+                    if (actionBuffer.IsEmpty())
-                        actionVal = new float[m_ActionSize.Length];
-                        lastActions[agentId] = actionVal;
+                        actionBuffer = new ActionBuffers(m_ActionSpec);
+                        lastActions[agentId] = actionBuffer;
+                    var discreteBuffer = actionBuffer.DiscreteActions;
-                        actionVal[j] = actionValues[agentIndex, j];
+                        discreteBuffer[j] = (int)actionValues[agentIndex, j];
                    }
                }
                agentIndex++;
            m_Memories = memories;
        }

-        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            var agentIndex = 0;
            var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];
            m_Memories = memories;
        }

-        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            var agentIndex = 0;
            var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
    /// </summary>
    internal class BarracudaModelParamLoader
    {
-        enum ModelActionType
-        {
-            Unknown,
-            Discrete,
-            Continuous
-        }
-
-        /// Generates the Tensor inputs that are expected to be present in the Model.
-        /// </summary>
-        /// <param name="model">
-        /// The Barracuda engine model for loading static parameters.
-        /// </param>
-        /// <returns>TensorProxy IEnumerable with the expected Tensor inputs.</returns>
-        public static IReadOnlyList<TensorProxy> GetInputTensors(Model model)
-        {
-            var tensors = new List<TensorProxy>();
-
-            if (model == null)
-                return tensors;
-
-            foreach (var input in model.inputs)
-            {
-                tensors.Add(new TensorProxy
-                {
-                    name = input.name,
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    data = null,
-                    shape = input.shape.Select(i => (long)i).ToArray()
-                });
-            }
-
-            foreach (var mem in model.memories)
-            {
-                tensors.Add(new TensorProxy
-                {
-                    name = mem.input,
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    data = null,
-                    shape = TensorUtils.TensorShapeFromBarracuda(mem.shape)
-                });
-            }
-
-            tensors.Sort((el1, el2) => el1.name.CompareTo(el2.name));
-
-            return tensors;
-        }
-
-        public static int GetNumVisualInputs(Model model)
-        {
-            var count = 0;
-            if (model == null)
-                return count;
-
-            foreach (var input in model.inputs)
-            {
-                if (input.shape.Length == 4)
-                {
-                    if (input.name.StartsWith(TensorNames.VisualObservationPlaceholderPrefix))
-                    {
-                        count++;
-                    }
-                }
-            }
-
-            return count;
-        }
-
-        /// <summary>
-        /// Generates the Tensor outputs that are expected to be present in the Model.
-        /// </summary>
-        /// <param name="model">
-        /// The Barracuda engine model for loading static parameters
-        /// </param>
-        /// <returns>TensorProxy IEnumerable with the expected Tensor outputs</returns>
-        public static string[] GetOutputNames(Model model)
-        {
-            var names = new List<string>();
-
-            if (model == null)
-            {
-                return names.ToArray();
-            }
-
-            names.Add(TensorNames.ActionOutput);
-
-            var memory = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
-            if (memory > 0)
-            {
-                foreach (var mem in model.memories)
-                {
-                    names.Add(mem.output);
-                }
-            }
-
-            names.Sort();
-
-            return names.ToArray();
-        }
-
-        /// <summary>
        /// Factory for the ModelParamLoader : Creates a ModelParamLoader and runs the checks
        /// on it.
        /// </summary>
                return failedModelChecks;
            }

-            foreach (var constantName in TensorNames.RequiredConstants)
+            var hasExpectedTensors = model.CheckExpectedTensors(failedModelChecks);
+            if (!hasExpectedTensors)
-                var tensor = model.GetTensorByName(constantName);
-                if (tensor == null)
-                {
-                    failedModelChecks.Add($"Required constant \"{constantName}\" was not found in the model file.");
-                    return failedModelChecks;
-                }
+                return failedModelChecks;
-            var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
-            var isContinuousInt = (int)model.GetTensorByName(TensorNames.IsContinuousControl)[0];
-            var isContinuous = GetActionType(isContinuousInt);
-            var actionSize = (int)model.GetTensorByName(TensorNames.ActionOutputShape)[0];
            if (modelApiVersion == -1)
            {
                failedModelChecks.Add(
                return failedModelChecks;
            }

-            var modelDiscreteActionSize = isContinuous == ModelActionType.Discrete ? actionSize : 0;
-            var modelContinuousActionSize = isContinuous == ModelActionType.Continuous ? actionSize : 0;
+            var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
+            if (memorySize == -1)
+            {
+                failedModelChecks.Add($"Missing node in the model provided : {TensorNames.MemorySize}");
+                return failedModelChecks;
+            }
-                CheckIntScalarPresenceHelper(new Dictionary<string, int>()
-                {
-                    {TensorNames.MemorySize, memorySize},
-                    {TensorNames.IsContinuousControl, isContinuousInt},
-                    {TensorNames.ActionOutputShape, actionSize}
-                })
+                CheckInputTensorPresence(model, brainParameters, memorySize, sensorComponents)
-                CheckInputTensorPresence(model, brainParameters, memorySize, isContinuous, sensorComponents)
+                CheckOutputTensorPresence(model, memorySize)
-                CheckOutputTensorPresence(model, memorySize))
-            ;
-            failedModelChecks.AddRange(
-                CheckOutputTensorShape(model, brainParameters, actuatorComponents, isContinuous, modelContinuousActionSize, modelDiscreteActionSize)
+                CheckOutputTensorShape(model, brainParameters, actuatorComponents)
-        /// Converts the integer value in the model corresponding to the type of control to a
-        /// ModelActionType.
-        /// </summary>
-        /// <param name="isContinuousInt">
-        /// The integer value in the model indicating the type of control
-        /// </param>
-        /// <returns>The equivalent ModelActionType</returns>
-        static ModelActionType GetActionType(int isContinuousInt)
-        {
-            ModelActionType isContinuous;
-            switch (isContinuousInt)
-            {
-                case 0:
-                    isContinuous = ModelActionType.Discrete;
-                    break;
-                case 1:
-                    isContinuous = ModelActionType.Continuous;
-                    break;
-                default:
-                    isContinuous = ModelActionType.Unknown;
-                    break;
-            }
-            return isContinuous;
-        }
-
-        /// <summary>
-        /// Given a Dictionary of node names to int values, create checks if the values have the
-        /// invalid value of -1.
-        /// </summary>
-        /// <param name="requiredScalarFields"> Mapping from node names to int values</param>
-        /// <returns>The list the error messages of the checks that failed</returns>
-        static IEnumerable<string> CheckIntScalarPresenceHelper(
-            Dictionary<string, int> requiredScalarFields)
-        {
-            var failedModelChecks = new List<string>();
-            foreach (var field in requiredScalarFields)
-            {
-                if (field.Value == -1)
-                {
-                    failedModelChecks.Add($"Missing node in the model provided : {field.Key}");
-                }
-            }
-            return failedModelChecks;
-        }
-
-        /// <summary>
        /// Generates failed checks that correspond to inputs expected by the model that are not
        /// present in the BrainParameters.
        /// </summary>
            Model model,
            BrainParameters brainParameters,
            int memory,
-            ModelActionType isContinuous,
-            var tensorsNames = GetInputTensors(model).Select(x => x.name).ToList();
+            var tensorsNames = model.GetInputNames();

            // If there is no Vector Observation Input but the Brain Parameters expect one.
            if ((brainParameters.VectorObservationSize != 0) &&
-                    "The model does not contain a Vector Observation  Placeholder Input. " +
+                    "The model does not contain a Vector Observation Placeholder Input. " +
                    "You must set the Vector Observation Space Size to 0.");
            }

                visObsIndex++;
            }

-            var expectedVisualObs = GetNumVisualInputs(model);
+            var expectedVisualObs = model.GetNumVisualInputs();
            // Check if there's not enough visual sensors (too many would be handled above)
            if (expectedVisualObs > visObsIndex)
            {
            }

            // If the model uses discrete control but does not have an input for action masks
-            if (isContinuous == ModelActionType.Discrete)
+            if (model.HasDiscreteOutputs())
            {
                if (!tensorsNames.Contains(TensorNames.ActionMaskPlaceholder))
                {
        static IEnumerable<string> CheckOutputTensorPresence(Model model, int memory)
        {
            var failedModelChecks = new List<string>();
-            // If there is no Action Output.
-            if (!model.outputs.Contains(TensorNames.ActionOutput))
-            {
-                failedModelChecks.Add("The model does not contain an Action Output Node.");
-            }

            // If there is no Recurrent Output but the model is Recurrent.
            if (memory > 0)
            }

            // If the model expects an input but it is not in this list
-            foreach (var tensor in GetInputTensors(model))
+            foreach (var tensor in model.GetInputTensors())
            {
                if (!tensorTester.ContainsKey(tensor.name))
                {
            BrainParameters brainParameters, TensorProxy tensorProxy,
            SensorComponent[] sensorComponents, int observableAttributeTotalSize)
        {
-            var numberActionsBp = brainParameters.VectorActionSize.Length;
+            var numberActionsBp = brainParameters.ActionSpec.NumDiscreteActions;
            var numberActionsT = tensorProxy.shape[tensorProxy.shape.Length - 1];
            if (numberActionsBp != numberActionsT)
            {
        static IEnumerable<string> CheckOutputTensorShape(
            Model model,
            BrainParameters brainParameters,
-            ActuatorComponent[] actuatorComponents,
-            ModelActionType isContinuous,
-            int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
+            ActuatorComponent[] actuatorComponents)
-            if (isContinuous == ModelActionType.Unknown)
-            {
-                failedModelChecks.Add("Cannot infer type of Control from the provided model.");
-                return failedModelChecks;
-            }
-            if (isContinuous == ModelActionType.Continuous &&
-                brainParameters.VectorActionSpaceType != SpaceType.Continuous)
-            {
-                failedModelChecks.Add(
-                    "Model has been trained using Continuous Control but the Brain Parameters " +
-                    "suggest Discrete Control.");
-                return failedModelChecks;
-            }
-            if (isContinuous == ModelActionType.Discrete &&
-                brainParameters.VectorActionSpaceType != SpaceType.Discrete)
-            {
-                failedModelChecks.Add(
-                    "Model has been trained using Discrete Control but the Brain Parameters " +
-                    "suggest Continuous Control.");
-                return failedModelChecks;
-            }
-            var tensorTester = new Dictionary<string, Func<BrainParameters, ActuatorComponent[], TensorShape?, int, int, string>>();
-            // This will need to change a bit for hybrid action spaces.
-            if (isContinuous == ModelActionType.Continuous)
+            // If the model expects an output but it is not in this list
+            var modelContinuousActionSize = model.ContinuousOutputSize();
+            var continuousError = CheckContinuousActionOutputShape(brainParameters, actuatorComponents, modelContinuousActionSize);
+            if (continuousError != null)
-                tensorTester[TensorNames.ActionOutput] = CheckContinuousActionOutputShape;
+                failedModelChecks.Add(continuousError);
-            else
+            var modelSumDiscreteBranchSizes = model.DiscreteOutputSize();
+            var discreteError = CheckDiscreteActionOutputShape(brainParameters, actuatorComponents, modelSumDiscreteBranchSizes);
+            if (discreteError != null)
-                tensorTester[TensorNames.ActionOutput] = CheckDiscreteActionOutputShape;
-            }
-
-            // If the model expects an output but it is not in this list
-            foreach (var name in model.outputs)
-            {
-                if (tensorTester.ContainsKey(name))
-                {
-                    var tester = tensorTester[name];
-                    var error = tester.Invoke(brainParameters, actuatorComponents, model.GetShapeByName(name), modelContinuousActionSize, modelSumDiscreteBranchSizes);
-                    if (error != null)
-                    {
-                        failedModelChecks.Add(error);
-                    }
-                }
+                failedModelChecks.Add(discreteError);
            }
            return failedModelChecks;
        }
        /// check failed. If the check passed, returns null.
        /// </returns>
        static string CheckDiscreteActionOutputShape(
-            BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
+            BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, int modelSumDiscreteBranchSizes)
-            var sumOfDiscreteBranchSizes = 0;
-            if (brainParameters.VectorActionSpaceType == SpaceType.Discrete)
-            {
-                sumOfDiscreteBranchSizes += brainParameters.VectorActionSize.Sum();
-            }
+            // TODO: check each branch size instead of sum of branch sizes
+            var sumOfDiscreteBranchSizes = brainParameters.ActionSpec.SumOfDiscreteBranchSizes;

            foreach (var actuatorComponent in actuatorComponents)
            {
        /// <returns>If the Check failed, returns a string containing information about why the
        /// check failed. If the check passed, returns null.</returns>
        static string CheckContinuousActionOutputShape(
-            BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
+            BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, int modelContinuousActionSize)
-            var numContinuousActions = 0;
-            if (brainParameters.VectorActionSpaceType == SpaceType.Continuous)
-            {
-                numContinuousActions += brainParameters.NumActions;
-            }
+            var numContinuousActions = brainParameters.ActionSpec.NumContinuousActions;

            foreach (var actuatorComponent in actuatorComponents)
            {
--- a/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
            foreach (var infoSensorPair in infos)
            {
                var info = infoSensorPair.agentInfo;
-                var pastAction = info.storedVectorActions;
-                if (pastAction != null)
+                var pastAction = info.storedVectorActions.DiscreteActions;
+                if (!pastAction.IsEmpty())
                {
                    for (var j = 0; j < actionSize; j++)
                    {
--- a/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
    internal class ModelRunner
    {
        List<AgentInfoSensorsPair> m_Infos = new List<AgentInfoSensorsPair>();
-        Dictionary<int, float[]> m_LastActionsReceived = new Dictionary<int, float[]>();
+        Dictionary<int, ActionBuffers> m_LastActionsReceived = new Dictionary<int, ActionBuffers>();
        List<int> m_OrderedAgentsRequestingDecisions = new List<int>();

        ITensorAllocator m_TensorAllocator;
                m_Engine = null;
            }

-            m_InferenceInputs = BarracudaModelParamLoader.GetInputTensors(barracudaModel);
-            m_OutputNames = BarracudaModelParamLoader.GetOutputNames(barracudaModel);
+            m_InferenceInputs = barracudaModel.GetInputTensors();
+            m_OutputNames = barracudaModel.GetOutputNames();
            m_TensorGenerator = new TensorGenerator(
                seed, m_TensorAllocator, m_Memories, barracudaModel);
            m_TensorApplier = new TensorApplier(

            if (!m_LastActionsReceived.ContainsKey(info.episodeId))
            {
-                m_LastActionsReceived[info.episodeId] = null;
+                m_LastActionsReceived[info.episodeId] = ActionBuffers.Empty;
            }
            if (info.done)
            {
            return m_Model == other && m_InferenceDevice == otherInferenceDevice;
        }

-        public float[] GetAction(int agentId)
+        public ActionBuffers GetAction(int agentId)
-            return null;
+            return ActionBuffers.Empty;
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
            /// </param>
            /// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
            /// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
-            void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions);
+            void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions);
        }

        readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();
            Dictionary<int, List<float>> memories,
            object barracudaModel = null)
        {
-            actionSpec.CheckNotHybrid();
+            // If model is null, no inference to run and exception is thrown before reaching here.
+            if (barracudaModel == null)
+            {
+                return;
+            }
+            var model = (Model)barracudaModel;
+            if (!model.SupportsContinuousAndDiscrete())
+            {
+                actionSpec.CheckAllContinuousOrDiscrete();
+            }
-                m_Dict[TensorNames.ActionOutput] = new ContinuousActionOutputApplier();
+                var tensorName = model.ContinuousOutputName();
+                m_Dict[tensorName] = new ContinuousActionOutputApplier(actionSpec);
-            else
+            if (actionSpec.NumDiscreteActions > 0)
-                m_Dict[TensorNames.ActionOutput] =
-                    new DiscreteActionOutputApplier(actionSpec.BranchSizes, seed, allocator);
+                var tensorName = model.DiscreteOutputName();
+                m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
-            if (barracudaModel != null)
+            for (var i = 0; i < model?.memories.Count; i++)
-                var model = (Model)barracudaModel;
-
-                for (var i = 0; i < model?.memories.Count; i++)
-                {
-                    m_Dict[model.memories[i].output] =
-                        new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
-                }
+                m_Dict[model.memories[i].output] =
+                    new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
            }
        }

        /// <exception cref="UnityAgentsException"> One of the tensor does not have an
        /// associated applier.</exception>
        public void ApplyTensors(
-            IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+            IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            foreach (var tensor in tensors)
            {
--- a/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
            Dictionary<int, List<float>> memories,
            object barracudaModel = null)
        {
+            // If model is null, no inference to run and exception is thrown before reaching here.
+            if (barracudaModel == null)
+            {
+                return;
+            }
+            var model = (Model)barracudaModel;
+
            // Generator for Inputs
            m_Dict[TensorNames.BatchSizePlaceholder] =
                new BatchSizeGenerator(allocator);
                new RecurrentInputGenerator(allocator, memories);

-            if (barracudaModel != null)
+            for (var i = 0; i < model.memories.Count; i++)
-                var model = (Model)barracudaModel;
-                for (var i = 0; i < model.memories.Count; i++)
-                {
-                    m_Dict[model.memories[i].input] =
-                        new BarracudaRecurrentInputGenerator(i, allocator, memories);
-                }
+                m_Dict[model.memories[i].input] =
+                    new BarracudaRecurrentInputGenerator(i, allocator, memories);
            }

            m_Dict[TensorNames.PreviousActionPlaceholder] =


            // Generators for Outputs
-            m_Dict[TensorNames.ActionOutput] = new BiDimensionalOutputGenerator(allocator);
+            if (model.HasContinuousOutputs())
+            {
+                m_Dict[model.ContinuousOutputName()] = new BiDimensionalOutputGenerator(allocator);
+            }
+            if (model.HasDiscreteOutputs())
+            {
+                m_Dict[model.DiscreteOutputName()] = new BiDimensionalOutputGenerator(allocator);
+            }
            m_Dict[TensorNames.RecurrentOutput] = new BiDimensionalOutputGenerator(allocator);
            m_Dict[TensorNames.ValueEstimateOutput] = new BiDimensionalOutputGenerator(allocator);
        }
--- a/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
        public const string recurrentOutputC = "recurrent_out_c";
        public const string MemorySize = "memory_size";
        public const string VersionNumber = "version_number";
-        public const string IsContinuousControl = "is_continuous_control";
-        public const string ActionOutputShape = "action_output_shape";
-        public const string ActionOutput = "action";
+        public const string ContinuousActionOutputShape = "continuous_action_output_shape";
+        public const string DiscreteActionOutputShape = "discrete_action_output_shape";
+        public const string ContinuousActionOutput = "continuous_actions";
+        public const string DiscreteActionOutput = "discrete_actions";
-        public static readonly string[] RequiredConstants =
-        {
-            VersionNumber, MemorySize, IsContinuousControl, ActionOutputShape
-        };
+        // Deprecated TensorNames entries for backward compatibility
+        public const string IsContinuousControlDeprecated = "is_continuous_control";
+        public const string ActionOutputDeprecated = "action";
+        public const string ActionOutputShapeDeprecated = "action_output_shape";
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
        /// Sensor shapes for the associated Agents. All Agents must have the same shapes for their Sensors.
        /// </summary>
        List<int[]> m_SensorShapes;
-        SpaceType m_SpaceType;
+        ActionSpec m_ActionSpec;

        /// <inheritdoc />
        public BarracudaPolicy(
        {
            var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, actionSpec, inferenceDevice);
            m_ModelRunner = modelRunner;
-            actionSpec.CheckNotHybrid();
-            m_SpaceType = actionSpec.NumContinuousActions > 0 ? SpaceType.Continuous : SpaceType.Discrete;
+            m_ActionSpec = actionSpec;
        }

        /// <inheritdoc />
        /// <inheritdoc />
        public ref readonly ActionBuffers DecideAction()
        {
-            m_ModelRunner?.DecideBatch();
-            var actions = m_ModelRunner?.GetAction(m_AgentId);
-            if (m_SpaceType == SpaceType.Continuous)
+            if (m_ModelRunner == null)
+            {
+                m_LastActionBuffer = ActionBuffers.Empty;
+            }
+            else
-                m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
-                return ref m_LastActionBuffer;
+                m_ModelRunner?.DecideBatch();
+                m_LastActionBuffer = m_ModelRunner.GetAction(m_AgentId);
-
-            m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
            return ref m_LastActionBuffer;
        }

--- a/com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
 using System;
 using UnityEngine;
 using UnityEngine.Serialization;
+using Unity.MLAgents.Actuators;

 namespace Unity.MLAgents.Policies
 {
    /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
    /// </remarks>
    [Serializable]
-    public class BrainParameters
+    public class BrainParameters : ISerializationCallbackReceiver
    {
        /// <summary>
        /// The number of the observations that are added in
        [FormerlySerializedAs("numStackedVectorObservations")]
        [Range(1, 50)] public int NumStackedVectorObservations = 1;

+        [SerializeField]
+        internal ActionSpec m_ActionSpec = new ActionSpec(0, null);
+
-        /// The size of the action space.
+        /// The specification of the Action space for the BrainParameters.
+        /// </summary>
+        public ActionSpec ActionSpec
+        {
+            get { return m_ActionSpec; }
+            set
+            {
+                m_ActionSpec.NumContinuousActions = value.NumContinuousActions;
+                m_ActionSpec.BranchSizes = value.BranchSizes;
+                SyncDeprecatedActionFields();
+            }
+        }
+
+        /// <summary>
+        /// (Deprecated) The size of the action space.
        /// </summary>
        /// <remarks>The size specified is interpreted differently depending on whether
        /// the agent uses the continuous or the discrete action space.</remarks>
        /// For the discrete action space: the number of branches in the action space.
        /// </value>
+        /// [Obsolete("VectorActionSize has been deprecated, please use ActionSpec instead.")]
        [FormerlySerializedAs("vectorActionSize")]
        public int[] VectorActionSize = new[] { 1 };

        public string[] VectorActionDescriptions;

        /// <summary>
-        /// Defines if the action is discrete or continuous.
+        /// (Deprecated) Defines if the action is discrete or continuous.
+        /// [Obsolete("VectorActionSpaceType has been deprecated, please use ActionSpec instead.")]
+        [SerializeField]
+        [HideInInspector]
+        internal bool hasUpgradedBrainParametersWithActionSpec;
+
-        /// The number of actions specified by this Brain.
+        /// (Deprecated) The number of actions specified by this Brain.
+        /// [Obsolete("NumActions has been deprecated, please use ActionSpec instead.")]
-                switch (VectorActionSpaceType)
-                {
-                    case SpaceType.Discrete:
-                        return VectorActionSize.Length;
-                    case SpaceType.Continuous:
-                        return VectorActionSize[0];
-                    default:
-                        return 0;
-                }
+                return ActionSpec.NumContinuousActions > 0 ? ActionSpec.NumContinuousActions : ActionSpec.NumDiscreteActions;
            }
        }

            {
                VectorObservationSize = VectorObservationSize,
                NumStackedVectorObservations = NumStackedVectorObservations,
-                VectorActionSize = (int[])VectorActionSize.Clone(),
-                VectorActionSpaceType = VectorActionSpaceType
+                ActionSpec = new ActionSpec(ActionSpec.NumContinuousActions, ActionSpec.BranchSizes),
+                VectorActionSize = (int[])VectorActionSize.Clone(),
+                VectorActionSpaceType = VectorActionSpaceType,
+        }
+
+        /// <summary>
+        /// Propogate ActionSpec fields from deprecated fields
+        /// </summary>
+        private void UpdateToActionSpec()
+        {
+            if (!hasUpgradedBrainParametersWithActionSpec)
+            {
+                if (VectorActionSpaceType == SpaceType.Continuous)
+                {
+                    m_ActionSpec.NumContinuousActions = VectorActionSize[0];
+                    m_ActionSpec.BranchSizes = null;
+                }
+                if (VectorActionSpaceType == SpaceType.Discrete)
+                {
+                    m_ActionSpec.NumContinuousActions = 0;
+                    m_ActionSpec.BranchSizes = VectorActionSize;
+                }
+
+                hasUpgradedBrainParametersWithActionSpec = true;
+            }
+        }
+
+        /// <summary>
+        /// Sync values in ActionSpec fields to deprecated fields
+        /// </summary>
+        private void SyncDeprecatedActionFields()
+        {
+            if (m_ActionSpec.NumContinuousActions == 0)
+            {
+                VectorActionSize = ActionSpec.BranchSizes;
+                VectorActionSpaceType = SpaceType.Discrete;
+            }
+            else if (m_ActionSpec.NumDiscreteActions == 0)
+            {
+                VectorActionSize = new[] { m_ActionSpec.NumContinuousActions };
+                VectorActionSpaceType = SpaceType.Continuous;
+            }
+            else
+            {
+                VectorActionSize = null;
+            }
+        }
+
+        /// <summary>
+        /// Called by Unity immediately before serializing this object.
+        /// </summary>
+        public void OnBeforeSerialize()
+        {
+            UpdateToActionSpec();
+            SyncDeprecatedActionFields();
+        }
+
+        /// <summary>
+        /// Called by Unity immediately after deserializing this object.
+        /// </summary>
+        public void OnAfterDeserialize()
+        {
+            UpdateToActionSpec();
+            SyncDeprecatedActionFields();
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
    {
        int m_AgentId;
        string m_FullyQualifiedBehaviorName;
-        SpaceType m_SpaceType;
+        ActionSpec m_ActionSpec;
        ActionBuffers m_LastActionBuffer;

        internal ICommunicator m_Communicator;
            m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
            m_Communicator = Academy.Instance.Communicator;
            m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, actionSpec);
-
-            actionSpec.CheckNotHybrid();
-            m_SpaceType = actionSpec.NumContinuousActions > 0 ? SpaceType.Continuous : SpaceType.Discrete;
+            m_ActionSpec = actionSpec;
        }

        /// <inheritdoc />
        {
            m_Communicator?.DecideBatch();
            var actions = m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
-            // TODO figure out how to handle this with multiple space types.
-            if (m_SpaceType == SpaceType.Continuous)
-            {
-                m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
-                return ref m_LastActionBuffer;
-            }
-            m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
+            m_LastActionBuffer = actions == null ? ActionBuffers.Empty : (ActionBuffers)actions;
            return ref m_LastActionBuffer;
        }

--- a/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
            {
                m_TensorShape = new TensorShape(m_Batch, shape[0]);
            }
+            else if (shape.Length == 2)
+            {
+                m_TensorShape = new TensorShape(new int[] { m_Batch, 1, shape[0], shape[1] });
+            }
            else
            {
                m_TensorShape = new TensorShape(m_Batch, shape[0], shape[1], shape[2]);
--- a/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
        }

        [Test]
-        public void TestFailOnMixedActionSpace()
-        {
-            var manager = new ActuatorManager();
-            var actuator1 = new TestActuator(ActionSpec.MakeDiscrete(new[] { 1, 2, 3, 4 }), "actuator1");
-            var actuator2 = new TestActuator(ActionSpec.MakeContinuous(3), "actuator2");
-            manager.Add(actuator1);
-            manager.Add(actuator2);
-            LogAssert.Expect(LogType.Assert, "Actuators on the same Agent must have the same action SpaceType.");
-            manager.ReadyActuatorsForExecution(new[] { actuator1, actuator2 }, 3, 10, 4);
-        }
-
-        [Test]
        public void TestFailOnSameActuatorName()
        {
            var manager = new ActuatorManager();
--- a/com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
        public void TestConstruct()
        {
            var ar = new TestActionReceiver();
-            var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
-            var va1 = new VectorActuator(ar, new[] { 4 }, SpaceType.Continuous, "name");
+            var va1 = new VectorActuator(ar, ActionSpec.MakeContinuous(4), "name");

            Assert.IsTrue(va1.ActionSpec.NumContinuousActions == 4);
            Assert.IsTrue(va1.ActionSpec.SumOfDiscreteBranchSizes == 0);
        public void TestOnActionReceived()
        {
            var ar = new TestActionReceiver();
-            var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");

            var discreteActions = new[] { 0, 1, 1 };
            var ab = new ActionBuffers(ActionSegment<float>.Empty,
        public void TestResetData()
        {
            var ar = new TestActionReceiver();
-            var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");

            var discreteActions = new[] { 0, 1, 1 };
            var ab = new ActionBuffers(ActionSegment<float>.Empty,
        public void TestWriteDiscreteActionMask()
        {
            var ar = new TestActionReceiver();
-            var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
            var bdam = new ActuatorDiscreteActionMask(new[] { va }, 6, 3);

            var groundTruthMask = new[] { false, true, false, false, true, true };
--- a/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
 using UnityEngine;
 using System.IO.Abstractions.TestingHelpers;
 using System.Reflection;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.CommunicatorObjects;
 using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Demonstrations;
            bp.BrainParameters.VectorObservationSize = 3;
            bp.BrainParameters.NumStackedVectorObservations = 2;
            bp.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
-            bp.BrainParameters.VectorActionSize = new[] { 2, 2 };
-            bp.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
+            bp.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);

            gameobj.AddComponent<TestAgent>();

                done = true,
                episodeId = 5,
                maxStepReached = true,
-                storedVectorActions = new[] { 0f, 1f },
+                storedVectorActions = new ActionBuffers(null, new int[] { 0, 1 }),
            };


            bpA.BrainParameters.VectorObservationSize = 3;
            bpA.BrainParameters.NumStackedVectorObservations = 1;
            bpA.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
-            bpA.BrainParameters.VectorActionSize = new[] { 2, 2 };
-            bpA.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
+            bpA.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);

            agentGo1.AddComponent<ObservationAgent>();
            var agent1 = agentGo1.GetComponent<ObservationAgent>();
--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
 using Unity.Barracuda;
 using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Inference;
-using Unity.MLAgents.Policies;

 namespace Unity.MLAgents.Tests
 {
        [Test]
        public void ApplyContinuousActionOutput()
        {
+            var actionSpec = ActionSpec.MakeContinuous(3);
            var inputTensor = new TensorProxy()
            {
                shape = new long[] { 2, 3 },
-            var applier = new ContinuousActionOutputApplier();
+            var applier = new ContinuousActionOutputApplier(actionSpec);
-            var actionDict = new Dictionary<int, float[]>() { { 0, null }, { 1, null } };
+            var actionDict = new Dictionary<int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } };
-            Assert.AreEqual(actionDict[0][0], 1);
-            Assert.AreEqual(actionDict[0][1], 2);
-            Assert.AreEqual(actionDict[0][2], 3);
+            Assert.AreEqual(actionDict[0].ContinuousActions[0], 1);
+            Assert.AreEqual(actionDict[0].ContinuousActions[1], 2);
+            Assert.AreEqual(actionDict[0].ContinuousActions[2], 3);
-            Assert.AreEqual(actionDict[1][0], 4);
-            Assert.AreEqual(actionDict[1][1], 5);
-            Assert.AreEqual(actionDict[1][2], 6);
+            Assert.AreEqual(actionDict[1].ContinuousActions[0], 4);
+            Assert.AreEqual(actionDict[1].ContinuousActions[1], 5);
+            Assert.AreEqual(actionDict[1].ContinuousActions[2], 6);
+            var actionSpec = ActionSpec.MakeDiscrete(2, 3);
            var inputTensor = new TensorProxy()
            {
                shape = new long[] { 2, 5 },
                    new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
            };
            var alloc = new TensorCachingAllocator();
-            var applier = new DiscreteActionOutputApplier(new[] { 2, 3 }, 0, alloc);
+            var applier = new DiscreteActionOutputApplier(actionSpec, 0, alloc);
-            var actionDict = new Dictionary<int, float[]>() { { 0, null }, { 1, null } };
+            var actionDict = new Dictionary<int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } };
-            Assert.AreEqual(actionDict[0][0], 1);
-            Assert.AreEqual(actionDict[0][1], 1);
+            Assert.AreEqual(actionDict[0].DiscreteActions[0], 1);
+            Assert.AreEqual(actionDict[0].DiscreteActions[1], 1);
-            Assert.AreEqual(actionDict[1][0], 1);
-            Assert.AreEqual(actionDict[1][1], 2);
+            Assert.AreEqual(actionDict[1].DiscreteActions[0], 1);
+            Assert.AreEqual(actionDict[1].DiscreteActions[1], 2);
+            alloc.Dispose();
+        }
+
+        [Test]
+        public void ApplyHybridActionOutput()
+        {
+            var actionSpec = new ActionSpec(3, new int[] { 2, 3 });
+            var continuousInputTensor = new TensorProxy()
+            {
+                shape = new long[] { 2, 3 },
+                data = new Tensor(2, 3, new float[] { 1, 2, 3, 4, 5, 6 })
+            };
+            var discreteInputTensor = new TensorProxy()
+            {
+                shape = new long[] { 2, 8 },
+                data = new Tensor(
+                    2,
+                    5,
+                    new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
+            };
+            var continuousApplier = new ContinuousActionOutputApplier(actionSpec);
+            var alloc = new TensorCachingAllocator();
+            var discreteApplier = new DiscreteActionOutputApplier(actionSpec, 0, alloc);
+
+            var agentIds = new List<int>() { 0, 1 };
+            // Dictionary from AgentId to Action
+            var actionDict = new Dictionary<int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } };
+
+
+            continuousApplier.Apply(continuousInputTensor, agentIds, actionDict);
+            discreteApplier.Apply(discreteInputTensor, agentIds, actionDict);
+
+            Assert.AreEqual(actionDict[0].ContinuousActions[0], 1);
+            Assert.AreEqual(actionDict[0].ContinuousActions[1], 2);
+            Assert.AreEqual(actionDict[0].ContinuousActions[2], 3);
+            Assert.AreEqual(actionDict[0].DiscreteActions[0], 1);
+            Assert.AreEqual(actionDict[0].DiscreteActions[1], 1);
+
+            Assert.AreEqual(actionDict[1].ContinuousActions[0], 4);
+            Assert.AreEqual(actionDict[1].ContinuousActions[1], 5);
+            Assert.AreEqual(actionDict[1].ContinuousActions[2], 6);
+            Assert.AreEqual(actionDict[1].DiscreteActions[0], 1);
+            Assert.AreEqual(actionDict[1].DiscreteActions[1], 2);
            alloc.Dispose();
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
 using Unity.Barracuda;
 using NUnit.Framework;
 using UnityEngine;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Inference;
 using Unity.MLAgents.Policies;
 using Unity.MLAgents.Sensors.Reflection;

            var infoA = new AgentInfo
            {
-                storedVectorActions = new[] { 1f, 2f },
-                discreteActionMasks = null
+                storedVectorActions = new ActionBuffers(null, new[] { 1, 2 }),
+                discreteActionMasks = null,
-                storedVectorActions = new[] { 3f, 4f },
+                storedVectorActions = new ActionBuffers(null, new[] { 3, 4 }),
                discreteActionMasks = new[] { true, false, false, false, false },
            };

--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
-            bp2.BrainParameters.VectorActionSize = new[] { 1 };
-            bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            agentGo2.AddComponent<TestAgent>();
            var agent2 = agentGo2.GetComponent<TestAgent>();

        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
-            bp2.BrainParameters.VectorActionSize = new[] { 1 };
-            bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            agentGo2.AddComponent<TestAgent>();
            var agent2 = agentGo2.GetComponent<TestAgent>();

        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            var agent1 = agentGo1.AddComponent<TestAgent>();
            var behaviorParameters = agentGo1.GetComponent<BehaviorParameters>();
            behaviorParameters.BrainParameters.NumStackedVectorObservations = 3;
        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
-            bp2.BrainParameters.VectorActionSize = new[] { 1 };
-            bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            var agent2 = agentGo2.AddComponent<TestAgent>();
            var aca = Academy.Instance;

        {
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            agentGo1.AddComponent<TestAgent>();
            var agent1 = agentGo1.GetComponent<TestAgent>();
            var aca = Academy.Instance;
            // Make sure that Agents with HeuristicPolicies step their sensors each Academy step.
            var agentGo1 = new GameObject("TestAgent");
            var bp1 = agentGo1.AddComponent<BehaviorParameters>();
-            bp1.BrainParameters.VectorActionSize = new[] { 1 };
-            bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
            agentGo1.AddComponent<TestAgent>();
            var agent1 = agentGo1.GetComponent<TestAgent>();
            var aca = Academy.Instance;
--- a/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
 using Unity.Barracuda;
 using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Inference;
-using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Policies;

 namespace Unity.MLAgents.Tests
    {
-        const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.nn";
-        const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.nn";
-        NNModel continuous2vis8vec2actionModel;
-        NNModel discrete1vis0vec_2_3action_recurrModel;
+        const string k_continuousONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.onnx";
+        const string k_discreteONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.onnx";
+        const string k_hybridONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/hybrid0vis53vec_3c_2daction.onnx";
+        const string k_continuousNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn";
+        const string k_discreteNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn";
+        NNModel continuousONNXModel;
+        NNModel discreteONNXModel;
+        NNModel hybridONNXModel;
+        NNModel continuousNNModel;
+        NNModel discreteNNModel;
        Test3DSensorComponent sensor_21_20_3;
        Test3DSensorComponent sensor_20_22_3;

            return ActionSpec.MakeDiscrete(2, 3);
        }

+        ActionSpec GetHybrid0vis53vec_3c_2dActionSpec()
+        {
+            return new ActionSpec(3, new int[] { 2 });
+        }
+
-            continuous2vis8vec2actionModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuous2vis8vec2actionPath, typeof(NNModel));
-            discrete1vis0vec_2_3action_recurrModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discrete1vis0vec_2_3action_recurrModelPath, typeof(NNModel));
+            continuousONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousONNXPath, typeof(NNModel));
+            discreteONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteONNXPath, typeof(NNModel));
+            hybridONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_hybridONNXPath, typeof(NNModel));
+            continuousNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousNNPath, typeof(NNModel));
+            discreteNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteNNPath, typeof(NNModel));
            var go = new GameObject("SensorA");
            sensor_21_20_3 = go.AddComponent<Test3DSensorComponent>();
            sensor_21_20_3.Sensor = new Test3DSensor("SensorA", 21, 20, 3);
        [Test]
        public void TestModelExist()
        {
-            Assert.IsNotNull(continuous2vis8vec2actionModel);
-            Assert.IsNotNull(discrete1vis0vec_2_3action_recurrModel);
+            Assert.IsNotNull(continuousONNXModel);
+            Assert.IsNotNull(discreteONNXModel);
+            Assert.IsNotNull(hybridONNXModel);
+            Assert.IsNotNull(continuousNNModel);
+            Assert.IsNotNull(discreteNNModel);
-            var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionActionSpec());
+            var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec());
-            modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
+            modelRunner = new ModelRunner(discreteONNXModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
+            modelRunner.Dispose();
+            modelRunner = new ModelRunner(hybridONNXModel, GetHybrid0vis53vec_3c_2dActionSpec());
+            modelRunner.Dispose();
+            modelRunner = new ModelRunner(continuousNNModel, GetContinuous2vis8vec2actionActionSpec());
+            modelRunner.Dispose();
+            modelRunner = new ModelRunner(discreteNNModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
            modelRunner.Dispose();
        }

-            var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionActionSpec(), InferenceDevice.CPU);
-            Assert.True(modelRunner.HasModel(continuous2vis8vec2actionModel, InferenceDevice.CPU));
-            Assert.False(modelRunner.HasModel(continuous2vis8vec2actionModel, InferenceDevice.GPU));
-            Assert.False(modelRunner.HasModel(discrete1vis0vec_2_3action_recurrModel, InferenceDevice.CPU));
+            var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec(), InferenceDevice.CPU);
+            Assert.True(modelRunner.HasModel(continuousONNXModel, InferenceDevice.CPU));
+            Assert.False(modelRunner.HasModel(continuousONNXModel, InferenceDevice.GPU));
+            Assert.False(modelRunner.HasModel(discreteONNXModel, InferenceDevice.CPU));
            modelRunner.Dispose();
        }

            var actionSpec = GetDiscrete1vis0vec_2_3action_recurrModelActionSpec();
-            var modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, actionSpec);
+            var modelRunner = new ModelRunner(discreteONNXModel, actionSpec);
            var info1 = new AgentInfo();
            info1.episodeId = 1;
            modelRunner.PutObservations(info1, new[] { sensor_21_20_3.CreateSensor() }.ToList());

            modelRunner.DecideBatch();

-            Assert.IsNotNull(modelRunner.GetAction(1));
-            Assert.IsNotNull(modelRunner.GetAction(2));
-            Assert.IsNull(modelRunner.GetAction(3));
-            Assert.AreEqual(actionSpec.NumDiscreteActions, modelRunner.GetAction(1).Count());
+            Assert.IsFalse(modelRunner.GetAction(1).Equals(ActionBuffers.Empty));
+            Assert.IsFalse(modelRunner.GetAction(2).Equals(ActionBuffers.Empty));
+            Assert.IsTrue(modelRunner.GetAction(3).Equals(ActionBuffers.Empty));
+            Assert.AreEqual(actionSpec.NumDiscreteActions, modelRunner.GetAction(1).DiscreteActions.Length);
            modelRunner.Dispose();
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
    [TestFixture]
    public class ParameterLoaderTest
    {
-        const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.nn";
-        const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.nn";
-        NNModel continuous2vis8vec2actionModel;
-        NNModel discrete1vis0vec_2_3action_recurrModel;
+        // ONNX model with continuous/discrete action output (support hybrid action)
+        const string k_continuousONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.onnx";
+        const string k_discreteONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.onnx";
+        const string k_hybridONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/hybrid0vis53vec_3c_2daction.onnx";
+        // NN model with single action output (deprecated, does not support hybrid action).
+        // Same BrainParameters settings as the corresponding ONNX model.
+        const string k_continuousNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn";
+        const string k_discreteNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn";
+        NNModel continuousONNXModel;
+        NNModel discreteONNXModel;
+        NNModel hybridONNXModel;
+        NNModel continuousNNModel;
+        NNModel discreteNNModel;
        Test3DSensorComponent sensor_21_20_3;
        Test3DSensorComponent sensor_20_22_3;

            validBrainParameters.VectorObservationSize = 8;
-            validBrainParameters.VectorActionSize = new[] { 2 };
-            validBrainParameters.VectorActionSpaceType = SpaceType.Continuous;
+            validBrainParameters.ActionSpec = ActionSpec.MakeContinuous(2);
            return validBrainParameters;
        }

            validBrainParameters.VectorObservationSize = 0;
-            validBrainParameters.VectorActionSize = new[] { 2, 3 };
+            validBrainParameters.NumStackedVectorObservations = 1;
+            validBrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 3);
+            return validBrainParameters;
+        }
+
+        BrainParameters GetHybridBrainParameters()
+        {
+            var validBrainParameters = new BrainParameters();
+            validBrainParameters.VectorObservationSize = 53;
-            validBrainParameters.VectorActionSpaceType = SpaceType.Discrete;
+            validBrainParameters.ActionSpec = new ActionSpec(3, new int[] { 2 });
            return validBrainParameters;
        }

-            continuous2vis8vec2actionModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuous2vis8vec2actionPath, typeof(NNModel));
-            discrete1vis0vec_2_3action_recurrModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discrete1vis0vec_2_3action_recurrModelPath, typeof(NNModel));
+            continuousONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousONNXPath, typeof(NNModel));
+            discreteONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteONNXPath, typeof(NNModel));
+            hybridONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_hybridONNXPath, typeof(NNModel));
+            continuousNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousNNPath, typeof(NNModel));
+            discreteNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteNNPath, typeof(NNModel));
            var go = new GameObject("SensorA");
            sensor_21_20_3 = go.AddComponent<Test3DSensorComponent>();
            sensor_21_20_3.Sensor = new Test3DSensor("SensorA", 21, 20, 3);
        [Test]
        public void TestModelExist()
        {
-            Assert.IsNotNull(continuous2vis8vec2actionModel);
-            Assert.IsNotNull(discrete1vis0vec_2_3action_recurrModel);
+            Assert.IsNotNull(continuousONNXModel);
+            Assert.IsNotNull(discreteONNXModel);
+            Assert.IsNotNull(hybridONNXModel);
+            Assert.IsNotNull(continuousNNModel);
+            Assert.IsNotNull(discreteNNModel);
-        [Test]
-        public void TestGetInputTensors1()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestGetInputTensorsContinuous(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
-            var inputTensors = BarracudaModelParamLoader.GetInputTensors(model);
-            var inputNames = inputTensors.Select(x => x.name).ToList();
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
+            var inputNames = model.GetInputNames();
-            Assert.AreEqual(3, inputNames.Count);
+            Assert.AreEqual(3, inputNames.Count());
-            Assert.AreEqual(2, BarracudaModelParamLoader.GetNumVisualInputs(model));
+            Assert.AreEqual(2, model.GetNumVisualInputs());
-            Assert.AreEqual(0, BarracudaModelParamLoader.GetInputTensors(null).Count);
-            Assert.AreEqual(0, BarracudaModelParamLoader.GetNumVisualInputs(null));
+            model = null;
+            Assert.AreEqual(0, model.GetInputTensors().Count);
+            Assert.AreEqual(0, model.GetNumVisualInputs());
-        [Test]
-        public void TestGetInputTensors2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestGetInputTensorsDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
-            var inputTensors = BarracudaModelParamLoader.GetInputTensors(model);
-            var inputNames = inputTensors.Select(x => x.name).ToList();
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
+            var inputNames = model.GetInputNames();
            // Model should contain 2 inputs : recurrent and visual 1

            Assert.Contains(TensorNames.VisualObservationPlaceholderPrefix + "0", inputNames);
        [Test]
-        public void TestGetOutputTensors1()
+        public void TestGetInputTensorsHybrid()
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
-            var outputNames = BarracudaModelParamLoader.GetOutputNames(model);
-            Assert.Contains(TensorNames.ActionOutput, outputNames);
+            var model = ModelLoader.Load(hybridONNXModel);
+            var inputNames = model.GetInputNames();
+            Assert.Contains(TensorNames.VectorObservationPlaceholder, inputNames);
+        }
+
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestGetOutputTensorsContinuous(bool useDeprecatedNNModel)
+        {
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
+            var outputNames = model.GetOutputNames();
+            var actionOutputName = useDeprecatedNNModel ? TensorNames.ActionOutputDeprecated : TensorNames.ContinuousActionOutput;
+            Assert.Contains(actionOutputName, outputNames);
-            Assert.AreEqual(0, BarracudaModelParamLoader.GetOutputNames(null).Count());
+            model = null;
+            Assert.AreEqual(0, model.GetOutputNames().Count());
-        [Test]
-        public void TestGetOutputTensors2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestGetOutputTensorsDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
-            var outputNames = BarracudaModelParamLoader.GetOutputNames(model);
-            Assert.Contains(TensorNames.ActionOutput, outputNames);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
+            var outputNames = model.GetOutputNames();
+            var actionOutputName = useDeprecatedNNModel ? TensorNames.ActionOutputDeprecated : TensorNames.DiscreteActionOutput;
+            Assert.Contains(actionOutputName, outputNames);
-        public void TestCheckModelValid1()
+        public void TestGetOutputTensorsHybrid()
+        {
+            var model = ModelLoader.Load(hybridONNXModel);
+            var outputNames = model.GetOutputNames();
+
+            Assert.AreEqual(2, outputNames.Count());
+            Assert.Contains(TensorNames.ContinuousActionOutput, outputNames);
+            Assert.Contains(TensorNames.DiscreteActionOutput, outputNames);
+
+            model = null;
+            Assert.AreEqual(0, model.GetOutputNames().Count());
+        }
+
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelValidContinuous(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
            var validBrainParameters = GetContinuous2vis8vec2actionBrainParameters();

            var errors = BarracudaModelParamLoader.CheckModel(
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }

-        [Test]
-        public void TestCheckModelValid2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelValidDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
            var validBrainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();

            var errors = BarracudaModelParamLoader.CheckModel(
        }

        [Test]
-        public void TestCheckModelThrowsVectorObservation1()
+        public void TestCheckModelValidHybrid()
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
+            var model = ModelLoader.Load(hybridONNXModel);
+            var validBrainParameters = GetHybridBrainParameters();
+
+            var errors = BarracudaModelParamLoader.CheckModel(
+                model, validBrainParameters,
+                new SensorComponent[] { }, new ActuatorComponent[0]
+            );
+            Assert.AreEqual(0, errors.Count()); // There should not be any errors
+        }
+
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelThrowsVectorObservationContinuous(bool useDeprecatedNNModel)
+        {
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);

            var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
            brainParameters.VectorObservationSize = 9; // Invalid observation
            Assert.Greater(errors.Count(), 0);
        }

-        [Test]
-        public void TestCheckModelThrowsVectorObservation2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelThrowsVectorObservationDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);

            var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
            brainParameters.VectorObservationSize = 1; // Invalid observation

        [Test]
-        public void TestCheckModelThrowsAction1()
+        public void TestCheckModelThrowsVectorObservationHybrid()
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
+            var model = ModelLoader.Load(hybridONNXModel);
+
+            var brainParameters = GetHybridBrainParameters();
+            brainParameters.VectorObservationSize = 9; // Invalid observation
+            var errors = BarracudaModelParamLoader.CheckModel(
+                model, brainParameters,
+                new SensorComponent[] { }, new ActuatorComponent[0]
+            );
+            Assert.Greater(errors.Count(), 0);
+
+            brainParameters = GetContinuous2vis8vec2actionBrainParameters();
+            brainParameters.NumStackedVectorObservations = 2;// Invalid stacking
+            errors = BarracudaModelParamLoader.CheckModel(
+                model, brainParameters,
+                new SensorComponent[] { }, new ActuatorComponent[0]
+            );
+            Assert.Greater(errors.Count(), 0);
+        }
+
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelThrowsActionContinuous(bool useDeprecatedNNModel)
+        {
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
-            brainParameters.VectorActionSize = new[] { 3 }; // Invalid action
+            brainParameters.ActionSpec = ActionSpec.MakeContinuous(3); // Invalid action
-            brainParameters.VectorActionSpaceType = SpaceType.Discrete;// Invalid SpaceType
+            brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3); // Invalid SpaceType
-        [Test]
-        public void TestCheckModelThrowsAction2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelThrowsActionDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
-            brainParameters.VectorActionSize = new[] { 3, 3 }; // Invalid action
+            brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3, 3); // Invalid action
-            brainParameters.VectorActionSpaceType = SpaceType.Continuous;// Invalid SpaceType
+            brainParameters.ActionSpec = ActionSpec.MakeContinuous(2); // Invalid SpaceType
+            Assert.Greater(errors.Count(), 0);
+        }
+
+        [Test]
+        public void TestCheckModelThrowsActionHybrid()
+        {
+            var model = ModelLoader.Load(hybridONNXModel);
+
+            var brainParameters = GetHybridBrainParameters();
+            brainParameters.ActionSpec = new ActionSpec(3, new int[] { 3 }); ; // Invalid discrete action size
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            Assert.Greater(errors.Count(), 0);
+
+            brainParameters = GetContinuous2vis8vec2actionBrainParameters();
+            brainParameters.ActionSpec = ActionSpec.MakeDiscrete(2); // Missing continuous action
+            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }

--- a/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn.meta
+++ b/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn.meta
 fileFormatVersion: 2
-guid: 8a92fbcd96caa4ef5a93dd55c0c36705
+guid: 6d6040ad621454dd5b713beb5483e347
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
--- a/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn.meta
+++ b/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn.meta
 fileFormatVersion: 2
-guid: a75582ff670094ff2996c1c4ab9dfd15
+guid: bf4543cc3c6944794bbba065bdf90079
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
--- a/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
+++ b/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
            behaviorParams.BrainParameters.VectorObservationSize = 3;
            behaviorParams.BrainParameters.NumStackedVectorObservations = 2;
            behaviorParams.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
-            behaviorParams.BrainParameters.VectorActionSize = new[] { 2, 2 };
-            behaviorParams.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
+            behaviorParams.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);
            behaviorParams.BehaviorName = "TestBehavior";
            behaviorParams.TeamId = 42;
            behaviorParams.UseChildSensors = true;
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md

 #### Behavior Parameters : Vector Action Space

-An Agent is given instructions in the form of a float array of _actions_.
+An Agent is given instructions in the form of actions.
-The 3D Balance Ball example is programmed to use continuous action space which
-is a a vector of numbers that can vary continuously. More specifically, it uses
-a `Space Size` of 2 to control the amount of `x` and `z` rotations to apply to
+The 3D Balance Ball example is programmed to use continuous actions, which
+are a vector of floating-point numbers that can vary continuously. More specifically,
+it uses a `Space Size` of 2 to control the amount of `x` and `z` rotations to apply to
 itself to keep the ball balanced on its head.

 ## Running a pre-trained model
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md

 - `OnEpisodeBegin()`
 - `CollectObservations(VectorSensor sensor)`
- `OnActionReceived(float[] vectorAction)`
+- `OnActionReceived(ActionBuffers actionBuffers)`

 We overview each of these in more detail in the dedicated subsections below.


 ```csharp
 public float forceMultiplier = 10;
-public override void OnActionReceived(float[] vectorAction)
+public override void OnActionReceived(ActionBuffers actionBuffers)
-    controlSignal.x = vectorAction[0];
-    controlSignal.z = vectorAction[1];
+    controlSignal.x = actionBuffers.ContinuousActions[0];
+    controlSignal.z = actionBuffers.ContinuousActions[1];
    rBody.AddForce(controlSignal * forceMultiplier);

    // Rewards
 (which correspond to the keyboard arrow keys):

 ```csharp
-public override void Heuristic(float[] actionsOut)
+public override void Heuristic(in ActionBuffers actionsOut)
-    actionsOut[0] = Input.GetAxis("Horizontal");
-    actionsOut[1] = Input.GetAxis("Vertical");
+    var continuousActionsOut = actionsOut.ContinuousActions;
+    continuousActionsOut[0] = Input.GetAxis("Horizontal");
+    continuousActionsOut[1] = Input.GetAxis("Vertical");
 }
 ```

--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
 ## Actions

 An action is an instruction from the Policy that the agent carries out. The
-action is passed to the Agent as a parameter when the Academy invokes the
-agent's `OnActionReceived()` function. Actions for an agent can take one of two
-forms, either **Continuous** or **Discrete**.
-
-When you specify that the vector action space is **Continuous**, the action
-parameter passed to the Agent is an array of floating point numbers with length
-equal to the `Vector Action Space Size` property. When you specify a
-**Discrete** vector action space type, the action parameter is an array
-containing integers. Each integer is an index into a list or table of commands.
-In the **Discrete** vector action space type, the action parameter is an array
-of indices. The number of indices in the array is determined by the number of
-branches defined in the `Branches Size` property. Each branch corresponds to an
-action table, you can specify the size of each table by modifying the `Branches`
-property.
+action is passed to the Agent as the `ActionBuffers` parameter when the Academy invokes the
+agent's `OnActionReceived()` function. There are two types of actions supported:
+ **Continuous** and **Discrete**.

 Neither the Policy nor the training algorithm know anything about what the
 action values themselves mean. The training algorithm simply tries different

 ### Continuous Action Space

-When an Agent uses a Policy set to the **Continuous** vector action space, the
-action parameter passed to the Agent's `OnActionReceived()` function is an array
-with length equal to the `Vector Action Space Size` property value. The
+When an Agent's Policy has **Continuous** actions, the
+`ActionBuffers.ContinuousActions` passed to the Agent's `OnActionReceived()` function
+is an array with length equal to the `Vector Action Space Size` property value. The
 individual values in the array have whatever meanings that you ascribe to them.
 If you assign an element in the array as the speed of an Agent, for example, the
 training process learns to control the speed of the Agent through this
 These control values are applied as torques to the bodies making up the arm:

 ```csharp
-public override void OnActionReceived(float[] act)
-{
-    float torque_x = Mathf.Clamp(act[0], -1, 1) * 100f;
-    float torque_z = Mathf.Clamp(act[1], -1, 1) * 100f;
-    rbA.AddTorque(new Vector3(torque_x, 0f, torque_z));
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+    {
+        var torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f) * 150f;
+        var torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[1], -1f, 1f) * 150f;
+        m_RbA.AddTorque(new Vector3(torqueX, 0f, torqueZ));
-    torque_x = Mathf.Clamp(act[2], -1, 1) * 100f;
-    torque_z = Mathf.Clamp(act[3], -1, 1) * 100f;
-    rbB.AddTorque(new Vector3(torque_x, 0f, torque_z));
-}
+        torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[2], -1f, 1f) * 150f;
+        torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[3], -1f, 1f) * 150f;
+        m_RbB.AddTorque(new Vector3(torqueX, 0f, torqueZ));
+    }
 ```

 By default the output from our provided PPO algorithm pre-clamps the values of

 ### Discrete Action Space

-When an Agent uses a **Discrete** vector action space, the action parameter
-passed to the Agent's `OnActionReceived()` function is an array containing
-indices. With the discrete vector action space, `Branches` is an array of
-integers, each value corresponds to the number of possibilities for each branch.
+When an Agent's Policy uses **discrete** actions, the
+`ActionBuffers.DiscreteActions` passed to the Agent's `OnActionReceived()` function
+is an array of integers. When defining the discrete vector action space, `Branches`
+is an array of integers, each value corresponds to the number of possibilities for each branch.

 For example, if we wanted an Agent that can move in a plane and jump, we could
 define two branches (one for motion and one for jumping) because we want our

 ```csharp
 // Get the action index for movement
-int movement = Mathf.FloorToInt(act[0]);
+int movement = actionBuffers.DiscreteActions[0];
-int jump = Mathf.FloorToInt(act[1]);
+int jump = actionBuffers.DiscreteActions[1];

 // Look up the index in the movement action list:
 if (movement == 1) { directionX = -1; }
        directionX * 40f, directionY * 300f, directionZ * 40f));
 ```

-Note that the above code example is a simplified extract from the AreaAgent
-class, which provides alternate implementations for both the discrete and the
-continuous action spaces.
-
 #### Masking Discrete Actions

 When using Discrete Actions, it is possible to specify that some actions are
 decide to perform the masked action. In order to mask an action, override the
-`Agent.CollectDiscreteActionMasks()` virtual method, and call
-`DiscreteActionMasker.SetMask()` in it:
+`Agent.WriteDiscreteActionMask()` virtual method, and call
+`WriteMask()` on the provided `IDiscreteActionMask`:
-public override void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker){
-    actionMasker.SetMask(branch, actionIndices)
+public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
+{
+    actionMasker.WriteMask(branch, actionIndices)
 }
 ```

  the action
 - `actionIndices` is a list of `int` corresponding to the indices of the actions
-  that the Agent cannot perform.
+  that the Agent **cannot** perform.

 For example, if you have an Agent with 2 branches and on the first branch
 (branch 0) there are 4 possible actions : _"do nothing"_, _"jump"_, _"shoot"_

 ```csharp
-SetMask(0, new int[2]{1,2})
+WriteMask(0, new int[2]{1,2})
- You can call `SetMask` multiple times if you want to put masks on multiple
+- You can call `WriteMask` multiple times if you want to put masks on multiple
  branches.
 - You cannot mask all the actions of a branch.
 - You cannot mask actions in continuous control.
- Actions can either use `Discrete` or `Continuous` spaces.
- When using `Discrete` it is possible to assign multiple action branches, and
-  to mask certain actions.
+- Agents can either use `Discrete` or `Continuous` actions.
+- Discrete actions can have multiple action branches, and it's possible to mask
+  certain actions so that they won't be taken.
- When using continuous control, action values should be clipped to an
+- Continuous action values should be clipped to an
  appropriate range. The provided PPO model automatically clips these values
  between -1 and 1, but third party training systems may not do so.

--- a/docs/Learning-Environment-Design.md
+++ b/docs/Learning-Environment-Design.md
 important to ensure that your environment parameters are updated at each step to
 the correct values. To enable this, we expose a `EnvironmentParameters` C# class
 that you can use to retrieve the values of the parameters defined in the
-training configurations for both of those features.
+training configurations for both of those features. Please see our
+[documentation](Training-ML-Agents.md#environment-parameters)
+for curriculum learning and environment parameter randomization for details.

 We recommend modifying the environment from the Agent's `OnEpisodeBegin()`
 function by leveraging `Academy.Instance.EnvironmentParameters`. See the
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
  terminates the communication.
 - **Behavior Specs : `env.behavior_specs`** Returns a Mapping of
  `BehaviorName` to `BehaviorSpec` objects (read only).
-  A `BehaviorSpec` contains information such as the observation shapes, the
-  action type (multi-discrete or continuous) and the action shape. Note that
+  A `BehaviorSpec` contains the observation shapes and the
+  `ActionSpec` (which defines the action shape). Note that
  the `BehaviorSpec` for a specific group is fixed throughout the simulation.
  The number of entries in the Mapping can change over time in the simulation
  if new Agent behaviors are created in the simulation.
  number of agents is not guaranteed to remain constant during the simulation
  and it is not unusual to have either `DecisionSteps` or `TerminalSteps`
  contain no Agents at all.
- **Set Actions :`env.set_actions(behavior_name: str, action: np.array)`** Sets
-  the actions for a whole agent group. `action` is a 2D `np.array` of
-  `dtype=np.int32` in the discrete action case and `dtype=np.float32` in the
-  continuous action case. The first dimension of `action` is the number of
-  agents that requested a decision since the last call to `env.step()`. The
-  second dimension is the number of discrete actions in multi-discrete action
-  type and the number of actions in continuous action type.
+- **Set Actions :`env.set_actions(behavior_name: str, action: ActionTuple)`** Sets
+  the actions for a whole agent group. `action` is an `ActionTuple`, which
+  is made up of a 2D `np.array` of `dtype=np.int32` for discrete actions, and
+  `dtype=np.float32` for continuous actions. The first dimension of `np.array`
+  in the tuple is the number of agents that requested a decision since the
+  last call to `env.step()`. The second dimension is the number of discrete or
+  continuous actions for the corresponding array.
-  `env.set_action_for_agent(agent_group: str, agent_id: int, action: np.array)`**
+  `env.set_action_for_agent(agent_group: str, agent_id: int, action: ActionTuple)`**
-  identifier of the Agent. Action is a 1D array of type `dtype=np.int32` and
-  size equal to the number of discrete actions in multi-discrete action type and
-  of type `dtype=np.float32` and size equal to the number of actions in
-  continuous action type.
-
+  identifier of the Agent. `action` is an `ActionTuple` as described above.
 **Note:** If no action is provided for an agent group between two calls to
 `env.step()` then the default action will be all zeros (in either discrete or
 continuous action space)
 - `agent_id` is an int vector of length batch size containing unique identifier
  for the corresponding Agent. This is used to track Agents across simulation
  steps.
- `action_mask` is an optional list of two dimensional array of booleans. Only
-  available in multi-discrete action space type. Each array corresponds to an
+- `action_mask` is an optional list of two dimensional arrays of booleans which is only
+  available when using multi-discrete actions. Each array corresponds to an
  action branch. The first dimension of each array is the batch size and the
  second contains a mask for each action of the branch. If true, the action is
  not available for the agent during this simulation step.
 - `reward` is a float. Corresponds to the rewards collected by the agent since
  the last simulation step.
 - `agent_id` is an int and an unique identifier for the corresponding Agent.
- `action_mask` is an optional list of one dimensional array of booleans. Only
-  available in multi-discrete action space type. Each array corresponds to an
+- `action_mask` is an optional list of one dimensional arrays of booleans which is only
+  available when using multi-discrete actions. Each array corresponds to an
  action branch. Each array contains a mask for each action of the branch. If
  true, the action is not available for the agent during this simulation step.


 #### BehaviorSpec

-An Agent behavior can either have discrete or continuous actions. To check which
-type it is, use `spec.is_action_discrete()` or `spec.is_action_continuous()` to
-see which one it is. If discrete, the action tensors are expected to be
-`np.int32`. If continuous, the actions are expected to be `np.float32`.
-
 A `BehaviorSpec` has the following fields :

 - `observation_shapes` is a List of Tuples of int : Each Tuple corresponds to an
- `action_type` is the type of data of the action. it can be discrete or
-  continuous. If discrete, the action tensors are expected to be `np.int32`. If
-  continuous, the actions are expected to be `np.float32`.
- `action_size` is an `int` corresponding to the expected dimension of the
-  action array.
-  - In continuous action space it is the number of floats that constitute the
-    action.
-  - In discrete action space (same as multi-discrete) it corresponds to the
-    number of branches (the number of independent actions)
- `discrete_action_branches` is a Tuple of int only for discrete action space.
-  Each int corresponds to the number of different options for each branch of the
-  action. For example : In a game direction input (no movement, left, right) and
+- `action_spec` is an `ActionSpec` namedtuple that defines the number and types
+  of actions for the Agent.
+
+An `ActionSpec` has the following fields and properties:
+- `continuous_size` is the number of floats that constitute the continuous actions.
+- `discrete_size` is the number of branches (the number of independent actions) that
+  constitute the multi-discrete actions.
+- `discrete_branches` is a Tuple of ints. Each int corresponds to the number of
+  different options for each branch of the action. For example:
+  In a game direction input (no movement, left, right) and
-  the first one with 3 options and the second with 2 options. (`action_size = 2`
+  the first one with 3 options and the second with 2 options. (`discrete_size = 2`
+

 ### Communicating additional information with the Environment

--- a/docs/Training-Configuration-File.md
+++ b/docs/Training-Configuration-File.md

 A few considerations when deciding to use memory:

- LSTM does not work well with continuous vector action space. Please use
-  discrete vector action space for better results.
+- LSTM does not work well with continuous vector actions. Please use
+  discrete actions for better results.
 - Since the memories must be sent back and forth between Python and Unity, using
  too large `memory_size` will slow down training.
 - Adding a recurrent layer increases the complexity of the neural network, it is
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py
 import gym
 from gym import error, spaces

-from mlagents_envs.base_env import BaseEnv
+from mlagents_envs.base_env import ActionTuple, BaseEnv
 from mlagents_envs.base_env import DecisionSteps, TerminalSteps
 from mlagents_envs import logging_util

            action = self._flattener.lookup_action(action)

        action = np.array(action).reshape((1, self.action_size))
-        self._env.set_actions(self.name, action)
+
+        action_tuple = ActionTuple()
+        if self.group_spec.action_spec.is_continuous():
+            action_tuple.add_continuous(action)
+        else:
+            action_tuple.add_discrete(action)
+        self._env.set_actions(self.name, action_tuple)

        self._env.step()
        decision_step, terminal_step = self._env.get_steps(self.name)
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
        )


+class _ActionTupleBase(ABC):
+    """
+    An object whose fields correspond to action data of continuous and discrete
+    spaces. Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size),
+    respectively. Note, this also holds when continuous or discrete size is
+    zero.
+    """
+
+    def __init__(
+        self,
+        continuous: Optional[np.ndarray] = None,
+        discrete: Optional[np.ndarray] = None,
+    ):
+        self._continuous: Optional[np.ndarray] = None
+        self._discrete: Optional[np.ndarray] = None
+        if continuous is not None:
+            self.add_continuous(continuous)
+        if discrete is not None:
+            self.add_discrete(discrete)
+
+    @property
+    def continuous(self) -> np.ndarray:
+        return self._continuous
+
+    @property
+    def discrete(self) -> np.ndarray:
+        return self._discrete
+
+    def add_continuous(self, continuous: np.ndarray) -> None:
+        if continuous.dtype != np.float32:
+            continuous = continuous.astype(np.float32, copy=False)
+        if self._discrete is None:
+            self._discrete = np.zeros(
+                (continuous.shape[0], 0), dtype=self.discrete_dtype
+            )
+        self._continuous = continuous
+
+    def add_discrete(self, discrete: np.ndarray) -> None:
+        if discrete.dtype != self.discrete_dtype:
+            discrete = discrete.astype(self.discrete_dtype, copy=False)
+        if self._continuous is None:
+            self._continuous = np.zeros((discrete.shape[0], 0), dtype=np.float32)
+        self._discrete = discrete
+
+    @property
+    @abstractmethod
+    def discrete_dtype(self) -> np.dtype:
+        pass
+
+
+class ActionTuple(_ActionTupleBase):
+    """
+    An object whose fields correspond to actions of different types.
+    Continuous and discrete actions are numpy arrays of type float32 and
+    int32, respectively and are type checked on construction.
+    Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size),
+    respectively. Note, this also holds when continuous or discrete size is
+    zero.
+    """
+
+    @property
+    def discrete_dtype(self) -> np.dtype:
+        """
+        The dtype of a discrete action.
+        """
+        return np.int32
+
+
 class ActionSpec(NamedTuple):
    """
    A NamedTuple containing utility functions and information about the action spaces
        """
        return len(self.discrete_branches)

-    def empty_action(self, n_agents: int) -> np.ndarray:
+    def empty_action(self, n_agents: int) -> ActionTuple:
-        Generates a numpy array corresponding to an empty action (all zeros)
+        Generates ActionTuple corresponding to an empty action (all zeros)
-        if self.is_continuous():
-            return np.zeros((n_agents, self.continuous_size), dtype=np.float32)
-        return np.zeros((n_agents, self.discrete_size), dtype=np.int32)
+        _continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
+        _discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
+        return ActionTuple(continuous=_continuous, discrete=_discrete)
-    def random_action(self, n_agents: int) -> np.ndarray:
+    def random_action(self, n_agents: int) -> ActionTuple:
-        Generates a numpy array corresponding to a random action (either discrete
+        Generates ActionTuple corresponding to a random action (either discrete
-        if self.is_continuous():
-            action = np.random.uniform(
-                low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
-            ).astype(np.float32)
-        else:
-            branch_size = self.discrete_branches
-            action = np.column_stack(
+        _continuous = np.random.uniform(
+            low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
+        )
+        _discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
+        if self.discrete_size > 0:
+            _discrete = np.column_stack(
-                        branch_size[i],  # type: ignore
+                        self.discrete_branches[i],  # type: ignore
                        size=(n_agents),
                        dtype=np.int32,
                    )
-        return action
+        return ActionTuple(continuous=_continuous, discrete=_discrete)
-        self, actions: np.ndarray, n_agents: int, name: str
-    ) -> np.ndarray:
+        self, actions: ActionTuple, n_agents: Optional[int], name: str
+    ) -> ActionTuple:
-        if self.continuous_size > 0:
-            _size = self.continuous_size
-        else:
-            _size = self.discrete_size
-        _expected_shape = (n_agents, _size)
-        if actions.shape != _expected_shape:
+        _expected_shape = (
+            (n_agents, self.continuous_size)
+            if n_agents is not None
+            else (self.continuous_size,)
+        )
+        if actions.continuous.shape != _expected_shape:
-                f"The behavior {name} needs an input of dimension "
+                f"The behavior {name} needs a continuous input of dimension "
-                f"received input of dimension {actions.shape}"
+                f"received input of dimension {actions.continuous.shape}"
-        _expected_type = np.float32 if self.is_continuous() else np.int32
-        if actions.dtype != _expected_type:
-            actions = actions.astype(_expected_type)
+        _expected_shape = (
+            (n_agents, self.discrete_size)
+            if n_agents is not None
+            else (self.discrete_size,)
+        )
+        if actions.discrete.shape != _expected_shape:
+            raise UnityActionException(
+                f"The behavior {name} needs a discrete input of dimension "
+                f"{_expected_shape} for (<number of agents>, <action size>) but "
+                f"received input of dimension {actions.discrete.shape}"
+            )
        return actions

    @staticmethod
        """

    @abstractmethod
-    def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
+    def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
-        :param action: A two dimensional np.ndarray corresponding to the action
-        (either int or float)
+        :param action: ActionTuple tuple of continuous and/or discrete action.
+        Actions are np.arrays with dimensions  (n_agents, continuous_size) and
+        (n_agents, discrete_size), respectively.
-        self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
+        self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
    ) -> None:
        """
        Sets the action for one of the agents in the simulation for the next
-        :param action: A one dimensional np.ndarray corresponding to the action
-        (either int or float)
+        :param action: ActionTuple tuple of continuous and/or discrete action
+        Actions are np.arrays with dimensions  (1, continuous_size) and
+        (1, discrete_size), respectively. Note, this initial dimensions of 1 is because
+        this action is meant for a single agent.
        """

    @abstractmethod
--- a/ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.py
  name='mlagents_envs/communicator_objects/agent_action.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n5mlagents_envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\"K\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02J\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04J\x04\x08\x05\x10\x06\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n5mlagents_envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\"\x8c\x01\n\x10\x41gentActionProto\x12!\n\x19vector_actions_deprecated\x18\x01 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02\x12\x1a\n\x12\x63ontinuous_actions\x18\x06 \x03(\x02\x12\x18\n\x10\x64iscrete_actions\x18\x07 \x03(\x05J\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04J\x04\x08\x05\x10\x06\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )


  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
-      name='vector_actions', full_name='communicator_objects.AgentActionProto.vector_actions', index=0,
+      name='vector_actions_deprecated', full_name='communicator_objects.AgentActionProto.vector_actions_deprecated', index=0,
      number=1, type=2, cpp_type=6, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='continuous_actions', full_name='communicator_objects.AgentActionProto.continuous_actions', index=2,
+      number=6, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='discrete_actions', full_name='communicator_objects.AgentActionProto.discrete_actions', index=3,
+      number=7, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=79,
-  serialized_end=154,
+  serialized_start=80,
+  serialized_end=220,
 )

 DESCRIPTOR.message_types_by_name['AgentActionProto'] = _AGENTACTIONPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.pyi

 class AgentActionProto(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
-    vector_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
+    vector_actions_deprecated = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
+    continuous_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
+    discrete_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
-        vector_actions : typing___Optional[typing___Iterable[builtin___float]] = None,
+        vector_actions_deprecated : typing___Optional[typing___Iterable[builtin___float]] = None,
+        continuous_actions : typing___Optional[typing___Iterable[builtin___float]] = None,
+        discrete_actions : typing___Optional[typing___Iterable[builtin___int]] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> AgentActionProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"value",u"vector_actions"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"continuous_actions",u"discrete_actions",u"value",u"vector_actions_deprecated"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"value",b"value",u"vector_actions",b"vector_actions"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"continuous_actions",b"continuous_actions",u"discrete_actions",b"discrete_actions",u"value",b"value",u"vector_actions_deprecated",b"vector_actions_deprecated"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.py
  name='mlagents_envs/communicator_objects/brain_parameters.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n9mlagents_envs/communicator_objects/brain_parameters.proto\x12\x14\x63ommunicator_objects\x1a\x33mlagents_envs/communicator_objects/space_type.proto\"\xd9\x01\n\x14\x42rainParametersProto\x12\x1a\n\x12vector_action_size\x18\x03 \x03(\x05\x12\"\n\x1avector_action_descriptions\x18\x05 \x03(\t\x12\x46\n\x18vector_action_space_type\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03J\x04\x08\x04\x10\x05\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n9mlagents_envs/communicator_objects/brain_parameters.proto\x12\x14\x63ommunicator_objects\x1a\x33mlagents_envs/communicator_objects/space_type.proto\"\x8b\x01\n\x0f\x41\x63tionSpecProto\x12\x1e\n\x16num_continuous_actions\x18\x01 \x01(\x05\x12\x1c\n\x14num_discrete_actions\x18\x02 \x01(\x05\x12\x1d\n\x15\x64iscrete_branch_sizes\x18\x03 \x03(\x05\x12\x1b\n\x13\x61\x63tion_descriptions\x18\x04 \x03(\t\"\xb6\x02\n\x14\x42rainParametersProto\x12%\n\x1dvector_action_size_deprecated\x18\x03 \x03(\x05\x12-\n%vector_action_descriptions_deprecated\x18\x05 \x03(\t\x12Q\n#vector_action_space_type_deprecated\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08\x12:\n\x0b\x61\x63tion_spec\x18\t \x01(\x0b\x32%.communicator_objects.ActionSpecProtoJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03J\x04\x08\x04\x10\x05\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
  ,
  dependencies=[mlagents__envs_dot_communicator__objects_dot_space__type__pb2.DESCRIPTOR,])

+_ACTIONSPECPROTO = _descriptor.Descriptor(
+  name='ActionSpecProto',
+  full_name='communicator_objects.ActionSpecProto',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='num_continuous_actions', full_name='communicator_objects.ActionSpecProto.num_continuous_actions', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='num_discrete_actions', full_name='communicator_objects.ActionSpecProto.num_discrete_actions', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='discrete_branch_sizes', full_name='communicator_objects.ActionSpecProto.discrete_branch_sizes', index=2,
+      number=3, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='action_descriptions', full_name='communicator_objects.ActionSpecProto.action_descriptions', index=3,
+      number=4, type=9, cpp_type=9, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=137,
+  serialized_end=276,
+)
+
+
 _BRAINPARAMETERSPROTO = _descriptor.Descriptor(
  name='BrainParametersProto',
  full_name='communicator_objects.BrainParametersProto',
  fields=[
    _descriptor.FieldDescriptor(
-      name='vector_action_size', full_name='communicator_objects.BrainParametersProto.vector_action_size', index=0,
+      name='vector_action_size_deprecated', full_name='communicator_objects.BrainParametersProto.vector_action_size_deprecated', index=0,
      number=3, type=5, cpp_type=1, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
-      name='vector_action_descriptions', full_name='communicator_objects.BrainParametersProto.vector_action_descriptions', index=1,
+      name='vector_action_descriptions_deprecated', full_name='communicator_objects.BrainParametersProto.vector_action_descriptions_deprecated', index=1,
      number=5, type=9, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
-      name='vector_action_space_type', full_name='communicator_objects.BrainParametersProto.vector_action_space_type', index=2,
+      name='vector_action_space_type_deprecated', full_name='communicator_objects.BrainParametersProto.vector_action_space_type_deprecated', index=2,
      number=6, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='action_spec', full_name='communicator_objects.BrainParametersProto.action_spec', index=5,
+      number=9, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=137,
-  serialized_end=354,
+  serialized_start=279,
+  serialized_end=589,
-_BRAINPARAMETERSPROTO.fields_by_name['vector_action_space_type'].enum_type = mlagents__envs_dot_communicator__objects_dot_space__type__pb2._SPACETYPEPROTO
+_BRAINPARAMETERSPROTO.fields_by_name['vector_action_space_type_deprecated'].enum_type = mlagents__envs_dot_communicator__objects_dot_space__type__pb2._SPACETYPEPROTO
+_BRAINPARAMETERSPROTO.fields_by_name['action_spec'].message_type = _ACTIONSPECPROTO
+DESCRIPTOR.message_types_by_name['ActionSpecProto'] = _ACTIONSPECPROTO
+
+ActionSpecProto = _reflection.GeneratedProtocolMessageType('ActionSpecProto', (_message.Message,), dict(
+  DESCRIPTOR = _ACTIONSPECPROTO,
+  __module__ = 'mlagents_envs.communicator_objects.brain_parameters_pb2'
+  # @@protoc_insertion_point(class_scope:communicator_objects.ActionSpecProto)
+  ))
+_sym_db.RegisterMessage(ActionSpecProto)

 BrainParametersProto = _reflection.GeneratedProtocolMessageType('BrainParametersProto', (_message.Message,), dict(
  DESCRIPTOR = _BRAINPARAMETERSPROTO,
--- a/ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.pyi
 builtin___int = int


+class ActionSpecProto(google___protobuf___message___Message):
+    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
+    num_continuous_actions = ... # type: builtin___int
+    num_discrete_actions = ... # type: builtin___int
+    discrete_branch_sizes = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
+    action_descriptions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text]
+
+    def __init__(self,
+        *,
+        num_continuous_actions : typing___Optional[builtin___int] = None,
+        num_discrete_actions : typing___Optional[builtin___int] = None,
+        discrete_branch_sizes : typing___Optional[typing___Iterable[builtin___int]] = None,
+        action_descriptions : typing___Optional[typing___Iterable[typing___Text]] = None,
+        ) -> None: ...
+    @classmethod
+    def FromString(cls, s: builtin___bytes) -> ActionSpecProto: ...
+    def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
+    def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
+    if sys.version_info >= (3,):
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_descriptions",u"discrete_branch_sizes",u"num_continuous_actions",u"num_discrete_actions"]) -> None: ...
+    else:
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_descriptions",b"action_descriptions",u"discrete_branch_sizes",b"discrete_branch_sizes",u"num_continuous_actions",b"num_continuous_actions",u"num_discrete_actions",b"num_discrete_actions"]) -> None: ...
+
-    vector_action_size = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
-    vector_action_descriptions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text]
-    vector_action_space_type = ... # type: mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto
+    vector_action_size_deprecated = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
+    vector_action_descriptions_deprecated = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text]
+    vector_action_space_type_deprecated = ... # type: mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto
+    @property
+    def action_spec(self) -> ActionSpecProto: ...
+
-        vector_action_size : typing___Optional[typing___Iterable[builtin___int]] = None,
-        vector_action_descriptions : typing___Optional[typing___Iterable[typing___Text]] = None,
-        vector_action_space_type : typing___Optional[mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto] = None,
+        vector_action_size_deprecated : typing___Optional[typing___Iterable[builtin___int]] = None,
+        vector_action_descriptions_deprecated : typing___Optional[typing___Iterable[typing___Text]] = None,
+        vector_action_space_type_deprecated : typing___Optional[mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto] = None,
+        action_spec : typing___Optional[ActionSpecProto] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> BrainParametersProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"brain_name",u"is_training",u"vector_action_descriptions",u"vector_action_size",u"vector_action_space_type"]) -> None: ...
+        def HasField(self, field_name: typing_extensions___Literal[u"action_spec"]) -> builtin___bool: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_spec",u"brain_name",u"is_training",u"vector_action_descriptions_deprecated",u"vector_action_size_deprecated",u"vector_action_space_type_deprecated"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"brain_name",b"brain_name",u"is_training",b"is_training",u"vector_action_descriptions",b"vector_action_descriptions",u"vector_action_size",b"vector_action_size",u"vector_action_space_type",b"vector_action_space_type"]) -> None: ...
+        def HasField(self, field_name: typing_extensions___Literal[u"action_spec",b"action_spec"]) -> builtin___bool: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_spec",b"action_spec",u"brain_name",b"brain_name",u"is_training",b"is_training",u"vector_action_descriptions_deprecated",b"vector_action_descriptions_deprecated",u"vector_action_size_deprecated",b"vector_action_size_deprecated",u"vector_action_space_type_deprecated",b"vector_action_space_type_deprecated"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  name='mlagents_envs/communicator_objects/capabilities.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"}\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\x94\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )


      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='hybridActions', full_name='communicator_objects.UnityRLCapabilitiesProto.hybridActions', index=3,
+      number=4, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=79,
-  serialized_end=204,
+  serialized_start=80,
+  serialized_end=228,
 )

 DESCRIPTOR.message_types_by_name['UnityRLCapabilitiesProto'] = _UNITYRLCAPABILITIESPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
    baseRLCapabilities = ... # type: builtin___bool
    concatenatedPngObservations = ... # type: builtin___bool
    compressedChannelMapping = ... # type: builtin___bool
+    hybridActions = ... # type: builtin___bool

    def __init__(self,
        *,
+        hybridActions : typing___Optional[builtin___bool] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> UnityRLCapabilitiesProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
    DecisionSteps,
    TerminalSteps,
    BehaviorSpec,
+    ActionTuple,
    BehaviorName,
    AgentId,
    BehaviorMapping,
    #  * 1.0.0 - initial version
    #  * 1.1.0 - support concatenated PNGs for compressed observations.
    #  * 1.2.0 - support compression mapping for stacked compressed observations.
-    API_VERSION = "1.2.0"
+    #  * 1.3.0 - support action spaces with both continuous and discrete actions.
+    API_VERSION = "1.3.0"

    # Default port that the editor listens on. If an environment executable
    # isn't specified, this port will be used.
        capabilities.baseRLCapabilities = True
        capabilities.concatenatedPngObservations = True
        capabilities.compressedChannelMapping = True
+        capabilities.hybridActions = True
        return capabilities

    @staticmethod

        self._env_state: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
        self._env_specs: Dict[str, BehaviorSpec] = {}
-        self._env_actions: Dict[str, np.ndarray] = {}
+        self._env_actions: Dict[str, ActionTuple] = {}
        self._is_first_message = True
        self._update_behavior_specs(aca_output)

                f"agent group in the environment"
            )

-    def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
+    def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
        self._assert_behavior_exists(behavior_name)
        if behavior_name not in self._env_state:
            return
        self._env_actions[behavior_name] = action

    def set_action_for_agent(
-        self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
+        self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
    ) -> None:
        self._assert_behavior_exists(behavior_name)
        if behavior_name not in self._env_state:
-        action = action_spec._validate_action(action, num_agents, behavior_name)
+        action = action_spec._validate_action(action, None, behavior_name)
        if behavior_name not in self._env_actions:
            self._env_actions[behavior_name] = action_spec.empty_action(num_agents)
        try:
                    agent_id
                )
            ) from ie
-        self._env_actions[behavior_name][index] = action
+        if action_spec.continuous_size > 0:
+            self._env_actions[behavior_name].continuous[index] = action.continuous[0, :]
+        if action_spec.discrete_size > 0:
+            self._env_actions[behavior_name].discrete[index] = action.discrete[0, :]

    def get_steps(
        self, behavior_name: BehaviorName

    @timed
    def _generate_step_input(
-        self, vector_action: Dict[str, np.ndarray]
+        self, vector_action: Dict[str, ActionTuple]
    ) -> UnityInputProto:
        rl_in = UnityRLInputProto()
        for b in vector_action:
            for i in range(n_agents):
-                action = AgentActionProto(vector_actions=vector_action[b][i])
+                action = AgentActionProto()
+                if vector_action[b].continuous is not None:
+                    action.vector_actions_deprecated.extend(
+                        vector_action[b].continuous[i]
+                    )
+                    action.continuous_actions.extend(vector_action[b].continuous[i])
+                if vector_action[b].discrete is not None:
+                    action.vector_actions_deprecated.extend(
+                        vector_action[b].discrete[i]
+                    )
+                    action.discrete_actions.extend(vector_action[b].discrete[i])
                rl_in.agent_actions[b].value.extend([action])
                rl_in.command = STEP
        rl_in.side_channel = bytes(
--- a/ml-agents-envs/mlagents_envs/mock_communicator.py
+++ b/ml-agents-envs/mlagents_envs/mock_communicator.py
 from .communicator import Communicator
 from .environment import UnityEnvironment
 from mlagents_envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
-from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
+from mlagents_envs.communicator_objects.brain_parameters_pb2 import (
+    BrainParametersProto,
+    ActionSpecProto,
+)
 from mlagents_envs.communicator_objects.unity_rl_initialization_output_pb2 import (
    UnityRLInitializationOutputProto,
 )
    NONE as COMPRESSION_TYPE_NONE,
    PNG as COMPRESSION_TYPE_PNG,
 )
-from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous


 class MockCommunicator(Communicator):
        self.vec_obs_size = vec_obs_size

    def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
+        if self.is_discrete:
+            action_spec = ActionSpecProto(
+                num_discrete_actions=2, discrete_branch_sizes=[3, 2]
+            )
+        else:
+            action_spec = ActionSpecProto(num_continuous_actions=2)
-            vector_action_size=[2],
-            vector_action_descriptions=["", ""],
-            vector_action_space_type=discrete if self.is_discrete else continuous,
-            brain_name=self.brain_name,
-            is_training=True,
+            brain_name=self.brain_name, is_training=True, action_spec=action_spec
        )
        rl_init = UnityRLInitializationOutputProto(
            name="RealFakeAcademy",
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
 from mlagents_envs.base_env import (
-    BehaviorSpec,
+    BehaviorSpec,
    DecisionSteps,
    TerminalSteps,
 )
    :return: BehaviorSpec object.
    """
    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
-    if brain_param_proto.vector_action_space_type == 1:
-        action_spec = ActionSpec(brain_param_proto.vector_action_size[0], ())
+    # proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
+    if (
+        brain_param_proto.action_spec.num_continuous_actions == 0
+        and brain_param_proto.action_spec.num_discrete_actions == 0
+    ):
+        if brain_param_proto.vector_action_space_type_deprecated == 1:
+            action_spec = ActionSpec(
+                brain_param_proto.vector_action_size_deprecated[0], ()
+            )
+        else:
+            action_spec = ActionSpec(
+                0, tuple(brain_param_proto.vector_action_size_deprecated)
+            )
-        action_spec = ActionSpec(0, tuple(brain_param_proto.vector_action_size))
+        action_spec_proto = brain_param_proto.action_spec
+        action_spec = ActionSpec(
+            action_spec_proto.num_continuous_actions,
+            tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
+        )
    return BehaviorSpec(observation_shape, action_spec)


    ],  # pylint: disable=unsubscriptable-object
 ) -> np.ndarray:
    if len(agent_info_list) == 0:
-        return np.zeros((0, shape[0]), dtype=np.float32)
+        return np.zeros((0,) + shape, dtype=np.float32)
    np_obs = np.array(
        [
            agent_obs.observations[obs_index].float_data.data
-    )
+    ).reshape((len(agent_info_list),) + shape)
    _raise_on_nan_and_inf(np_obs, "observations")
    return np_obs

--- a/ml-agents-envs/mlagents_envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_envs.py
 import pytest

 from mlagents_envs.environment import UnityEnvironment
-from mlagents_envs.base_env import DecisionSteps, TerminalSteps
+from mlagents_envs.base_env import DecisionSteps, TerminalSteps, ActionTuple
 from mlagents_envs.exception import UnityEnvironmentException, UnityActionException
 from mlagents_envs.mock_communicator import MockCommunicator

        env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents - 1))
    decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
    n_agents = len(decision_steps)
-    env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents) - 1)
+    _empty_act = spec.action_spec.empty_action(n_agents)
+    next_action = ActionTuple(_empty_act.continuous - 1, _empty_act.discrete - 1)
+    env.set_actions("RealFakeBrain", next_action)
    env.step()

    env.close()
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
    return agent_info_protos


-# The arguments here are the DecisionSteps, TerminalSteps and actions for a single agent name
+# The arguments here are the DecisionSteps, TerminalSteps and continuous/discrete actions for a single agent name
-    decision_steps: DecisionSteps, terminal_steps: TerminalSteps, actions: np.ndarray
+    decision_steps: DecisionSteps,
+    terminal_steps: TerminalSteps,
+    continuous_actions: np.ndarray,
+    discrete_actions: np.ndarray,
-    agent_action_protos = [
-        AgentActionProto(vector_actions=action) for action in actions
-    ]
+    agent_action_protos = []
+    num_agents = (
+        len(continuous_actions)
+        if continuous_actions is not None
+        else len(discrete_actions)
+    )
+    for i in range(num_agents):
+        proto = AgentActionProto()
+        if continuous_actions is not None:
+            proto.continuous_actions.extend(continuous_actions[i])
+            proto.vector_actions_deprecated.extend(continuous_actions[i])
+        if discrete_actions is not None:
+            proto.discrete_actions.extend(discrete_actions[i])
+            proto.vector_actions_deprecated.extend(discrete_actions[i])
+        agent_action_protos.append(proto)
    agent_info_action_pair_protos = [
        AgentInfoActionPairProto(agent_info=agent_info_proto, action_info=action_proto)
        for agent_info_proto, action_proto in zip(
 def test_agent_behavior_spec_from_proto():
    agent_proto = generate_list_agent_proto(1, [(3,), (4,)])[0]
    bp = BrainParametersProto()
-    bp.vector_action_size.extend([5, 4])
-    bp.vector_action_space_type = 0
+    bp.vector_action_size_deprecated.extend([5, 4])
+    bp.vector_action_space_type_deprecated = 0
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert behavior_spec.action_spec.is_discrete()
    assert not behavior_spec.action_spec.is_continuous()
    bp = BrainParametersProto()
-    bp.vector_action_size.extend([6])
-    bp.vector_action_space_type = 1
+    bp.vector_action_size_deprecated.extend([6])
+    bp.vector_action_space_type_deprecated = 1
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert not behavior_spec.action_spec.is_discrete()
    assert behavior_spec.action_spec.is_continuous()
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py
    assert specs.discrete_branches == ()
    assert specs.discrete_size == 0
    assert specs.continuous_size == 3
-    assert specs.empty_action(5).shape == (5, 3)
-    assert specs.empty_action(5).dtype == np.float32
+    assert specs.empty_action(5).continuous.shape == (5, 3)
+    assert specs.empty_action(5).continuous.dtype == np.float32
-    assert specs.empty_action(5).shape == (5, 1)
-    assert specs.empty_action(5).dtype == np.int32
+    assert specs.empty_action(5).discrete.shape == (5, 1)
+    assert specs.empty_action(5).discrete.dtype == np.int32
+
+    specs = ActionSpec(3, (3,))
+    assert specs.continuous_size == 3
+    assert specs.discrete_branches == (3,)
+    assert specs.discrete_size == 1
+    assert specs.empty_action(5).continuous.shape == (5, 3)
+    assert specs.empty_action(5).continuous.dtype == np.float32
+    assert specs.empty_action(5).discrete.shape == (5, 1)
+    assert specs.empty_action(5).discrete.dtype == np.int32


 def test_action_generator():
-    zero_action = specs.empty_action(4)
+    zero_action = specs.empty_action(4).continuous
-    random_action = specs.random_action(4)
+    print(specs.random_action(4))
+    random_action = specs.random_action(4).continuous
+    print(random_action)
    assert random_action.dtype == np.float32
    assert random_action.shape == (4, action_len)
    assert np.min(random_action) >= -1
    action_shape = (10, 20, 30)
    specs = ActionSpec.create_discrete(action_shape)
-    zero_action = specs.empty_action(4)
+    zero_action = specs.empty_action(4).discrete
-    random_action = specs.random_action(4)
+    random_action = specs.random_action(4).discrete
    assert random_action.dtype == np.int32
    assert random_action.shape == (4, len(action_shape))
    assert np.min(random_action) >= 0
--- a/ml-agents/mlagents/trainers/action_info.py
+++ b/ml-agents/mlagents/trainers/action_info.py

 class ActionInfo(NamedTuple):
    action: Any
+    env_action: Any
    value: Any
    outputs: ActionInfoOutputs
    agent_ids: List[AgentId]
-        return ActionInfo([], [], {}, [])
+        return ActionInfo([], [], [], {}, [])
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
 import queue

 from mlagents_envs.base_env import (
+    ActionTuple,
    DecisionSteps,
    DecisionStep,
    TerminalSteps,
 from mlagents.trainers.trajectory import Trajectory, AgentExperience
 from mlagents.trainers.policy import Policy
 from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
+from mlagents.trainers.torch.action_log_probs import LogProbsTuple
 from mlagents.trainers.stats import StatsReporter
 from mlagents.trainers.behavior_id_utils import get_global_agent_id

            done = terminated  # Since this is an ongoing step
            interrupted = step.interrupted if terminated else False
            # Add the outputs of the last eval
-            action = stored_take_action_outputs["action"][idx]
-            if self.policy.use_continuous_act:
-                action_pre = stored_take_action_outputs["pre_action"][idx]
-            else:
-                action_pre = None
-            action_probs = stored_take_action_outputs["log_probs"][idx]
+            stored_actions = stored_take_action_outputs["action"]
+            action_tuple = ActionTuple(
+                continuous=stored_actions.continuous[idx],
+                discrete=stored_actions.discrete[idx],
+            )
+            stored_action_probs = stored_take_action_outputs["log_probs"]
+            log_probs_tuple = LogProbsTuple(
+                continuous=stored_action_probs.continuous[idx],
+                discrete=stored_action_probs.discrete[idx],
+            )
            action_mask = stored_decision_step.action_mask
            prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
            experience = AgentExperience(
                done=done,
-                action=action,
-                action_probs=action_probs,
-                action_pre=action_pre,
+                action=action_tuple,
+                action_probs=log_probs_tuple,
                action_mask=action_mask,
                prev_action=prev_action,
                interrupted=interrupted,
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
            [next_pair_info.agent_info], behavior_spec
        )
        previous_action = (
-            np.array(pair_infos[idx].action_info.vector_actions, dtype=np.float32) * 0
+            np.array(
+                pair_infos[idx].action_info.vector_actions_deprecated, dtype=np.float32
+            )
+            * 0
-                pair_infos[idx - 1].action_info.vector_actions, dtype=np.float32
+                pair_infos[idx - 1].action_info.vector_actions_deprecated,
+                dtype=np.float32,
            )

        next_done = len(next_terminal_step) == 1
        demo_raw_buffer["rewards"].append(next_reward)
        demo_raw_buffer["obs"].append(current_obs)
        demo_raw_buffer["actions"].append(current_pair_info.action_info.vector_actions)
+        # TODO: update the demonstraction files and read from the new proto format
+        if behavior_spec.action_spec.continuous_size > 0:
+            demo_raw_buffer["continuous_action"].append(
+                current_pair_info.action_info.vector_actions_deprecated
+            )
+        if behavior_spec.action_spec.discrete_size > 0:
+            demo_raw_buffer["discrete_action"].append(
+                current_pair_info.action_info.vector_actions_deprecated
+            )
        demo_raw_buffer["prev_action"].append(previous_action)
        if next_done:
            demo_raw_buffer.resequence_and_append(
--- a/ml-agents/mlagents/trainers/env_manager.py
+++ b/ml-agents/mlagents/trainers/env_manager.py
 from abc import ABC, abstractmethod
+
 from typing import List, Dict, NamedTuple, Iterable, Tuple
 from mlagents_envs.base_env import (
    DecisionSteps,
--- a/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
                [self.value_heads, self.policy.memory_out, self.memory_out], feed_dict
            )
            prev_action = (
-                batch["actions"][-1] if not self.policy.use_continuous_act else None
+                batch["discrete_action"][-1]
+                if not self.policy.use_continuous_act
+                else None
            )
        else:
            value_estimates = self.sess.run(self.value_heads, feed_dict)
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
 from typing import Dict, List, Optional
 import numpy as np

-from mlagents_envs.base_env import DecisionSteps
+from mlagents_envs.base_env import ActionTuple, BehaviorSpec, DecisionSteps
-from mlagents_envs.base_env import BehaviorSpec
 from mlagents.trainers.settings import TrainerSettings, NetworkSettings


        self.trainer_settings = trainer_settings
        self.network_settings: NetworkSettings = trainer_settings.network_settings
        self.seed = seed
-        if (
-            self.behavior_spec.action_spec.continuous_size > 0
-            and self.behavior_spec.action_spec.discrete_size > 0
-        ):
-            raise UnityPolicyException("Trainers do not support mixed action spaces.")
        self.act_size = (
            list(self.behavior_spec.action_spec.discrete_branches)
            if self.behavior_spec.action_spec.is_discrete()
            1 for shape in behavior_spec.observation_shapes if len(shape) == 3
        )
        self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
-        # This line will be removed in the ActionBuffer change
-        self.num_branches = (
-            self.behavior_spec.action_spec.continuous_size
-            + self.behavior_spec.action_spec.discrete_size
-        )
-        self.previous_action_dict: Dict[str, np.array] = {}
+        self.previous_action_dict: Dict[str, np.ndarray] = {}
        self.memory_dict: Dict[str, np.ndarray] = {}
        self.normalize = trainer_settings.network_settings.normalize
        self.use_recurrent = self.network_settings.memory is not None
    ) -> None:
        if memory_matrix is None:
            return
+
        for index, agent_id in enumerate(agent_ids):
            self.memory_dict[agent_id] = memory_matrix[index, :]

            if agent_id in self.memory_dict:
                self.memory_dict.pop(agent_id)

-    def make_empty_previous_action(self, num_agents):
+    def make_empty_previous_action(self, num_agents: int) -> np.ndarray:
-        return np.zeros((num_agents, self.num_branches), dtype=np.int)
+        return np.zeros(
+            (num_agents, self.behavior_spec.action_spec.discrete_size), dtype=np.int32
+        )
-        self, agent_ids: List[str], action_matrix: Optional[np.ndarray]
+        self, agent_ids: List[str], action_tuple: ActionTuple
-        if action_matrix is None:
-            return
-            self.previous_action_dict[agent_id] = action_matrix[index, :]
+            self.previous_action_dict[agent_id] = action_tuple.discrete[index, :]
-        action_matrix = np.zeros((len(agent_ids), self.num_branches), dtype=np.int)
+        action_matrix = self.make_empty_previous_action(len(agent_ids))
        for index, agent_id in enumerate(agent_ids):
            if agent_id in self.previous_action_dict:
                action_matrix[index, :] = self.previous_action_dict[agent_id]
        raise NotImplementedError

    @staticmethod
-    def check_nan_action(action: Optional[np.ndarray]) -> None:
+    def check_nan_action(action: Optional[ActionTuple]) -> None:
-            d = np.sum(action)
+            d = np.sum(action.continuous)
-                raise RuntimeError("NaN action detected.")
+                raise RuntimeError("Continuous NaN action detected.")
+            d = np.sum(action.discrete)
+            has_nan = np.isnan(d)
+            if has_nan:
+                raise RuntimeError("Discrete NaN action detected.")

    @abstractmethod
    def update_normalization(self, vector_obs: np.ndarray) -> None:
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
 from mlagents.tf_utils import tf
 from mlagents import tf_utils
 from mlagents_envs.exception import UnityException
-from mlagents_envs.base_env import BehaviorSpec
+from mlagents.trainers.torch.action_log_probs import LogProbsTuple
-from mlagents_envs.base_env import DecisionSteps
+from mlagents_envs.base_env import DecisionSteps, ActionTuple, BehaviorSpec
 from mlagents.trainers.tf.models import ModelUtils
 from mlagents.trainers.settings import TrainerSettings, EncoderType
 from mlagents.trainers import __version__
            reparameterize,
            condition_sigma_on_obs,
        )
+        if (
+            self.behavior_spec.action_spec.continuous_size > 0
+            and self.behavior_spec.action_spec.discrete_size > 0
+        ):
+            raise UnityPolicyException(
+                "TensorFlow does not support mixed action spaces. Please run with the Torch framework."
+            )
        # for ghost trainer save/load snapshots
        self.assign_phs: List[tf.Tensor] = []
        self.assign_ops: List[tf.Operation] = []
                feed_dict[self.prev_action] = self.retrieve_previous_action(
                    global_agent_ids
                )
+
            feed_dict[self.memory_in] = self.retrieve_memories(global_agent_ids)
        feed_dict = self.fill_eval_dict(feed_dict, decision_requests)
        run_out = self._execute_model(feed_dict, self.inference_dict)
        )

        self.save_memories(global_agent_ids, run_out.get("memory_out"))
+        # For Compatibility with buffer changes for hybrid action support
+        if "log_probs" in run_out:
+            log_probs_tuple = LogProbsTuple()
+            if self.behavior_spec.action_spec.is_continuous():
+                log_probs_tuple.add_continuous(run_out["log_probs"])
+            else:
+                log_probs_tuple.add_discrete(run_out["log_probs"])
+            run_out["log_probs"] = log_probs_tuple
+        if "action" in run_out:
+            action_tuple = ActionTuple()
+            env_action_tuple = ActionTuple()
+            if self.behavior_spec.action_spec.is_continuous():
+                action_tuple.add_continuous(run_out["pre_action"])
+                env_action_tuple.add_continuous(run_out["action"])
+            else:
+                action_tuple.add_discrete(run_out["action"])
+                env_action_tuple.add_discrete(run_out["action"])
+            run_out["action"] = action_tuple
+            run_out["env_action"] = env_action_tuple
-
+            env_action=run_out.get("env_action"),
            value=run_out.get("value"),
            outputs=run_out,
            agent_ids=decision_requests.agent_id,
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
    SeparateActorCritic,
    GlobalSteps,
 )
+
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs

 EPSILON = 1e-7  # Small value to avoid divide by zero

            conditional_sigma=self.condition_sigma_on_obs,
            tanh_squash=tanh_squash,
        )
-        self._clip_action = not tanh_squash
        # Save the m_size needed for export
        self._export_m_size = self.m_size
        # m_size needed for training is determined by network, not trainer settings
+        self._clip_action = not tanh_squash

    @property
    def export_memory_size(self) -> int:
    ) -> Tuple[SplitObservations, np.ndarray]:
        obs = ModelUtils.list_to_tensor_list(decision_requests.obs)
        mask = None
-        if not self.use_continuous_act:
+        if self.behavior_spec.action_spec.discrete_size > 0:
            mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
            if decision_requests.action_mask is not None:
                mask = torch.as_tensor(
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        seq_len: int = 1,
-        all_log_probs: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        critic_obs: Optional[List[List[torch.Tensor]]] = None,
+    ) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor, torch.Tensor]:
        """
        :param vec_obs: List of vector observations.
        :param vis_obs: List of visual observations.
-        :param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action.
-        :return: Tuple of actions, actions clipped to -1, 1, log probabilities (dependent on all_log_probs),
-            entropies, and output memories, all as Torch Tensors.
+        :return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
-        if memories is None:
-            dists, memories = self.actor_critic.get_dists(obs, masks, memories, seq_len)
-        else:
-            # If we're using LSTM. we need to execute the values to get the critic memories
-            dists, _, memories = self.actor_critic.get_dist_and_value(
-                obs, masks, memories, seq_len
-            )
-        action_list = self.actor_critic.sample_action(dists)
-        log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
-            action_list, dists
-        )
-        actions = torch.stack(action_list, dim=-1)
-        if self.use_continuous_act:
-            actions = actions[:, :, 0]
-        else:
-            actions = actions[:, 0, :]
-        # Use the sum of entropy across actions, not the mean
-        entropy_sum = torch.sum(entropies, dim=1)
-
-        if self._clip_action and self.use_continuous_act:
-            clipped_action = torch.clamp(actions, -3, 3) / 3
-        else:
-            clipped_action = actions
-        return (
-            actions,
-            clipped_action,
-            all_logs if all_log_probs else log_probs,
-            entropy_sum,
-            memories,
+        actions, log_probs, entropies, memories = self.actor_critic.get_action_stats(
+            obs, masks, memories, seq_len
+        return (actions, log_probs, entropies, memories)
-        actions: torch.Tensor,
+        actions: AgentAction,
-    ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, torch.Tensor]]:
-        dists, value_heads, _ = self.actor_critic.get_dist_and_value(
-            obs, masks, memories, seq_len, critic_obs
+    ) -> Tuple[ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor]]:
+        log_probs, entropies, value_heads = self.actor_critic.get_stats_and_value(
+            obs, actions, masks, memories, seq_len, critic_obs
-        action_list = [actions[..., i] for i in range(actions.shape[-1])]
-        log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)
-        # Use the sum of entropy across actions, not the mean
-        entropy_sum = torch.sum(entropies, dim=1)
-        return log_probs, entropy_sum, value_heads
+        return log_probs, entropies, value_heads

    @timed
    def evaluate(
        :return: Outputs from network as defined by self.inference_dict.
        """
        obs, masks = self._split_decision_step(decision_requests)
-        memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
-            0
-        ) if self.use_recurrent else None
+        memories = (
+            torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(0)
+            if self.use_recurrent
+            else None
+        )
-            action, clipped_action, log_probs, entropy, memories = self.sample_actions(
+            action, log_probs, entropy, memories = self.sample_actions(
-
-        run_out["pre_action"] = ModelUtils.to_numpy(action)
-        run_out["action"] = ModelUtils.to_numpy(clipped_action)
-        # Todo - make pre_action difference
-        run_out["log_probs"] = ModelUtils.to_numpy(log_probs)
+        action_tuple = action.to_action_tuple()
+        run_out["action"] = action_tuple
+        # This is the clipped action which is not saved to the buffer
+        # but is exclusively sent to the environment.
+        env_action_tuple = action.to_action_tuple(clip=self._clip_action)
+        run_out["env_action"] = env_action_tuple
+        run_out["log_probs"] = log_probs.to_log_probs_tuple()
        run_out["entropy"] = ModelUtils.to_numpy(entropy)
        run_out["learning_rate"] = 0.0
        if self.use_recurrent:
        self.check_nan_action(run_out.get("action"))
        return ActionInfo(
            action=run_out.get("action"),
+            env_action=run_out.get("env_action"),
            value=run_out.get("value"),
            outputs=run_out,
            agent_ids=list(decision_requests.agent_id),
--- a/ml-agents/mlagents/trainers/ppo/optimizer_tf.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_tf.py
            self.policy.sequence_length_ph: self.policy.sequence_length,
            self.policy.mask_input: mini_batch["masks"] * burn_in_mask,
            self.advantage: mini_batch["advantages"],
-            self.all_old_log_probs: mini_batch["action_probs"],
-        if self.policy.output_pre is not None and "actions_pre" in mini_batch:
-            feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]
+        if self.policy.use_continuous_act:  # For hybrid action buffer support
+            feed_dict[self.all_old_log_probs] = mini_batch["continuous_log_probs"]
+            feed_dict[self.policy.output_pre] = mini_batch["continuous_action"]
-            feed_dict[self.policy.output] = mini_batch["actions"]
+            feed_dict[self.all_old_log_probs] = mini_batch["discrete_log_probs"]
+            feed_dict[self.policy.output] = mini_batch["discrete_action"]
            if self.policy.use_recurrent:
                feed_dict[self.policy.prev_action] = mini_batch["prev_action"]
            feed_dict[self.policy.action_masks] = mini_batch["action_mask"]
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
 from mlagents.trainers.policy.torch_policy import TorchPolicy
 from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
 from mlagents.trainers.settings import TrainerSettings, PPOSettings
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
 from mlagents.trainers.torch.utils import ModelUtils


        advantage = advantages.unsqueeze(-1)

        decay_epsilon = self.hyperparameters.epsilon
-
        r_theta = torch.exp(log_probs - old_log_probs)
        p_opt_a = r_theta * advantage
        p_opt_b = (
        ]

        act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
-        if self.policy.use_continuous_act:
-            actions = ModelUtils.list_to_tensor(batch["actions_pre"]).unsqueeze(-1)
-        else:
-            actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
+        actions = AgentAction.from_dict(batch)

        memories = [
            ModelUtils.list_to_tensor(batch["memory"][i])
            critic_obs=critic_obs,
            seq_len=self.policy.sequence_length,
        )
+        old_log_probs = ActionLogProbs.from_dict(batch).flatten()
+        log_probs = log_probs.flatten()
        loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
        value_loss = self.ppo_value_loss(
            values, old_values, returns, decay_eps, loss_masks
            log_probs,
-            ModelUtils.list_to_tensor(batch["action_probs"]),
+            old_log_probs,
            loss_masks,
        )
        loss = (
--- a/ml-agents/mlagents/trainers/sac/optimizer_tf.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_tf.py
            feed_dict[self.rewards_holders[name]] = batch[f"{name}_rewards"]

        if self.policy.use_continuous_act:
-            feed_dict[self.policy_network.external_action_in] = batch["actions"]
+            feed_dict[self.policy_network.external_action_in] = batch[
+                "continuous_action"
+            ]
-            feed_dict[policy.output] = batch["actions"]
+            feed_dict[policy.output] = batch["discrete_action"]
            if self.policy.use_recurrent:
                feed_dict[policy.prev_action] = batch["prev_action"]
            feed_dict[policy.action_masks] = batch["action_mask"]
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
 import numpy as np
-from typing import Dict, List, Mapping, cast, Tuple, Optional
+from typing import Dict, List, Mapping, NamedTuple, cast, Tuple, Optional
-from mlagents_envs.base_env import ActionSpec
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
+from mlagents_envs.base_env import ActionSpec
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.settings import TrainerSettings, SACSettings
 from contextlib import ExitStack
            action_spec: ActionSpec,
        ):
            super().__init__()
-            self.action_spec = action_spec
-            if self.action_spec.is_continuous():
-                self.act_size = self.action_spec.continuous_size
-                num_value_outs = 1
-                num_action_ins = self.act_size
+            num_value_outs = max(sum(action_spec.discrete_branches), 1)
+            num_action_ins = int(action_spec.continuous_size)
-            else:
-                self.act_size = self.action_spec.discrete_branches
-                num_value_outs = sum(self.act_size)
-                num_action_ins = 0
            self.q1_network = ValueNetwork(
                stream_names,
                observation_shapes,
                )
            return q1_out, q2_out

+    class TargetEntropy(NamedTuple):
+
+        discrete: List[float] = []  # One per branch
+        continuous: float = 0.0
+
+    class LogEntCoef(nn.Module):
+        def __init__(self, discrete, continuous):
+            super().__init__()
+            self.discrete = discrete
+            self.continuous = continuous
+
    def __init__(self, policy: TorchPolicy, trainer_params: TrainerSettings):
        super().__init__(policy, trainer_params)
        hyperparameters: SACSettings = cast(SACSettings, trainer_params.hyperparameters)
        self.policy = policy
-        self.act_size = policy.act_size
        policy_network_settings = policy.network_settings

        self.tau = hyperparameters.tau
            name: int(not self.reward_signals[name].ignore_done)
            for name in self.stream_names
        }
+        self._action_spec = self.policy.behavior_spec.action_spec
-            self.policy.behavior_spec.action_spec,
+            self._action_spec,
        )

        self.target_network = ValueNetwork(
            self.policy.actor_critic.critic, self.target_network, 1.0
        )

-        self._log_ent_coef = torch.nn.Parameter(
-            torch.log(torch.as_tensor([self.init_entcoef] * len(self.act_size))),
+        # We create one entropy coefficient per action, whether discrete or continuous.
+        _disc_log_ent_coef = torch.nn.Parameter(
+            torch.log(
+                torch.as_tensor(
+                    [self.init_entcoef] * len(self._action_spec.discrete_branches)
+                )
+            ),
-        if self.policy.use_continuous_act:
-            self.target_entropy = torch.as_tensor(
-                -1
-                * self.continuous_target_entropy_scale
-                * np.prod(self.act_size[0]).astype(np.float32)
-            )
-        else:
-            self.target_entropy = [
-                self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
-                for i in self.act_size
-            ]
-
+        _cont_log_ent_coef = torch.nn.Parameter(
+            torch.log(torch.as_tensor([self.init_entcoef])), requires_grad=True
+        )
+        self._log_ent_coef = TorchSACOptimizer.LogEntCoef(
+            discrete=_disc_log_ent_coef, continuous=_cont_log_ent_coef
+        )
+        _cont_target = (
+            -1
+            * self.continuous_target_entropy_scale
+            * np.prod(self._action_spec.continuous_size).astype(np.float32)
+        )
+        _disc_target = [
+            self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
+            for i in self._action_spec.discrete_branches
+        ]
+        self.target_entropy = TorchSACOptimizer.TargetEntropy(
+            continuous=_cont_target, discrete=_disc_target
+        )
-            self.policy.actor_critic.distribution.parameters()
+            self.policy.actor_critic.action_model.parameters()
        )
        value_params = list(self.value_network.parameters()) + list(
            self.policy.actor_critic.critic.parameters()
            value_params, lr=hyperparameters.learning_rate
        )
        self.entropy_optimizer = torch.optim.Adam(
-            [self._log_ent_coef], lr=hyperparameters.learning_rate
+            self._log_ent_coef.parameters(), lr=hyperparameters.learning_rate
        )
        self._move_to_device(default_device())


    def sac_value_loss(
        self,
-        log_probs: torch.Tensor,
+        log_probs: ActionLogProbs,
-        discrete: bool,
-            _ent_coef = torch.exp(self._log_ent_coef)
+            _cont_ent_coef = self._log_ent_coef.continuous.exp()
+            _disc_ent_coef = self._log_ent_coef.discrete.exp()
-                if not discrete:
+                if self._action_spec.discrete_size <= 0:
-                    action_probs = log_probs.exp()
+                    disc_action_probs = log_probs.all_discrete_tensor.exp()
-                        q1p_out[name] * action_probs, self.act_size
+                        q1p_out[name] * disc_action_probs,
+                        self._action_spec.discrete_branches,
-                        q2p_out[name] * action_probs, self.act_size
+                        q2p_out[name] * disc_action_probs,
+                        self._action_spec.discrete_branches,
                    )
                    _q1p_mean = torch.mean(
                        torch.stack(
                    min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)

        value_losses = []
-        if not discrete:
+        if self._action_spec.discrete_size <= 0:
-                        _ent_coef * log_probs, dim=1
+                        _cont_ent_coef * log_probs.continuous_tensor, dim=1
                    )
                value_loss = 0.5 * ModelUtils.masked_mean(
                    torch.nn.functional.mse_loss(values[name], v_backup), loss_masks
+            disc_log_probs = log_probs.all_discrete_tensor
-                log_probs * log_probs.exp(), self.act_size
+                disc_log_probs * disc_log_probs.exp(),
+                self._action_spec.discrete_branches,
-                    torch.sum(_ent_coef[i] * _lp, dim=1, keepdim=True)
+                    torch.sum(_disc_ent_coef[i] * _lp, dim=1, keepdim=True)
                    for i, _lp in enumerate(branched_per_action_ent)
                ]
            )
                        branched_ent_bonus, axis=0
                    )
+                    # Add continuous entropy bonus to minimum Q
+                    if self._action_spec.continuous_size > 0:
+                        v_backup += torch.sum(
+                            _cont_ent_coef * log_probs.continuous_tensor,
+                            dim=1,
+                            keepdim=True,
+                        )
                value_loss = 0.5 * ModelUtils.masked_mean(
                    torch.nn.functional.mse_loss(values[name], v_backup.squeeze()),
                    loss_masks,

    def sac_policy_loss(
        self,
-        log_probs: torch.Tensor,
+        log_probs: ActionLogProbs,
-        discrete: bool,
-        _ent_coef = torch.exp(self._log_ent_coef)
+        _cont_ent_coef, _disc_ent_coef = (
+            self._log_ent_coef.continuous,
+            self._log_ent_coef.discrete,
+        )
+        _cont_ent_coef = _cont_ent_coef.exp()
+        _disc_ent_coef = _disc_ent_coef.exp()
+
-        if not discrete:
-            mean_q1 = mean_q1.unsqueeze(1)
-            batch_policy_loss = torch.mean(_ent_coef * log_probs - mean_q1, dim=1)
-            policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
-        else:
-            action_probs = log_probs.exp()
+        batch_policy_loss = 0
+        if self._action_spec.discrete_size > 0:
+            disc_log_probs = log_probs.all_discrete_tensor
+            disc_action_probs = disc_log_probs.exp()
-                log_probs * action_probs, self.act_size
+                disc_log_probs * disc_action_probs, self._action_spec.discrete_branches
-                mean_q1 * action_probs, self.act_size
+                mean_q1 * disc_action_probs, self._action_spec.discrete_branches
-                    torch.sum(_ent_coef[i] * _lp - _qt, dim=1, keepdim=True)
+                    torch.sum(_disc_ent_coef[i] * _lp - _qt, dim=1, keepdim=False)
                    for i, (_lp, _qt) in enumerate(
                        zip(branched_per_action_ent, branched_q_term)
                    )
-            batch_policy_loss = torch.squeeze(branched_policy_loss)
-            policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
+            batch_policy_loss += torch.sum(branched_policy_loss, dim=1)
+            all_mean_q1 = torch.sum(disc_action_probs * mean_q1, dim=1)
+        else:
+            all_mean_q1 = mean_q1
+        if self._action_spec.continuous_size > 0:
+            cont_log_probs = log_probs.continuous_tensor
+            batch_policy_loss += torch.mean(
+                _cont_ent_coef * cont_log_probs - all_mean_q1.unsqueeze(1), dim=1
+            )
+        policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
+
-        self, log_probs: torch.Tensor, loss_masks: torch.Tensor, discrete: bool
+        self, log_probs: ActionLogProbs, loss_masks: torch.Tensor
-        if not discrete:
+        _cont_ent_coef, _disc_ent_coef = (
+            self._log_ent_coef.continuous,
+            self._log_ent_coef.discrete,
+        )
+        entropy_loss = 0
+        if self._action_spec.discrete_size > 0:
-                target_current_diff = torch.sum(log_probs + self.target_entropy, dim=1)
-            entropy_loss = -1 * ModelUtils.masked_mean(
-                self._log_ent_coef * target_current_diff, loss_masks
-            )
-        else:
-            with torch.no_grad():
+                # Break continuous into separate branch
+                disc_log_probs = log_probs.all_discrete_tensor
-                    log_probs * log_probs.exp(), self.act_size
+                    disc_log_probs * disc_log_probs.exp(),
+                    self._action_spec.discrete_branches,
-                            branched_per_action_ent, self.target_entropy
+                            branched_per_action_ent, self.target_entropy.discrete
                        )
                    ],
                    axis=1,
                )
-            entropy_loss = -1 * ModelUtils.masked_mean(
-                torch.mean(self._log_ent_coef * target_current_diff, axis=1), loss_masks
+            entropy_loss += -1 * ModelUtils.masked_mean(
+                torch.mean(_disc_ent_coef * target_current_diff, axis=1), loss_masks
+            )
+        if self._action_spec.continuous_size > 0:
+            with torch.no_grad():
+                cont_log_probs = log_probs.continuous_tensor
+                target_current_diff = torch.sum(
+                    cont_log_probs + self.target_entropy.continuous, dim=1
+                )
+            # We update all the _cont_ent_coef as one block
+            entropy_loss += -1 * ModelUtils.masked_mean(
+                _cont_ent_coef * target_current_diff, loss_masks
            )

        return entropy_loss
    ) -> Dict[str, torch.Tensor]:
        condensed_q_output = {}
-        onehot_actions = ModelUtils.actions_to_onehot(discrete_actions, self.act_size)
+        onehot_actions = ModelUtils.actions_to_onehot(
+            discrete_actions, self._action_spec.discrete_branches
+        )
-            branched_q = ModelUtils.break_into_branches(item, self.act_size)
+            branched_q = ModelUtils.break_into_branches(
+                item, self._action_spec.discrete_branches
+            )
            only_action_qs = torch.stack(
                [
                    torch.sum(_act * _q, dim=1, keepdim=True)
            AgentBuffer.obs_list_to_obs_batch(batch["next_obs"])
        )
        act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
-        if self.policy.use_continuous_act:
-            actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
-        else:
-            actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
+        actions = AgentAction.from_dict(batch)

        memories_list = [
            ModelUtils.list_to_tensor(batch["memory"][i])
        self.target_network.network_body.copy_normalization(
            self.policy.actor_critic.network_body
        )
-        (sampled_actions, _, log_probs, _, _) = self.policy.sample_actions(
+        (
+            sampled_actions,
+            log_probs,
+            _,
+            value_estimates,
+            _,
+        ) = self.policy.actor_critic.get_action_stats_and_value(
-            seq_len=self.policy.sequence_length,
-            all_log_probs=not self.policy.use_continuous_act,
+            sequence_length=self.policy.sequence_length,
+        )
+
+        cont_sampled_actions = sampled_actions.continuous_tensor
+
+        cont_actions = actions.continuous_tensor
+        q1p_out, q2p_out = self.value_network(
+            obs,
+            cont_sampled_actions,
+            memories=q_memories,
+            sequence_length=self.policy.sequence_length,
+            q2_grad=False,
-        value_estimates, _ = self.policy.actor_critic.critic_pass(
-            obs, memories, sequence_length=self.policy.sequence_length
+        q1_out, q2_out = self.value_network(
+            obs,
+            cont_actions,
+            memories=q_memories,
+            sequence_length=self.policy.sequence_length,
-        if self.policy.use_continuous_act:
-            squeezed_actions = actions.squeeze(-1)
-            # Only need grad for q1, as that is used for policy.
-            q1p_out, q2p_out = self.value_network(
-                obs,
-                sampled_actions,
-                memories=q_memories,
-                sequence_length=self.policy.sequence_length,
-                q2_grad=False,
-            )
-            q1_out, q2_out = self.value_network(
-                obs,
-                squeezed_actions,
-                memories=q_memories,
-                sequence_length=self.policy.sequence_length,
-            )
+
+        if self._action_spec.discrete_size > 0:
+            disc_actions = actions.discrete_tensor
+            q1_stream = self._condense_q_streams(q1_out, disc_actions)
+            q2_stream = self._condense_q_streams(q2_out, disc_actions)
+        else:
-        else:
-            # For discrete, you don't need to backprop through the Q for the policy
-            q1p_out, q2p_out = self.value_network(
-                obs,
-                memories=q_memories,
-                sequence_length=self.policy.sequence_length,
-                q1_grad=False,
-                q2_grad=False,
-            )
-            q1_out, q2_out = self.value_network(
-                obs,
-                memories=q_memories,
-                sequence_length=self.policy.sequence_length,
-            )
-            q1_stream = self._condense_q_streams(q1_out, actions)
-            q2_stream = self._condense_q_streams(q2_out, actions)

        with torch.no_grad():
            target_values, _ = self.target_network(
            )
        masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
-        use_discrete = not self.policy.use_continuous_act
        dones = ModelUtils.list_to_tensor(batch["done"])

        q1_loss, q2_loss = self.sac_q_loss(
-            log_probs, value_estimates, q1p_out, q2p_out, masks, use_discrete
+            log_probs, value_estimates, q1p_out, q2p_out, masks
-        policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks, use_discrete)
-        entropy_loss = self.sac_entropy_loss(log_probs, masks, use_discrete)
+        policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks)
+        entropy_loss = self.sac_entropy_loss(log_probs, masks)

        total_value_loss = q1_loss + q2_loss + value_loss

            "Losses/Value Loss": value_loss.item(),
            "Losses/Q1 Loss": q1_loss.item(),
            "Losses/Q2 Loss": q2_loss.item(),
-            "Policy/Entropy Coeff": torch.mean(torch.exp(self._log_ent_coef)).item(),
+            "Policy/Discrete Entropy Coeff": torch.mean(
+                torch.exp(self._log_ent_coef.discrete)
+            ).item(),
+            "Policy/Continuous Entropy Coeff": torch.mean(
+                torch.exp(self._log_ent_coef.continuous)
+            ).item(),
            "Policy/Learning Rate": decay_lr,
        }

--- a/ml-agents/mlagents/trainers/simple_env_manager.py
+++ b/ml-agents/mlagents/trainers/simple_env_manager.py
        self.previous_all_action_info = all_action_info

        for brain_name, action_info in all_action_info.items():
-            self.env.set_actions(brain_name, action_info.action)
+            self.env.set_actions(brain_name, action_info.env_action)
        self.env.step()
        all_step_result = self._generate_all_results()

--- a/ml-agents/mlagents/trainers/subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py
 from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set
 import cloudpickle
 import enum
+import time

 from mlagents_envs.environment import UnityEnvironment
 from mlagents_envs.exception import (


 logger = logging_util.get_logger(__name__)
+WORKER_SHUTDOWN_TIMEOUT_S = 10


 class EnvironmentCommand(enum.Enum):
    RESET = 4
    CLOSE = 5
    ENV_EXITED = 6
+    CLOSED = 7


 class EnvironmentRequest(NamedTuple):
        self.previous_step: EnvironmentStep = EnvironmentStep.empty(worker_id)
        self.previous_all_action_info: Dict[str, ActionInfo] = {}
        self.waiting = False
+        self.closed = False

    def send(self, cmd: EnvironmentCommand, payload: Any = None) -> None:
        try:
        except (BrokenPipeError, EOFError):
            raise UnityCommunicationException("UnityEnvironment worker: recv failed.")

-    def close(self):
+    def request_close(self):
        try:
            self.conn.send(EnvironmentRequest(EnvironmentCommand.CLOSE))
        except (BrokenPipeError, EOFError):
            pass
-        logger.debug(f"UnityEnvWorker {self.worker_id} joining process.")
-        self.process.join()


 def worker(
            if req.cmd == EnvironmentCommand.STEP:
                all_action_info = req.payload
                for brain_name, action_info in all_action_info.items():
-                    if len(action_info.action) != 0:
-                        env.set_actions(brain_name, action_info.action)
+                    if len(action_info.agent_ids) > 0:
+                        env.set_actions(brain_name, action_info.env_action)
                env.step()
                all_step_result = _generate_all_results()
                # The timers in this process are independent from all the processes and the "main" process
            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
        )
        _send_response(EnvironmentCommand.ENV_EXITED, ex)
+    except Exception as ex:
+        logger.error(
+            f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception."
+        )
+        step_queue.put(
+            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
+        )
+        _send_response(EnvironmentCommand.ENV_EXITED, ex)
-        # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process
-        # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread()
-        # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for
-        # more info.
-        step_queue.cancel_join_thread()
-        step_queue.close()
+        parent_conn.close()
+        step_queue.put(EnvironmentResponse(EnvironmentCommand.CLOSED, worker_id, None))
+        step_queue.close()


 class SubprocessEnvManager(EnvManager):
        super().__init__()
        self.env_workers: List[UnityEnvWorker] = []
        self.step_queue: Queue = Queue()
+        self.workers_alive = 0
        for worker_idx in range(n_env):
            self.env_workers.append(
                self.create_worker(
+            self.workers_alive += 1

    @staticmethod
    def create_worker(

    def close(self) -> None:
        logger.debug("SubprocessEnvManager closing.")
+        for env_worker in self.env_workers:
+            env_worker.request_close()
+        # Pull messages out of the queue until every worker has CLOSED or we time out.
+        deadline = time.time() + WORKER_SHUTDOWN_TIMEOUT_S
+        while self.workers_alive > 0 and time.time() < deadline:
+            try:
+                step: EnvironmentResponse = self.step_queue.get_nowait()
+                env_worker = self.env_workers[step.worker_id]
+                if step.cmd == EnvironmentCommand.CLOSED and not env_worker.closed:
+                    env_worker.closed = True
+                    self.workers_alive -= 1
+                # Discard all other messages.
+            except EmptyQueueException:
+                pass
+        # Sanity check to kill zombie workers and report an issue if they occur.
+        if self.workers_alive > 0:
+            logger.error("SubprocessEnvManager had workers that didn't signal shutdown")
+            for env_worker in self.env_workers:
+                if not env_worker.closed and env_worker.process.is_alive():
+                    env_worker.process.terminate()
+                    logger.error(
+                        "A SubprocessEnvManager worker did not shut down correctly so it was forcefully terminated."
+                    )
-        for env_worker in self.env_workers:
-            env_worker.close()

    def _postprocess_steps(
        self, env_steps: List[EnvironmentResponse]
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
 import numpy as np

 from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.torch.action_log_probs import LogProbsTuple
 from mlagents.trainers.trajectory import Trajectory, AgentExperience
 from mlagents_envs.base_env import (
    DecisionSteps,
+    ActionTuple,
 )


    steps_list = []

    action_size = action_spec.discrete_size + action_spec.continuous_size
-    action_probs = np.ones(
-        int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size),
-        dtype=np.float32,
-    )
    for _i in range(length - 1):
        obs = []
        for _shape in observation_shapes:
-        action = np.zeros(action_size, dtype=np.float32)
-        action_pre = np.zeros(action_size, dtype=np.float32)
+        action = ActionTuple(
+            continuous=np.zeros(action_spec.continuous_size, dtype=np.float32),
+            discrete=np.zeros(action_spec.discrete_size, dtype=np.int32),
+        )
+        action_probs = LogProbsTuple(
+            continuous=np.ones(action_spec.continuous_size, dtype=np.float32),
+            discrete=np.ones(action_spec.discrete_size, dtype=np.float32),
+        )
        action_mask = (
            [
                [False for _ in range(branch)]
            else None
        )
-        prev_action = np.ones(action_size, dtype=np.float32)
+        if action_spec.is_discrete():
+            prev_action = np.ones(action_size, dtype=np.int32)
+        else:
+            prev_action = np.ones(action_size, dtype=np.float32)
+
        max_step = False
        memory = np.ones(memory_size, dtype=np.float32)
        agent_id = "test_agent"
            done=done,
            action=action,
            action_probs=action_probs,
-            action_pre=action_pre,
            action_mask=action_mask,
            prev_action=prev_action,
            interrupted=max_step,
        done=not max_step_complete,
        action=action,
        action_probs=action_probs,
-        action_pre=action_pre,
        action_mask=action_mask,
        prev_action=prev_action,
        interrupted=max_step_complete,
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py

 from mlagents_envs.base_env import (
    ActionSpec,
+    ActionTuple,
    BaseEnv,
    BehaviorSpec,
    DecisionSteps,

 OBS_SIZE = 1
 VIS_OBS_SIZE = (20, 20, 3)
-STEP_SIZE = 0.1
+STEP_SIZE = 0.2

 TIME_PENALTY = 0.01
 MIN_STEPS = int(1.0 / STEP_SIZE) + 1
    def __init__(
        self,
        brain_names,
-        use_discrete,
-        action_size=1,
+        action_sizes=(1, 0),
-        self.discrete = use_discrete
-        if use_discrete:
-            action_spec = ActionSpec.create_discrete(
-                tuple(2 for _ in range(action_size))
-            )
-        else:
-            action_spec = ActionSpec.create_continuous(action_size)
+        continuous_action_size, discrete_action_size = action_sizes
+        discrete_tuple = tuple(2 for _ in range(discrete_action_size))
+        action_spec = ActionSpec(continuous_action_size, discrete_tuple)
+        self.total_action_size = (
+            continuous_action_size + discrete_action_size
+        )  # to set the goals/positions
+        self.action_spec = action_spec
-        self.action_size = action_size
+        self.action_spec = action_spec
        self.names = brain_names
        self.positions: Dict[str, List[float]] = {}
        self.step_count: Dict[str, float] = {}

    def _take_action(self, name: str) -> bool:
        deltas = []
-        for _act in self.action[name][0]:
-            if self.discrete:
-                deltas.append(1 if _act else -1)
-            else:
-                deltas.append(_act)
+        _act = self.action[name]
+        if self.action_spec.continuous_size > 0:
+            for _cont in _act.continuous[0]:
+                deltas.append(_cont)
+        if self.action_spec.discrete_size > 0:
+            for _disc in _act.discrete[0]:
+                deltas.append(1 if _disc else -1)
        for i, _delta in enumerate(deltas):
            _delta = clamp(_delta, -self.step_size, self.step_size)
            self.positions[name][i] += _delta
        return done

    def _generate_mask(self):
-        if self.discrete:
+        action_mask = None
+        if self.action_spec.discrete_size > 0:
-            ndmask = np.array(2 * self.action_size * [False], dtype=np.bool)
+            ndmask = np.array(
+                2 * self.action_spec.discrete_size * [False], dtype=np.bool
+            )
-        else:
-            action_mask = None
        return action_mask

    def _compute_reward(self, name: str, done: bool) -> float:

    def _reset_agent(self, name):
        self.goal[name] = self.random.choice([-1, 1])
-        self.positions[name] = [0.0 for _ in range(self.action_size)]
+        self.positions[name] = [0.0 for _ in range(self.total_action_size)]
        self.step_count[name] = 0
        self.rewards[name] = 0
        self.agent_id[name] = self.agent_id[name] + 1


 class MemoryEnvironment(SimpleEnvironment):
-    def __init__(self, brain_names, use_discrete, step_size=0.2):
-        super().__init__(brain_names, use_discrete, step_size=step_size)
+    def __init__(self, brain_names, action_sizes=(1, 0), step_size=0.2):
+        super().__init__(brain_names, action_sizes=action_sizes, step_size=step_size)
        # Number of steps to reveal the goal for. Lower is harder. Should be
        # less than 1/step_size to force agent to use memory
        self.num_show_steps = 2
    def __init__(
        self,
        brain_names,
-        use_discrete,
+        action_sizes=(1, 0),
-            use_discrete,
+            action_sizes=action_sizes,
        )
        self.demonstration_protos: Dict[str, List[AgentInfoActionPairProto]] = {}
        self.n_demos = n_demos
    def step(self) -> None:
        super().step()
        for name in self.names:
+            discrete_actions = (
+                self.action[name].discrete
+                if self.action_spec.discrete_size > 0
+                else None
+            )
+            continuous_actions = (
+                self.action[name].continuous
+                if self.action_spec.continuous_size > 0
+                else None
+            )
-                self.step_result[name][0], self.step_result[name][1], self.action[name]
+                self.step_result[name][0],
+                self.step_result[name][1],
+                continuous_actions,
+                discrete_actions,
            )
            self.demonstration_protos[name] = self.demonstration_protos[name][
                -self.n_demos :
        self.reset()
        for _ in range(self.n_demos):
            for name in self.names:
-                if self.discrete:
-                    self.action[name] = [[1]] if self.goal[name] > 0 else [[0]]
+                if self.action_spec.discrete_size > 0:
+                    self.action[name] = ActionTuple(
+                        np.array([], dtype=np.float32),
+                        np.array(
+                            [[1]] if self.goal[name] > 0 else [[0]], dtype=np.int32
+                        ),
+                    )
-                    self.action[name] = [[float(self.goal[name])]]
+                    self.action[name] = ActionTuple(
+                        np.array([[float(self.goal[name])]], dtype=np.float32),
+                        np.array([], dtype=np.int32),
+                    )
+
+
+class UnexpectedExceptionEnvironment(SimpleEnvironment):
+    def __init__(self, brain_names, use_discrete, to_raise):
+        super().__init__(brain_names, use_discrete)
+        self.to_raise = to_raise
+
+    def step(self) -> None:
+        raise self.to_raise()
--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
        dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
    )
    # Test update
-    update_buffer = mb.simulate_rollout(
-        BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec
-    )
+    behavior_spec = optimizer.policy.behavior_spec
+    update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, behavior_spec)
+    # NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
+    if discrete:
+        n_agents = len(update_buffer["discrete_log_probs"])
+        update_buffer["discrete_log_probs"] = np.ones(
+            (n_agents, int(sum(behavior_spec.action_spec.discrete_branches))),
+            dtype=np.float32,
+        )
+    else:
+        n_agents = len(update_buffer["continuous_log_probs"])
+        update_buffer["continuous_log_probs"] = np.ones(
+            (n_agents, behavior_spec.action_spec.continuous_size), dtype=np.float32
+        )
+
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
        dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
    )
    # Test update
-    update_buffer = mb.simulate_rollout(
-        BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec
-    )
+    behavior_spec = optimizer.policy.behavior_spec
+    update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, behavior_spec)
    # Mock out reward signal eval
    update_buffer["advantages"] = update_buffer["environment_rewards"]
    update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]
+    # NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
+    if discrete:
+        n_agents = len(update_buffer["discrete_log_probs"])
+        update_buffer["discrete_log_probs"] = np.ones(
+            (n_agents, int(sum(behavior_spec.action_spec.discrete_branches))),
+            dtype=np.float32,
+        )
+    else:
+        n_agents = len(update_buffer["continuous_log_probs"])
+        update_buffer["continuous_log_probs"] = np.ones(
+            (n_agents, behavior_spec.action_spec.continuous_size), dtype=np.float32
+        )
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
        use_visual=False,
    )
    # Test update
-    update_buffer = mb.simulate_rollout(
-        BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec
-    )
+    behavior_spec = optimizer.policy.behavior_spec
+    update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, behavior_spec)
    # Mock out reward signal eval
    update_buffer["advantages"] = update_buffer["environment_rewards"]
    update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]
+    # NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
+    n_agents = len(update_buffer["continuous_log_probs"])
+    update_buffer["continuous_log_probs"] = np.ones(
+        (n_agents, behavior_spec.action_spec.continuous_size), dtype=np.float32
+    )
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
    buffer["curiosity_returns"] = buffer["environment_rewards"]
    buffer["curiosity_value_estimates"] = buffer["environment_rewards"]
    buffer["advantages"] = buffer["environment_rewards"]
-
+    # NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
+    if use_discrete:
+        n_agents = len(buffer["discrete_log_probs"])
+        buffer["discrete_log_probs"].reset_field()
+        for _ in range(n_agents):
+            buffer["discrete_log_probs"].append(
+                np.ones(
+                    int(sum(mock_behavior_spec.action_spec.discrete_branches)),
+                    dtype=np.float32,
+                )
+            )
+    else:
+        n_agents = len(buffer["continuous_log_probs"])
+        buffer["continuous_log_probs"].reset_field()
+        for _ in range(n_agents):
+            buffer["continuous_log_probs"].append(
+                np.ones(
+                    mock_behavior_spec.action_spec.continuous_size, dtype=np.float32
+                )
+            )
    trainer.update_buffer = buffer
    trainer._update_policy()

--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
            assert all(reward > success_threshold for reward in processed_rewards)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ppo(use_discrete):
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ppo(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_2d_ppo(use_discrete):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
-    )
+@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
+def test_2d_ppo(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(
        PPO_TF_CONFIG.hyperparameters, batch_size=64, buffer_size=640
    )
    _check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_visual_ppo(num_visual, use_discrete):
+def test_visual_ppo(num_visual, action_sizes):
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
 def test_visual_advanced_ppo(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
-        use_discrete=True,
+        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        PPO_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
-        max_steps=300,
+        max_steps=400,
        summary_freq=100,
        framework=FrameworkType.TENSORFLOW,
    )

-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_recurrent_ppo(use_discrete):
-    env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_recurrent_ppo(action_sizes):
+    env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
    new_network_settings = attr.evolve(
        PPO_TF_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_sac(use_discrete):
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
-    config = attr.evolve(SAC_TF_CONFIG, framework=FrameworkType.TENSORFLOW)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_sac(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
+    config = attr.evolve(
+        SAC_TF_CONFIG, framework=FrameworkType.TENSORFLOW, max_steps=900
+    )
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_2d_sac(use_discrete):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
-    )
+@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
+def test_2d_sac(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(SAC_TF_CONFIG.hyperparameters, buffer_init_steps=2000)
    config = attr.evolve(
        SAC_TF_CONFIG,
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_visual_sac(num_visual, use_discrete):
+def test_visual_sac(num_visual, action_sizes):
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
 def test_visual_advanced_sac(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
-        use_discrete=True,
+        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        SAC_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
-        max_steps=100,
+        max_steps=200,
        framework=FrameworkType.TENSORFLOW,
    )
    # The number of steps is pretty small for these encoders
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_recurrent_sac(use_discrete):
-    step_size = 0.2 if use_discrete else 0.5
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_recurrent_sac(action_sizes):
+    step_size = 0.2 if action_sizes == (0, 1) else 0.5
-        [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
+        [BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
    )
    new_networksettings = attr.evolve(
        SAC_TF_CONFIG.network_settings,
    _check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ghost(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ghost(action_sizes):
-        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000
    _check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ghost_fails(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ghost_fails(action_sizes):
-        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    # This config should fail because the ghosted policy is never swapped with a competent policy.
    # Swap occurs after max step is reached.
    )


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_asymm_ghost(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_asymm_ghost(action_sizes):
-        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0,
    _check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_asymm_ghost_fails(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_asymm_ghost_fails(action_sizes):
-        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
    )
    # This config should fail because the team that us not learning when both have reached
    # max step should be executing the initial, untrained poliy.

@pytest.fixture(scope="session")
 def simple_record(tmpdir_factory):
-    def record_demo(use_discrete, num_visual=0, num_vector=1):
+    def record_demo(action_sizes, num_visual=0, num_vector=1):
-            use_discrete=use_discrete,
+            action_sizes=action_sizes,
            num_visual=num_visual,
            num_vector=num_vector,
            n_demos=100,
        env.solve()
+        continuous_size, discrete_size = action_sizes
+        use_discrete = True if discrete_size > 0 else False
-            vector_action_size=[2] if use_discrete else [1],
-            vector_action_descriptions=[""],
-            vector_action_space_type=discrete if use_discrete else continuous,
+            vector_action_size_deprecated=[2] if use_discrete else [1],
+            vector_action_descriptions_deprecated=[""],
+            vector_action_space_type_deprecated=discrete
+            if use_discrete
+            else continuous,
            brain_name=BRAIN_NAME,
            is_training=True,
        )
    return record_demo


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_gail(simple_record, use_discrete, trainer_config):
-    demo_path = simple_record(use_discrete)
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
+def test_gail(simple_record, action_sizes, trainer_config):
+    demo_path = simple_record(action_sizes)
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_gail_visual_ppo(simple_record, use_discrete):
-    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_gail_visual_ppo(simple_record, action_sizes):
+    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_gail_visual_sac(simple_record, use_discrete):
-    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_gail_visual_sac(simple_record, action_sizes):
+    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
        [], np.array([], dtype=np.float32), np.array([0]), None
    )
    result = policy.get_action(step_with_agents, worker_id=0)
-    assert result == ActionInfo(None, None, {}, [0])
+    assert result == ActionInfo(None, None, None, {}, [0])


 def test_take_action_returns_action_info_when_available():
    policy_eval_out = {
-        "action": np.array([1.0], dtype=np.float32),
+        "action": np.array([[1.0]], dtype=np.float32),
+        "pre_action": np.array([[1.0]], dtype=np.float32),
        "memory_out": np.array([[2.5]], dtype=np.float32),
        "value": np.array([1.1], dtype=np.float32),
    }
    )
    result = policy.get_action(step_with_agents)
+    print(result)
-        policy_eval_out["action"], policy_eval_out["value"], policy_eval_out, [0]
+        policy_eval_out["action"],
+        policy_eval_out["env_action"],
+        policy_eval_out["value"],
+        policy_eval_out,
+        [0],
    )
    assert result == expected

--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
    AgentManagerQueue,
 )
 from mlagents.trainers.action_info import ActionInfo
+from mlagents.trainers.torch.action_log_probs import LogProbsTuple
-from mlagents_envs.base_env import ActionSpec
+from mlagents_envs.base_env import ActionSpec, ActionTuple


 def create_mock_policy():
-    mock_policy.retrieve_previous_action.return_value = np.zeros(
-        (1, 1), dtype=np.float32
-    )
+    mock_policy.retrieve_previous_action.return_value = np.zeros((1, 1), dtype=np.int32)
    return mock_policy


    )

    fake_action_outputs = {
-        "action": [0.1, 0.1],
+        "action": ActionTuple(continuous=np.array([[0.1], [0.1]])),
-        "pre_action": [0.1, 0.1],
-        "log_probs": [0.1, 0.1],
+        "log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])),
    }
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
    fake_action_info = ActionInfo(
-        action=[0.1, 0.1],
+        action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
+        env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_steps.agent_id,
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(
-        mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, [])
+        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    # Assert that the AgentProcessor is still empty
    assert len(processor.experience_buffers[0]) == 0
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
-
-        "action": [0.1],
+        "action": ActionTuple(continuous=np.array([[0.1]])),
-        "pre_action": [0.1],
-        "log_probs": [0.1],
+        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
+
    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
        observation_shapes=[(8,)],
        done=True,
    )
    fake_action_info = ActionInfo(
-        action=[0.1],
+        action=ActionTuple(continuous=np.array([[0.1]])),
+        env_action=ActionTuple(continuous=np.array([[0.1]])),
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
            processor.add_experiences(
                mock_decision_step, mock_terminal_step, _ep, fake_action_info
            )
-            add_calls.append(mock.call([get_global_agent_id(_ep, 0)], [0.1]))
+            add_calls.append(
+                mock.call([get_global_agent_id(_ep, 0)], fake_action_outputs["action"])
+            )
        processor.add_experiences(
            mock_done_decision_step, mock_done_terminal_step, _ep, fake_action_info
        )
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
-
-        "action": [0.1],
+        "action": ActionTuple(continuous=np.array([[0.1]])),
-        "pre_action": [0.1],
-        "log_probs": [0.1],
+        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
+
    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
        observation_shapes=[(8,)],
-        action=[0.1],
+        action=ActionTuple(continuous=np.array([[0.1]])),
+        env_action=ActionTuple(continuous=np.array([[0.1]])),
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)
-    assert len(demo_buffer["actions"]) == total_expected - 1
+    assert (
+        len(demo_buffer["continuous_action"]) == total_expected - 1
+        or len(demo_buffer["discrete_action"]) == total_expected - 1
+    )


 def test_load_demo_dir():
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)
-    assert len(demo_buffer["actions"]) == total_expected - 1
+    assert (
+        len(demo_buffer["continuous_action"]) == total_expected - 1
+        or len(demo_buffer["discrete_action"]) == total_expected - 1
+    )


 def test_demo_mismatch():
--- a/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
 from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
 from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
 from mlagents_envs.exception import UnityEnvironmentException
-from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment
+from mlagents.trainers.tests.simple_test_envs import (
+    SimpleEnvironment,
+    UnexpectedExceptionEnvironment,
+)
 from mlagents.trainers.stats import StatsReporter
 from mlagents.trainers.agent_processor import AgentManagerQueue
 from mlagents.trainers.tests.check_env_trains import (
@pytest.mark.parametrize("num_envs", [1, 4])
 def test_subprocess_env_endtoend(num_envs):
    def simple_env_factory(worker_id, config):
-        env = SimpleEnvironment(["1D"], use_discrete=True)
+        env = SimpleEnvironment(["1D"], action_sizes=(0, 1))
        return env

    env_manager = SubprocessEnvManager(
    assert all(
        val > 0.7 for val in StatsReporter.writers[0].get_last_rewards().values()
    )
+    env_manager.close()
+
+
+class CustomTestOnlyException(Exception):
+    pass
+
+
+@pytest.mark.parametrize("num_envs", [1, 4])
+def test_subprocess_failing_step(num_envs):
+    def failing_step_env_factory(_worker_id, _config):
+        env = UnexpectedExceptionEnvironment(
+            ["1D"], use_discrete=True, to_raise=CustomTestOnlyException
+        )
+        return env
+
+    env_manager = SubprocessEnvManager(
+        failing_step_env_factory, EngineConfig.default_config()
+    )
+    # Expect the exception raised to be routed back up to the top level.
+    with pytest.raises(CustomTestOnlyException):
+        check_environment_trains(
+            failing_step_env_factory(0, []),
+            {"1D": ppo_dummy_config()},
+            env_manager=env_manager,
+            success_threshold=None,
+        )
    env_manager.close()


--- a/ml-agents/mlagents/trainers/tests/test_trajectory.py
+++ b/ml-agents/mlagents/trainers/tests/test_trajectory.py
        "memory",
        "masks",
        "done",
-        "actions_pre",
-        "actions",
-        "action_probs",
+        "continuous_action",
+        "discrete_action",
+        "continuous_log_probs",
+        "discrete_log_probs",
        "action_mask",
        "prev_action",
        "environment_rewards",
--- a/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
+++ b/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
    ).unsqueeze(0)

    with torch.no_grad():
-        _, _, log_probs1, _, _ = policy1.sample_actions(
-            vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
+        _, log_probs1, _, _ = policy1.sample_actions(
+            vec_obs, vis_obs, masks=masks, memories=memories
-        _, _, log_probs2, _, _ = policy2.sample_actions(
-            vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
+        _, log_probs2, _, _ = policy2.sample_actions(
+            vec_obs, vis_obs, masks=masks, memories=memories
-
-    np.testing.assert_array_equal(log_probs1, log_probs2)
+    np.testing.assert_array_equal(
+        log_probs1.all_discrete_tensor, log_probs2.all_discrete_tensor
+    )


@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
--- a/ml-agents/mlagents/trainers/tests/torch/test_distributions.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py
    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)

    for _ in range(50):
-        dist_inst = gauss_dist(sample_embedding)[0]
+        dist_inst = gauss_dist(sample_embedding)
        if tanh_squash:
            assert isinstance(dist_inst, TanhGaussianDistInstance)
        else:
--- a/ml-agents/mlagents/trainers/tests/torch/test_networks.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_networks.py
 from mlagents.trainers.torch.networks import (
    NetworkBody,
    ValueNetwork,
-    SimpleActor,
-from mlagents.trainers.torch.distributions import (
-    GaussianDistInstance,
-    CategoricalDistInstance,
-)
-
+from mlagents.trainers.tests.torch.test_encoders import compare_models


 def test_networkbody_vector():
            assert _out[0] == pytest.approx(1.0, abs=0.1)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_actor(use_discrete):
-    obs_size = 4
-    network_settings = NetworkSettings()
-    obs_shapes = [(obs_size,)]
-    act_size = [2]
-    if use_discrete:
-        masks = torch.ones((1, 1))
-        action_spec = ActionSpec.create_discrete(tuple(act_size))
-    else:
-        masks = None
-        action_spec = ActionSpec.create_continuous(act_size[0])
-    actor = SimpleActor(obs_shapes, network_settings, action_spec)
-    # Test get_dist
-    sample_obs = torch.ones((1, obs_size))
-    dists, _ = actor.get_dists([sample_obs], [], masks=masks)
-    for dist in dists:
-        if use_discrete:
-            assert isinstance(dist, CategoricalDistInstance)
-        else:
-            assert isinstance(dist, GaussianDistInstance)
-
-    # Test sample_actions
-    actions = actor.sample_action(dists)
-    for act in actions:
-        if use_discrete:
-            assert act.shape == (1, 1)
-        else:
-            assert act.shape == (1, act_size[0])
-
-    # Test forward
-    actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
-        [sample_obs], [], masks=masks
-    )
-    for act in actions:
-        # This is different from above for ONNX export
-        if use_discrete:
-            assert act.shape == tuple(act_size)
-        else:
-            assert act.shape == (act_size[0], 1)
-
-    assert mem_size == 0
-    assert is_cont == int(not use_discrete)
-    assert act_size_vec == torch.tensor(act_size)
-
-
-        memory=NetworkSettings.MemorySettings() if lstm else None
+        memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
-    act_size = [2]
+    act_size = 2
+    mask = torch.ones([1, act_size * 2])
-    action_spec = ActionSpec.create_continuous(act_size[0])
+    # action_spec = ActionSpec.create_continuous(act_size[0])
+    action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size)))
    actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
        else:
            assert value_out[stream].shape == (1,)

-    # Test get_dist_and_value
-    dists, value_out, mem_out = actor.get_dist_and_value(
-        [sample_obs], [], memories=memories
+    # Test get action stats and_value
+    action, log_probs, entropies, value_out, mem_out = actor.get_action_stats_and_value(
+        [sample_obs], [], memories=memories, masks=mask
+    if lstm:
+        assert action.continuous_tensor.shape == (64, 2)
+    else:
+        assert action.continuous_tensor.shape == (1, 2)
+
+    assert len(action.discrete_list) == 2
+    for _disc in action.discrete_list:
+        if lstm:
+            assert _disc.shape == (64, 1)
+        else:
+            assert _disc.shape == (1, 1)
+
-    for dist in dists:
-        assert isinstance(dist, GaussianDistInstance)
+
+    # Test normalization
+    actor.update_normalization(sample_obs)
+    if isinstance(actor, SeparateActorCritic):
+        for act_proc, crit_proc in zip(
+            actor.network_body.vector_processors,
+            actor.critic.network_body.vector_processors,
+        ):
+            assert compare_models(act_proc, crit_proc)
--- a/ml-agents/mlagents/trainers/tests/torch/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_policy.py
 from mlagents.trainers.tests import mock_brain as mb
 from mlagents.trainers.settings import TrainerSettings, NetworkSettings
 from mlagents.trainers.torch.utils import ModelUtils
+from mlagents.trainers.torch.agent_action import AgentAction

 VECTOR_ACTION_SPACE = 2
 VECTOR_OBS_SPACE = 8

    run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
    if discrete:
-        run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
+        run_out["action"].discrete.shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
-        assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
+        assert run_out["action"].continuous.shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)


@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
    buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
    vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])]
    act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
-    if policy.use_continuous_act:
-        actions = ModelUtils.list_to_tensor(buffer["actions"]).unsqueeze(-1)
-    else:
-        actions = ModelUtils.list_to_tensor(buffer["actions"], dtype=torch.long)
+    agent_action = AgentAction.from_dict(buffer)
    vis_obs = []
    for idx, _ in enumerate(policy.actor_critic.network_body.visual_processors):
        vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])
        vec_obs,
        vis_obs,
        masks=act_masks,
-        actions=actions,
+        actions=agent_action,
        memories=memories,
        seq_len=policy.sequence_length,
    )
        _size = policy.behavior_spec.action_spec.continuous_size

-    assert log_probs.shape == (64, _size)
+    assert log_probs.flatten().shape == (64, _size)
    assert entropy.shape == (64,)
    for val in values.values():
        assert val.shape == (64,)
    if len(memories) > 0:
        memories = torch.stack(memories).unsqueeze(0)

-    (
-        sampled_actions,
-        clipped_actions,
-        log_probs,
-        entropies,
-        memories,
-    ) = policy.sample_actions(
+    (sampled_actions, log_probs, entropies, memories) = policy.sample_actions(
-        all_log_probs=not policy.use_continuous_act,
-        assert log_probs.shape == (
+        assert log_probs.all_discrete_tensor.shape == (
-        assert log_probs.shape == (64, policy.behavior_spec.action_spec.continuous_size)
-        assert clipped_actions.shape == (
+        assert log_probs.continuous_tensor.shape == (
            64,
            policy.behavior_spec.action_spec.continuous_size,
        )
--- a/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
    update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]
    update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]

-    # NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
-    # in PyTorch it is saved as the total probability per branch. So we need to modify the
-    # log prob in the fake buffer here.
-    update_buffer["action_probs"] = np.ones_like(update_buffer["actions"])
    return_stats = optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
    update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
    update_buffer["curiosity_returns"] = update_buffer["environment_rewards"]
    update_buffer["curiosity_value_estimates"] = update_buffer["environment_rewards"]
-    # NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
-    # in PyTorch it is saved as the total probability per branch. So we need to modify the
-    # log prob in the fake buffer here.
-    update_buffer["action_probs"] = np.ones_like(update_buffer["actions"])
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
    update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
    update_buffer["gail_returns"] = update_buffer["environment_rewards"]
    update_buffer["gail_value_estimates"] = update_buffer["environment_rewards"]
+    update_buffer["continuous_log_probs"] = np.ones_like(
+        update_buffer["continuous_action"]
+    )
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
    update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
    update_buffer["gail_returns"] = update_buffer["environment_rewards"]
    update_buffer["gail_value_estimates"] = update_buffer["environment_rewards"]
-    # NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
-    # in PyTorch it is saved as the total probability per branch. So we need to modify the
-    # log prob in the fake buffer here.
-    update_buffer["action_probs"] = np.ones_like(update_buffer["actions"])
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
    for _ in range(200):
        curiosity_rp.update(buffer)
    prediction = curiosity_rp._network.predict_action(buffer)[0]
-    target = torch.tensor(buffer["actions"][0])
+    target = torch.tensor(buffer["continuous_action"][0])
    error = torch.mean((prediction - target) ** 2).item()
    assert error < 0.001

--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
        np.random.normal(size=shape).astype(np.float32)
        for shape in behavior_spec.observation_shapes
    ]
-    action = behavior_spec.action_spec.random_action(1)[0, :]
+    action_buffer = behavior_spec.action_spec.random_action(1)
+    action = {}
+    if behavior_spec.action_spec.continuous_size > 0:
+        action["continuous_action"] = action_buffer.continuous
+    if behavior_spec.action_spec.discrete_size > 0:
+        action["discrete_action"] = action_buffer.discrete
+
    for _ in range(number):
        curr_split_obs = SplitObservations.from_observations(curr_observations)
        next_split_obs = SplitObservations.from_observations(next_observations)
            )
        buffer["vector_obs"].append(curr_split_obs.vector_observations)
        buffer["next_vector_in"].append(next_split_obs.vector_observations)
-        buffer["actions"].append(action)
+        for _act_type, _act in action.items():
+            buffer[_act_type].append(_act[0, :])
        buffer["reward"].append(np.ones(1, dtype=np.float32) * reward)
        buffer["masks"].append(np.ones(1, dtype=np.float32))
    buffer["done"] = np.zeros(number, dtype=np.float32)
--- a/ml-agents/mlagents/trainers/tests/torch/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_sac.py
        "Losses/Value Loss",
        "Losses/Q1 Loss",
        "Losses/Q2 Loss",
-        "Policy/Entropy Coeff",
+        "Policy/Continuous Entropy Coeff",
+        "Policy/Discrete Entropy Coeff",
        "Policy/Learning Rate",
    ]
    for stat in required_stats:
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
 from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
    DemonstrationMetaProto,
 )
-from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
-from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
+from mlagents_envs.communicator_objects.brain_parameters_pb2 import (
+    BrainParametersProto,
+    ActionSpecProto,
+)

 from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
 from mlagents.trainers.tests.check_env_trains import (
 SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ppo(use_discrete):
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ppo(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_2d_ppo(use_discrete):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
-    )
+@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
+def test_2d_ppo(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
    )
    check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_visual_ppo(num_visual, use_discrete):
+def test_visual_ppo(num_visual, action_sizes):
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
 def test_visual_advanced_ppo(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
-        use_discrete=True,
+        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_recurrent_ppo(use_discrete):
-    env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_recurrent_ppo(action_sizes):
+    env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
    new_network_settings = attr.evolve(
        PPO_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_sac(use_discrete):
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_sac(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_2d_sac(use_discrete):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
-    )
+@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
+def test_2d_sac(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
-        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
+        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_visual_sac(num_visual, use_discrete):
+def test_visual_sac(num_visual, action_sizes):
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
 def test_visual_advanced_sac(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
-        use_discrete=True,
+        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_recurrent_sac(use_discrete):
-    step_size = 0.2 if use_discrete else 0.5
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_recurrent_sac(action_sizes):
+    step_size = 0.2 if action_sizes == (0, 1) else 0.5
-        [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
+        [BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
    )
    new_networksettings = attr.evolve(
        SAC_TORCH_CONFIG.network_settings,
    check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ghost(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ghost(action_sizes):
-        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ghost_fails(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ghost_fails(action_sizes):
-        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    # This config should fail because the ghosted policy is never swapped with a competent policy.
    # Swap occurs after max step is reached.
    )


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_asymm_ghost(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_asymm_ghost(action_sizes):
-        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0,
    check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_asymm_ghost_fails(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_asymm_ghost_fails(action_sizes):
-        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
    )
    # This config should fail because the team that us not learning when both have reached
    # max step should be executing the initial, untrained poliy.

@pytest.fixture(scope="session")
 def simple_record(tmpdir_factory):
-    def record_demo(use_discrete, num_visual=0, num_vector=1):
+    def record_demo(action_sizes, num_visual=0, num_vector=1):
-            use_discrete=use_discrete,
+            action_sizes=action_sizes,
            num_visual=num_visual,
            num_vector=num_vector,
            n_demos=100,
        env.solve()
        agent_info_protos = env.demonstration_protos[BRAIN_NAME]
        meta_data_proto = DemonstrationMetaProto()
+        continuous_action_size, discrete_action_size = action_sizes
+        action_spec_proto = ActionSpecProto(
+            num_continuous_actions=continuous_action_size,
+            num_discrete_actions=discrete_action_size,
+            discrete_branch_sizes=[2] if discrete_action_size > 0 else None,
+        )
-            vector_action_size=[2] if use_discrete else [1],
-            vector_action_descriptions=[""],
-            vector_action_space_type=discrete if use_discrete else continuous,
-            brain_name=BRAIN_NAME,
-            is_training=True,
+            brain_name=BRAIN_NAME, is_training=True, action_spec=action_spec_proto
-        action_type = "Discrete" if use_discrete else "Continuous"
+        action_type = "Discrete" if action_sizes else "Continuous"
        demo_path_name = "1DTest" + action_type + ".demo"
        demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
        write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_gail(simple_record, use_discrete, trainer_config):
-    demo_path = simple_record(use_discrete)
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
+def test_gail(simple_record, action_sizes, trainer_config):
+    demo_path = simple_record(action_sizes)
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_gail_visual_ppo(simple_record, use_discrete):
-    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_gail_visual_ppo(simple_record, action_sizes):
+    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_gail_visual_sac(simple_record, use_discrete):
-    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_gail_visual_sac(simple_record, action_sizes):
+    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
 from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.torch.encoders import VectorInput
-from mlagents.trainers.torch.distributions import (
-    CategoricalDistInstance,
-    GaussianDistInstance,
-)


 def test_min_visual_size():
    ]
    for res, exp in zip(oh_actions, expected_result):
        assert torch.equal(res, exp)
-
-
-def test_get_probs_and_entropy():
-    # Test continuous
-    # Add two dists to the list. This isn't done in the code but we'd like to support it.
-    dist_list = [
-        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
-        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
-    ]
-    action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))]
-    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
-        action_list, dist_list
-    )
-    assert log_probs.shape == (1, 2, 2)
-    assert entropies.shape == (1, 1, 2)
-    assert all_probs is None
-
-    for log_prob in log_probs.flatten():
-        # Log prob of standard normal at 0
-        assert log_prob == pytest.approx(-0.919, abs=0.01)
-
-    for ent in entropies.flatten():
-        # entropy of standard normal at 0
-        assert ent == pytest.approx(1.42, abs=0.01)
-
-    # Test continuous
-    # Add two dists to the list.
-    act_size = 2
-    test_prob = torch.tensor(
-        [[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]
-    )  # High prob for first action
-    dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)]
-    action_list = [torch.tensor([0]), torch.tensor([1])]
-    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
-        action_list, dist_list
-    )
-    assert all_probs.shape == (1, len(dist_list * act_size))
-    assert entropies.shape == (1, len(dist_list))
-    # Make sure the first action has high probability than the others.
-    assert log_probs.flatten()[0] > log_probs.flatten()[1]


 def test_masked_mean():
--- a/ml-agents/mlagents/trainers/tf/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/tf/components/bc/module.py
            self.policy.batch_size_ph: n_sequences,
            self.policy.sequence_length_ph: self.policy.sequence_length,
        }
-        feed_dict[self.model.action_in_expert] = mini_batch_demo["actions"]
+            feed_dict[self.model.action_in_expert] = mini_batch_demo["discrete_action"]
            feed_dict[self.policy.action_masks] = np.ones(
                (
                    self.n_sequences * self.policy.sequence_length,
            )
+        else:
+            feed_dict[self.model.action_in_expert] = mini_batch_demo[
+                "continuous_action"
+            ]
        if self.policy.vec_obs_size > 0:
            feed_dict[self.policy.vector_in] = mini_batch_demo["vector_obs"]
        for i, _ in enumerate(self.policy.visual_in):
--- a/ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py
+++ b/ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py

    def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
        feed_dict: Dict[tf.Tensor, Any] = {
-            self.policy.batch_size_ph: len(mini_batch["actions"]),
+            self.policy.batch_size_ph: len(mini_batch["vector_obs"]),
            self.policy.sequence_length_ph: self.policy.sequence_length,
        }
        if self.policy.use_vec_obs:
                feed_dict[self.model.next_visual_in[i]] = _next_obs

        if self.policy.use_continuous_act:
-            feed_dict[self.policy.selected_actions] = mini_batch["actions"]
+            feed_dict[self.policy.selected_actions] = mini_batch["continuous_action"]
-            feed_dict[self.policy.output] = mini_batch["actions"]
+            feed_dict[self.policy.output] = mini_batch["discrete_action"]
        unscaled_reward = self.policy.sess.run(
            self.model.intrinsic_reward, feed_dict=feed_dict
        )
            policy.mask_input: mini_batch["masks"],
        }
        if self.policy.use_continuous_act:
-            feed_dict[policy.selected_actions] = mini_batch["actions"]
+            feed_dict[policy.selected_actions] = mini_batch["continuous_action"]
-            feed_dict[policy.output] = mini_batch["actions"]
+            feed_dict[policy.output] = mini_batch["discrete_action"]
        if self.policy.use_vec_obs:
            feed_dict[policy.vector_in] = mini_batch["vector_obs"]
            feed_dict[self.model.next_vector_in] = mini_batch["next_vector_in"]
--- a/ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py
+++ b/ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py

    def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
        feed_dict: Dict[tf.Tensor, Any] = {
-            self.policy.batch_size_ph: len(mini_batch["actions"]),
+            self.policy.batch_size_ph: len(mini_batch["vector_obs"]),
            self.policy.sequence_length_ph: self.policy.sequence_length,
        }
        if self.model.use_vail:
                feed_dict[self.policy.visual_in[i]] = _obs

        if self.policy.use_continuous_act:
-            feed_dict[self.policy.selected_actions] = mini_batch["actions"]
+            feed_dict[self.policy.selected_actions] = mini_batch["continuous_action"]
-            feed_dict[self.policy.output] = mini_batch["actions"]
+            feed_dict[self.policy.output] = mini_batch["discrete_action"]
        feed_dict[self.model.done_policy_holder] = np.array(
            mini_batch["done"]
        ).flatten()
        if self.model.use_vail:
            feed_dict[self.model.use_noise] = [1]

-        feed_dict[self.model.action_in_expert] = np.array(mini_batch_demo["actions"])
-            feed_dict[policy.selected_actions] = mini_batch["actions"]
+            feed_dict[policy.selected_actions] = mini_batch["continuous_action"]
+            feed_dict[self.model.action_in_expert] = np.array(
+                mini_batch_demo["continuous_action"]
+            )
-            feed_dict[policy.output] = mini_batch["actions"]
+            feed_dict[policy.output] = mini_batch["discrete_action"]
+            feed_dict[self.model.action_in_expert] = np.array(
+                mini_batch_demo["discrete_action"]
+            )

        if self.policy.use_vis_obs > 0:
            for i in range(len(policy.visual_in)):