Develop hybrid action staging (#4702)

Co-authored-by: Ervin T <ervin@unity3d.com> Co-authored-by: Vincent-Pierre BERGES <vincentpierre@unity3d.com> Co-authored-by: Ruo-Ping Dong <ruoping.dong@unity3d.com> Co-authored-by: Chris Elion <chris.elion@unity3d.com>
4 年前 · 990f801a
--- a/Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
    /// The agent's four actions correspond to torques on each of the two joints.
    /// </summary>
    public override void OnActionReceived(ActionBuffers actionBuffers)
-
    {
        m_GoalDegree += m_GoalSpeed;
        UpdateGoalPosition();
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        ///         <term>1.2.0</term>
        ///         <description>Support compression mapping for stacked compressed observations.</description>
        ///     </item>
+        ///     <item>
+        ///         <term>1.3.0</term>
+        ///         <description>Support action spaces with both continuous and discrete actions.</description>
+        ///     </item>
-        const string k_ApiVersion = "1.2.0";
+        const string k_ApiVersion = "1.3.0";

        /// <summary>
        /// Unity package version of com.unity.ml-agents.
                Dispose();
            }
        }
+
 #endif

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
            System.Array.Clear(Array, Offset, Length);
        }

+        /// <summary>
+        /// Check if the segment is empty.
+        /// </summary>
+        public bool IsEmpty()
+        {
+            return Array == null || Array.Length == 0;
+        }
+
        /// <inheritdoc/>
        IEnumerator<T> IEnumerable<T>.GetEnumerator()
        {
--- a/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
    /// </summary>
    public readonly struct ActionSpec
    {
-
        /// <summary>
        /// An array of branch sizes for our action space.
        ///
        }

        /// <summary>
-        /// Temporary check that the ActionSpec uses either all continuous or all discrete actions.
-        /// This should be removed once the trainer supports them.
+        /// Check that the ActionSpec uses either all continuous or all discrete actions.
+        /// This is only used when connecting to old versions of the trainer that don't support this.
-        internal void CheckNotHybrid()
+        internal void CheckAllContinuousOrDiscrete()
-                throw new UnityAgentsException("ActionSpecs must be all continuous or all discrete.");
+                throw new UnityAgentsException(
+                    "Action spaces with both continuous and discrete actions are not supported by the trainer. " +
+                    "ActionSpecs must be all continuous or all discrete."
+                );
            }
        }
    }
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
                Debug.Assert(
                    !m_Actuators[i].Name.Equals(m_Actuators[i + 1].Name),
                    "Actuator names must be unique.");
-                var first = m_Actuators[i].ActionSpec;
-                var second = m_Actuators[i + 1].ActionSpec;
-                Debug.Assert(first.NumContinuousActions > 0 == second.NumContinuousActions > 0,
-                    "Actuators on the same Agent must have the same action SpaceType.");
            }
        }

--- a/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
        }

        /// <summary>
+        /// Construct an <see cref="ActionBuffers"/> instance with <see cref="ActionSpec"/>. All values are initialized to zeros.
+        /// /// </summary>
+        /// <param name="actionSpec">The <see cref="ActionSpec"/>  to send to an <see cref="IActionReceiver"/>.</param>
+        public ActionBuffers(ActionSpec actionSpec)
+            : this(new ActionSegment<float>(new float[actionSpec.NumContinuousActions]),
+            new ActionSegment<int>(new int[actionSpec.NumDiscreteActions]))
+        { }
+
+        /// <summary>
+        /// Create an <see cref="ActionBuffers"/> instance with ActionSpec and all actions stored as a float array.
+        /// </summary>
+        /// <param name="actionSpec"><see cref="ActionSpec"/> of the <see cref="ActionBuffers"/></param>
+        /// <param name="actions">The float array of all actions, including discrete and continuous actions.</param>
+        /// <returns>An <see cref="ActionBuffers"/> instance initialized with a <see cref="ActionSpec"/> and a float array.
+        internal static ActionBuffers FromActionSpec(ActionSpec actionSpec, float[] actions)
+        {
+            if (actions == null)
+            {
+                return ActionBuffers.Empty;
+            }
+
+            Debug.Assert(actions.Length == actionSpec.NumContinuousActions + actionSpec.NumDiscreteActions,
+                $"The length of '{nameof(actions)}' does not match the total size of ActionSpec.\n" +
+                $"{nameof(actions)}.Length: {actions.Length}\n" +
+                $"{nameof(actionSpec)}: {actionSpec.NumContinuousActions + actionSpec.NumDiscreteActions}");
+
+            ActionSegment<float> continuousActionSegment = ActionSegment<float>.Empty;
+            ActionSegment<int> discreteActionSegment = ActionSegment<int>.Empty;
+            int offset = 0;
+            if (actionSpec.NumContinuousActions > 0)
+            {
+                continuousActionSegment = new ActionSegment<float>(actions, 0, actionSpec.NumContinuousActions);
+                offset += actionSpec.NumContinuousActions;
+            }
+            if (actionSpec.NumDiscreteActions > 0)
+            {
+                int[] discreteActions = new int[actionSpec.NumDiscreteActions];
+                for (var i = 0; i < actionSpec.NumDiscreteActions; i++)
+                {
+                    discreteActions[i] = (int)actions[i + offset];
+                }
+                discreteActionSegment = new ActionSegment<int>(discreteActions);
+            }
+
+            return new ActionBuffers(continuousActionSegment, discreteActionSegment);
+        }
+
+        /// <summary>
        /// Clear the <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/> segments to be all zeros.
        /// </summary>
        public void Clear()
        }

+        /// <summary>
+        /// Check if the <see cref="ActionBuffers"/> is empty.
+        /// </summary>
+        public bool IsEmpty()
+        {
+            return ContinuousActions.IsEmpty() && DiscreteActions.IsEmpty();
+        }
+
        /// <inheritdoc/>
        public override bool Equals(object obj)
        {
            unchecked
            {
                return (ContinuousActions.GetHashCode() * 397) ^ DiscreteActions.GetHashCode();
-            }
-        }
-
-        /// <summary>
-        /// Packs the continuous and discrete actions into one float array.  The array passed into this method
-        /// must have a Length that is greater than or equal to the sum of the Lengths of
-        /// <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/>.
-        /// </summary>
-        /// <param name="destination">A float array to pack actions into whose length is greater than or
-        /// equal to the addition of the Lengths of this objects <see cref="ContinuousActions"/> and
-        /// <see cref="DiscreteActions"/> segments.</param>
-        public void PackActions(in float[] destination)
-        {
-            Debug.Assert(destination.Length >= ContinuousActions.Length + DiscreteActions.Length,
-                $"argument '{nameof(destination)}' is not large enough to pack the actions into.\n" +
-                $"{nameof(destination)}.Length: {destination.Length}\n" +
-                $"{nameof(ContinuousActions)}.Length + {nameof(DiscreteActions)}.Length: {ContinuousActions.Length + DiscreteActions.Length}");
-
-            var start = 0;
-            if (ContinuousActions.Length > 0)
-            {
-                Array.Copy(ContinuousActions.Array,
-                    ContinuousActions.Offset,
-                    destination,
-                    start,
-                    ContinuousActions.Length);
-                start = ContinuousActions.Length;
-            }
-            if (start >= destination.Length)
-            {
-                return;
-            }
-
-            if (DiscreteActions.Length > 0)
-            {
-                Array.Copy(DiscreteActions.Array,
-                    DiscreteActions.Offset,
-                    destination,
-                    start,
-                    DiscreteActions.Length);
            }
        }
    }
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
        /// <summary>
        /// Keeps track of the last vector action taken by the Brain.
        /// </summary>
-        public float[] storedVectorActions;
+        public ActionBuffers storedVectorActions;

        /// <summary>
        /// For discrete control, specifies the actions that the agent cannot take.

        public void ClearActions()
        {
-            Array.Clear(storedVectorActions, 0, storedVectorActions.Length);
+            storedVectorActions.Clear();
-            actionBuffers.PackActions(storedVectorActions);
+            var continuousActions = storedVectorActions.ContinuousActions;
+            for (var i = 0; i < actionBuffers.ContinuousActions.Length; i++)
+            {
+                continuousActions[i] = actionBuffers.ContinuousActions[i];
+            }
+            var discreteActions = storedVectorActions.DiscreteActions;
+            for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++)
+            {
+                discreteActions[i] = actionBuffers.DiscreteActions[i];
+            }
        }
    }

                InitializeSensors();
            }

-            m_Info.storedVectorActions = new float[m_ActuatorManager.TotalNumberOfActions];
+            m_Info.storedVectorActions = new ActionBuffers(
+                new float[m_ActuatorManager.NumContinuousActions],
+                new int[m_ActuatorManager.NumDiscreteActions]
+            );

            // The first time the Academy resets, all Agents in the scene will be
            // forced to reset through the <see cref="AgentForceReset"/> event.
            m_CumulativeReward = 0f;
            m_RequestAction = false;
            m_RequestDecision = false;
-            Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);
+            m_Info.storedVectorActions.Clear();
        }

        /// <summary>
            }
            else
            {
-                m_ActuatorManager.StoredActions.PackActions(m_Info.storedVectorActions);
+                m_Info.CopyActions(m_ActuatorManager.StoredActions);
            }

            UpdateSensors();
        /// </param>
        public virtual void OnActionReceived(ActionBuffers actions)
        {
-            actions.PackActions(m_LegacyActionCache);
+            if (!actions.ContinuousActions.IsEmpty())
+            {
+                m_LegacyActionCache = actions.ContinuousActions.Array;
+            }
+            else
+            {
+                m_LegacyActionCache = Array.ConvertAll(actions.DiscreteActions.Array, x => (float)x);
+            }
            OnActionReceived(m_LegacyActionCache);
        }

            {
                OnEpisodeBegin();
            }
-
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Agent.deprecated.cs
+++ b/com.unity.ml-agents/Runtime/Agent.deprecated.cs
        // [Obsolete("GetAction has been deprecated, please use GetStoredActionBuffers, Or GetStoredDiscreteActions.")]
        public float[] GetAction()
        {
-            return m_Info.storedVectorActions;
+            var storedAction = m_Info.storedVectorActions;
+            if (!storedAction.ContinuousActions.IsEmpty())
+            {
+                return storedAction.ContinuousActions.Array;
+            }
+            else
+            {
+                return Array.ConvertAll(storedAction.DiscreteActions.Array, x => (float)x);
+            }
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
            var agentInfoProto = ai.ToAgentInfoProto();

            var agentActionProto = new AgentActionProto();
-            if (ai.storedVectorActions != null)
+
+            if (!ai.storedVectorActions.IsEmpty())
-                agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
+                if (!ai.storedVectorActions.ContinuousActions.IsEmpty())
+                {
+                    agentActionProto.ContinuousActions.AddRange(ai.storedVectorActions.ContinuousActions.Array);
+                }
+                if (!ai.storedVectorActions.DiscreteActions.IsEmpty())
+                {
+                    agentActionProto.DiscreteActions.AddRange(ai.storedVectorActions.DiscreteActions.Array);
+                }
            }

            return new AgentInfoActionPairProto
            return summariesOut;
        }

-
        #endregion

        #region BrainParameters
        {
            var brainParametersProto = new BrainParametersProto
            {
-                VectorActionSize = { bp.VectorActionSize },
-                VectorActionSpaceType = (SpaceTypeProto)bp.VectorActionSpaceType,
+                VectorActionSizeDeprecated = { bp.VectorActionSize },
+                VectorActionSpaceTypeDeprecated = (SpaceTypeProto)bp.VectorActionSpaceType,
-                brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
+                brainParametersProto.VectorActionDescriptionsDeprecated.AddRange(bp.VectorActionDescriptions);
            }
            return brainParametersProto;
        }
        /// <param name="isTraining">Whether or not the Brain is training.</param>
        public static BrainParametersProto ToBrainParametersProto(this ActionSpec actionSpec, string name, bool isTraining)
        {
-            actionSpec.CheckNotHybrid();
-
-            if (actionSpec.NumContinuousActions > 0)
+            var actionSpecProto = new ActionSpecProto
-                brainParametersProto.VectorActionSize.Add(actionSpec.NumContinuousActions);
-                brainParametersProto.VectorActionSpaceType = SpaceTypeProto.Continuous;
+                NumContinuousActions = actionSpec.NumContinuousActions,
+                NumDiscreteActions = actionSpec.NumDiscreteActions,
+            };
+            if (actionSpec.BranchSizes != null)
+            {
+                actionSpecProto.DiscreteBranchSizes.AddRange(actionSpec.BranchSizes);
-            else if (actionSpec.NumDiscreteActions > 0)
+            brainParametersProto.ActionSpec = actionSpecProto;
+
+            var supportHybrid = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.HybridActions;
+            if (!supportHybrid)
-                brainParametersProto.VectorActionSize.AddRange(actionSpec.BranchSizes);
-                brainParametersProto.VectorActionSpaceType = SpaceTypeProto.Discrete;
+                actionSpec.CheckAllContinuousOrDiscrete();
+                if (actionSpec.NumContinuousActions > 0)
+                {
+                    brainParametersProto.VectorActionSizeDeprecated.Add(actionSpec.NumContinuousActions);
+                    brainParametersProto.VectorActionSpaceTypeDeprecated = SpaceTypeProto.Continuous;
+                }
+                else if (actionSpec.NumDiscreteActions > 0)
+                {
+                    brainParametersProto.VectorActionSizeDeprecated.AddRange(actionSpec.BranchSizes);
+                    brainParametersProto.VectorActionSpaceTypeDeprecated = SpaceTypeProto.Discrete;
+                }
            }

            // TODO handle ActionDescriptions?
        {
            var bp = new BrainParameters
            {
-                VectorActionSize = bpp.VectorActionSize.ToArray(),
-                VectorActionDescriptions = bpp.VectorActionDescriptions.ToArray(),
-                VectorActionSpaceType = (SpaceType)bpp.VectorActionSpaceType
+                VectorActionSize = bpp.VectorActionSizeDeprecated.ToArray(),
+                VectorActionDescriptions = bpp.VectorActionDescriptionsDeprecated.ToArray(),
+                VectorActionSpaceType = (SpaceType)bpp.VectorActionSpaceTypeDeprecated
            };
            return bp;
        }
            }
            return dm;
        }
+
        #endregion

        public static UnityRLInitParameters ToUnityRLInitParameters(this UnityRLInitializationInputProto inputProto)
        }

        #region AgentAction
-        public static List<float[]> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
+        public static List<ActionBuffers> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
-            var agentActions = new List<float[]>(proto.Value.Count);
+            var agentActions = new List<ActionBuffers>(proto.Value.Count);
-                agentActions.Add(ap.VectorActions.ToArray());
+                agentActions.Add(ap.ToActionBuffers());
+
+        public static ActionBuffers ToActionBuffers(this AgentActionProto proto)
+        {
+            return new ActionBuffers(proto.ContinuousActions.ToArray(), proto.DiscreteActions.ToArray());
+        }
+
        #endregion

        #region Observations
                    if (!s_HaveWarnedTrainerCapabilitiesMapping)
                    {
                        Debug.LogWarning($"The sensor {sensor.GetName()} is using non-trivial mapping and " +
-                                "the attached trainer doesn't support compression mapping. " +
-                                "Switching to uncompressed observations.");
+                            "the attached trainer doesn't support compression mapping. " +
+                            "Switching to uncompressed observations.");
                        s_HaveWarnedTrainerCapabilitiesMapping = true;
                    }
                    compressionType = SensorCompressionType.None;
                        $"GetCompressedObservation() returned null data for sensor named {sensor.GetName()}. " +
                        "You must return a byte[]. If you don't want to use compressed observations, " +
                        "return SensorCompressionType.None from GetCompressionType()."
-                        );
+                    );
                }
                observationProto = new ObservationProto
                {
            observationProto.Shape.AddRange(shape);
            return observationProto;
        }
+
        #endregion

        public static UnityRLCapabilities ToRLCapabilities(this UnityRLCapabilitiesProto proto)
                BaseRLCapabilities = proto.BaseRLCapabilities,
                ConcatenatedPngObservations = proto.ConcatenatedPngObservations,
                CompressedChannelMapping = proto.CompressedChannelMapping,
+                HybridActions = proto.HybridActions,
            };
        }

                BaseRLCapabilities = rlCaps.BaseRLCapabilities,
                ConcatenatedPngObservations = rlCaps.ConcatenatedPngObservations,
                CompressedChannelMapping = rlCaps.CompressedChannelMapping,
+                HybridActions = rlCaps.HybridActions,
            };
        }

--- a/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
        /// <param name="key">A key to identify which behavior actions to get.</param>
        /// <param name="agentId">A key to identify which Agent actions to get.</param>
        /// <returns></returns>
-        float[] GetActions(string key, int agentId);
+        ActionBuffers GetActions(string key, int agentId);
    }
 }
--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
        UnityRLOutputProto m_CurrentUnityRlOutput =
            new UnityRLOutputProto();

-        Dictionary<string, Dictionary<int, float[]>> m_LastActionsReceived =
-            new Dictionary<string, Dictionary<int, float[]>>();
+        Dictionary<string, Dictionary<int, ActionBuffers>> m_LastActionsReceived =
+            new Dictionary<string, Dictionary<int, ActionBuffers>>();

        // Brains that we have sent over the communicator with agents.
        HashSet<string> m_SentBrainKeys = new HashSet<string>();
                {
                    return false;
                }
-
            }
            else if (unityVersion.Major != pythonVersion.Major)
            {
            }
            if (!m_LastActionsReceived.ContainsKey(behaviorName))
            {
-                m_LastActionsReceived[behaviorName] = new Dictionary<int, float[]>();
+                m_LastActionsReceived[behaviorName] = new Dictionary<int, ActionBuffers>();
-            m_LastActionsReceived[behaviorName][info.episodeId] = null;
+            m_LastActionsReceived[behaviorName][info.episodeId] = ActionBuffers.Empty;
            if (info.done)
            {
                m_LastActionsReceived[behaviorName].Remove(info.episodeId);
            }
        }

-        public float[] GetActions(string behaviorName, int agentId)
+        public ActionBuffers GetActions(string behaviorName, int agentId)
        {
            if (m_LastActionsReceived.ContainsKey(behaviorName))
            {
                }
            }
-            return null;
+            return ActionBuffers.Empty;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
        public bool BaseRLCapabilities;
        public bool ConcatenatedPngObservations;
        public bool CompressedChannelMapping;
+        public bool HybridActions;
-        public UnityRLCapabilities(bool baseRlCapabilities = true, bool concatenatedPngObservations = true, bool compressedChannelMapping = true)
+        public UnityRLCapabilities(
+            bool baseRlCapabilities = true,
+            bool concatenatedPngObservations = true,
+            bool compressedChannelMapping = true,
+            bool hybridActions = true)
+            HybridActions = hybridActions;
        }

        /// <summary>
                return false;
            }
            Debug.LogWarning("Unity has connected to a Training process that does not support" +
-                             "Base Reinforcement Learning Capabilities.  Please make sure you have the" +
-                             " latest training codebase installed for this version of the ML-Agents package.");
+                "Base Reinforcement Learning Capabilities.  Please make sure you have the" +
+                " latest training codebase installed for this version of the ML-Agents package.");
-
    }
 }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentAction.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentAction.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2FnZW50X2Fj",
-            "dGlvbi5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMiSwoQQWdlbnRBY3Rp",
-            "b25Qcm90bxIWCg52ZWN0b3JfYWN0aW9ucxgBIAMoAhINCgV2YWx1ZRgEIAEo",
-            "AkoECAIQA0oECAMQBEoECAUQBkIlqgIiVW5pdHkuTUxBZ2VudHMuQ29tbXVu",
-            "aWNhdG9yT2JqZWN0c2IGcHJvdG8z"));
+            "dGlvbi5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMijAEKEEFnZW50QWN0",
+            "aW9uUHJvdG8SIQoZdmVjdG9yX2FjdGlvbnNfZGVwcmVjYXRlZBgBIAMoAhIN",
+            "CgV2YWx1ZRgEIAEoAhIaChJjb250aW51b3VzX2FjdGlvbnMYBiADKAISGAoQ",
+            "ZGlzY3JldGVfYWN0aW9ucxgHIAMoBUoECAIQA0oECAMQBEoECAUQBkIlqgIi",
+            "VW5pdHkuTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0c2IGcHJvdG8z"));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentActionProto), global::Unity.MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActions", "Value" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentActionProto), global::Unity.MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActionsDeprecated", "Value", "ContinuousActions", "DiscreteActions" }, null, null, null)
          }));
    }
    #endregion

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public AgentActionProto(AgentActionProto other) : this() {
-      vectorActions_ = other.vectorActions_.Clone();
+      vectorActionsDeprecated_ = other.vectorActionsDeprecated_.Clone();
+      continuousActions_ = other.continuousActions_.Clone();
+      discreteActions_ = other.discreteActions_.Clone();
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

    }

-    /// <summary>Field number for the "vector_actions" field.</summary>
-    public const int VectorActionsFieldNumber = 1;
-    private static readonly pb::FieldCodec<float> _repeated_vectorActions_codec
+    /// <summary>Field number for the "vector_actions_deprecated" field.</summary>
+    public const int VectorActionsDeprecatedFieldNumber = 1;
+    private static readonly pb::FieldCodec<float> _repeated_vectorActionsDeprecated_codec
-    private readonly pbc::RepeatedField<float> vectorActions_ = new pbc::RepeatedField<float>();
+    private readonly pbc::RepeatedField<float> vectorActionsDeprecated_ = new pbc::RepeatedField<float>();
+    /// <summary>
+    /// mark as deprecated in communicator v1.3.0
+    /// </summary>
-    public pbc::RepeatedField<float> VectorActions {
-      get { return vectorActions_; }
+    public pbc::RepeatedField<float> VectorActionsDeprecated {
+      get { return vectorActionsDeprecated_; }
    }

    /// <summary>Field number for the "value" field.</summary>
      }
    }

+    /// <summary>Field number for the "continuous_actions" field.</summary>
+    public const int ContinuousActionsFieldNumber = 6;
+    private static readonly pb::FieldCodec<float> _repeated_continuousActions_codec
+        = pb::FieldCodec.ForFloat(50);
+    private readonly pbc::RepeatedField<float> continuousActions_ = new pbc::RepeatedField<float>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<float> ContinuousActions {
+      get { return continuousActions_; }
+    }
+
+    /// <summary>Field number for the "discrete_actions" field.</summary>
+    public const int DiscreteActionsFieldNumber = 7;
+    private static readonly pb::FieldCodec<int> _repeated_discreteActions_codec
+        = pb::FieldCodec.ForInt32(58);
+    private readonly pbc::RepeatedField<int> discreteActions_ = new pbc::RepeatedField<int>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<int> DiscreteActions {
+      get { return discreteActions_; }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as AgentActionProto);
      if (ReferenceEquals(other, this)) {
        return true;
      }
-      if(!vectorActions_.Equals(other.vectorActions_)) return false;
+      if(!vectorActionsDeprecated_.Equals(other.vectorActionsDeprecated_)) return false;
+      if(!continuousActions_.Equals(other.continuousActions_)) return false;
+      if(!discreteActions_.Equals(other.discreteActions_)) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

-      hash ^= vectorActions_.GetHashCode();
+      hash ^= vectorActionsDeprecated_.GetHashCode();
+      hash ^= continuousActions_.GetHashCode();
+      hash ^= discreteActions_.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public void WriteTo(pb::CodedOutputStream output) {
-      vectorActions_.WriteTo(output, _repeated_vectorActions_codec);
+      vectorActionsDeprecated_.WriteTo(output, _repeated_vectorActionsDeprecated_codec);
+      continuousActions_.WriteTo(output, _repeated_continuousActions_codec);
+      discreteActions_.WriteTo(output, _repeated_discreteActions_codec);
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
    public int CalculateSize() {
      int size = 0;
-      size += vectorActions_.CalculateSize(_repeated_vectorActions_codec);
+      size += vectorActionsDeprecated_.CalculateSize(_repeated_vectorActionsDeprecated_codec);
+      size += continuousActions_.CalculateSize(_repeated_continuousActions_codec);
+      size += discreteActions_.CalculateSize(_repeated_discreteActions_codec);
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
      if (other == null) {
        return;
      }
-      vectorActions_.Add(other.vectorActions_);
+      vectorActionsDeprecated_.Add(other.vectorActionsDeprecated_);
+      continuousActions_.Add(other.continuousActions_);
+      discreteActions_.Add(other.discreteActions_);
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

            break;
          case 10:
          case 13: {
-            vectorActions_.AddEntriesFrom(input, _repeated_vectorActions_codec);
+            vectorActionsDeprecated_.AddEntriesFrom(input, _repeated_vectorActionsDeprecated_codec);
+            break;
+          }
+          case 50:
+          case 53: {
+            continuousActions_.AddEntriesFrom(input, _repeated_continuousActions_codec);
+            break;
+          }
+          case 58:
+          case 56: {
+            discreteActions_.AddEntriesFrom(input, _repeated_discreteActions_codec);
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/BrainParameters.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/BrainParameters.cs
            "CjltbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2JyYWluX3Bh",
            "cmFtZXRlcnMucHJvdG8SFGNvbW11bmljYXRvcl9vYmplY3RzGjNtbGFnZW50",
            "c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL3NwYWNlX3R5cGUucHJvdG8i",
-            "2QEKFEJyYWluUGFyYW1ldGVyc1Byb3RvEhoKEnZlY3Rvcl9hY3Rpb25fc2l6",
-            "ZRgDIAMoBRIiChp2ZWN0b3JfYWN0aW9uX2Rlc2NyaXB0aW9ucxgFIAMoCRJG",
-            "Chh2ZWN0b3JfYWN0aW9uX3NwYWNlX3R5cGUYBiABKA4yJC5jb21tdW5pY2F0",
-            "b3Jfb2JqZWN0cy5TcGFjZVR5cGVQcm90bxISCgpicmFpbl9uYW1lGAcgASgJ",
-            "EhMKC2lzX3RyYWluaW5nGAggASgISgQIARACSgQIAhADSgQIBBAFQiWqAiJV",
-            "bml0eS5NTEFnZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
+            "iwEKD0FjdGlvblNwZWNQcm90bxIeChZudW1fY29udGludW91c19hY3Rpb25z",
+            "GAEgASgFEhwKFG51bV9kaXNjcmV0ZV9hY3Rpb25zGAIgASgFEh0KFWRpc2Ny",
+            "ZXRlX2JyYW5jaF9zaXplcxgDIAMoBRIbChNhY3Rpb25fZGVzY3JpcHRpb25z",
+            "GAQgAygJIrYCChRCcmFpblBhcmFtZXRlcnNQcm90bxIlCh12ZWN0b3JfYWN0",
+            "aW9uX3NpemVfZGVwcmVjYXRlZBgDIAMoBRItCiV2ZWN0b3JfYWN0aW9uX2Rl",
+            "c2NyaXB0aW9uc19kZXByZWNhdGVkGAUgAygJElEKI3ZlY3Rvcl9hY3Rpb25f",
+            "c3BhY2VfdHlwZV9kZXByZWNhdGVkGAYgASgOMiQuY29tbXVuaWNhdG9yX29i",
+            "amVjdHMuU3BhY2VUeXBlUHJvdG8SEgoKYnJhaW5fbmFtZRgHIAEoCRITCgtp",
+            "c190cmFpbmluZxgIIAEoCBI6CgthY3Rpb25fc3BlYxgJIAEoCzIlLmNvbW11",
+            "bmljYXRvcl9vYmplY3RzLkFjdGlvblNwZWNQcm90b0oECAEQAkoECAIQA0oE",
+            "CAQQBUIlqgIiVW5pdHkuTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0c2IG",
+            "cHJvdG8z"));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto), global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto.Parser, new[]{ "VectorActionSize", "VectorActionDescriptions", "VectorActionSpaceType", "BrainName", "IsTraining" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto), global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto.Parser, new[]{ "NumContinuousActions", "NumDiscreteActions", "DiscreteBranchSizes", "ActionDescriptions" }, null, null, null),
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto), global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto.Parser, new[]{ "VectorActionSizeDeprecated", "VectorActionDescriptionsDeprecated", "VectorActionSpaceTypeDeprecated", "BrainName", "IsTraining", "ActionSpec" }, null, null, null)
          }));
    }
    #endregion
+  internal sealed partial class ActionSpecProto : pb::IMessage<ActionSpecProto> {
+    private static readonly pb::MessageParser<ActionSpecProto> _parser = new pb::MessageParser<ActionSpecProto>(() => new ActionSpecProto());
+    private pb::UnknownFieldSet _unknownFields;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public static pb::MessageParser<ActionSpecProto> Parser { get { return _parser; } }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public static pbr::MessageDescriptor Descriptor {
+      get { return global::Unity.MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor.MessageTypes[0]; }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    pbr::MessageDescriptor pb::IMessage.Descriptor {
+      get { return Descriptor; }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public ActionSpecProto() {
+      OnConstruction();
+    }
+
+    partial void OnConstruction();
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public ActionSpecProto(ActionSpecProto other) : this() {
+      numContinuousActions_ = other.numContinuousActions_;
+      numDiscreteActions_ = other.numDiscreteActions_;
+      discreteBranchSizes_ = other.discreteBranchSizes_.Clone();
+      actionDescriptions_ = other.actionDescriptions_.Clone();
+      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public ActionSpecProto Clone() {
+      return new ActionSpecProto(this);
+    }
+
+    /// <summary>Field number for the "num_continuous_actions" field.</summary>
+    public const int NumContinuousActionsFieldNumber = 1;
+    private int numContinuousActions_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int NumContinuousActions {
+      get { return numContinuousActions_; }
+      set {
+        numContinuousActions_ = value;
+      }
+    }
+
+    /// <summary>Field number for the "num_discrete_actions" field.</summary>
+    public const int NumDiscreteActionsFieldNumber = 2;
+    private int numDiscreteActions_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int NumDiscreteActions {
+      get { return numDiscreteActions_; }
+      set {
+        numDiscreteActions_ = value;
+      }
+    }
+
+    /// <summary>Field number for the "discrete_branch_sizes" field.</summary>
+    public const int DiscreteBranchSizesFieldNumber = 3;
+    private static readonly pb::FieldCodec<int> _repeated_discreteBranchSizes_codec
+        = pb::FieldCodec.ForInt32(26);
+    private readonly pbc::RepeatedField<int> discreteBranchSizes_ = new pbc::RepeatedField<int>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<int> DiscreteBranchSizes {
+      get { return discreteBranchSizes_; }
+    }
+
+    /// <summary>Field number for the "action_descriptions" field.</summary>
+    public const int ActionDescriptionsFieldNumber = 4;
+    private static readonly pb::FieldCodec<string> _repeated_actionDescriptions_codec
+        = pb::FieldCodec.ForString(34);
+    private readonly pbc::RepeatedField<string> actionDescriptions_ = new pbc::RepeatedField<string>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<string> ActionDescriptions {
+      get { return actionDescriptions_; }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public override bool Equals(object other) {
+      return Equals(other as ActionSpecProto);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public bool Equals(ActionSpecProto other) {
+      if (ReferenceEquals(other, null)) {
+        return false;
+      }
+      if (ReferenceEquals(other, this)) {
+        return true;
+      }
+      if (NumContinuousActions != other.NumContinuousActions) return false;
+      if (NumDiscreteActions != other.NumDiscreteActions) return false;
+      if(!discreteBranchSizes_.Equals(other.discreteBranchSizes_)) return false;
+      if(!actionDescriptions_.Equals(other.actionDescriptions_)) return false;
+      return Equals(_unknownFields, other._unknownFields);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public override int GetHashCode() {
+      int hash = 1;
+      if (NumContinuousActions != 0) hash ^= NumContinuousActions.GetHashCode();
+      if (NumDiscreteActions != 0) hash ^= NumDiscreteActions.GetHashCode();
+      hash ^= discreteBranchSizes_.GetHashCode();
+      hash ^= actionDescriptions_.GetHashCode();
+      if (_unknownFields != null) {
+        hash ^= _unknownFields.GetHashCode();
+      }
+      return hash;
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public override string ToString() {
+      return pb::JsonFormatter.ToDiagnosticString(this);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public void WriteTo(pb::CodedOutputStream output) {
+      if (NumContinuousActions != 0) {
+        output.WriteRawTag(8);
+        output.WriteInt32(NumContinuousActions);
+      }
+      if (NumDiscreteActions != 0) {
+        output.WriteRawTag(16);
+        output.WriteInt32(NumDiscreteActions);
+      }
+      discreteBranchSizes_.WriteTo(output, _repeated_discreteBranchSizes_codec);
+      actionDescriptions_.WriteTo(output, _repeated_actionDescriptions_codec);
+      if (_unknownFields != null) {
+        _unknownFields.WriteTo(output);
+      }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int CalculateSize() {
+      int size = 0;
+      if (NumContinuousActions != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumContinuousActions);
+      }
+      if (NumDiscreteActions != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumDiscreteActions);
+      }
+      size += discreteBranchSizes_.CalculateSize(_repeated_discreteBranchSizes_codec);
+      size += actionDescriptions_.CalculateSize(_repeated_actionDescriptions_codec);
+      if (_unknownFields != null) {
+        size += _unknownFields.CalculateSize();
+      }
+      return size;
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public void MergeFrom(ActionSpecProto other) {
+      if (other == null) {
+        return;
+      }
+      if (other.NumContinuousActions != 0) {
+        NumContinuousActions = other.NumContinuousActions;
+      }
+      if (other.NumDiscreteActions != 0) {
+        NumDiscreteActions = other.NumDiscreteActions;
+      }
+      discreteBranchSizes_.Add(other.discreteBranchSizes_);
+      actionDescriptions_.Add(other.actionDescriptions_);
+      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public void MergeFrom(pb::CodedInputStream input) {
+      uint tag;
+      while ((tag = input.ReadTag()) != 0) {
+        switch(tag) {
+          default:
+            _unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input);
+            break;
+          case 8: {
+            NumContinuousActions = input.ReadInt32();
+            break;
+          }
+          case 16: {
+            NumDiscreteActions = input.ReadInt32();
+            break;
+          }
+          case 26:
+          case 24: {
+            discreteBranchSizes_.AddEntriesFrom(input, _repeated_discreteBranchSizes_codec);
+            break;
+          }
+          case 34: {
+            actionDescriptions_.AddEntriesFrom(input, _repeated_actionDescriptions_codec);
+            break;
+          }
+        }
+      }
+    }
+
+  }
+
  internal sealed partial class BrainParametersProto : pb::IMessage<BrainParametersProto> {
    private static readonly pb::MessageParser<BrainParametersProto> _parser = new pb::MessageParser<BrainParametersProto>(() => new BrainParametersProto());
    private pb::UnknownFieldSet _unknownFields;
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public static pbr::MessageDescriptor Descriptor {
-      get { return global::Unity.MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor.MessageTypes[0]; }
+      get { return global::Unity.MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor.MessageTypes[1]; }
    }

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public BrainParametersProto(BrainParametersProto other) : this() {
-      vectorActionSize_ = other.vectorActionSize_.Clone();
-      vectorActionDescriptions_ = other.vectorActionDescriptions_.Clone();
-      vectorActionSpaceType_ = other.vectorActionSpaceType_;
+      vectorActionSizeDeprecated_ = other.vectorActionSizeDeprecated_.Clone();
+      vectorActionDescriptionsDeprecated_ = other.vectorActionDescriptionsDeprecated_.Clone();
+      vectorActionSpaceTypeDeprecated_ = other.vectorActionSpaceTypeDeprecated_;
+      ActionSpec = other.actionSpec_ != null ? other.ActionSpec.Clone() : null;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

    }

-    /// <summary>Field number for the "vector_action_size" field.</summary>
-    public const int VectorActionSizeFieldNumber = 3;
-    private static readonly pb::FieldCodec<int> _repeated_vectorActionSize_codec
+    /// <summary>Field number for the "vector_action_size_deprecated" field.</summary>
+    public const int VectorActionSizeDeprecatedFieldNumber = 3;
+    private static readonly pb::FieldCodec<int> _repeated_vectorActionSizeDeprecated_codec
-    private readonly pbc::RepeatedField<int> vectorActionSize_ = new pbc::RepeatedField<int>();
+    private readonly pbc::RepeatedField<int> vectorActionSizeDeprecated_ = new pbc::RepeatedField<int>();
+    /// <summary>
+    /// mark as deprecated in communicator v1.3.0
+    /// </summary>
-    public pbc::RepeatedField<int> VectorActionSize {
-      get { return vectorActionSize_; }
+    public pbc::RepeatedField<int> VectorActionSizeDeprecated {
+      get { return vectorActionSizeDeprecated_; }
-    /// <summary>Field number for the "vector_action_descriptions" field.</summary>
-    public const int VectorActionDescriptionsFieldNumber = 5;
-    private static readonly pb::FieldCodec<string> _repeated_vectorActionDescriptions_codec
+    /// <summary>Field number for the "vector_action_descriptions_deprecated" field.</summary>
+    public const int VectorActionDescriptionsDeprecatedFieldNumber = 5;
+    private static readonly pb::FieldCodec<string> _repeated_vectorActionDescriptionsDeprecated_codec
-    private readonly pbc::RepeatedField<string> vectorActionDescriptions_ = new pbc::RepeatedField<string>();
+    private readonly pbc::RepeatedField<string> vectorActionDescriptionsDeprecated_ = new pbc::RepeatedField<string>();
+    /// <summary>
+    /// mark as deprecated in communicator v1.3.0
+    /// </summary>
-    public pbc::RepeatedField<string> VectorActionDescriptions {
-      get { return vectorActionDescriptions_; }
+    public pbc::RepeatedField<string> VectorActionDescriptionsDeprecated {
+      get { return vectorActionDescriptionsDeprecated_; }
-    /// <summary>Field number for the "vector_action_space_type" field.</summary>
-    public const int VectorActionSpaceTypeFieldNumber = 6;
-    private global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto vectorActionSpaceType_ = 0;
+    /// <summary>Field number for the "vector_action_space_type_deprecated" field.</summary>
+    public const int VectorActionSpaceTypeDeprecatedFieldNumber = 6;
+    private global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto vectorActionSpaceTypeDeprecated_ = 0;
+    /// <summary>
+    /// mark as deprecated in communicator v1.3.0
+    /// </summary>
-    public global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto VectorActionSpaceType {
-      get { return vectorActionSpaceType_; }
+    public global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto VectorActionSpaceTypeDeprecated {
+      get { return vectorActionSpaceTypeDeprecated_; }
-        vectorActionSpaceType_ = value;
+        vectorActionSpaceTypeDeprecated_ = value;
      }
    }

      }
    }

+    /// <summary>Field number for the "action_spec" field.</summary>
+    public const int ActionSpecFieldNumber = 9;
+    private global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto actionSpec_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto ActionSpec {
+      get { return actionSpec_; }
+      set {
+        actionSpec_ = value;
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as BrainParametersProto);
      if (ReferenceEquals(other, this)) {
        return true;
      }
-      if(!vectorActionSize_.Equals(other.vectorActionSize_)) return false;
-      if(!vectorActionDescriptions_.Equals(other.vectorActionDescriptions_)) return false;
-      if (VectorActionSpaceType != other.VectorActionSpaceType) return false;
+      if(!vectorActionSizeDeprecated_.Equals(other.vectorActionSizeDeprecated_)) return false;
+      if(!vectorActionDescriptionsDeprecated_.Equals(other.vectorActionDescriptionsDeprecated_)) return false;
+      if (VectorActionSpaceTypeDeprecated != other.VectorActionSpaceTypeDeprecated) return false;
+      if (!object.Equals(ActionSpec, other.ActionSpec)) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

-      hash ^= vectorActionSize_.GetHashCode();
-      hash ^= vectorActionDescriptions_.GetHashCode();
-      if (VectorActionSpaceType != 0) hash ^= VectorActionSpaceType.GetHashCode();
+      hash ^= vectorActionSizeDeprecated_.GetHashCode();
+      hash ^= vectorActionDescriptionsDeprecated_.GetHashCode();
+      if (VectorActionSpaceTypeDeprecated != 0) hash ^= VectorActionSpaceTypeDeprecated.GetHashCode();
+      if (actionSpec_ != null) hash ^= ActionSpec.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }

    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public void WriteTo(pb::CodedOutputStream output) {
-      vectorActionSize_.WriteTo(output, _repeated_vectorActionSize_codec);
-      vectorActionDescriptions_.WriteTo(output, _repeated_vectorActionDescriptions_codec);
-      if (VectorActionSpaceType != 0) {
+      vectorActionSizeDeprecated_.WriteTo(output, _repeated_vectorActionSizeDeprecated_codec);
+      vectorActionDescriptionsDeprecated_.WriteTo(output, _repeated_vectorActionDescriptionsDeprecated_codec);
+      if (VectorActionSpaceTypeDeprecated != 0) {
-        output.WriteEnum((int) VectorActionSpaceType);
+        output.WriteEnum((int) VectorActionSpaceTypeDeprecated);
      }
      if (BrainName.Length != 0) {
        output.WriteRawTag(58);
        output.WriteRawTag(64);
        output.WriteBool(IsTraining);
      }
+      if (actionSpec_ != null) {
+        output.WriteRawTag(74);
+        output.WriteMessage(ActionSpec);
+      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
    public int CalculateSize() {
      int size = 0;
-      size += vectorActionSize_.CalculateSize(_repeated_vectorActionSize_codec);
-      size += vectorActionDescriptions_.CalculateSize(_repeated_vectorActionDescriptions_codec);
-      if (VectorActionSpaceType != 0) {
-        size += 1 + pb::CodedOutputStream.ComputeEnumSize((int) VectorActionSpaceType);
+      size += vectorActionSizeDeprecated_.CalculateSize(_repeated_vectorActionSizeDeprecated_codec);
+      size += vectorActionDescriptionsDeprecated_.CalculateSize(_repeated_vectorActionDescriptionsDeprecated_codec);
+      if (VectorActionSpaceTypeDeprecated != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeEnumSize((int) VectorActionSpaceTypeDeprecated);
      }
      if (BrainName.Length != 0) {
        size += 1 + pb::CodedOutputStream.ComputeStringSize(BrainName);
+      }
+      if (actionSpec_ != null) {
+        size += 1 + pb::CodedOutputStream.ComputeMessageSize(ActionSpec);
      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      if (other == null) {
        return;
      }
-      vectorActionSize_.Add(other.vectorActionSize_);
-      vectorActionDescriptions_.Add(other.vectorActionDescriptions_);
-      if (other.VectorActionSpaceType != 0) {
-        VectorActionSpaceType = other.VectorActionSpaceType;
+      vectorActionSizeDeprecated_.Add(other.vectorActionSizeDeprecated_);
+      vectorActionDescriptionsDeprecated_.Add(other.vectorActionDescriptionsDeprecated_);
+      if (other.VectorActionSpaceTypeDeprecated != 0) {
+        VectorActionSpaceTypeDeprecated = other.VectorActionSpaceTypeDeprecated;
      }
      if (other.BrainName.Length != 0) {
        BrainName = other.BrainName;
      }
+      if (other.actionSpec_ != null) {
+        if (actionSpec_ == null) {
+          actionSpec_ = new global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto();
+        }
+        ActionSpec.MergeFrom(other.ActionSpec);
+      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

            break;
          case 26:
          case 24: {
-            vectorActionSize_.AddEntriesFrom(input, _repeated_vectorActionSize_codec);
+            vectorActionSizeDeprecated_.AddEntriesFrom(input, _repeated_vectorActionSizeDeprecated_codec);
-            vectorActionDescriptions_.AddEntriesFrom(input, _repeated_vectorActionDescriptions_codec);
+            vectorActionDescriptionsDeprecated_.AddEntriesFrom(input, _repeated_vectorActionDescriptionsDeprecated_codec);
-            vectorActionSpaceType_ = (global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto) input.ReadEnum();
+            vectorActionSpaceTypeDeprecated_ = (global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto) input.ReadEnum();
            break;
          }
          case 58: {
          case 64: {
            IsTraining = input.ReadBool();
+            break;
+          }
+          case 74: {
+            if (actionSpec_ == null) {
+              actionSpec_ = new global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto();
+            }
+            input.ReadMessage(actionSpec_);
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
-            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMifQoYVW5pdHlSTENh",
-            "cGFiaWxpdGllc1Byb3RvEhoKEmJhc2VSTENhcGFiaWxpdGllcxgBIAEoCBIj",
-            "Chtjb25jYXRlbmF0ZWRQbmdPYnNlcnZhdGlvbnMYAiABKAgSIAoYY29tcHJl",
-            "c3NlZENoYW5uZWxNYXBwaW5nGAMgASgIQiWqAiJVbml0eS5NTEFnZW50cy5D",
-            "b21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
+            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMilAEKGFVuaXR5UkxD",
+            "YXBhYmlsaXRpZXNQcm90bxIaChJiYXNlUkxDYXBhYmlsaXRpZXMYASABKAgS",
+            "IwobY29uY2F0ZW5hdGVkUG5nT2JzZXJ2YXRpb25zGAIgASgIEiAKGGNvbXBy",
+            "ZXNzZWRDaGFubmVsTWFwcGluZxgDIAEoCBIVCg1oeWJyaWRBY3Rpb25zGAQg",
+            "ASgIQiWqAiJVbml0eS5NTEFnZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZw",
+            "cm90bzM="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions" }, null, null, null)
          }));
    }
    #endregion
      baseRLCapabilities_ = other.baseRLCapabilities_;
      concatenatedPngObservations_ = other.concatenatedPngObservations_;
      compressedChannelMapping_ = other.compressedChannelMapping_;
+      hybridActions_ = other.hybridActions_;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

      }
    }

+    /// <summary>Field number for the "hybridActions" field.</summary>
+    public const int HybridActionsFieldNumber = 4;
+    private bool hybridActions_;
+    /// <summary>
+    /// support for hybrid action spaces (discrete + continuous)
+    /// </summary>
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public bool HybridActions {
+      get { return hybridActions_; }
+      set {
+        hybridActions_ = value;
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as UnityRLCapabilitiesProto);
      if (BaseRLCapabilities != other.BaseRLCapabilities) return false;
      if (ConcatenatedPngObservations != other.ConcatenatedPngObservations) return false;
      if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
+      if (HybridActions != other.HybridActions) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

      if (BaseRLCapabilities != false) hash ^= BaseRLCapabilities.GetHashCode();
      if (ConcatenatedPngObservations != false) hash ^= ConcatenatedPngObservations.GetHashCode();
      if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
+      if (HybridActions != false) hash ^= HybridActions.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }
        output.WriteRawTag(24);
        output.WriteBool(CompressedChannelMapping);
      }
+      if (HybridActions != false) {
+        output.WriteRawTag(32);
+        output.WriteBool(HybridActions);
+      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
        size += 1 + 1;
      }
      if (CompressedChannelMapping != false) {
+        size += 1 + 1;
+      }
+      if (HybridActions != false) {
        size += 1 + 1;
      }
      if (_unknownFields != null) {
      if (other.CompressedChannelMapping != false) {
        CompressedChannelMapping = other.CompressedChannelMapping;
      }
+      if (other.HybridActions != false) {
+        HybridActions = other.HybridActions;
+      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

          }
          case 24: {
            CompressedChannelMapping = input.ReadBool();
+            break;
+          }
+          case 32: {
+            HybridActions = input.ReadBool();
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
 using System.Collections.Generic;
 using System.Linq;
 using Unity.MLAgents.Inference.Utils;
+using Unity.MLAgents.Actuators;
 using Unity.Barracuda;
 using UnityEngine;

    /// </summary>
    internal class ContinuousActionOutputApplier : TensorApplier.IApplier
    {
-        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+        readonly ActionSpec m_ActionSpec;
+
+        public ContinuousActionOutputApplier(ActionSpec actionSpec)
+        {
+            m_ActionSpec = actionSpec;
+        }
+
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
            var agentIndex = 0;
                {
-                    var actionValue = lastActions[agentId];
-                    if (actionValue == null)
+                    var actionBuffer = lastActions[agentId];
+                    if (actionBuffer.IsEmpty())
-                        actionValue = new float[actionSize];
-                        lastActions[agentId] = actionValue;
+                        actionBuffer = new ActionBuffers(m_ActionSpec);
+                        lastActions[agentId] = actionBuffer;
+                    var continuousBuffer = actionBuffer.ContinuousActions;
-                        actionValue[j] = tensorProxy.data[agentIndex, j];
+                        continuousBuffer[j] = tensorProxy.data[agentIndex, j];
                    }
                }
                agentIndex++;
        readonly int[] m_ActionSize;
        readonly Multinomial m_Multinomial;
        readonly ITensorAllocator m_Allocator;
+        readonly ActionSpec m_ActionSpec;
-        public DiscreteActionOutputApplier(int[] actionSize, int seed, ITensorAllocator allocator)
+        public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
-            m_ActionSize = actionSize;
+            m_ActionSize = actionSpec.BranchSizes;
+            m_ActionSpec = actionSpec;
-        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            //var tensorDataProbabilities = tensorProxy.Data as float[,];
            var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
            {
                if (lastActions.ContainsKey(agentId))
                {
-                    var actionVal = lastActions[agentId];
-                    if (actionVal == null)
+                    var actionBuffer = lastActions[agentId];
+                    if (actionBuffer.IsEmpty())
-                        actionVal = new float[m_ActionSize.Length];
-                        lastActions[agentId] = actionVal;
+                        actionBuffer = new ActionBuffers(m_ActionSpec);
+                        lastActions[agentId] = actionBuffer;
+                    var discreteBuffer = actionBuffer.DiscreteActions;
-                        actionVal[j] = actionValues[agentIndex, j];
+                        discreteBuffer[j] = (int)actionValues[agentIndex, j];
                    }
                }
                agentIndex++;
            m_Memories = memories;
        }

-        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            var agentIndex = 0;
            var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];
            m_Memories = memories;
        }

-        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            var agentIndex = 0;
            var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
    /// </summary>
    internal class BarracudaModelParamLoader
    {
-        enum ModelActionType
-        {
-            Unknown,
-            Discrete,
-            Continuous
-        }
-
-        /// Generates the Tensor inputs that are expected to be present in the Model.
-        /// </summary>
-        /// <param name="model">
-        /// The Barracuda engine model for loading static parameters.
-        /// </param>
-        /// <returns>TensorProxy IEnumerable with the expected Tensor inputs.</returns>
-        public static IReadOnlyList<TensorProxy> GetInputTensors(Model model)
-        {
-            var tensors = new List<TensorProxy>();
-
-            if (model == null)
-                return tensors;
-
-            foreach (var input in model.inputs)
-            {
-                tensors.Add(new TensorProxy
-                {
-                    name = input.name,
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    data = null,
-                    shape = input.shape.Select(i => (long)i).ToArray()
-                });
-            }
-
-            foreach (var mem in model.memories)
-            {
-                tensors.Add(new TensorProxy
-                {
-                    name = mem.input,
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    data = null,
-                    shape = TensorUtils.TensorShapeFromBarracuda(mem.shape)
-                });
-            }
-
-            tensors.Sort((el1, el2) => el1.name.CompareTo(el2.name));
-
-            return tensors;
-        }
-
-        public static int GetNumVisualInputs(Model model)
-        {
-            var count = 0;
-            if (model == null)
-                return count;
-
-            foreach (var input in model.inputs)
-            {
-                if (input.shape.Length == 4)
-                {
-                    if (input.name.StartsWith(TensorNames.VisualObservationPlaceholderPrefix))
-                    {
-                        count++;
-                    }
-                }
-            }
-
-            return count;
-        }
-
-        /// <summary>
-        /// Generates the Tensor outputs that are expected to be present in the Model.
-        /// </summary>
-        /// <param name="model">
-        /// The Barracuda engine model for loading static parameters
-        /// </param>
-        /// <returns>TensorProxy IEnumerable with the expected Tensor outputs</returns>
-        public static string[] GetOutputNames(Model model)
-        {
-            var names = new List<string>();
-
-            if (model == null)
-            {
-                return names.ToArray();
-            }
-
-            names.Add(TensorNames.ActionOutput);
-
-            var memory = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
-            if (memory > 0)
-            {
-                foreach (var mem in model.memories)
-                {
-                    names.Add(mem.output);
-                }
-            }
-
-            names.Sort();
-
-            return names.ToArray();
-        }
-
-        /// <summary>
        /// Factory for the ModelParamLoader : Creates a ModelParamLoader and runs the checks
        /// on it.
        /// </summary>
                return failedModelChecks;
            }

-            foreach (var constantName in TensorNames.RequiredConstants)
+            var hasExpectedTensors = model.CheckExpectedTensors(failedModelChecks);
+            if (!hasExpectedTensors)
-                var tensor = model.GetTensorByName(constantName);
-                if (tensor == null)
-                {
-                    failedModelChecks.Add($"Required constant \"{constantName}\" was not found in the model file.");
-                    return failedModelChecks;
-                }
+                return failedModelChecks;
-            var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
-            var isContinuousInt = (int)model.GetTensorByName(TensorNames.IsContinuousControl)[0];
-            var isContinuous = GetActionType(isContinuousInt);
-            var actionSize = (int)model.GetTensorByName(TensorNames.ActionOutputShape)[0];
            if (modelApiVersion == -1)
            {
                failedModelChecks.Add(
                return failedModelChecks;
            }

-            var modelDiscreteActionSize = isContinuous == ModelActionType.Discrete ? actionSize : 0;
-            var modelContinuousActionSize = isContinuous == ModelActionType.Continuous ? actionSize : 0;
+            var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
+            if (memorySize == -1)
+            {
+                failedModelChecks.Add($"Missing node in the model provided : {TensorNames.MemorySize}");
+                return failedModelChecks;
+            }
-                CheckIntScalarPresenceHelper(new Dictionary<string, int>()
-                {
-                    {TensorNames.MemorySize, memorySize},
-                    {TensorNames.IsContinuousControl, isContinuousInt},
-                    {TensorNames.ActionOutputShape, actionSize}
-                })
+                CheckInputTensorPresence(model, brainParameters, memorySize, sensorComponents)
-                CheckInputTensorPresence(model, brainParameters, memorySize, isContinuous, sensorComponents)
+                CheckOutputTensorPresence(model, memorySize)
-            failedModelChecks.AddRange(
-                CheckOutputTensorPresence(model, memorySize))
-            ;
-                CheckOutputTensorShape(model, brainParameters, actuatorComponents, isContinuous, modelContinuousActionSize, modelDiscreteActionSize)
+                CheckOutputTensorShape(model, brainParameters, actuatorComponents)
-        /// Converts the integer value in the model corresponding to the type of control to a
-        /// ModelActionType.
-        /// </summary>
-        /// <param name="isContinuousInt">
-        /// The integer value in the model indicating the type of control
-        /// </param>
-        /// <returns>The equivalent ModelActionType</returns>
-        static ModelActionType GetActionType(int isContinuousInt)
-        {
-            ModelActionType isContinuous;
-            switch (isContinuousInt)
-            {
-                case 0:
-                    isContinuous = ModelActionType.Discrete;
-                    break;
-                case 1:
-                    isContinuous = ModelActionType.Continuous;
-                    break;
-                default:
-                    isContinuous = ModelActionType.Unknown;
-                    break;
-            }
-            return isContinuous;
-        }
-
-        /// <summary>
-        /// Given a Dictionary of node names to int values, create checks if the values have the
-        /// invalid value of -1.
-        /// </summary>
-        /// <param name="requiredScalarFields"> Mapping from node names to int values</param>
-        /// <returns>The list the error messages of the checks that failed</returns>
-        static IEnumerable<string> CheckIntScalarPresenceHelper(
-            Dictionary<string, int> requiredScalarFields)
-        {
-            var failedModelChecks = new List<string>();
-            foreach (var field in requiredScalarFields)
-            {
-                if (field.Value == -1)
-                {
-                    failedModelChecks.Add($"Missing node in the model provided : {field.Key}");
-                }
-            }
-            return failedModelChecks;
-        }
-
-        /// <summary>
        /// Generates failed checks that correspond to inputs expected by the model that are not
        /// present in the BrainParameters.
        /// </summary>
            Model model,
            BrainParameters brainParameters,
            int memory,
-            ModelActionType isContinuous,
-            var tensorsNames = GetInputTensors(model).Select(x => x.name).ToList();
+            var tensorsNames = model.GetInputNames();

            // If there is no Vector Observation Input but the Brain Parameters expect one.
            if ((brainParameters.VectorObservationSize != 0) &&
-                    "The model does not contain a Vector Observation  Placeholder Input. " +
+                    "The model does not contain a Vector Observation Placeholder Input. " +
                    "You must set the Vector Observation Space Size to 0.");
            }

                visObsIndex++;
            }

-            var expectedVisualObs = GetNumVisualInputs(model);
+            var expectedVisualObs = model.GetNumVisualInputs();
            // Check if there's not enough visual sensors (too many would be handled above)
            if (expectedVisualObs > visObsIndex)
            {
            }

            // If the model uses discrete control but does not have an input for action masks
-            if (isContinuous == ModelActionType.Discrete)
+            if (model.HasDiscreteOutputs())
            {
                if (!tensorsNames.Contains(TensorNames.ActionMaskPlaceholder))
                {
        static IEnumerable<string> CheckOutputTensorPresence(Model model, int memory)
        {
            var failedModelChecks = new List<string>();
-            // If there is no Action Output.
-            if (!model.outputs.Contains(TensorNames.ActionOutput))
-            {
-                failedModelChecks.Add("The model does not contain an Action Output Node.");
-            }

            // If there is no Recurrent Output but the model is Recurrent.
            if (memory > 0)
            }

            // If the model expects an input but it is not in this list
-            foreach (var tensor in GetInputTensors(model))
+            foreach (var tensor in model.GetInputTensors())
            {
                if (!tensorTester.ContainsKey(tensor.name))
                {
            BrainParameters brainParameters, TensorProxy tensorProxy,
            SensorComponent[] sensorComponents, int observableAttributeTotalSize)
        {
+            // TODO: Update this check after intergrating ActionSpec into BrainParameters
            var numberActionsBp = brainParameters.VectorActionSize.Length;
            var numberActionsT = tensorProxy.shape[tensorProxy.shape.Length - 1];
            if (numberActionsBp != numberActionsT)
        static IEnumerable<string> CheckOutputTensorShape(
            Model model,
            BrainParameters brainParameters,
-            ActuatorComponent[] actuatorComponents,
-            ModelActionType isContinuous,
-            int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
+            ActuatorComponent[] actuatorComponents)
-            if (isContinuous == ModelActionType.Unknown)
-            {
-                failedModelChecks.Add("Cannot infer type of Control from the provided model.");
-                return failedModelChecks;
-            }
-            if (isContinuous == ModelActionType.Continuous &&
-                brainParameters.VectorActionSpaceType != SpaceType.Continuous)
-            {
-                failedModelChecks.Add(
-                    "Model has been trained using Continuous Control but the Brain Parameters " +
-                    "suggest Discrete Control.");
-                return failedModelChecks;
-            }
-            if (isContinuous == ModelActionType.Discrete &&
-                brainParameters.VectorActionSpaceType != SpaceType.Discrete)
-            {
-                failedModelChecks.Add(
-                    "Model has been trained using Discrete Control but the Brain Parameters " +
-                    "suggest Continuous Control.");
-                return failedModelChecks;
-            }
+
-
-            // This will need to change a bit for hybrid action spaces.
-            if (isContinuous == ModelActionType.Continuous)
+            if (model.HasContinuousOutputs())
-                tensorTester[TensorNames.ActionOutput] = CheckContinuousActionOutputShape;
+                tensorTester[model.ContinuousOutputName()] = CheckContinuousActionOutputShape;
-            else
+            if (model.HasDiscreteOutputs())
-                tensorTester[TensorNames.ActionOutput] = CheckDiscreteActionOutputShape;
+                tensorTester[model.DiscreteOutputName()] = CheckDiscreteActionOutputShape;
+            var modelContinuousActionSize = model.ContinuousOutputSize();
+            var modelSumDiscreteBranchSizes = model.DiscreteOutputSize();
            foreach (var name in model.outputs)
            {
                if (tensorTester.ContainsKey(name))
--- a/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
            foreach (var infoSensorPair in infos)
            {
                var info = infoSensorPair.agentInfo;
-                var pastAction = info.storedVectorActions;
-                if (pastAction != null)
+                var pastAction = info.storedVectorActions.DiscreteActions;
+                if (!pastAction.IsEmpty())
                {
                    for (var j = 0; j < actionSize; j++)
                    {
--- a/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
    internal class ModelRunner
    {
        List<AgentInfoSensorsPair> m_Infos = new List<AgentInfoSensorsPair>();
-        Dictionary<int, float[]> m_LastActionsReceived = new Dictionary<int, float[]>();
+        Dictionary<int, ActionBuffers> m_LastActionsReceived = new Dictionary<int, ActionBuffers>();
        List<int> m_OrderedAgentsRequestingDecisions = new List<int>();

        ITensorAllocator m_TensorAllocator;
                m_Engine = null;
            }

-            m_InferenceInputs = BarracudaModelParamLoader.GetInputTensors(barracudaModel);
-            m_OutputNames = BarracudaModelParamLoader.GetOutputNames(barracudaModel);
+            m_InferenceInputs = barracudaModel.GetInputTensors();
+            m_OutputNames = barracudaModel.GetOutputNames();
            m_TensorGenerator = new TensorGenerator(
                seed, m_TensorAllocator, m_Memories, barracudaModel);
            m_TensorApplier = new TensorApplier(

            if (!m_LastActionsReceived.ContainsKey(info.episodeId))
            {
-                m_LastActionsReceived[info.episodeId] = null;
+                m_LastActionsReceived[info.episodeId] = ActionBuffers.Empty;
            }
            if (info.done)
            {
            return m_Model == other && m_InferenceDevice == otherInferenceDevice;
        }

-        public float[] GetAction(int agentId)
+        public ActionBuffers GetAction(int agentId)
-            return null;
+            return ActionBuffers.Empty;
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
            /// </param>
            /// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
            /// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
-            void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions);
+            void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions);
        }

        readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();
            Dictionary<int, List<float>> memories,
            object barracudaModel = null)
        {
-            actionSpec.CheckNotHybrid();
+            // If model is null, no inference to run and exception is thrown before reaching here.
+            if (barracudaModel == null)
+            {
+                return;
+            }
+            var model = (Model)barracudaModel;
+            if (!model.SupportsContinuousAndDiscrete())
+            {
+                actionSpec.CheckAllContinuousOrDiscrete();
+            }
-                m_Dict[TensorNames.ActionOutput] = new ContinuousActionOutputApplier();
+                var tensorName = model.ContinuousOutputName();
+                m_Dict[tensorName] = new ContinuousActionOutputApplier(actionSpec);
-            else
+            if (actionSpec.NumDiscreteActions > 0)
-                m_Dict[TensorNames.ActionOutput] =
-                    new DiscreteActionOutputApplier(actionSpec.BranchSizes, seed, allocator);
+                var tensorName = model.DiscreteOutputName();
+                m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
-            if (barracudaModel != null)
+            for (var i = 0; i < model?.memories.Count; i++)
-                var model = (Model)barracudaModel;
-
-                for (var i = 0; i < model?.memories.Count; i++)
-                {
-                    m_Dict[model.memories[i].output] =
-                        new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
-                }
+                m_Dict[model.memories[i].output] =
+                    new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
            }
        }

        /// <exception cref="UnityAgentsException"> One of the tensor does not have an
        /// associated applier.</exception>
        public void ApplyTensors(
-            IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
+            IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
        {
            foreach (var tensor in tensors)
            {
--- a/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
            Dictionary<int, List<float>> memories,
            object barracudaModel = null)
        {
+            // If model is null, no inference to run and exception is thrown before reaching here.
+            if (barracudaModel == null)
+            {
+                return;
+            }
+            var model = (Model)barracudaModel;
+
            // Generator for Inputs
            m_Dict[TensorNames.BatchSizePlaceholder] =
                new BatchSizeGenerator(allocator);
                new RecurrentInputGenerator(allocator, memories);

-            if (barracudaModel != null)
+            for (var i = 0; i < model.memories.Count; i++)
-                var model = (Model)barracudaModel;
-                for (var i = 0; i < model.memories.Count; i++)
-                {
-                    m_Dict[model.memories[i].input] =
-                        new BarracudaRecurrentInputGenerator(i, allocator, memories);
-                }
+                m_Dict[model.memories[i].input] =
+                    new BarracudaRecurrentInputGenerator(i, allocator, memories);
            }

            m_Dict[TensorNames.PreviousActionPlaceholder] =


            // Generators for Outputs
-            m_Dict[TensorNames.ActionOutput] = new BiDimensionalOutputGenerator(allocator);
+            if (model.HasContinuousOutputs())
+            {
+                m_Dict[model.ContinuousOutputName()] = new BiDimensionalOutputGenerator(allocator);
+            }
+            if (model.HasDiscreteOutputs())
+            {
+                m_Dict[model.DiscreteOutputName()] = new BiDimensionalOutputGenerator(allocator);
+            }
            m_Dict[TensorNames.RecurrentOutput] = new BiDimensionalOutputGenerator(allocator);
            m_Dict[TensorNames.ValueEstimateOutput] = new BiDimensionalOutputGenerator(allocator);
        }
--- a/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
        public const string recurrentOutputC = "recurrent_out_c";
        public const string MemorySize = "memory_size";
        public const string VersionNumber = "version_number";
-        public const string IsContinuousControl = "is_continuous_control";
-        public const string ActionOutputShape = "action_output_shape";
-        public const string ActionOutput = "action";
+        public const string ContinuousActionOutputShape = "continuous_action_output_shape";
+        public const string DiscreteActionOutputShape = "discrete_action_output_shape";
+        public const string ContinuousActionOutput = "continuous_actions";
+        public const string DiscreteActionOutput = "discrete_actions";
-        public static readonly string[] RequiredConstants =
-        {
-            VersionNumber, MemorySize, IsContinuousControl, ActionOutputShape
-        };
+        // Deprecated TensorNames entries for backward compatibility
+        public const string IsContinuousControlDeprecated = "is_continuous_control";
+        public const string ActionOutputDeprecated = "action";
+        public const string ActionOutputShapeDeprecated = "action_output_shape";
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
        /// Sensor shapes for the associated Agents. All Agents must have the same shapes for their Sensors.
        /// </summary>
        List<int[]> m_SensorShapes;
-        SpaceType m_SpaceType;
+        ActionSpec m_ActionSpec;

        /// <inheritdoc />
        public BarracudaPolicy(
        {
            var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, actionSpec, inferenceDevice);
            m_ModelRunner = modelRunner;
-            actionSpec.CheckNotHybrid();
-            m_SpaceType = actionSpec.NumContinuousActions > 0 ? SpaceType.Continuous : SpaceType.Discrete;
+            m_ActionSpec = actionSpec;
        }

        /// <inheritdoc />
        /// <inheritdoc />
        public ref readonly ActionBuffers DecideAction()
        {
-            m_ModelRunner?.DecideBatch();
-            var actions = m_ModelRunner?.GetAction(m_AgentId);
-            if (m_SpaceType == SpaceType.Continuous)
+            if (m_ModelRunner == null)
+            {
+                m_LastActionBuffer = ActionBuffers.Empty;
+            }
+            else
-                m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
-                return ref m_LastActionBuffer;
+                m_ModelRunner?.DecideBatch();
+                m_LastActionBuffer = m_ModelRunner.GetAction(m_AgentId);
-
-            m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
            return ref m_LastActionBuffer;
        }

--- a/com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
    {
        int m_AgentId;
        string m_FullyQualifiedBehaviorName;
-        SpaceType m_SpaceType;
+        ActionSpec m_ActionSpec;
        ActionBuffers m_LastActionBuffer;

        internal ICommunicator m_Communicator;
            m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
            m_Communicator = Academy.Instance.Communicator;
            m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, actionSpec);
-
-            actionSpec.CheckNotHybrid();
-            m_SpaceType = actionSpec.NumContinuousActions > 0 ? SpaceType.Continuous : SpaceType.Discrete;
+            m_ActionSpec = actionSpec;
        }

        /// <inheritdoc />
        {
            m_Communicator?.DecideBatch();
            var actions = m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
-            // TODO figure out how to handle this with multiple space types.
-            if (m_SpaceType == SpaceType.Continuous)
-            {
-                m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
-                return ref m_LastActionBuffer;
-            }
-            m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
+            m_LastActionBuffer = actions == null ? ActionBuffers.Empty : (ActionBuffers)actions;
            return ref m_LastActionBuffer;
        }

--- a/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
        }

        [Test]
-        public void TestFailOnMixedActionSpace()
-        {
-            var manager = new ActuatorManager();
-            var actuator1 = new TestActuator(ActionSpec.MakeDiscrete(new[] { 1, 2, 3, 4 }), "actuator1");
-            var actuator2 = new TestActuator(ActionSpec.MakeContinuous(3), "actuator2");
-            manager.Add(actuator1);
-            manager.Add(actuator2);
-            LogAssert.Expect(LogType.Assert, "Actuators on the same Agent must have the same action SpaceType.");
-            manager.ReadyActuatorsForExecution(new[] { actuator1, actuator2 }, 3, 10, 4);
-        }
-
-        [Test]
        public void TestFailOnSameActuatorName()
        {
            var manager = new ActuatorManager();
--- a/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
 using UnityEngine;
 using System.IO.Abstractions.TestingHelpers;
 using System.Reflection;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.CommunicatorObjects;
 using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Demonstrations;
                done = true,
                episodeId = 5,
                maxStepReached = true,
-                storedVectorActions = new[] { 0f, 1f },
+                storedVectorActions = new ActionBuffers(null, new int[] { 0, 1 }),
            };


--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
 using Unity.Barracuda;
 using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Inference;
-using Unity.MLAgents.Policies;

 namespace Unity.MLAgents.Tests
 {
        [Test]
        public void ApplyContinuousActionOutput()
        {
+            var actionSpec = ActionSpec.MakeContinuous(3);
            var inputTensor = new TensorProxy()
            {
                shape = new long[] { 2, 3 },
-            var applier = new ContinuousActionOutputApplier();
+            var applier = new ContinuousActionOutputApplier(actionSpec);
-            var actionDict = new Dictionary<int, float[]>() { { 0, null }, { 1, null } };
+            var actionDict = new Dictionary<int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } };
-            Assert.AreEqual(actionDict[0][0], 1);
-            Assert.AreEqual(actionDict[0][1], 2);
-            Assert.AreEqual(actionDict[0][2], 3);
+            Assert.AreEqual(actionDict[0].ContinuousActions[0], 1);
+            Assert.AreEqual(actionDict[0].ContinuousActions[1], 2);
+            Assert.AreEqual(actionDict[0].ContinuousActions[2], 3);
-            Assert.AreEqual(actionDict[1][0], 4);
-            Assert.AreEqual(actionDict[1][1], 5);
-            Assert.AreEqual(actionDict[1][2], 6);
+            Assert.AreEqual(actionDict[1].ContinuousActions[0], 4);
+            Assert.AreEqual(actionDict[1].ContinuousActions[1], 5);
+            Assert.AreEqual(actionDict[1].ContinuousActions[2], 6);
+            var actionSpec = ActionSpec.MakeDiscrete(new int[] { 2, 3 });
            var inputTensor = new TensorProxy()
            {
                shape = new long[] { 2, 5 },
                    new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
            };
            var alloc = new TensorCachingAllocator();
-            var applier = new DiscreteActionOutputApplier(new[] { 2, 3 }, 0, alloc);
+            var applier = new DiscreteActionOutputApplier(actionSpec, 0, alloc);
-            var actionDict = new Dictionary<int, float[]>() { { 0, null }, { 1, null } };
+            var actionDict = new Dictionary<int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } };
-            Assert.AreEqual(actionDict[0][0], 1);
-            Assert.AreEqual(actionDict[0][1], 1);
+            Assert.AreEqual(actionDict[0].DiscreteActions[0], 1);
+            Assert.AreEqual(actionDict[0].DiscreteActions[1], 1);
-            Assert.AreEqual(actionDict[1][0], 1);
-            Assert.AreEqual(actionDict[1][1], 2);
+            Assert.AreEqual(actionDict[1].DiscreteActions[0], 1);
+            Assert.AreEqual(actionDict[1].DiscreteActions[1], 2);
+            alloc.Dispose();
+        }
+
+        [Test]
+        public void ApplyHybridActionOutput()
+        {
+            var actionSpec = new ActionSpec(3, 2, new int[] { 2, 3 });
+            var continuousInputTensor = new TensorProxy()
+            {
+                shape = new long[] { 2, 3 },
+                data = new Tensor(2, 3, new float[] { 1, 2, 3, 4, 5, 6 })
+            };
+            var discreteInputTensor = new TensorProxy()
+            {
+                shape = new long[] { 2, 8 },
+                data = new Tensor(
+                    2,
+                    5,
+                    new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
+            };
+            var continuousApplier = new ContinuousActionOutputApplier(actionSpec);
+            var alloc = new TensorCachingAllocator();
+            var discreteApplier = new DiscreteActionOutputApplier(actionSpec, 0, alloc);
+
+            var agentIds = new List<int>() { 0, 1 };
+            // Dictionary from AgentId to Action
+            var actionDict = new Dictionary<int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } };
+
+
+            continuousApplier.Apply(continuousInputTensor, agentIds, actionDict);
+            discreteApplier.Apply(discreteInputTensor, agentIds, actionDict);
+
+            Assert.AreEqual(actionDict[0].ContinuousActions[0], 1);
+            Assert.AreEqual(actionDict[0].ContinuousActions[1], 2);
+            Assert.AreEqual(actionDict[0].ContinuousActions[2], 3);
+            Assert.AreEqual(actionDict[0].DiscreteActions[0], 1);
+            Assert.AreEqual(actionDict[0].DiscreteActions[1], 1);
+
+            Assert.AreEqual(actionDict[1].ContinuousActions[0], 4);
+            Assert.AreEqual(actionDict[1].ContinuousActions[1], 5);
+            Assert.AreEqual(actionDict[1].ContinuousActions[2], 6);
+            Assert.AreEqual(actionDict[1].DiscreteActions[0], 1);
+            Assert.AreEqual(actionDict[1].DiscreteActions[1], 2);
            alloc.Dispose();
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
 using Unity.Barracuda;
 using NUnit.Framework;
 using UnityEngine;
+using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Inference;
 using Unity.MLAgents.Policies;
 using Unity.MLAgents.Sensors.Reflection;

            var infoA = new AgentInfo
            {
-                storedVectorActions = new[] { 1f, 2f },
-                discreteActionMasks = null
+                storedVectorActions = new ActionBuffers(null, new[] { 1, 2 }),
+                discreteActionMasks = null,
-                storedVectorActions = new[] { 3f, 4f },
+                storedVectorActions = new ActionBuffers(null, new[] { 3, 4 }),
                discreteActionMasks = new[] { true, false, false, false, false },
            };

--- a/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
 using Unity.Barracuda;
 using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Inference;
-using Unity.MLAgents.Sensors;
 using Unity.MLAgents.Policies;

 namespace Unity.MLAgents.Tests
    {
-        const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.nn";
-        const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.nn";
-        NNModel continuous2vis8vec2actionModel;
-        NNModel discrete1vis0vec_2_3action_recurrModel;
+        const string k_continuousONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.onnx";
+        const string k_discreteONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.onnx";
+        const string k_hybridONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/hybrid0vis53vec_3c_2daction.onnx";
+        const string k_continuousNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn";
+        const string k_discreteNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn";
+        NNModel continuousONNXModel;
+        NNModel discreteONNXModel;
+        NNModel hybridONNXModel;
+        NNModel continuousNNModel;
+        NNModel discreteNNModel;
        Test3DSensorComponent sensor_21_20_3;
        Test3DSensorComponent sensor_20_22_3;

            return ActionSpec.MakeDiscrete(2, 3);
        }

+        ActionSpec GetHybrid0vis53vec_3c_2dActionSpec()
+        {
+            return new ActionSpec(3, 1, new int[] { 2 });
+        }
+
-            continuous2vis8vec2actionModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuous2vis8vec2actionPath, typeof(NNModel));
-            discrete1vis0vec_2_3action_recurrModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discrete1vis0vec_2_3action_recurrModelPath, typeof(NNModel));
+            continuousONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousONNXPath, typeof(NNModel));
+            discreteONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteONNXPath, typeof(NNModel));
+            hybridONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_hybridONNXPath, typeof(NNModel));
+            continuousNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousNNPath, typeof(NNModel));
+            discreteNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteNNPath, typeof(NNModel));
            var go = new GameObject("SensorA");
            sensor_21_20_3 = go.AddComponent<Test3DSensorComponent>();
            sensor_21_20_3.Sensor = new Test3DSensor("SensorA", 21, 20, 3);
        [Test]
        public void TestModelExist()
        {
-            Assert.IsNotNull(continuous2vis8vec2actionModel);
-            Assert.IsNotNull(discrete1vis0vec_2_3action_recurrModel);
+            Assert.IsNotNull(continuousONNXModel);
+            Assert.IsNotNull(discreteONNXModel);
+            Assert.IsNotNull(hybridONNXModel);
+            Assert.IsNotNull(continuousNNModel);
+            Assert.IsNotNull(discreteNNModel);
-            var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionActionSpec());
+            var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec());
-            modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
+            modelRunner = new ModelRunner(discreteONNXModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
+            modelRunner.Dispose();
+            modelRunner = new ModelRunner(hybridONNXModel, GetHybrid0vis53vec_3c_2dActionSpec());
+            modelRunner.Dispose();
+            modelRunner = new ModelRunner(continuousNNModel, GetContinuous2vis8vec2actionActionSpec());
+            modelRunner.Dispose();
+            modelRunner = new ModelRunner(discreteNNModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
            modelRunner.Dispose();
        }

-            var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionActionSpec(), InferenceDevice.CPU);
-            Assert.True(modelRunner.HasModel(continuous2vis8vec2actionModel, InferenceDevice.CPU));
-            Assert.False(modelRunner.HasModel(continuous2vis8vec2actionModel, InferenceDevice.GPU));
-            Assert.False(modelRunner.HasModel(discrete1vis0vec_2_3action_recurrModel, InferenceDevice.CPU));
+            var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec(), InferenceDevice.CPU);
+            Assert.True(modelRunner.HasModel(continuousONNXModel, InferenceDevice.CPU));
+            Assert.False(modelRunner.HasModel(continuousONNXModel, InferenceDevice.GPU));
+            Assert.False(modelRunner.HasModel(discreteONNXModel, InferenceDevice.CPU));
            modelRunner.Dispose();
        }

            var actionSpec = GetDiscrete1vis0vec_2_3action_recurrModelActionSpec();
-            var modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, actionSpec);
+            var modelRunner = new ModelRunner(discreteONNXModel, actionSpec);
            var info1 = new AgentInfo();
            info1.episodeId = 1;
            modelRunner.PutObservations(info1, new[] { sensor_21_20_3.CreateSensor() }.ToList());

            modelRunner.DecideBatch();

-            Assert.IsNotNull(modelRunner.GetAction(1));
-            Assert.IsNotNull(modelRunner.GetAction(2));
-            Assert.IsNull(modelRunner.GetAction(3));
-            Assert.AreEqual(actionSpec.NumDiscreteActions, modelRunner.GetAction(1).Count());
+            Assert.IsFalse(modelRunner.GetAction(1).Equals(ActionBuffers.Empty));
+            Assert.IsFalse(modelRunner.GetAction(2).Equals(ActionBuffers.Empty));
+            Assert.IsTrue(modelRunner.GetAction(3).Equals(ActionBuffers.Empty));
+            Assert.AreEqual(actionSpec.NumDiscreteActions, modelRunner.GetAction(1).DiscreteActions.Length);
            modelRunner.Dispose();
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
    [TestFixture]
    public class ParameterLoaderTest
    {
-        const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.nn";
-        const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.nn";
-        NNModel continuous2vis8vec2actionModel;
-        NNModel discrete1vis0vec_2_3action_recurrModel;
+        // ONNX model with continuous/discrete action output (support hybrid action)
+        const string k_continuousONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.onnx";
+        const string k_discreteONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.onnx";
+        const string k_hybridONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/hybrid0vis53vec_3c_2daction.onnx";
+        // NN model with single action output (deprecated, does not support hybrid action).
+        // Same BrainParameters settings as the corresponding ONNX model.
+        const string k_continuousNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn";
+        const string k_discreteNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn";
+        NNModel continuousONNXModel;
+        NNModel discreteONNXModel;
+        NNModel hybridONNXModel;
+        NNModel continuousNNModel;
+        NNModel discreteNNModel;
        Test3DSensorComponent sensor_21_20_3;
        Test3DSensorComponent sensor_20_22_3;

            return validBrainParameters;
        }

+        // TODO: update and enable this after integrating action spec into BrainParameters
+        // BrainParameters GetHybridBrainParameters()
+        // {
+        //     var validBrainParameters = new BrainParameters();
+        //     validBrainParameters.VectorObservationSize = 53;
+        //     validBrainParameters.VectorActionSize = new[] { 2 };
+        //     validBrainParameters.NumStackedVectorObservations = 1;
+        //     validBrainParameters.VectorActionSpaceType = SpaceType.Discrete;
+        //     return validBrainParameters;
+        // }
+
-            continuous2vis8vec2actionModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuous2vis8vec2actionPath, typeof(NNModel));
-            discrete1vis0vec_2_3action_recurrModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discrete1vis0vec_2_3action_recurrModelPath, typeof(NNModel));
+            continuousONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousONNXPath, typeof(NNModel));
+            discreteONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteONNXPath, typeof(NNModel));
+            hybridONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_hybridONNXPath, typeof(NNModel));
+            continuousNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousNNPath, typeof(NNModel));
+            discreteNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteNNPath, typeof(NNModel));
            var go = new GameObject("SensorA");
            sensor_21_20_3 = go.AddComponent<Test3DSensorComponent>();
            sensor_21_20_3.Sensor = new Test3DSensor("SensorA", 21, 20, 3);
        [Test]
        public void TestModelExist()
        {
-            Assert.IsNotNull(continuous2vis8vec2actionModel);
-            Assert.IsNotNull(discrete1vis0vec_2_3action_recurrModel);
+            Assert.IsNotNull(continuousONNXModel);
+            Assert.IsNotNull(discreteONNXModel);
+            Assert.IsNotNull(hybridONNXModel);
+            Assert.IsNotNull(continuousNNModel);
+            Assert.IsNotNull(discreteNNModel);
-        [Test]
-        public void TestGetInputTensors1()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestGetInputTensorsContinuous(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
-            var inputTensors = BarracudaModelParamLoader.GetInputTensors(model);
-            var inputNames = inputTensors.Select(x => x.name).ToList();
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
+            var inputNames = model.GetInputNames();
-            Assert.AreEqual(3, inputNames.Count);
+            Assert.AreEqual(3, inputNames.Count());
-            Assert.AreEqual(2, BarracudaModelParamLoader.GetNumVisualInputs(model));
+            Assert.AreEqual(2, model.GetNumVisualInputs());
-            Assert.AreEqual(0, BarracudaModelParamLoader.GetInputTensors(null).Count);
-            Assert.AreEqual(0, BarracudaModelParamLoader.GetNumVisualInputs(null));
+            model = null;
+            Assert.AreEqual(0, model.GetInputTensors().Count);
+            Assert.AreEqual(0, model.GetNumVisualInputs());
-        [Test]
-        public void TestGetInputTensors2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestGetInputTensorsDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
-            var inputTensors = BarracudaModelParamLoader.GetInputTensors(model);
-            var inputNames = inputTensors.Select(x => x.name).ToList();
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
+            var inputNames = model.GetInputNames();
            // Model should contain 2 inputs : recurrent and visual 1

            Assert.Contains(TensorNames.VisualObservationPlaceholderPrefix + "0", inputNames);
        [Test]
-        public void TestGetOutputTensors1()
+        public void TestGetInputTensorsHybrid()
+        {
+            var model = ModelLoader.Load(hybridONNXModel);
+            var inputNames = model.GetInputNames();
+            Assert.Contains(TensorNames.VectorObservationPlaceholder, inputNames);
+        }
+
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestGetOutputTensorsContinuous(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
-            var outputNames = BarracudaModelParamLoader.GetOutputNames(model);
-            Assert.Contains(TensorNames.ActionOutput, outputNames);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
+            var outputNames = model.GetOutputNames();
+            var actionOutputName = useDeprecatedNNModel ? TensorNames.ActionOutputDeprecated : TensorNames.ContinuousActionOutput;
+            Assert.Contains(actionOutputName, outputNames);
-            Assert.AreEqual(0, BarracudaModelParamLoader.GetOutputNames(null).Count());
+            model = null;
+            Assert.AreEqual(0, model.GetOutputNames().Count());
-        [Test]
-        public void TestGetOutputTensors2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestGetOutputTensorsDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
-            var outputNames = BarracudaModelParamLoader.GetOutputNames(model);
-            Assert.Contains(TensorNames.ActionOutput, outputNames);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
+            var outputNames = model.GetOutputNames();
+            var actionOutputName = useDeprecatedNNModel ? TensorNames.ActionOutputDeprecated : TensorNames.DiscreteActionOutput;
+            Assert.Contains(actionOutputName, outputNames);
-        public void TestCheckModelValid1()
+        public void TestGetOutputTensorsHybrid()
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
+            var model = ModelLoader.Load(hybridONNXModel);
+            var outputNames = model.GetOutputNames();
+
+            Assert.AreEqual(2, outputNames.Count());
+            Assert.Contains(TensorNames.ContinuousActionOutput, outputNames);
+            Assert.Contains(TensorNames.DiscreteActionOutput, outputNames);
+
+            model = null;
+            Assert.AreEqual(0, model.GetOutputNames().Count());
+        }
+
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelValidContinuous(bool useDeprecatedNNModel)
+        {
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
            var validBrainParameters = GetContinuous2vis8vec2actionBrainParameters();

            var errors = BarracudaModelParamLoader.CheckModel(
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }

-        [Test]
-        public void TestCheckModelValid2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelValidDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
            var validBrainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();

            var errors = BarracudaModelParamLoader.CheckModel(
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }

-        [Test]
-        public void TestCheckModelThrowsVectorObservation1()
+        // TODO: update and enable this test after integrating action spec into BrainParameters
+        // [Test]
+        // public void TestCheckModelValidHybrid()
+        // {
+        //     var model = ModelLoader.Load(hybridModel);
+        //     var validBrainParameters = GetHybridBrainParameters();
+
+        //     var errors = BarracudaModelParamLoader.CheckModel(
+        //         model, validBrainParameters,
+        //         new SensorComponent[] { }, new ActuatorComponent[0]
+        //     );
+        //     Assert.AreEqual(0, errors.Count()); // There should not be any errors
+        // }
+
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelThrowsVectorObservationContinuous(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);

            var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
            brainParameters.VectorObservationSize = 9; // Invalid observation
            Assert.Greater(errors.Count(), 0);
        }

-        [Test]
-        public void TestCheckModelThrowsVectorObservation2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelThrowsVectorObservationDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);

            var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
            brainParameters.VectorObservationSize = 1; // Invalid observation

-        [Test]
-        public void TestCheckModelThrowsAction1()
+        // TODO: update and enable this test after integrating action spec into BrainParameters
+        // [Test]
+        // public void TestCheckModelThrowsVectorObservationHybrid()
+        // {
+        //     var model = ModelLoader.Load(hybridModel);
+
+        //     var brainParameters = GetHybridBrainParameters();
+        //     brainParameters.VectorObservationSize = 9; // Invalid observation
+        //     var errors = BarracudaModelParamLoader.CheckModel(
+        //         model, brainParameters,
+        //         new SensorComponent[] { }, new ActuatorComponent[0]
+        //     );
+        //     Assert.Greater(errors.Count(), 0);
+
+        //     brainParameters = GetContinuous2vis8vec2actionBrainParameters();
+        //     brainParameters.NumStackedVectorObservations = 2;// Invalid stacking
+        //     errors = BarracudaModelParamLoader.CheckModel(
+        //         model, brainParameters,
+        //         new SensorComponent[] { }, new ActuatorComponent[0]
+        //     );
+        //     Assert.Greater(errors.Count(), 0);
+        // }
+
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelThrowsActionContinuous(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(continuous2vis8vec2actionModel);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);

            var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
            brainParameters.VectorActionSize = new[] { 3 }; // Invalid action
            Assert.Greater(errors.Count(), 0);
        }

-        [Test]
-        public void TestCheckModelThrowsAction2()
+        [TestCase(true)]
+        [TestCase(false)]
+        public void TestCheckModelThrowsActionDiscrete(bool useDeprecatedNNModel)
-            var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
+            var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);

            var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
            brainParameters.VectorActionSize = new[] { 3, 3 }; // Invalid action
            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }
+
+        // TODO: update and enable this test after integrating action spec into BrainParameters
+        // [Test]
+        // public void TestCheckModelThrowsActionHybrid()
+        // {
+        //     var model = ModelLoader.Load(hybridModel);
+
+        //     var brainParameters = GetHybridBrainParameters();
+        //     brainParameters.VectorActionSize = new[] { 3 }; // Invalid action
+        //     var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+        //     Assert.Greater(errors.Count(), 0);
+
+        //     brainParameters = GetContinuous2vis8vec2actionBrainParameters();
+        //     brainParameters.VectorActionSpaceType = SpaceType.Discrete;// Invalid SpaceType
+        //     errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+        //     Assert.Greater(errors.Count(), 0);
+        // }

        [Test]
        public void TestCheckModelThrowsNoModel()
--- a/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn.meta
+++ b/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn.meta
 fileFormatVersion: 2
-guid: a75582ff670094ff2996c1c4ab9dfd15
+guid: bf4543cc3c6944794bbba065bdf90079
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
--- a/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn.meta
+++ b/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn.meta
 fileFormatVersion: 2
-guid: 8a92fbcd96caa4ef5a93dd55c0c36705
+guid: 6d6040ad621454dd5b713beb5483e347
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md

 #### Behavior Parameters : Vector Action Space

-An Agent is given instructions in the form of a float array of _actions_.
+An Agent is given instructions in the form of actions.
-The 3D Balance Ball example is programmed to use continuous action space which
-is a a vector of numbers that can vary continuously. More specifically, it uses
-a `Space Size` of 2 to control the amount of `x` and `z` rotations to apply to
+The 3D Balance Ball example is programmed to use continuous actions, which
+are a vector of floating-point numbers that can vary continuously. More specifically,
+it uses a `Space Size` of 2 to control the amount of `x` and `z` rotations to apply to
 itself to keep the ball balanced on its head.

 ## Running a pre-trained model
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md

 - `OnEpisodeBegin()`
 - `CollectObservations(VectorSensor sensor)`
- `OnActionReceived(float[] vectorAction)`
+- `OnActionReceived(ActionBuffers actionBuffers)`

 We overview each of these in more detail in the dedicated subsections below.


 ```csharp
 public float forceMultiplier = 10;
-public override void OnActionReceived(float[] vectorAction)
+public override void OnActionReceived(ActionBuffers actionBuffers)
-    controlSignal.x = vectorAction[0];
-    controlSignal.z = vectorAction[1];
+    controlSignal.x = actionBuffers.ContinuousActions[0];
+    controlSignal.z = actionBuffers.ContinuousActions[1];
    rBody.AddForce(controlSignal * forceMultiplier);

    // Rewards
 (which correspond to the keyboard arrow keys):

 ```csharp
-public override void Heuristic(float[] actionsOut)
+public override void Heuristic(in ActionBuffers actionsOut)
-    actionsOut[0] = Input.GetAxis("Horizontal");
-    actionsOut[1] = Input.GetAxis("Vertical");
+    var continuousActionsOut = actionsOut.ContinuousActions;
+    continuousActionsOut[0] = Input.GetAxis("Horizontal");
+    continuousActionsOut[1] = Input.GetAxis("Vertical");
 }
 ```

--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
 ## Actions

 An action is an instruction from the Policy that the agent carries out. The
-action is passed to the Agent as a parameter when the Academy invokes the
-agent's `OnActionReceived()` function. Actions for an agent can take one of two
-forms, either **Continuous** or **Discrete**.
-
-When you specify that the vector action space is **Continuous**, the action
-parameter passed to the Agent is an array of floating point numbers with length
-equal to the `Vector Action Space Size` property. When you specify a
-**Discrete** vector action space type, the action parameter is an array
-containing integers. Each integer is an index into a list or table of commands.
-In the **Discrete** vector action space type, the action parameter is an array
-of indices. The number of indices in the array is determined by the number of
-branches defined in the `Branches Size` property. Each branch corresponds to an
-action table, you can specify the size of each table by modifying the `Branches`
-property.
+action is passed to the Agent as the `ActionBuffers` parameter when the Academy invokes the
+agent's `OnActionReceived()` function. There are two types of actions supported:
+ **Continuous** and **Discrete**.

 Neither the Policy nor the training algorithm know anything about what the
 action values themselves mean. The training algorithm simply tries different

 ### Continuous Action Space

-When an Agent uses a Policy set to the **Continuous** vector action space, the
-action parameter passed to the Agent's `OnActionReceived()` function is an array
-with length equal to the `Vector Action Space Size` property value. The
+When an Agent's Policy has **Continuous** actions, the
+`ActionBuffers.ContinuousActions` passed to the Agent's `OnActionReceived()` function
+is an array with length equal to the `Vector Action Space Size` property value. The
 individual values in the array have whatever meanings that you ascribe to them.
 If you assign an element in the array as the speed of an Agent, for example, the
 training process learns to control the speed of the Agent through this
 These control values are applied as torques to the bodies making up the arm:

 ```csharp
-public override void OnActionReceived(float[] act)
-{
-    float torque_x = Mathf.Clamp(act[0], -1, 1) * 100f;
-    float torque_z = Mathf.Clamp(act[1], -1, 1) * 100f;
-    rbA.AddTorque(new Vector3(torque_x, 0f, torque_z));
+    public override void OnActionReceived(ActionBuffers actionBuffers)
+    {
+        var torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f) * 150f;
+        var torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[1], -1f, 1f) * 150f;
+        m_RbA.AddTorque(new Vector3(torqueX, 0f, torqueZ));
-    torque_x = Mathf.Clamp(act[2], -1, 1) * 100f;
-    torque_z = Mathf.Clamp(act[3], -1, 1) * 100f;
-    rbB.AddTorque(new Vector3(torque_x, 0f, torque_z));
-}
+        torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[2], -1f, 1f) * 150f;
+        torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[3], -1f, 1f) * 150f;
+        m_RbB.AddTorque(new Vector3(torqueX, 0f, torqueZ));
+    }
 ```

 By default the output from our provided PPO algorithm pre-clamps the values of

 ### Discrete Action Space

-When an Agent uses a **Discrete** vector action space, the action parameter
-passed to the Agent's `OnActionReceived()` function is an array containing
-indices. With the discrete vector action space, `Branches` is an array of
-integers, each value corresponds to the number of possibilities for each branch.
+When an Agent's Policy uses **discrete** actions, the
+`ActionBuffers.DiscreteActions` passed to the Agent's `OnActionReceived()` function
+is an array of integers. When defining the discrete vector action space, `Branches`
+is an array of integers, each value corresponds to the number of possibilities for each branch.

 For example, if we wanted an Agent that can move in a plane and jump, we could
 define two branches (one for motion and one for jumping) because we want our

 ```csharp
 // Get the action index for movement
-int movement = Mathf.FloorToInt(act[0]);
+int movement = actionBuffers.DiscreteActions[0];
-int jump = Mathf.FloorToInt(act[1]);
+int jump = actionBuffers.DiscreteActions[1];

 // Look up the index in the movement action list:
 if (movement == 1) { directionX = -1; }
        directionX * 40f, directionY * 300f, directionZ * 40f));
 ```

-Note that the above code example is a simplified extract from the AreaAgent
-class, which provides alternate implementations for both the discrete and the
-continuous action spaces.
-
 #### Masking Discrete Actions

 When using Discrete Actions, it is possible to specify that some actions are
 decide to perform the masked action. In order to mask an action, override the
-`Agent.CollectDiscreteActionMasks()` virtual method, and call
-`DiscreteActionMasker.SetMask()` in it:
+`Agent.WriteDiscreteActionMask()` virtual method, and call
+`WriteMask()` on the provided `IDiscreteActionMask`:
-public override void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker){
-    actionMasker.SetMask(branch, actionIndices)
+public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
+{
+    actionMasker.WriteMask(branch, actionIndices)
 }
 ```

  the action
 - `actionIndices` is a list of `int` corresponding to the indices of the actions
-  that the Agent cannot perform.
+  that the Agent **cannot** perform.

 For example, if you have an Agent with 2 branches and on the first branch
 (branch 0) there are 4 possible actions : _"do nothing"_, _"jump"_, _"shoot"_

 ```csharp
-SetMask(0, new int[2]{1,2})
+WriteMask(0, new int[2]{1,2})
- You can call `SetMask` multiple times if you want to put masks on multiple
+- You can call `WriteMask` multiple times if you want to put masks on multiple
  branches.
 - You cannot mask all the actions of a branch.
 - You cannot mask actions in continuous control.
- Actions can either use `Discrete` or `Continuous` spaces.
- When using `Discrete` it is possible to assign multiple action branches, and
-  to mask certain actions.
+- Agents can either use `Discrete` or `Continuous` actions.
+- Discrete actions can have multiple action branches, and it's possible to mask
+  certain actions so that they won't be taken.
- When using continuous control, action values should be clipped to an
+- Continuous action values should be clipped to an
  appropriate range. The provided PPO model automatically clips these values
  between -1 and 1, but third party training systems may not do so.

--- a/docs/Python-API.md
+++ b/docs/Python-API.md
  terminates the communication.
 - **Behavior Specs : `env.behavior_specs`** Returns a Mapping of
  `BehaviorName` to `BehaviorSpec` objects (read only).
-  A `BehaviorSpec` contains information such as the observation shapes, the
-  action type (multi-discrete or continuous) and the action shape. Note that
+  A `BehaviorSpec` contains the observation shapes and the
+  `ActionSpec` (which defines the action shape). Note that
  the `BehaviorSpec` for a specific group is fixed throughout the simulation.
  The number of entries in the Mapping can change over time in the simulation
  if new Agent behaviors are created in the simulation.
  number of agents is not guaranteed to remain constant during the simulation
  and it is not unusual to have either `DecisionSteps` or `TerminalSteps`
  contain no Agents at all.
- **Set Actions :`env.set_actions(behavior_name: str, action: np.array)`** Sets
-  the actions for a whole agent group. `action` is a 2D `np.array` of
-  `dtype=np.int32` in the discrete action case and `dtype=np.float32` in the
-  continuous action case. The first dimension of `action` is the number of
-  agents that requested a decision since the last call to `env.step()`. The
-  second dimension is the number of discrete actions in multi-discrete action
-  type and the number of actions in continuous action type.
+- **Set Actions :`env.set_actions(behavior_name: str, action: ActionTuple)`** Sets
+  the actions for a whole agent group. `action` is an `ActionTuple`, which
+  is made up of a 2D `np.array` of `dtype=np.int32` for discrete actions, and
+  `dtype=np.float32` for continuous actions. The first dimension of `np.array`
+  in the tuple is the number of agents that requested a decision since the
+  last call to `env.step()`. The second dimension is the number of discrete or
+  continuous actions for the corresponding array.
-  `env.set_action_for_agent(agent_group: str, agent_id: int, action: np.array)`**
+  `env.set_action_for_agent(agent_group: str, agent_id: int, action: ActionTuple)`**
-  identifier of the Agent. Action is a 1D array of type `dtype=np.int32` and
-  size equal to the number of discrete actions in multi-discrete action type and
-  of type `dtype=np.float32` and size equal to the number of actions in
-  continuous action type.
-
+  identifier of the Agent. `action` is an `ActionTuple` as described above.
 **Note:** If no action is provided for an agent group between two calls to
 `env.step()` then the default action will be all zeros (in either discrete or
 continuous action space)
 - `agent_id` is an int vector of length batch size containing unique identifier
  for the corresponding Agent. This is used to track Agents across simulation
  steps.
- `action_mask` is an optional list of two dimensional array of booleans. Only
-  available in multi-discrete action space type. Each array corresponds to an
+- `action_mask` is an optional list of two dimensional arrays of booleans which is only
+  available when using multi-discrete actions. Each array corresponds to an
  action branch. The first dimension of each array is the batch size and the
  second contains a mask for each action of the branch. If true, the action is
  not available for the agent during this simulation step.
 - `reward` is a float. Corresponds to the rewards collected by the agent since
  the last simulation step.
 - `agent_id` is an int and an unique identifier for the corresponding Agent.
- `action_mask` is an optional list of one dimensional array of booleans. Only
-  available in multi-discrete action space type. Each array corresponds to an
+- `action_mask` is an optional list of one dimensional arrays of booleans which is only
+  available when using multi-discrete actions. Each array corresponds to an
  action branch. Each array contains a mask for each action of the branch. If
  true, the action is not available for the agent during this simulation step.


 #### BehaviorSpec

-An Agent behavior can either have discrete or continuous actions. To check which
-type it is, use `spec.is_action_discrete()` or `spec.is_action_continuous()` to
-see which one it is. If discrete, the action tensors are expected to be
-`np.int32`. If continuous, the actions are expected to be `np.float32`.
-
 A `BehaviorSpec` has the following fields :

 - `observation_shapes` is a List of Tuples of int : Each Tuple corresponds to an
- `action_type` is the type of data of the action. it can be discrete or
-  continuous. If discrete, the action tensors are expected to be `np.int32`. If
-  continuous, the actions are expected to be `np.float32`.
- `action_size` is an `int` corresponding to the expected dimension of the
-  action array.
-  - In continuous action space it is the number of floats that constitute the
-    action.
-  - In discrete action space (same as multi-discrete) it corresponds to the
-    number of branches (the number of independent actions)
- `discrete_action_branches` is a Tuple of int only for discrete action space.
-  Each int corresponds to the number of different options for each branch of the
-  action. For example : In a game direction input (no movement, left, right) and
+- `action_spec` is an `ActionSpec` namedtuple that defines the number and types
+  of actions for the Agent.
+
+An `ActionSpec` has the following fields and properties:
+- `continuous_size` is the number of floats that constitute the continuous actions.
+- `discrete_size` is the number of branches (the number of independent actions) that
+  constitute the multi-discrete actions.
+- `discrete_branches` is a Tuple of ints. Each int corresponds to the number of
+  different options for each branch of the action. For example:
+  In a game direction input (no movement, left, right) and
-  the first one with 3 options and the second with 2 options. (`action_size = 2`
+  the first one with 3 options and the second with 2 options. (`discrete_size = 2`
+

 ### Communicating additional information with the Environment

--- a/docs/Training-Configuration-File.md
+++ b/docs/Training-Configuration-File.md

 A few considerations when deciding to use memory:

- LSTM does not work well with continuous vector action space. Please use
-  discrete vector action space for better results.
+- LSTM does not work well with continuous vector actions. Please use
+  discrete actions for better results.
 - Since the memories must be sent back and forth between Python and Unity, using
  too large `memory_size` will slow down training.
 - Adding a recurrent layer increases the complexity of the neural network, it is
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py
 import gym
 from gym import error, spaces

-from mlagents_envs.base_env import BaseEnv
+from mlagents_envs.base_env import ActionTuple, BaseEnv
 from mlagents_envs.base_env import DecisionSteps, TerminalSteps
 from mlagents_envs import logging_util

            action = self._flattener.lookup_action(action)

        action = np.array(action).reshape((1, self.action_size))
-        self._env.set_actions(self.name, action)
+
+        action_tuple = ActionTuple()
+        if self.group_spec.action_spec.is_continuous():
+            action_tuple.add_continuous(action)
+        else:
+            action_tuple.add_discrete(action)
+        self._env.set_actions(self.name, action_tuple)

        self._env.step()
        decision_step, terminal_step = self._env.get_steps(self.name)
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
        )


+class _ActionTupleBase(ABC):
+    """
+    An object whose fields correspond to action data of continuous and discrete
+    spaces. Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size),
+    respectively. Note, this also holds when continuous or discrete size is
+    zero.
+    """
+
+    def __init__(
+        self,
+        continuous: Optional[np.ndarray] = None,
+        discrete: Optional[np.ndarray] = None,
+    ):
+        self._continuous: Optional[np.ndarray] = None
+        self._discrete: Optional[np.ndarray] = None
+        if continuous is not None:
+            self.add_continuous(continuous)
+        if discrete is not None:
+            self.add_discrete(discrete)
+
+    @property
+    def continuous(self) -> np.ndarray:
+        return self._continuous
+
+    @property
+    def discrete(self) -> np.ndarray:
+        return self._discrete
+
+    def add_continuous(self, continuous: np.ndarray) -> None:
+        if continuous.dtype != np.float32:
+            continuous = continuous.astype(np.float32, copy=False)
+        if self._discrete is None:
+            self._discrete = np.zeros(
+                (continuous.shape[0], 0), dtype=self.discrete_dtype
+            )
+        self._continuous = continuous
+
+    def add_discrete(self, discrete: np.ndarray) -> None:
+        if discrete.dtype != self.discrete_dtype:
+            discrete = discrete.astype(self.discrete_dtype, copy=False)
+        if self._continuous is None:
+            self._continuous = np.zeros((discrete.shape[0], 0), dtype=np.float32)
+        self._discrete = discrete
+
+    @property
+    @abstractmethod
+    def discrete_dtype(self) -> np.dtype:
+        pass
+
+
+class ActionTuple(_ActionTupleBase):
+    """
+    An object whose fields correspond to actions of different types.
+    Continuous and discrete actions are numpy arrays of type float32 and
+    int32, respectively and are type checked on construction.
+    Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size),
+    respectively. Note, this also holds when continuous or discrete size is
+    zero.
+    """
+
+    @property
+    def discrete_dtype(self) -> np.dtype:
+        """
+        The dtype of a discrete action.
+        """
+        return np.int32
+
+
 class ActionSpec(NamedTuple):
    """
    A NamedTuple containing utility functions and information about the action spaces
        """
        return len(self.discrete_branches)

-    def empty_action(self, n_agents: int) -> np.ndarray:
+    def empty_action(self, n_agents: int) -> ActionTuple:
-        Generates a numpy array corresponding to an empty action (all zeros)
+        Generates ActionTuple corresponding to an empty action (all zeros)
-        if self.is_continuous():
-            return np.zeros((n_agents, self.continuous_size), dtype=np.float32)
-        return np.zeros((n_agents, self.discrete_size), dtype=np.int32)
+        _continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
+        _discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
+        return ActionTuple(continuous=_continuous, discrete=_discrete)
-    def random_action(self, n_agents: int) -> np.ndarray:
+    def random_action(self, n_agents: int) -> ActionTuple:
-        Generates a numpy array corresponding to a random action (either discrete
+        Generates ActionTuple corresponding to a random action (either discrete
-        if self.is_continuous():
-            action = np.random.uniform(
-                low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
-            ).astype(np.float32)
-        else:
-            branch_size = self.discrete_branches
-            action = np.column_stack(
+        _continuous = np.random.uniform(
+            low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
+        )
+        _discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
+        if self.discrete_size > 0:
+            _discrete = np.column_stack(
-                        branch_size[i],  # type: ignore
+                        self.discrete_branches[i],  # type: ignore
                        size=(n_agents),
                        dtype=np.int32,
                    )
-        return action
+        return ActionTuple(continuous=_continuous, discrete=_discrete)
-        self, actions: np.ndarray, n_agents: Optional[int], name: str
-    ) -> np.ndarray:
+        self, actions: ActionTuple, n_agents: Optional[int], name: str
+    ) -> ActionTuple:
-        if self.continuous_size > 0:
-            _size = self.continuous_size
-        else:
-            _size = self.discrete_size
-        _expected_shape = (n_agents, _size) if n_agents is not None else (_size,)
-        if actions.shape != _expected_shape:
+        _expected_shape = (
+            (n_agents, self.continuous_size)
+            if n_agents is not None
+            else (self.continuous_size,)
+        )
+        if actions.continuous.shape != _expected_shape:
-                f"The behavior {name} needs an input of dimension "
+                f"The behavior {name} needs a continuous input of dimension "
-                f"received input of dimension {actions.shape}"
+                f"received input of dimension {actions.continuous.shape}"
-        _expected_type = np.float32 if self.is_continuous() else np.int32
-        if actions.dtype != _expected_type:
-            actions = actions.astype(_expected_type)
+        _expected_shape = (
+            (n_agents, self.discrete_size)
+            if n_agents is not None
+            else (self.discrete_size,)
+        )
+        if actions.discrete.shape != _expected_shape:
+            raise UnityActionException(
+                f"The behavior {name} needs a discrete input of dimension "
+                f"{_expected_shape} for (<number of agents>, <action size>) but "
+                f"received input of dimension {actions.discrete.shape}"
+            )
        return actions

    @staticmethod
        """

    @abstractmethod
-    def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
+    def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
-        :param action: A two dimensional np.ndarray corresponding to the action
-        (either int or float)
+        :param action: ActionTuple tuple of continuous and/or discrete action.
+        Actions are np.arrays with dimensions  (n_agents, continuous_size) and
+        (n_agents, discrete_size), respectively.
-        self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
+        self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
    ) -> None:
        """
        Sets the action for one of the agents in the simulation for the next
-        :param action: A one dimensional np.ndarray corresponding to the action
-        (either int or float)
+        :param action: ActionTuple tuple of continuous and/or discrete action
+        Actions are np.arrays with dimensions  (1, continuous_size) and
+        (1, discrete_size), respectively. Note, this initial dimensions of 1 is because
+        this action is meant for a single agent.
        """

    @abstractmethod
--- a/ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.py
  name='mlagents_envs/communicator_objects/agent_action.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n5mlagents_envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\"K\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02J\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04J\x04\x08\x05\x10\x06\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n5mlagents_envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\"\x8c\x01\n\x10\x41gentActionProto\x12!\n\x19vector_actions_deprecated\x18\x01 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02\x12\x1a\n\x12\x63ontinuous_actions\x18\x06 \x03(\x02\x12\x18\n\x10\x64iscrete_actions\x18\x07 \x03(\x05J\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04J\x04\x08\x05\x10\x06\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )


  containing_type=None,
  fields=[
    _descriptor.FieldDescriptor(
-      name='vector_actions', full_name='communicator_objects.AgentActionProto.vector_actions', index=0,
+      name='vector_actions_deprecated', full_name='communicator_objects.AgentActionProto.vector_actions_deprecated', index=0,
      number=1, type=2, cpp_type=6, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='continuous_actions', full_name='communicator_objects.AgentActionProto.continuous_actions', index=2,
+      number=6, type=2, cpp_type=6, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='discrete_actions', full_name='communicator_objects.AgentActionProto.discrete_actions', index=3,
+      number=7, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=79,
-  serialized_end=154,
+  serialized_start=80,
+  serialized_end=220,
 )

 DESCRIPTOR.message_types_by_name['AgentActionProto'] = _AGENTACTIONPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.pyi

 class AgentActionProto(google___protobuf___message___Message):
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
-    vector_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
+    vector_actions_deprecated = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
+    continuous_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
+    discrete_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
-        vector_actions : typing___Optional[typing___Iterable[builtin___float]] = None,
+        vector_actions_deprecated : typing___Optional[typing___Iterable[builtin___float]] = None,
+        continuous_actions : typing___Optional[typing___Iterable[builtin___float]] = None,
+        discrete_actions : typing___Optional[typing___Iterable[builtin___int]] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> AgentActionProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"value",u"vector_actions"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"continuous_actions",u"discrete_actions",u"value",u"vector_actions_deprecated"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"value",b"value",u"vector_actions",b"vector_actions"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"continuous_actions",b"continuous_actions",u"discrete_actions",b"discrete_actions",u"value",b"value",u"vector_actions_deprecated",b"vector_actions_deprecated"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.py
  name='mlagents_envs/communicator_objects/brain_parameters.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n9mlagents_envs/communicator_objects/brain_parameters.proto\x12\x14\x63ommunicator_objects\x1a\x33mlagents_envs/communicator_objects/space_type.proto\"\xd9\x01\n\x14\x42rainParametersProto\x12\x1a\n\x12vector_action_size\x18\x03 \x03(\x05\x12\"\n\x1avector_action_descriptions\x18\x05 \x03(\t\x12\x46\n\x18vector_action_space_type\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03J\x04\x08\x04\x10\x05\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n9mlagents_envs/communicator_objects/brain_parameters.proto\x12\x14\x63ommunicator_objects\x1a\x33mlagents_envs/communicator_objects/space_type.proto\"\x8b\x01\n\x0f\x41\x63tionSpecProto\x12\x1e\n\x16num_continuous_actions\x18\x01 \x01(\x05\x12\x1c\n\x14num_discrete_actions\x18\x02 \x01(\x05\x12\x1d\n\x15\x64iscrete_branch_sizes\x18\x03 \x03(\x05\x12\x1b\n\x13\x61\x63tion_descriptions\x18\x04 \x03(\t\"\xb6\x02\n\x14\x42rainParametersProto\x12%\n\x1dvector_action_size_deprecated\x18\x03 \x03(\x05\x12-\n%vector_action_descriptions_deprecated\x18\x05 \x03(\t\x12Q\n#vector_action_space_type_deprecated\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08\x12:\n\x0b\x61\x63tion_spec\x18\t \x01(\x0b\x32%.communicator_objects.ActionSpecProtoJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03J\x04\x08\x04\x10\x05\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
  ,
  dependencies=[mlagents__envs_dot_communicator__objects_dot_space__type__pb2.DESCRIPTOR,])

+_ACTIONSPECPROTO = _descriptor.Descriptor(
+  name='ActionSpecProto',
+  full_name='communicator_objects.ActionSpecProto',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='num_continuous_actions', full_name='communicator_objects.ActionSpecProto.num_continuous_actions', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='num_discrete_actions', full_name='communicator_objects.ActionSpecProto.num_discrete_actions', index=1,
+      number=2, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='discrete_branch_sizes', full_name='communicator_objects.ActionSpecProto.discrete_branch_sizes', index=2,
+      number=3, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='action_descriptions', full_name='communicator_objects.ActionSpecProto.action_descriptions', index=3,
+      number=4, type=9, cpp_type=9, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=137,
+  serialized_end=276,
+)
+
+
 _BRAINPARAMETERSPROTO = _descriptor.Descriptor(
  name='BrainParametersProto',
  full_name='communicator_objects.BrainParametersProto',
  fields=[
    _descriptor.FieldDescriptor(
-      name='vector_action_size', full_name='communicator_objects.BrainParametersProto.vector_action_size', index=0,
+      name='vector_action_size_deprecated', full_name='communicator_objects.BrainParametersProto.vector_action_size_deprecated', index=0,
      number=3, type=5, cpp_type=1, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
-      name='vector_action_descriptions', full_name='communicator_objects.BrainParametersProto.vector_action_descriptions', index=1,
+      name='vector_action_descriptions_deprecated', full_name='communicator_objects.BrainParametersProto.vector_action_descriptions_deprecated', index=1,
      number=5, type=9, cpp_type=9, label=3,
      has_default_value=False, default_value=[],
      message_type=None, enum_type=None, containing_type=None,
-      name='vector_action_space_type', full_name='communicator_objects.BrainParametersProto.vector_action_space_type', index=2,
+      name='vector_action_space_type_deprecated', full_name='communicator_objects.BrainParametersProto.vector_action_space_type_deprecated', index=2,
      number=6, type=14, cpp_type=8, label=1,
      has_default_value=False, default_value=0,
      message_type=None, enum_type=None, containing_type=None,
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='action_spec', full_name='communicator_objects.BrainParametersProto.action_spec', index=5,
+      number=9, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=137,
-  serialized_end=354,
+  serialized_start=279,
+  serialized_end=589,
-_BRAINPARAMETERSPROTO.fields_by_name['vector_action_space_type'].enum_type = mlagents__envs_dot_communicator__objects_dot_space__type__pb2._SPACETYPEPROTO
+_BRAINPARAMETERSPROTO.fields_by_name['vector_action_space_type_deprecated'].enum_type = mlagents__envs_dot_communicator__objects_dot_space__type__pb2._SPACETYPEPROTO
+_BRAINPARAMETERSPROTO.fields_by_name['action_spec'].message_type = _ACTIONSPECPROTO
+DESCRIPTOR.message_types_by_name['ActionSpecProto'] = _ACTIONSPECPROTO
+
+ActionSpecProto = _reflection.GeneratedProtocolMessageType('ActionSpecProto', (_message.Message,), dict(
+  DESCRIPTOR = _ACTIONSPECPROTO,
+  __module__ = 'mlagents_envs.communicator_objects.brain_parameters_pb2'
+  # @@protoc_insertion_point(class_scope:communicator_objects.ActionSpecProto)
+  ))
+_sym_db.RegisterMessage(ActionSpecProto)

 BrainParametersProto = _reflection.GeneratedProtocolMessageType('BrainParametersProto', (_message.Message,), dict(
  DESCRIPTOR = _BRAINPARAMETERSPROTO,
--- a/ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.pyi
 builtin___int = int


+class ActionSpecProto(google___protobuf___message___Message):
+    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
+    num_continuous_actions = ... # type: builtin___int
+    num_discrete_actions = ... # type: builtin___int
+    discrete_branch_sizes = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
+    action_descriptions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text]
+
+    def __init__(self,
+        *,
+        num_continuous_actions : typing___Optional[builtin___int] = None,
+        num_discrete_actions : typing___Optional[builtin___int] = None,
+        discrete_branch_sizes : typing___Optional[typing___Iterable[builtin___int]] = None,
+        action_descriptions : typing___Optional[typing___Iterable[typing___Text]] = None,
+        ) -> None: ...
+    @classmethod
+    def FromString(cls, s: builtin___bytes) -> ActionSpecProto: ...
+    def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
+    def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
+    if sys.version_info >= (3,):
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_descriptions",u"discrete_branch_sizes",u"num_continuous_actions",u"num_discrete_actions"]) -> None: ...
+    else:
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_descriptions",b"action_descriptions",u"discrete_branch_sizes",b"discrete_branch_sizes",u"num_continuous_actions",b"num_continuous_actions",u"num_discrete_actions",b"num_discrete_actions"]) -> None: ...
+
-    vector_action_size = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
-    vector_action_descriptions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text]
-    vector_action_space_type = ... # type: mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto
+    vector_action_size_deprecated = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
+    vector_action_descriptions_deprecated = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text]
+    vector_action_space_type_deprecated = ... # type: mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto
+    @property
+    def action_spec(self) -> ActionSpecProto: ...
+
-        vector_action_size : typing___Optional[typing___Iterable[builtin___int]] = None,
-        vector_action_descriptions : typing___Optional[typing___Iterable[typing___Text]] = None,
-        vector_action_space_type : typing___Optional[mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto] = None,
+        vector_action_size_deprecated : typing___Optional[typing___Iterable[builtin___int]] = None,
+        vector_action_descriptions_deprecated : typing___Optional[typing___Iterable[typing___Text]] = None,
+        vector_action_space_type_deprecated : typing___Optional[mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto] = None,
+        action_spec : typing___Optional[ActionSpecProto] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> BrainParametersProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"brain_name",u"is_training",u"vector_action_descriptions",u"vector_action_size",u"vector_action_space_type"]) -> None: ...
+        def HasField(self, field_name: typing_extensions___Literal[u"action_spec"]) -> builtin___bool: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_spec",u"brain_name",u"is_training",u"vector_action_descriptions_deprecated",u"vector_action_size_deprecated",u"vector_action_space_type_deprecated"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"brain_name",b"brain_name",u"is_training",b"is_training",u"vector_action_descriptions",b"vector_action_descriptions",u"vector_action_size",b"vector_action_size",u"vector_action_space_type",b"vector_action_space_type"]) -> None: ...
+        def HasField(self, field_name: typing_extensions___Literal[u"action_spec",b"action_spec"]) -> builtin___bool: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_spec",b"action_spec",u"brain_name",b"brain_name",u"is_training",b"is_training",u"vector_action_descriptions_deprecated",b"vector_action_descriptions_deprecated",u"vector_action_size_deprecated",b"vector_action_size_deprecated",u"vector_action_space_type_deprecated",b"vector_action_space_type_deprecated"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  name='mlagents_envs/communicator_objects/capabilities.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"}\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\x94\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )


      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='hybridActions', full_name='communicator_objects.UnityRLCapabilitiesProto.hybridActions', index=3,
+      number=4, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=79,
-  serialized_end=204,
+  serialized_start=80,
+  serialized_end=228,
 )

 DESCRIPTOR.message_types_by_name['UnityRLCapabilitiesProto'] = _UNITYRLCAPABILITIESPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
    baseRLCapabilities = ... # type: builtin___bool
    concatenatedPngObservations = ... # type: builtin___bool
    compressedChannelMapping = ... # type: builtin___bool
+    hybridActions = ... # type: builtin___bool

    def __init__(self,
        *,
+        hybridActions : typing___Optional[builtin___bool] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> UnityRLCapabilitiesProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
    DecisionSteps,
    TerminalSteps,
    BehaviorSpec,
+    ActionTuple,
    BehaviorName,
    AgentId,
    BehaviorMapping,
    #  * 1.0.0 - initial version
    #  * 1.1.0 - support concatenated PNGs for compressed observations.
    #  * 1.2.0 - support compression mapping for stacked compressed observations.
-    API_VERSION = "1.2.0"
+    #  * 1.3.0 - support action spaces with both continuous and discrete actions.
+    API_VERSION = "1.3.0"

    # Default port that the editor listens on. If an environment executable
    # isn't specified, this port will be used.
        capabilities.baseRLCapabilities = True
        capabilities.concatenatedPngObservations = True
        capabilities.compressedChannelMapping = True
+        capabilities.hybridActions = True
        return capabilities

    @staticmethod

        self._env_state: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
        self._env_specs: Dict[str, BehaviorSpec] = {}
-        self._env_actions: Dict[str, np.ndarray] = {}
+        self._env_actions: Dict[str, ActionTuple] = {}
        self._is_first_message = True
        self._update_behavior_specs(aca_output)

                f"agent group in the environment"
            )

-    def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
+    def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
        self._assert_behavior_exists(behavior_name)
        if behavior_name not in self._env_state:
            return
        self._env_actions[behavior_name] = action

    def set_action_for_agent(
-        self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
+        self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
    ) -> None:
        self._assert_behavior_exists(behavior_name)
        if behavior_name not in self._env_state:
                    agent_id
                )
            ) from ie
-        self._env_actions[behavior_name][index] = action
+        if action_spec.continuous_size > 0:
+            self._env_actions[behavior_name].continuous[index] = action.continuous[0, :]
+        if action_spec.discrete_size > 0:
+            self._env_actions[behavior_name].discrete[index] = action.discrete[0, :]

    def get_steps(
        self, behavior_name: BehaviorName

    @timed
    def _generate_step_input(
-        self, vector_action: Dict[str, np.ndarray]
+        self, vector_action: Dict[str, ActionTuple]
    ) -> UnityInputProto:
        rl_in = UnityRLInputProto()
        for b in vector_action:
            for i in range(n_agents):
-                action = AgentActionProto(vector_actions=vector_action[b][i])
+                action = AgentActionProto()
+                if vector_action[b].continuous is not None:
+                    action.vector_actions_deprecated.extend(
+                        vector_action[b].continuous[i]
+                    )
+                    action.continuous_actions.extend(vector_action[b].continuous[i])
+                if vector_action[b].discrete is not None:
+                    action.vector_actions_deprecated.extend(
+                        vector_action[b].discrete[i]
+                    )
+                    action.discrete_actions.extend(vector_action[b].discrete[i])
                rl_in.agent_actions[b].value.extend([action])
                rl_in.command = STEP
        rl_in.side_channel = bytes(
--- a/ml-agents-envs/mlagents_envs/mock_communicator.py
+++ b/ml-agents-envs/mlagents_envs/mock_communicator.py
 from .communicator import Communicator
 from .environment import UnityEnvironment
 from mlagents_envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
-from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
+from mlagents_envs.communicator_objects.brain_parameters_pb2 import (
+    BrainParametersProto,
+    ActionSpecProto,
+)
 from mlagents_envs.communicator_objects.unity_rl_initialization_output_pb2 import (
    UnityRLInitializationOutputProto,
 )
    NONE as COMPRESSION_TYPE_NONE,
    PNG as COMPRESSION_TYPE_PNG,
 )
-from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous


 class MockCommunicator(Communicator):
        self.vec_obs_size = vec_obs_size

    def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
+        if self.is_discrete:
+            action_spec = ActionSpecProto(
+                num_discrete_actions=2, discrete_branch_sizes=[3, 2]
+            )
+        else:
+            action_spec = ActionSpecProto(num_continuous_actions=2)
-            vector_action_size=[2],
-            vector_action_descriptions=["", ""],
-            vector_action_space_type=discrete if self.is_discrete else continuous,
-            brain_name=self.brain_name,
-            is_training=True,
+            brain_name=self.brain_name, is_training=True, action_spec=action_spec
        )
        rl_init = UnityRLInitializationOutputProto(
            name="RealFakeAcademy",
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
 from mlagents_envs.base_env import (
-    BehaviorSpec,
+    BehaviorSpec,
    DecisionSteps,
    TerminalSteps,
 )
    :return: BehaviorSpec object.
    """
    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
-    if brain_param_proto.vector_action_space_type == 1:
-        action_spec = ActionSpec(brain_param_proto.vector_action_size[0], ())
+    # proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
+    if (
+        brain_param_proto.action_spec.num_continuous_actions == 0
+        and brain_param_proto.action_spec.num_discrete_actions == 0
+    ):
+        if brain_param_proto.vector_action_space_type_deprecated == 1:
+            action_spec = ActionSpec(
+                brain_param_proto.vector_action_size_deprecated[0], ()
+            )
+        else:
+            action_spec = ActionSpec(
+                0, tuple(brain_param_proto.vector_action_size_deprecated)
+            )
-        action_spec = ActionSpec(0, tuple(brain_param_proto.vector_action_size))
+        action_spec_proto = brain_param_proto.action_spec
+        action_spec = ActionSpec(
+            action_spec_proto.num_continuous_actions,
+            tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
+        )
    return BehaviorSpec(observation_shape, action_spec)


--- a/ml-agents-envs/mlagents_envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_envs.py
 import pytest

 from mlagents_envs.environment import UnityEnvironment
-from mlagents_envs.base_env import DecisionSteps, TerminalSteps
+from mlagents_envs.base_env import DecisionSteps, TerminalSteps, ActionTuple
 from mlagents_envs.exception import UnityEnvironmentException, UnityActionException
 from mlagents_envs.mock_communicator import MockCommunicator

        env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents - 1))
    decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
    n_agents = len(decision_steps)
-    env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents) - 1)
+    _empty_act = spec.action_spec.empty_action(n_agents)
+    next_action = ActionTuple(_empty_act.continuous - 1, _empty_act.discrete - 1)
+    env.set_actions("RealFakeBrain", next_action)
    env.step()

    env.close()
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
    return agent_info_protos


-# The arguments here are the DecisionSteps, TerminalSteps and actions for a single agent name
+# The arguments here are the DecisionSteps, TerminalSteps and continuous/discrete actions for a single agent name
-    decision_steps: DecisionSteps, terminal_steps: TerminalSteps, actions: np.ndarray
+    decision_steps: DecisionSteps,
+    terminal_steps: TerminalSteps,
+    continuous_actions: np.ndarray,
+    discrete_actions: np.ndarray,
-    agent_action_protos = [
-        AgentActionProto(vector_actions=action) for action in actions
-    ]
+    agent_action_protos = []
+    num_agents = (
+        len(continuous_actions)
+        if continuous_actions is not None
+        else len(discrete_actions)
+    )
+    for i in range(num_agents):
+        proto = AgentActionProto()
+        if continuous_actions is not None:
+            proto.continuous_actions.extend(continuous_actions[i])
+            proto.vector_actions_deprecated.extend(continuous_actions[i])
+        if discrete_actions is not None:
+            proto.discrete_actions.extend(discrete_actions[i])
+            proto.vector_actions_deprecated.extend(discrete_actions[i])
+        agent_action_protos.append(proto)
    agent_info_action_pair_protos = [
        AgentInfoActionPairProto(agent_info=agent_info_proto, action_info=action_proto)
        for agent_info_proto, action_proto in zip(
 def test_agent_behavior_spec_from_proto():
    agent_proto = generate_list_agent_proto(1, [(3,), (4,)])[0]
    bp = BrainParametersProto()
-    bp.vector_action_size.extend([5, 4])
-    bp.vector_action_space_type = 0
+    bp.vector_action_size_deprecated.extend([5, 4])
+    bp.vector_action_space_type_deprecated = 0
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert behavior_spec.action_spec.is_discrete()
    assert not behavior_spec.action_spec.is_continuous()
    bp = BrainParametersProto()
-    bp.vector_action_size.extend([6])
-    bp.vector_action_space_type = 1
+    bp.vector_action_size_deprecated.extend([6])
+    bp.vector_action_space_type_deprecated = 1
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert not behavior_spec.action_spec.is_discrete()
    assert behavior_spec.action_spec.is_continuous()
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py
    assert specs.discrete_branches == ()
    assert specs.discrete_size == 0
    assert specs.continuous_size == 3
-    assert specs.empty_action(5).shape == (5, 3)
-    assert specs.empty_action(5).dtype == np.float32
+    assert specs.empty_action(5).continuous.shape == (5, 3)
+    assert specs.empty_action(5).continuous.dtype == np.float32
-    assert specs.empty_action(5).shape == (5, 1)
-    assert specs.empty_action(5).dtype == np.int32
+    assert specs.empty_action(5).discrete.shape == (5, 1)
+    assert specs.empty_action(5).discrete.dtype == np.int32
+
+    specs = ActionSpec(3, (3,))
+    assert specs.continuous_size == 3
+    assert specs.discrete_branches == (3,)
+    assert specs.discrete_size == 1
+    assert specs.empty_action(5).continuous.shape == (5, 3)
+    assert specs.empty_action(5).continuous.dtype == np.float32
+    assert specs.empty_action(5).discrete.shape == (5, 1)
+    assert specs.empty_action(5).discrete.dtype == np.int32


 def test_action_generator():
-    zero_action = specs.empty_action(4)
+    zero_action = specs.empty_action(4).continuous
-    random_action = specs.random_action(4)
+    print(specs.random_action(4))
+    random_action = specs.random_action(4).continuous
+    print(random_action)
    assert random_action.dtype == np.float32
    assert random_action.shape == (4, action_len)
    assert np.min(random_action) >= -1
    action_shape = (10, 20, 30)
    specs = ActionSpec.create_discrete(action_shape)
-    zero_action = specs.empty_action(4)
+    zero_action = specs.empty_action(4).discrete
-    random_action = specs.random_action(4)
+    random_action = specs.random_action(4).discrete
    assert random_action.dtype == np.int32
    assert random_action.shape == (4, len(action_shape))
    assert np.min(random_action) >= 0
--- a/ml-agents/mlagents/trainers/action_info.py
+++ b/ml-agents/mlagents/trainers/action_info.py

 class ActionInfo(NamedTuple):
    action: Any
+    env_action: Any
    value: Any
    outputs: ActionInfoOutputs
    agent_ids: List[AgentId]
-        return ActionInfo([], [], {}, [])
+        return ActionInfo([], [], [], {}, [])
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
 import queue

 from mlagents_envs.base_env import (
+    ActionTuple,
    DecisionSteps,
    DecisionStep,
    TerminalSteps,
 from mlagents.trainers.trajectory import Trajectory, AgentExperience
 from mlagents.trainers.policy import Policy
 from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
+from mlagents.trainers.torch.action_log_probs import LogProbsTuple
 from mlagents.trainers.stats import StatsReporter
 from mlagents.trainers.behavior_id_utils import get_global_agent_id

            done = terminated  # Since this is an ongoing step
            interrupted = step.interrupted if terminated else False
            # Add the outputs of the last eval
-            action = stored_take_action_outputs["action"][idx]
-            if self.policy.use_continuous_act:
-                action_pre = stored_take_action_outputs["pre_action"][idx]
-            else:
-                action_pre = None
-            action_probs = stored_take_action_outputs["log_probs"][idx]
+            stored_actions = stored_take_action_outputs["action"]
+            action_tuple = ActionTuple(
+                continuous=stored_actions.continuous[idx],
+                discrete=stored_actions.discrete[idx],
+            )
+            stored_action_probs = stored_take_action_outputs["log_probs"]
+            log_probs_tuple = LogProbsTuple(
+                continuous=stored_action_probs.continuous[idx],
+                discrete=stored_action_probs.discrete[idx],
+            )
            action_mask = stored_decision_step.action_mask
            prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
            experience = AgentExperience(
-                action=action,
-                action_probs=action_probs,
-                action_pre=action_pre,
+                action=action_tuple,
+                action_probs=log_probs_tuple,
                action_mask=action_mask,
                prev_action=prev_action,
                interrupted=interrupted,
--- a/ml-agents/mlagents/trainers/buffer.py
+++ b/ml-agents/mlagents/trainers/buffer.py

    class AgentBufferField(list):
        """
-        AgentBufferField is a list of numpy arrays. When an agent collects a field, you can add it to his
+        AgentBufferField is a list of numpy arrays. When an agent collects a field, you can add it to its
        AgentBufferField with the append method.
        """

--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
            [next_pair_info.agent_info], behavior_spec
        )
        previous_action = (
-            np.array(pair_infos[idx].action_info.vector_actions, dtype=np.float32) * 0
+            np.array(
+                pair_infos[idx].action_info.vector_actions_deprecated, dtype=np.float32
+            )
+            * 0
-                pair_infos[idx - 1].action_info.vector_actions, dtype=np.float32
+                pair_infos[idx - 1].action_info.vector_actions_deprecated,
+                dtype=np.float32,
            )

        next_done = len(next_terminal_step) == 1
        for i, obs in enumerate(split_obs.visual_observations):
            demo_raw_buffer["visual_obs%d" % i].append(obs)
        demo_raw_buffer["vector_obs"].append(split_obs.vector_observations)
-        demo_raw_buffer["actions"].append(current_pair_info.action_info.vector_actions)
+        # TODO: update the demonstraction files and read from the new proto format
+        if behavior_spec.action_spec.continuous_size > 0:
+            demo_raw_buffer["continuous_action"].append(
+                current_pair_info.action_info.vector_actions_deprecated
+            )
+        if behavior_spec.action_spec.discrete_size > 0:
+            demo_raw_buffer["discrete_action"].append(
+                current_pair_info.action_info.vector_actions_deprecated
+            )
        demo_raw_buffer["prev_action"].append(previous_action)
        if next_done:
            demo_raw_buffer.resequence_and_append(
--- a/ml-agents/mlagents/trainers/env_manager.py
+++ b/ml-agents/mlagents/trainers/env_manager.py
 from abc import ABC, abstractmethod
+
 from typing import List, Dict, NamedTuple, Iterable, Tuple
 from mlagents_envs.base_env import (
    DecisionSteps,
--- a/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
                [self.value_heads, self.policy.memory_out, self.memory_out], feed_dict
            )
            prev_action = (
-                batch["actions"][-1] if not self.policy.use_continuous_act else None
+                batch["discrete_action"][-1]
+                if not self.policy.use_continuous_act
+                else None
            )
        else:
            value_estimates = self.sess.run(self.value_heads, feed_dict)
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
 from typing import Dict, List, Optional
 import numpy as np

-from mlagents_envs.base_env import DecisionSteps
+from mlagents_envs.base_env import ActionTuple, BehaviorSpec, DecisionSteps
-from mlagents_envs.base_env import BehaviorSpec
 from mlagents.trainers.settings import TrainerSettings, NetworkSettings


        self.trainer_settings = trainer_settings
        self.network_settings: NetworkSettings = trainer_settings.network_settings
        self.seed = seed
-        if (
-            self.behavior_spec.action_spec.continuous_size > 0
-            and self.behavior_spec.action_spec.discrete_size > 0
-        ):
-            raise UnityPolicyException("Trainers do not support mixed action spaces.")
        self.act_size = (
            list(self.behavior_spec.action_spec.discrete_branches)
            if self.behavior_spec.action_spec.is_discrete()
            1 for shape in behavior_spec.observation_shapes if len(shape) == 3
        )
        self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
-        # This line will be removed in the ActionBuffer change
-        self.num_branches = (
-            self.behavior_spec.action_spec.continuous_size
-            + self.behavior_spec.action_spec.discrete_size
-        )
-        self.previous_action_dict: Dict[str, np.array] = {}
+        self.previous_action_dict: Dict[str, np.ndarray] = {}
        self.memory_dict: Dict[str, np.ndarray] = {}
        self.normalize = trainer_settings.network_settings.normalize
        self.use_recurrent = self.network_settings.memory is not None
    ) -> None:
        if memory_matrix is None:
            return
+
        for index, agent_id in enumerate(agent_ids):
            self.memory_dict[agent_id] = memory_matrix[index, :]

            if agent_id in self.memory_dict:
                self.memory_dict.pop(agent_id)

-    def make_empty_previous_action(self, num_agents):
+    def make_empty_previous_action(self, num_agents: int) -> np.ndarray:
-        return np.zeros((num_agents, self.num_branches), dtype=np.int)
+        return np.zeros(
+            (num_agents, self.behavior_spec.action_spec.discrete_size), dtype=np.int32
+        )
-        self, agent_ids: List[str], action_matrix: Optional[np.ndarray]
+        self, agent_ids: List[str], action_tuple: ActionTuple
-        if action_matrix is None:
-            return
-            self.previous_action_dict[agent_id] = action_matrix[index, :]
+            self.previous_action_dict[agent_id] = action_tuple.discrete[index, :]
-        action_matrix = np.zeros((len(agent_ids), self.num_branches), dtype=np.int)
+        action_matrix = self.make_empty_previous_action(len(agent_ids))
        for index, agent_id in enumerate(agent_ids):
            if agent_id in self.previous_action_dict:
                action_matrix[index, :] = self.previous_action_dict[agent_id]
        raise NotImplementedError

    @staticmethod
-    def check_nan_action(action: Optional[np.ndarray]) -> None:
+    def check_nan_action(action: Optional[ActionTuple]) -> None:
-            d = np.sum(action)
+            d = np.sum(action.continuous)
-                raise RuntimeError("NaN action detected.")
+                raise RuntimeError("Continuous NaN action detected.")
+            d = np.sum(action.discrete)
+            has_nan = np.isnan(d)
+            if has_nan:
+                raise RuntimeError("Discrete NaN action detected.")

    @abstractmethod
    def update_normalization(self, vector_obs: np.ndarray) -> None:
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
 from mlagents.tf_utils import tf
 from mlagents import tf_utils
 from mlagents_envs.exception import UnityException
-from mlagents_envs.base_env import BehaviorSpec
+from mlagents.trainers.torch.action_log_probs import LogProbsTuple
-from mlagents_envs.base_env import DecisionSteps
+from mlagents_envs.base_env import DecisionSteps, ActionTuple, BehaviorSpec
 from mlagents.trainers.tf.models import ModelUtils
 from mlagents.trainers.settings import TrainerSettings, EncoderType
 from mlagents.trainers import __version__
            reparameterize,
            condition_sigma_on_obs,
        )
+        if (
+            self.behavior_spec.action_spec.continuous_size > 0
+            and self.behavior_spec.action_spec.discrete_size > 0
+        ):
+            raise UnityPolicyException(
+                "TensorFlow does not support mixed action spaces. Please run with the Torch framework."
+            )
        # for ghost trainer save/load snapshots
        self.assign_phs: List[tf.Tensor] = []
        self.assign_ops: List[tf.Operation] = []
                feed_dict[self.prev_action] = self.retrieve_previous_action(
                    global_agent_ids
                )
+
            feed_dict[self.memory_in] = self.retrieve_memories(global_agent_ids)
        feed_dict = self.fill_eval_dict(feed_dict, decision_requests)
        run_out = self._execute_model(feed_dict, self.inference_dict)
        )

        self.save_memories(global_agent_ids, run_out.get("memory_out"))
+        # For Compatibility with buffer changes for hybrid action support
+        if "log_probs" in run_out:
+            log_probs_tuple = LogProbsTuple()
+            if self.behavior_spec.action_spec.is_continuous():
+                log_probs_tuple.add_continuous(run_out["log_probs"])
+            else:
+                log_probs_tuple.add_discrete(run_out["log_probs"])
+            run_out["log_probs"] = log_probs_tuple
+        if "action" in run_out:
+            action_tuple = ActionTuple()
+            env_action_tuple = ActionTuple()
+            if self.behavior_spec.action_spec.is_continuous():
+                action_tuple.add_continuous(run_out["pre_action"])
+                env_action_tuple.add_continuous(run_out["action"])
+            else:
+                action_tuple.add_discrete(run_out["action"])
+                env_action_tuple.add_discrete(run_out["action"])
+            run_out["action"] = action_tuple
+            run_out["env_action"] = env_action_tuple
-
+            env_action=run_out.get("env_action"),
            value=run_out.get("value"),
            outputs=run_out,
            agent_ids=decision_requests.agent_id,
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
    SeparateActorCritic,
    GlobalSteps,
 )
+
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs

 EPSILON = 1e-7  # Small value to avoid divide by zero

            conditional_sigma=self.condition_sigma_on_obs,
            tanh_squash=tanh_squash,
        )
-        self._clip_action = not tanh_squash
        # Save the m_size needed for export
        self._export_m_size = self.m_size
        # m_size needed for training is determined by network, not trainer settings
+        self._clip_action = not tanh_squash

    @property
    def export_memory_size(self) -> int:
    ) -> Tuple[SplitObservations, np.ndarray]:
        vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
        mask = None
-        if not self.use_continuous_act:
+        if self.behavior_spec.action_spec.discrete_size > 0:
            mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
            if decision_requests.action_mask is not None:
                mask = torch.as_tensor(
        masks: Optional[torch.Tensor] = None,
        memories: Optional[torch.Tensor] = None,
        seq_len: int = 1,
-        all_log_probs: bool = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    ) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor, torch.Tensor]:
        """
        :param vec_obs: List of vector observations.
        :param vis_obs: List of visual observations.
-        :param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action.
-        :return: Tuple of actions, actions clipped to -1, 1, log probabilities (dependent on all_log_probs),
-            entropies, and output memories, all as Torch Tensors.
+        :return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
-        if memories is None:
-            dists, memories = self.actor_critic.get_dists(
-                vec_obs, vis_obs, masks, memories, seq_len
-            )
-        else:
-            # If we're using LSTM. we need to execute the values to get the critic memories
-            dists, _, memories = self.actor_critic.get_dist_and_value(
-                vec_obs, vis_obs, masks, memories, seq_len
-            )
-        action_list = self.actor_critic.sample_action(dists)
-        log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
-            action_list, dists
+        actions, log_probs, entropies, _, memories = self.actor_critic.get_action_stats_and_value(
+            vec_obs, vis_obs, masks, memories, seq_len
-        actions = torch.stack(action_list, dim=-1)
-        if self.use_continuous_act:
-            actions = actions[:, :, 0]
-        else:
-            actions = actions[:, 0, :]
-        # Use the sum of entropy across actions, not the mean
-        entropy_sum = torch.sum(entropies, dim=1)
-
-        if self._clip_action and self.use_continuous_act:
-            clipped_action = torch.clamp(actions, -3, 3) / 3
-        else:
-            clipped_action = actions
-        return (
-            actions,
-            clipped_action,
-            all_logs if all_log_probs else log_probs,
-            entropy_sum,
-            memories,
-        )
+        return (actions, log_probs, entropies, memories)
-        actions: torch.Tensor,
+        actions: AgentAction,
-    ) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, torch.Tensor]]:
-        dists, value_heads, _ = self.actor_critic.get_dist_and_value(
-            vec_obs, vis_obs, masks, memories, seq_len
+    ) -> Tuple[ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor]]:
+        log_probs, entropies, value_heads = self.actor_critic.get_stats_and_value(
+            vec_obs, vis_obs, actions, masks, memories, seq_len
-        action_list = [actions[..., i] for i in range(actions.shape[-1])]
-        log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)
-        # Use the sum of entropy across actions, not the mean
-        entropy_sum = torch.sum(entropies, dim=1)
-        return log_probs, entropy_sum, value_heads
+        return log_probs, entropies, value_heads

    @timed
    def evaluate(

        run_out = {}
        with torch.no_grad():
-            action, clipped_action, log_probs, entropy, memories = self.sample_actions(
+            action, log_probs, entropy, memories = self.sample_actions(
-
-        run_out["pre_action"] = ModelUtils.to_numpy(action)
-        run_out["action"] = ModelUtils.to_numpy(clipped_action)
-        # Todo - make pre_action difference
-        run_out["log_probs"] = ModelUtils.to_numpy(log_probs)
+        action_tuple = action.to_action_tuple()
+        run_out["action"] = action_tuple
+        # This is the clipped action which is not saved to the buffer
+        # but is exclusively sent to the environment.
+        env_action_tuple = action.to_action_tuple(clip=self._clip_action)
+        run_out["env_action"] = env_action_tuple
+        run_out["log_probs"] = log_probs.to_log_probs_tuple()
        run_out["entropy"] = ModelUtils.to_numpy(entropy)
        run_out["learning_rate"] = 0.0
        if self.use_recurrent:
        self.check_nan_action(run_out.get("action"))
        return ActionInfo(
            action=run_out.get("action"),
+            env_action=run_out.get("env_action"),
            value=run_out.get("value"),
            outputs=run_out,
            agent_ids=list(decision_requests.agent_id),
--- a/ml-agents/mlagents/trainers/ppo/optimizer_tf.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_tf.py
            self.policy.sequence_length_ph: self.policy.sequence_length,
            self.policy.mask_input: mini_batch["masks"] * burn_in_mask,
            self.advantage: mini_batch["advantages"],
-            self.all_old_log_probs: mini_batch["action_probs"],
-        if self.policy.output_pre is not None and "actions_pre" in mini_batch:
-            feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]
+        if self.policy.use_continuous_act:  # For hybrid action buffer support
+            feed_dict[self.all_old_log_probs] = mini_batch["continuous_log_probs"]
+            feed_dict[self.policy.output_pre] = mini_batch["continuous_action"]
-            feed_dict[self.policy.output] = mini_batch["actions"]
+            feed_dict[self.all_old_log_probs] = mini_batch["discrete_log_probs"]
+            feed_dict[self.policy.output] = mini_batch["discrete_action"]
            if self.policy.use_recurrent:
                feed_dict[self.policy.prev_action] = mini_batch["prev_action"]
            feed_dict[self.policy.action_masks] = mini_batch["action_mask"]
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
 from mlagents.trainers.policy.torch_policy import TorchPolicy
 from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
 from mlagents.trainers.settings import TrainerSettings, PPOSettings
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
 from mlagents.trainers.torch.utils import ModelUtils


        advantage = advantages.unsqueeze(-1)

        decay_epsilon = self.hyperparameters.epsilon
-
        r_theta = torch.exp(log_probs - old_log_probs)
        p_opt_a = r_theta * advantage
        p_opt_b = (

        vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
        act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
-        if self.policy.use_continuous_act:
-            actions = ModelUtils.list_to_tensor(batch["actions_pre"]).unsqueeze(-1)
-        else:
-            actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
+        actions = AgentAction.from_dict(batch)

        memories = [
            ModelUtils.list_to_tensor(batch["memory"][i])
                vis_obs.append(vis_ob)
        else:
            vis_obs = []
+
        log_probs, entropy, values = self.policy.evaluate_actions(
            vec_obs,
            vis_obs,
            seq_len=self.policy.sequence_length,
        )
+        old_log_probs = ActionLogProbs.from_dict(batch).flatten()
+        log_probs = log_probs.flatten()
        loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
        value_loss = self.ppo_value_loss(
            values, old_values, returns, decay_eps, loss_masks
            log_probs,
-            ModelUtils.list_to_tensor(batch["action_probs"]),
+            old_log_probs,
            loss_masks,
        )
        loss = (
--- a/ml-agents/mlagents/trainers/sac/optimizer_tf.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_tf.py
            feed_dict[self.rewards_holders[name]] = batch[f"{name}_rewards"]

        if self.policy.use_continuous_act:
-            feed_dict[self.policy_network.external_action_in] = batch["actions"]
+            feed_dict[self.policy_network.external_action_in] = batch[
+                "continuous_action"
+            ]
-            feed_dict[policy.output] = batch["actions"]
+            feed_dict[policy.output] = batch["discrete_action"]
            if self.policy.use_recurrent:
                feed_dict[policy.prev_action] = batch["prev_action"]
            feed_dict[policy.action_masks] = batch["action_mask"]
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
 import numpy as np
-from typing import Dict, List, Mapping, cast, Tuple, Optional
+from typing import Dict, List, Mapping, NamedTuple, cast, Tuple, Optional
-from mlagents_envs.base_env import ActionSpec
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
+from mlagents_envs.base_env import ActionSpec
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.settings import TrainerSettings, SACSettings
 from contextlib import ExitStack
            action_spec: ActionSpec,
        ):
            super().__init__()
-            self.action_spec = action_spec
-            if self.action_spec.is_continuous():
-                self.act_size = self.action_spec.continuous_size
-                num_value_outs = 1
-                num_action_ins = self.act_size
+            num_value_outs = max(sum(action_spec.discrete_branches), 1)
+            num_action_ins = int(action_spec.continuous_size)
-            else:
-                self.act_size = self.action_spec.discrete_branches
-                num_value_outs = sum(self.act_size)
-                num_action_ins = 0
            self.q1_network = ValueNetwork(
                stream_names,
                observation_shapes,
                )
            return q1_out, q2_out

+    class TargetEntropy(NamedTuple):
+
+        discrete: List[float] = []  # One per branch
+        continuous: float = 0.0
+
+    class LogEntCoef(nn.Module):
+        def __init__(self, discrete, continuous):
+            super().__init__()
+            self.discrete = discrete
+            self.continuous = continuous
+
    def __init__(self, policy: TorchPolicy, trainer_params: TrainerSettings):
        super().__init__(policy, trainer_params)
        hyperparameters: SACSettings = cast(SACSettings, trainer_params.hyperparameters)
        self.policy = policy
-        self.act_size = policy.act_size
        policy_network_settings = policy.network_settings

        self.tau = hyperparameters.tau
            name: int(not self.reward_signals[name].ignore_done)
            for name in self.stream_names
        }
+        self._action_spec = self.policy.behavior_spec.action_spec
-            self.policy.behavior_spec.action_spec,
+            self._action_spec,
        )

        self.target_network = ValueNetwork(
            self.policy.actor_critic.critic, self.target_network, 1.0
        )

-        self._log_ent_coef = torch.nn.Parameter(
-            torch.log(torch.as_tensor([self.init_entcoef] * len(self.act_size))),
+        # We create one entropy coefficient per action, whether discrete or continuous.
+        _disc_log_ent_coef = torch.nn.Parameter(
+            torch.log(
+                torch.as_tensor(
+                    [self.init_entcoef] * len(self._action_spec.discrete_branches)
+                )
+            ),
-        if self.policy.use_continuous_act:
-            self.target_entropy = torch.as_tensor(
-                -1
-                * self.continuous_target_entropy_scale
-                * np.prod(self.act_size[0]).astype(np.float32)
-            )
-        else:
-            self.target_entropy = [
-                self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
-                for i in self.act_size
-            ]
-
+        _cont_log_ent_coef = torch.nn.Parameter(
+            torch.log(torch.as_tensor([self.init_entcoef])), requires_grad=True
+        )
+        self._log_ent_coef = TorchSACOptimizer.LogEntCoef(
+            discrete=_disc_log_ent_coef, continuous=_cont_log_ent_coef
+        )
+        _cont_target = (
+            -1
+            * self.continuous_target_entropy_scale
+            * np.prod(self._action_spec.continuous_size).astype(np.float32)
+        )
+        _disc_target = [
+            self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
+            for i in self._action_spec.discrete_branches
+        ]
+        self.target_entropy = TorchSACOptimizer.TargetEntropy(
+            continuous=_cont_target, discrete=_disc_target
+        )
-            self.policy.actor_critic.distribution.parameters()
+            self.policy.actor_critic.action_model.parameters()
        )
        value_params = list(self.value_network.parameters()) + list(
            self.policy.actor_critic.critic.parameters()
            value_params, lr=hyperparameters.learning_rate
        )
        self.entropy_optimizer = torch.optim.Adam(
-            [self._log_ent_coef], lr=hyperparameters.learning_rate
+            self._log_ent_coef.parameters(), lr=hyperparameters.learning_rate
        )
        self._move_to_device(default_device())


    def sac_value_loss(
        self,
-        log_probs: torch.Tensor,
+        log_probs: ActionLogProbs,
-        discrete: bool,
-            _ent_coef = torch.exp(self._log_ent_coef)
+            _cont_ent_coef = self._log_ent_coef.continuous.exp()
+            _disc_ent_coef = self._log_ent_coef.discrete.exp()
-                if not discrete:
+                if self._action_spec.discrete_size <= 0:
-                    action_probs = log_probs.exp()
+                    disc_action_probs = log_probs.all_discrete_tensor.exp()
-                        q1p_out[name] * action_probs, self.act_size
+                        q1p_out[name] * disc_action_probs,
+                        self._action_spec.discrete_branches,
-                        q2p_out[name] * action_probs, self.act_size
+                        q2p_out[name] * disc_action_probs,
+                        self._action_spec.discrete_branches,
                    )
                    _q1p_mean = torch.mean(
                        torch.stack(
                    min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)

        value_losses = []
-        if not discrete:
+        if self._action_spec.discrete_size <= 0:
-                        _ent_coef * log_probs, dim=1
+                        _cont_ent_coef * log_probs.continuous_tensor, dim=1
                    )
                value_loss = 0.5 * ModelUtils.masked_mean(
                    torch.nn.functional.mse_loss(values[name], v_backup), loss_masks
+            disc_log_probs = log_probs.all_discrete_tensor
-                log_probs * log_probs.exp(), self.act_size
+                disc_log_probs * disc_log_probs.exp(),
+                self._action_spec.discrete_branches,
-                    torch.sum(_ent_coef[i] * _lp, dim=1, keepdim=True)
+                    torch.sum(_disc_ent_coef[i] * _lp, dim=1, keepdim=True)
                    for i, _lp in enumerate(branched_per_action_ent)
                ]
            )
                        branched_ent_bonus, axis=0
                    )
+                    # Add continuous entropy bonus to minimum Q
+                    if self._action_spec.continuous_size > 0:
+                        v_backup += torch.sum(
+                            _cont_ent_coef * log_probs.continuous_tensor,
+                            dim=1,
+                            keepdim=True,
+                        )
                value_loss = 0.5 * ModelUtils.masked_mean(
                    torch.nn.functional.mse_loss(values[name], v_backup.squeeze()),
                    loss_masks,

    def sac_policy_loss(
        self,
-        log_probs: torch.Tensor,
+        log_probs: ActionLogProbs,
-        discrete: bool,
-        _ent_coef = torch.exp(self._log_ent_coef)
+        _cont_ent_coef, _disc_ent_coef = (
+            self._log_ent_coef.continuous,
+            self._log_ent_coef.discrete,
+        )
+        _cont_ent_coef = _cont_ent_coef.exp()
+        _disc_ent_coef = _disc_ent_coef.exp()
+
-        if not discrete:
-            mean_q1 = mean_q1.unsqueeze(1)
-            batch_policy_loss = torch.mean(_ent_coef * log_probs - mean_q1, dim=1)
-            policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
-        else:
-            action_probs = log_probs.exp()
+        batch_policy_loss = 0
+        if self._action_spec.discrete_size > 0:
+            disc_log_probs = log_probs.all_discrete_tensor
+            disc_action_probs = disc_log_probs.exp()
-                log_probs * action_probs, self.act_size
+                disc_log_probs * disc_action_probs, self._action_spec.discrete_branches
-                mean_q1 * action_probs, self.act_size
+                mean_q1 * disc_action_probs, self._action_spec.discrete_branches
-                    torch.sum(_ent_coef[i] * _lp - _qt, dim=1, keepdim=True)
+                    torch.sum(_disc_ent_coef[i] * _lp - _qt, dim=1, keepdim=False)
                    for i, (_lp, _qt) in enumerate(
                        zip(branched_per_action_ent, branched_q_term)
                    )
-            batch_policy_loss = torch.squeeze(branched_policy_loss)
-            policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
+            batch_policy_loss += torch.sum(branched_policy_loss, dim=1)
+            all_mean_q1 = torch.sum(disc_action_probs * mean_q1, dim=1)
+        else:
+            all_mean_q1 = mean_q1
+        if self._action_spec.continuous_size > 0:
+            cont_log_probs = log_probs.continuous_tensor
+            batch_policy_loss += torch.mean(
+                _cont_ent_coef * cont_log_probs - all_mean_q1.unsqueeze(1), dim=1
+            )
+        policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
+
-        self, log_probs: torch.Tensor, loss_masks: torch.Tensor, discrete: bool
+        self, log_probs: ActionLogProbs, loss_masks: torch.Tensor
-        if not discrete:
-            with torch.no_grad():
-                target_current_diff = torch.sum(log_probs + self.target_entropy, dim=1)
-            entropy_loss = -1 * ModelUtils.masked_mean(
-                self._log_ent_coef * target_current_diff, loss_masks
-            )
-        else:
+        _cont_ent_coef, _disc_ent_coef = (
+            self._log_ent_coef.continuous,
+            self._log_ent_coef.discrete,
+        )
+        entropy_loss = 0
+        if self._action_spec.discrete_size > 0:
+                # Break continuous into separate branch
+                disc_log_probs = log_probs.all_discrete_tensor
-                    log_probs * log_probs.exp(), self.act_size
+                    disc_log_probs * disc_log_probs.exp(),
+                    self._action_spec.discrete_branches,
-                            branched_per_action_ent, self.target_entropy
+                            branched_per_action_ent, self.target_entropy.discrete
                        )
                    ],
                    axis=1,
                )
-            entropy_loss = -1 * ModelUtils.masked_mean(
-                torch.mean(self._log_ent_coef * target_current_diff, axis=1), loss_masks
+            entropy_loss += -1 * ModelUtils.masked_mean(
+                torch.mean(_disc_ent_coef * target_current_diff, axis=1), loss_masks
+            )
+        if self._action_spec.continuous_size > 0:
+            with torch.no_grad():
+                cont_log_probs = log_probs.continuous_tensor
+                target_current_diff = torch.sum(
+                    cont_log_probs + self.target_entropy.continuous, dim=1
+                )
+            # We update all the _cont_ent_coef as one block
+            entropy_loss += -1 * ModelUtils.masked_mean(
+                _cont_ent_coef * target_current_diff, loss_masks
            )

        return entropy_loss
    ) -> Dict[str, torch.Tensor]:
        condensed_q_output = {}
-        onehot_actions = ModelUtils.actions_to_onehot(discrete_actions, self.act_size)
+        onehot_actions = ModelUtils.actions_to_onehot(
+            discrete_actions, self._action_spec.discrete_branches
+        )
-            branched_q = ModelUtils.break_into_branches(item, self.act_size)
+            branched_q = ModelUtils.break_into_branches(
+                item, self._action_spec.discrete_branches
+            )
            only_action_qs = torch.stack(
                [
                    torch.sum(_act * _q, dim=1, keepdim=True)
        vec_obs = [ModelUtils.list_to_tensor(batch["vector_obs"])]
        next_vec_obs = [ModelUtils.list_to_tensor(batch["next_vector_in"])]
        act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
-        if self.policy.use_continuous_act:
-            actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
-        else:
-            actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
+        actions = AgentAction.from_dict(batch)

        memories_list = [
            ModelUtils.list_to_tensor(batch["memory"][i])
        self.target_network.network_body.copy_normalization(
            self.policy.actor_critic.network_body
        )
-        (sampled_actions, _, log_probs, _, _) = self.policy.sample_actions(
+        (
+            sampled_actions,
+            log_probs,
+            _,
+            value_estimates,
+            _,
+        ) = self.policy.actor_critic.get_action_stats_and_value(
-            seq_len=self.policy.sequence_length,
-            all_log_probs=not self.policy.use_continuous_act,
+            sequence_length=self.policy.sequence_length,
-        value_estimates, _ = self.policy.actor_critic.critic_pass(
-            vec_obs, vis_obs, memories, sequence_length=self.policy.sequence_length
+
+        cont_sampled_actions = sampled_actions.continuous_tensor
+
+        cont_actions = actions.continuous_tensor
+        q1p_out, q2p_out = self.value_network(
+            vec_obs,
+            vis_obs,
+            cont_sampled_actions,
+            memories=q_memories,
+            sequence_length=self.policy.sequence_length,
+            q2_grad=False,
-        if self.policy.use_continuous_act:
-            squeezed_actions = actions.squeeze(-1)
-            # Only need grad for q1, as that is used for policy.
-            q1p_out, q2p_out = self.value_network(
-                vec_obs,
-                vis_obs,
-                sampled_actions,
-                memories=q_memories,
-                sequence_length=self.policy.sequence_length,
-                q2_grad=False,
-            )
-            q1_out, q2_out = self.value_network(
-                vec_obs,
-                vis_obs,
-                squeezed_actions,
-                memories=q_memories,
-                sequence_length=self.policy.sequence_length,
-            )
+        q1_out, q2_out = self.value_network(
+            vec_obs,
+            vis_obs,
+            cont_actions,
+            memories=q_memories,
+            sequence_length=self.policy.sequence_length,
+        )
+
+        if self._action_spec.discrete_size > 0:
+            disc_actions = actions.discrete_tensor
+            q1_stream = self._condense_q_streams(q1_out, disc_actions)
+            q2_stream = self._condense_q_streams(q2_out, disc_actions)
+        else:
-        else:
-            # For discrete, you don't need to backprop through the Q for the policy
-            q1p_out, q2p_out = self.value_network(
-                vec_obs,
-                vis_obs,
-                memories=q_memories,
-                sequence_length=self.policy.sequence_length,
-                q1_grad=False,
-                q2_grad=False,
-            )
-            q1_out, q2_out = self.value_network(
-                vec_obs,
-                vis_obs,
-                memories=q_memories,
-                sequence_length=self.policy.sequence_length,
-            )
-            q1_stream = self._condense_q_streams(q1_out, actions)
-            q2_stream = self._condense_q_streams(q2_out, actions)

        with torch.no_grad():
            target_values, _ = self.target_network(
                sequence_length=self.policy.sequence_length,
            )
        masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
-        use_discrete = not self.policy.use_continuous_act
        dones = ModelUtils.list_to_tensor(batch["done"])

        q1_loss, q2_loss = self.sac_q_loss(
-            log_probs, value_estimates, q1p_out, q2p_out, masks, use_discrete
+            log_probs, value_estimates, q1p_out, q2p_out, masks
-        policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks, use_discrete)
-        entropy_loss = self.sac_entropy_loss(log_probs, masks, use_discrete)
+        policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks)
+        entropy_loss = self.sac_entropy_loss(log_probs, masks)

        total_value_loss = q1_loss + q2_loss + value_loss

            "Losses/Value Loss": value_loss.item(),
            "Losses/Q1 Loss": q1_loss.item(),
            "Losses/Q2 Loss": q2_loss.item(),
-            "Policy/Entropy Coeff": torch.mean(torch.exp(self._log_ent_coef)).item(),
+            "Policy/Discrete Entropy Coeff": torch.mean(
+                torch.exp(self._log_ent_coef.discrete)
+            ).item(),
+            "Policy/Continuous Entropy Coeff": torch.mean(
+                torch.exp(self._log_ent_coef.continuous)
+            ).item(),
            "Policy/Learning Rate": decay_lr,
        }

--- a/ml-agents/mlagents/trainers/simple_env_manager.py
+++ b/ml-agents/mlagents/trainers/simple_env_manager.py
        self.previous_all_action_info = all_action_info

        for brain_name, action_info in all_action_info.items():
-            self.env.set_actions(brain_name, action_info.action)
+            self.env.set_actions(brain_name, action_info.env_action)
        self.env.step()
        all_step_result = self._generate_all_results()

--- a/ml-agents/mlagents/trainers/subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py
            if req.cmd == EnvironmentCommand.STEP:
                all_action_info = req.payload
                for brain_name, action_info in all_action_info.items():
-                    if len(action_info.action) != 0:
-                        env.set_actions(brain_name, action_info.action)
+                    if len(action_info.agent_ids) > 0:
+                        env.set_actions(brain_name, action_info.env_action)
                env.step()
                all_step_result = _generate_all_results()
                # The timers in this process are independent from all the processes and the "main" process
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
 import numpy as np

 from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.torch.action_log_probs import LogProbsTuple
 from mlagents.trainers.trajectory import Trajectory, AgentExperience
 from mlagents_envs.base_env import (
    DecisionSteps,
+    ActionTuple,
 )


    steps_list = []

    action_size = action_spec.discrete_size + action_spec.continuous_size
-    action_probs = np.ones(
-        int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size),
-        dtype=np.float32,
-    )
    for _i in range(length - 1):
        obs = []
        for _shape in observation_shapes:
-        action = np.zeros(action_size, dtype=np.float32)
-        action_pre = np.zeros(action_size, dtype=np.float32)
+        action = ActionTuple(
+            continuous=np.zeros(action_spec.continuous_size, dtype=np.float32),
+            discrete=np.zeros(action_spec.discrete_size, dtype=np.int32),
+        )
+        action_probs = LogProbsTuple(
+            continuous=np.ones(action_spec.continuous_size, dtype=np.float32),
+            discrete=np.ones(action_spec.discrete_size, dtype=np.float32),
+        )
        action_mask = (
            [
                [False for _ in range(branch)]
            else None
        )
-        prev_action = np.ones(action_size, dtype=np.float32)
+        if action_spec.is_discrete():
+            prev_action = np.ones(action_size, dtype=np.int32)
+        else:
+            prev_action = np.ones(action_size, dtype=np.float32)
+
        max_step = False
        memory = np.ones(memory_size, dtype=np.float32)
        agent_id = "test_agent"
            done=done,
            action=action,
            action_probs=action_probs,
-            action_pre=action_pre,
            action_mask=action_mask,
            prev_action=prev_action,
            interrupted=max_step,
        done=not max_step_complete,
        action=action,
        action_probs=action_probs,
-        action_pre=action_pre,
        action_mask=action_mask,
        prev_action=prev_action,
        interrupted=max_step_complete,
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py

 from mlagents_envs.base_env import (
    ActionSpec,
+    ActionTuple,
    BaseEnv,
    BehaviorSpec,
    DecisionSteps,

 OBS_SIZE = 1
 VIS_OBS_SIZE = (20, 20, 3)
-STEP_SIZE = 0.1
+STEP_SIZE = 0.2

 TIME_PENALTY = 0.01
 MIN_STEPS = int(1.0 / STEP_SIZE) + 1
    def __init__(
        self,
        brain_names,
-        use_discrete,
-        action_size=1,
+        action_sizes=(1, 0),
-        self.discrete = use_discrete
-        if use_discrete:
-            action_spec = ActionSpec.create_discrete(
-                tuple(2 for _ in range(action_size))
-            )
-        else:
-            action_spec = ActionSpec.create_continuous(action_size)
+        continuous_action_size, discrete_action_size = action_sizes
+        discrete_tuple = tuple(2 for _ in range(discrete_action_size))
+        action_spec = ActionSpec(continuous_action_size, discrete_tuple)
+        self.total_action_size = (
+            continuous_action_size + discrete_action_size
+        )  # to set the goals/positions
+        self.action_spec = action_spec
-        self.action_size = action_size
+        self.action_spec = action_spec
        self.names = brain_names
        self.positions: Dict[str, List[float]] = {}
        self.step_count: Dict[str, float] = {}

    def _take_action(self, name: str) -> bool:
        deltas = []
-        for _act in self.action[name][0]:
-            if self.discrete:
-                deltas.append(1 if _act else -1)
-            else:
-                deltas.append(_act)
+        _act = self.action[name]
+        if self.action_spec.continuous_size > 0:
+            for _cont in _act.continuous[0]:
+                deltas.append(_cont)
+        if self.action_spec.discrete_size > 0:
+            for _disc in _act.discrete[0]:
+                deltas.append(1 if _disc else -1)
        for i, _delta in enumerate(deltas):
            _delta = clamp(_delta, -self.step_size, self.step_size)
            self.positions[name][i] += _delta
        return done

    def _generate_mask(self):
-        if self.discrete:
+        action_mask = None
+        if self.action_spec.discrete_size > 0:
-            ndmask = np.array(2 * self.action_size * [False], dtype=np.bool)
+            ndmask = np.array(
+                2 * self.action_spec.discrete_size * [False], dtype=np.bool
+            )
-        else:
-            action_mask = None
        return action_mask

    def _compute_reward(self, name: str, done: bool) -> float:

    def _reset_agent(self, name):
        self.goal[name] = self.random.choice([-1, 1])
-        self.positions[name] = [0.0 for _ in range(self.action_size)]
+        self.positions[name] = [0.0 for _ in range(self.total_action_size)]
        self.step_count[name] = 0
        self.rewards[name] = 0
        self.agent_id[name] = self.agent_id[name] + 1


 class MemoryEnvironment(SimpleEnvironment):
-    def __init__(self, brain_names, use_discrete, step_size=0.2):
-        super().__init__(brain_names, use_discrete, step_size=step_size)
+    def __init__(self, brain_names, action_sizes=(1, 0), step_size=0.2):
+        super().__init__(brain_names, action_sizes=action_sizes, step_size=step_size)
        # Number of steps to reveal the goal for. Lower is harder. Should be
        # less than 1/step_size to force agent to use memory
        self.num_show_steps = 2
    def __init__(
        self,
        brain_names,
-        use_discrete,
+        action_sizes=(1, 0),
-            use_discrete,
+            action_sizes=action_sizes,
        )
        self.demonstration_protos: Dict[str, List[AgentInfoActionPairProto]] = {}
        self.n_demos = n_demos
    def step(self) -> None:
        super().step()
        for name in self.names:
+            discrete_actions = (
+                self.action[name].discrete
+                if self.action_spec.discrete_size > 0
+                else None
+            )
+            continuous_actions = (
+                self.action[name].continuous
+                if self.action_spec.continuous_size > 0
+                else None
+            )
-                self.step_result[name][0], self.step_result[name][1], self.action[name]
+                self.step_result[name][0],
+                self.step_result[name][1],
+                continuous_actions,
+                discrete_actions,
            )
            self.demonstration_protos[name] = self.demonstration_protos[name][
                -self.n_demos :
        self.reset()
        for _ in range(self.n_demos):
            for name in self.names:
-                if self.discrete:
-                    self.action[name] = [[1]] if self.goal[name] > 0 else [[0]]
+                if self.action_spec.discrete_size > 0:
+                    self.action[name] = ActionTuple(
+                        np.array([], dtype=np.float32),
+                        np.array(
+                            [[1]] if self.goal[name] > 0 else [[0]], dtype=np.int32
+                        ),
+                    )
-                    self.action[name] = [[float(self.goal[name])]]
+                    self.action[name] = ActionTuple(
+                        np.array([[float(self.goal[name])]], dtype=np.float32),
+                        np.array([], dtype=np.int32),
+                    )
            self.step()


--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
        dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
    )
    # Test update
-    update_buffer = mb.simulate_rollout(
-        BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec
-    )
+    behavior_spec = optimizer.policy.behavior_spec
+    update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, behavior_spec)
+    # NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
+    if discrete:
+        n_agents = len(update_buffer["discrete_log_probs"])
+        update_buffer["discrete_log_probs"] = np.ones(
+            (n_agents, int(sum(behavior_spec.action_spec.discrete_branches))),
+            dtype=np.float32,
+        )
+    else:
+        n_agents = len(update_buffer["continuous_log_probs"])
+        update_buffer["continuous_log_probs"] = np.ones(
+            (n_agents, behavior_spec.action_spec.continuous_size), dtype=np.float32
+        )
+
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
        dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
    )
    # Test update
-    update_buffer = mb.simulate_rollout(
-        BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec
-    )
+    behavior_spec = optimizer.policy.behavior_spec
+    update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, behavior_spec)
    # Mock out reward signal eval
    update_buffer["advantages"] = update_buffer["environment_rewards"]
    update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]
+    # NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
+    if discrete:
+        n_agents = len(update_buffer["discrete_log_probs"])
+        update_buffer["discrete_log_probs"] = np.ones(
+            (n_agents, int(sum(behavior_spec.action_spec.discrete_branches))),
+            dtype=np.float32,
+        )
+    else:
+        n_agents = len(update_buffer["continuous_log_probs"])
+        update_buffer["continuous_log_probs"] = np.ones(
+            (n_agents, behavior_spec.action_spec.continuous_size), dtype=np.float32
+        )
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
        use_visual=False,
    )
    # Test update
-    update_buffer = mb.simulate_rollout(
-        BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec
-    )
+    behavior_spec = optimizer.policy.behavior_spec
+    update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, behavior_spec)
    # Mock out reward signal eval
    update_buffer["advantages"] = update_buffer["environment_rewards"]
    update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]
+    # NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
+    n_agents = len(update_buffer["continuous_log_probs"])
+    update_buffer["continuous_log_probs"] = np.ones(
+        (n_agents, behavior_spec.action_spec.continuous_size), dtype=np.float32
+    )
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
    buffer["curiosity_returns"] = buffer["environment_rewards"]
    buffer["curiosity_value_estimates"] = buffer["environment_rewards"]
    buffer["advantages"] = buffer["environment_rewards"]
-
+    # NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
+    if use_discrete:
+        n_agents = len(buffer["discrete_log_probs"])
+        buffer["discrete_log_probs"].reset_field()
+        for _ in range(n_agents):
+            buffer["discrete_log_probs"].append(
+                np.ones(
+                    int(sum(mock_behavior_spec.action_spec.discrete_branches)),
+                    dtype=np.float32,
+                )
+            )
+    else:
+        n_agents = len(buffer["continuous_log_probs"])
+        buffer["continuous_log_probs"].reset_field()
+        for _ in range(n_agents):
+            buffer["continuous_log_probs"].append(
+                np.ones(
+                    mock_behavior_spec.action_spec.continuous_size, dtype=np.float32
+                )
+            )
    trainer.update_buffer = buffer
    trainer._update_policy()

--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
            assert all(reward > success_threshold for reward in processed_rewards)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ppo(use_discrete):
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ppo(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_2d_ppo(use_discrete):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
-    )
+@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
+def test_2d_ppo(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(
        PPO_TF_CONFIG.hyperparameters, batch_size=64, buffer_size=640
    )
    _check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_visual_ppo(num_visual, use_discrete):
+def test_visual_ppo(num_visual, action_sizes):
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
 def test_visual_advanced_ppo(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
-        use_discrete=True,
+        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        PPO_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
-        max_steps=300,
+        max_steps=400,
        summary_freq=100,
        framework=FrameworkType.TENSORFLOW,
    )

-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_recurrent_ppo(use_discrete):
-    env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_recurrent_ppo(action_sizes):
+    env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
    new_network_settings = attr.evolve(
        PPO_TF_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_sac(use_discrete):
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
-    config = attr.evolve(SAC_TF_CONFIG, framework=FrameworkType.TENSORFLOW)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_sac(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
+    config = attr.evolve(
+        SAC_TF_CONFIG, framework=FrameworkType.TENSORFLOW, max_steps=900
+    )
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_2d_sac(use_discrete):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
-    )
+@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
+def test_2d_sac(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(SAC_TF_CONFIG.hyperparameters, buffer_init_steps=2000)
    config = attr.evolve(
        SAC_TF_CONFIG,
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_visual_sac(num_visual, use_discrete):
+def test_visual_sac(num_visual, action_sizes):
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
 def test_visual_advanced_sac(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
-        use_discrete=True,
+        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
        SAC_TF_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_networksettings,
-        max_steps=100,
+        max_steps=200,
        framework=FrameworkType.TENSORFLOW,
    )
    # The number of steps is pretty small for these encoders
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_recurrent_sac(use_discrete):
-    step_size = 0.2 if use_discrete else 0.5
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_recurrent_sac(action_sizes):
+    step_size = 0.2 if action_sizes == (0, 1) else 0.5
-        [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
+        [BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
    )
    new_networksettings = attr.evolve(
        SAC_TF_CONFIG.network_settings,
    _check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ghost(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ghost(action_sizes):
-        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000
    _check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ghost_fails(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ghost_fails(action_sizes):
-        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    # This config should fail because the ghosted policy is never swapped with a competent policy.
    # Swap occurs after max step is reached.
    )


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_asymm_ghost(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_asymm_ghost(action_sizes):
-        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0,
    _check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_asymm_ghost_fails(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_asymm_ghost_fails(action_sizes):
-        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
    )
    # This config should fail because the team that us not learning when both have reached
    # max step should be executing the initial, untrained poliy.

@pytest.fixture(scope="session")
 def simple_record(tmpdir_factory):
-    def record_demo(use_discrete, num_visual=0, num_vector=1):
+    def record_demo(action_sizes, num_visual=0, num_vector=1):
-            use_discrete=use_discrete,
+            action_sizes=action_sizes,
            num_visual=num_visual,
            num_vector=num_vector,
            n_demos=100,
        env.solve()
+        continuous_size, discrete_size = action_sizes
+        use_discrete = True if discrete_size > 0 else False
-            vector_action_size=[2] if use_discrete else [1],
-            vector_action_descriptions=[""],
-            vector_action_space_type=discrete if use_discrete else continuous,
+            vector_action_size_deprecated=[2] if use_discrete else [1],
+            vector_action_descriptions_deprecated=[""],
+            vector_action_space_type_deprecated=discrete
+            if use_discrete
+            else continuous,
            brain_name=BRAIN_NAME,
            is_training=True,
        )
    return record_demo


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_gail(simple_record, use_discrete, trainer_config):
-    demo_path = simple_record(use_discrete)
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
+def test_gail(simple_record, action_sizes, trainer_config):
+    demo_path = simple_record(action_sizes)
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_gail_visual_ppo(simple_record, use_discrete):
-    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_gail_visual_ppo(simple_record, action_sizes):
+    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_gail_visual_sac(simple_record, use_discrete):
-    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_gail_visual_sac(simple_record, action_sizes):
+    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
--- a/ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
+++ b/ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
        [], np.array([], dtype=np.float32), np.array([0]), None
    )
    result = policy.get_action(step_with_agents, worker_id=0)
-    assert result == ActionInfo(None, None, {}, [0])
+    assert result == ActionInfo(None, None, None, {}, [0])


 def test_take_action_returns_action_info_when_available():
    policy_eval_out = {
-        "action": np.array([1.0], dtype=np.float32),
+        "action": np.array([[1.0]], dtype=np.float32),
+        "pre_action": np.array([[1.0]], dtype=np.float32),
        "memory_out": np.array([[2.5]], dtype=np.float32),
        "value": np.array([1.1], dtype=np.float32),
    }
    )
    result = policy.get_action(step_with_agents)
+    print(result)
-        policy_eval_out["action"], policy_eval_out["value"], policy_eval_out, [0]
+        policy_eval_out["action"],
+        policy_eval_out["env_action"],
+        policy_eval_out["value"],
+        policy_eval_out,
+        [0],
    )
    assert result == expected

--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
    AgentManagerQueue,
 )
 from mlagents.trainers.action_info import ActionInfo
+from mlagents.trainers.torch.action_log_probs import LogProbsTuple
-from mlagents_envs.base_env import ActionSpec
+from mlagents_envs.base_env import ActionSpec, ActionTuple


 def create_mock_policy():
-    mock_policy.retrieve_previous_action.return_value = np.zeros(
-        (1, 1), dtype=np.float32
-    )
+    mock_policy.retrieve_previous_action.return_value = np.zeros((1, 1), dtype=np.int32)
    return mock_policy


    )

    fake_action_outputs = {
-        "action": [0.1, 0.1],
+        "action": ActionTuple(continuous=np.array([[0.1], [0.1]])),
-        "pre_action": [0.1, 0.1],
-        "log_probs": [0.1, 0.1],
+        "log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])),
    }
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
    fake_action_info = ActionInfo(
-        action=[0.1, 0.1],
+        action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
+        env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_steps.agent_id,
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(
-        mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, [])
+        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
    )
    # Assert that the AgentProcessor is still empty
    assert len(processor.experience_buffers[0]) == 0
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
-
-        "action": [0.1],
+        "action": ActionTuple(continuous=np.array([[0.1]])),
-        "pre_action": [0.1],
-        "log_probs": [0.1],
+        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
+
    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
        observation_shapes=[(8,)],
        done=True,
    )
    fake_action_info = ActionInfo(
-        action=[0.1],
+        action=ActionTuple(continuous=np.array([[0.1]])),
+        env_action=ActionTuple(continuous=np.array([[0.1]])),
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
            processor.add_experiences(
                mock_decision_step, mock_terminal_step, _ep, fake_action_info
            )
-            add_calls.append(mock.call([get_global_agent_id(_ep, 0)], [0.1]))
+            add_calls.append(
+                mock.call([get_global_agent_id(_ep, 0)], fake_action_outputs["action"])
+            )
        processor.add_experiences(
            mock_done_decision_step, mock_done_terminal_step, _ep, fake_action_info
        )
        max_trajectory_length=5,
        stats_reporter=StatsReporter("testcat"),
    )
-
-        "action": [0.1],
+        "action": ActionTuple(continuous=np.array([[0.1]])),
-        "pre_action": [0.1],
-        "log_probs": [0.1],
+        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
+
    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
        observation_shapes=[(8,)],
-        action=[0.1],
+        action=ActionTuple(continuous=np.array([[0.1]])),
+        env_action=ActionTuple(continuous=np.array([[0.1]])),
        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)
-    assert len(demo_buffer["actions"]) == total_expected - 1
+    assert (
+        len(demo_buffer["continuous_action"]) == total_expected - 1
+        or len(demo_buffer["discrete_action"]) == total_expected - 1
+    )


 def test_load_demo_dir():
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)
-    assert len(demo_buffer["actions"]) == total_expected - 1
+    assert (
+        len(demo_buffer["continuous_action"]) == total_expected - 1
+        or len(demo_buffer["discrete_action"]) == total_expected - 1
+    )


 def test_demo_mismatch():
--- a/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
@pytest.mark.parametrize("num_envs", [1, 4])
 def test_subprocess_env_endtoend(num_envs):
    def simple_env_factory(worker_id, config):
-        env = SimpleEnvironment(["1D"], use_discrete=True)
+        env = SimpleEnvironment(["1D"], action_sizes=(0, 1))
        return env

    env_manager = SubprocessEnvManager(
--- a/ml-agents/mlagents/trainers/tests/test_trajectory.py
+++ b/ml-agents/mlagents/trainers/tests/test_trajectory.py
        "memory",
        "masks",
        "done",
-        "actions_pre",
-        "actions",
-        "action_probs",
+        "continuous_action",
+        "discrete_action",
+        "continuous_log_probs",
+        "discrete_log_probs",
        "action_mask",
        "prev_action",
        "environment_rewards",
--- a/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
+++ b/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
    ).unsqueeze(0)

    with torch.no_grad():
-        _, _, log_probs1, _, _ = policy1.sample_actions(
-            vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
+        _, log_probs1, _, _ = policy1.sample_actions(
+            vec_obs, vis_obs, masks=masks, memories=memories
-        _, _, log_probs2, _, _ = policy2.sample_actions(
-            vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
+        _, log_probs2, _, _ = policy2.sample_actions(
+            vec_obs, vis_obs, masks=masks, memories=memories
-
-    np.testing.assert_array_equal(log_probs1, log_probs2)
+    np.testing.assert_array_equal(
+        log_probs1.all_discrete_tensor, log_probs2.all_discrete_tensor
+    )


@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
--- a/ml-agents/mlagents/trainers/tests/torch/test_distributions.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_distributions.py
    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)

    for _ in range(50):
-        dist_inst = gauss_dist(sample_embedding)[0]
+        dist_inst = gauss_dist(sample_embedding)
        if tanh_squash:
            assert isinstance(dist_inst, TanhGaussianDistInstance)
        else:
--- a/ml-agents/mlagents/trainers/tests/torch/test_networks.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_networks.py
 from mlagents.trainers.torch.networks import (
    NetworkBody,
    ValueNetwork,
-    SimpleActor,
-from mlagents.trainers.torch.distributions import (
-    GaussianDistInstance,
-    CategoricalDistInstance,
-)
-
+from mlagents.trainers.tests.torch.test_encoders import compare_models


 def test_networkbody_vector():
            assert _out[0] == pytest.approx(1.0, abs=0.1)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_actor(use_discrete):
-    obs_size = 4
-    network_settings = NetworkSettings()
-    obs_shapes = [(obs_size,)]
-    act_size = [2]
-    if use_discrete:
-        masks = torch.ones((1, 1))
-        action_spec = ActionSpec.create_discrete(tuple(act_size))
-    else:
-        masks = None
-        action_spec = ActionSpec.create_continuous(act_size[0])
-    actor = SimpleActor(obs_shapes, network_settings, action_spec)
-    # Test get_dist
-    sample_obs = torch.ones((1, obs_size))
-    dists, _ = actor.get_dists([sample_obs], [], masks=masks)
-    for dist in dists:
-        if use_discrete:
-            assert isinstance(dist, CategoricalDistInstance)
-        else:
-            assert isinstance(dist, GaussianDistInstance)
-
-    # Test sample_actions
-    actions = actor.sample_action(dists)
-    for act in actions:
-        if use_discrete:
-            assert act.shape == (1, 1)
-        else:
-            assert act.shape == (1, act_size[0])
-
-    # Test forward
-    actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
-        [sample_obs], [], masks=masks
-    )
-    for act in actions:
-        # This is different from above for ONNX export
-        if use_discrete:
-            assert act.shape == tuple(act_size)
-        else:
-            assert act.shape == (act_size[0], 1)
-
-    assert mem_size == 0
-    assert is_cont == int(not use_discrete)
-    assert act_size_vec == torch.tensor(act_size)
-
-
-        memory=NetworkSettings.MemorySettings() if lstm else None
+        memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
-    act_size = [2]
+    act_size = 2
+    mask = torch.ones([1, act_size * 2])
-    action_spec = ActionSpec.create_continuous(act_size[0])
+    # action_spec = ActionSpec.create_continuous(act_size[0])
+    action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size)))
    actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
        else:
            assert value_out[stream].shape == (1,)

-    # Test get_dist_and_value
-    dists, value_out, mem_out = actor.get_dist_and_value(
-        [sample_obs], [], memories=memories
+    # Test get action stats and_value
+    action, log_probs, entropies, value_out, mem_out = actor.get_action_stats_and_value(
+        [sample_obs], [], memories=memories, masks=mask
+    if lstm:
+        assert action.continuous_tensor.shape == (64, 2)
+    else:
+        assert action.continuous_tensor.shape == (1, 2)
+
+    assert len(action.discrete_list) == 2
+    for _disc in action.discrete_list:
+        if lstm:
+            assert _disc.shape == (64, 1)
+        else:
+            assert _disc.shape == (1, 1)
+
-    for dist in dists:
-        assert isinstance(dist, GaussianDistInstance)
+
+    # Test normalization
+    actor.update_normalization(sample_obs)
+    if isinstance(actor, SeparateActorCritic):
+        for act_proc, crit_proc in zip(
+            actor.network_body.vector_processors,
+            actor.critic.network_body.vector_processors,
+        ):
+            assert compare_models(act_proc, crit_proc)
--- a/ml-agents/mlagents/trainers/tests/torch/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_policy.py
 from mlagents.trainers.tests import mock_brain as mb
 from mlagents.trainers.settings import TrainerSettings, NetworkSettings
 from mlagents.trainers.torch.utils import ModelUtils
+from mlagents.trainers.torch.agent_action import AgentAction

 VECTOR_ACTION_SPACE = 2
 VECTOR_OBS_SPACE = 8

    run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
    if discrete:
-        run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
+        run_out["action"].discrete.shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
-        assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
+        assert run_out["action"].continuous.shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)


@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
    buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
    vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])]
    act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
-    if policy.use_continuous_act:
-        actions = ModelUtils.list_to_tensor(buffer["actions"]).unsqueeze(-1)
-    else:
-        actions = ModelUtils.list_to_tensor(buffer["actions"], dtype=torch.long)
+    agent_action = AgentAction.from_dict(buffer)
    vis_obs = []
    for idx, _ in enumerate(policy.actor_critic.network_body.visual_processors):
        vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])
        vec_obs,
        vis_obs,
        masks=act_masks,
-        actions=actions,
+        actions=agent_action,
        memories=memories,
        seq_len=policy.sequence_length,
    )
        _size = policy.behavior_spec.action_spec.continuous_size

-    assert log_probs.shape == (64, _size)
+    assert log_probs.flatten().shape == (64, _size)
    assert entropy.shape == (64,)
    for val in values.values():
        assert val.shape == (64,)
    if len(memories) > 0:
        memories = torch.stack(memories).unsqueeze(0)

-    (
-        sampled_actions,
-        clipped_actions,
-        log_probs,
-        entropies,
-        memories,
-    ) = policy.sample_actions(
+    (sampled_actions, log_probs, entropies, memories) = policy.sample_actions(
-        all_log_probs=not policy.use_continuous_act,
-        assert log_probs.shape == (
+        assert log_probs.all_discrete_tensor.shape == (
-        assert log_probs.shape == (64, policy.behavior_spec.action_spec.continuous_size)
-        assert clipped_actions.shape == (
+        assert log_probs.continuous_tensor.shape == (
            64,
            policy.behavior_spec.action_spec.continuous_size,
        )
--- a/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
    update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]
    update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]

-    # NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
-    # in PyTorch it is saved as the total probability per branch. So we need to modify the
-    # log prob in the fake buffer here.
-    update_buffer["action_probs"] = np.ones_like(update_buffer["actions"])
    return_stats = optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
    update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
    update_buffer["curiosity_returns"] = update_buffer["environment_rewards"]
    update_buffer["curiosity_value_estimates"] = update_buffer["environment_rewards"]
-    # NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
-    # in PyTorch it is saved as the total probability per branch. So we need to modify the
-    # log prob in the fake buffer here.
-    update_buffer["action_probs"] = np.ones_like(update_buffer["actions"])
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
    update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
    update_buffer["gail_returns"] = update_buffer["environment_rewards"]
    update_buffer["gail_value_estimates"] = update_buffer["environment_rewards"]
+    update_buffer["continuous_log_probs"] = np.ones_like(
+        update_buffer["continuous_action"]
+    )
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
    update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
    update_buffer["gail_returns"] = update_buffer["environment_rewards"]
    update_buffer["gail_value_estimates"] = update_buffer["environment_rewards"]
-    # NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
-    # in PyTorch it is saved as the total probability per branch. So we need to modify the
-    # log prob in the fake buffer here.
-    update_buffer["action_probs"] = np.ones_like(update_buffer["actions"])
    optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
    for _ in range(200):
        curiosity_rp.update(buffer)
    prediction = curiosity_rp._network.predict_action(buffer)[0]
-    target = torch.tensor(buffer["actions"][0])
+    target = torch.tensor(buffer["continuous_action"][0])
    error = torch.mean((prediction - target) ** 2).item()
    assert error < 0.001

--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
        np.random.normal(size=shape).astype(np.float32)
        for shape in behavior_spec.observation_shapes
    ]
-    action = behavior_spec.action_spec.random_action(1)[0, :]
+    action_buffer = behavior_spec.action_spec.random_action(1)
+    action = {}
+    if behavior_spec.action_spec.continuous_size > 0:
+        action["continuous_action"] = action_buffer.continuous
+    if behavior_spec.action_spec.discrete_size > 0:
+        action["discrete_action"] = action_buffer.discrete
+
    for _ in range(number):
        curr_split_obs = SplitObservations.from_observations(curr_observations)
        next_split_obs = SplitObservations.from_observations(next_observations)
            )
        buffer["vector_obs"].append(curr_split_obs.vector_observations)
        buffer["next_vector_in"].append(next_split_obs.vector_observations)
-        buffer["actions"].append(action)
+        for _act_type, _act in action.items():
+            buffer[_act_type].append(_act[0, :])
        buffer["reward"].append(np.ones(1, dtype=np.float32) * reward)
        buffer["masks"].append(np.ones(1, dtype=np.float32))
    buffer["done"] = np.zeros(number, dtype=np.float32)
--- a/ml-agents/mlagents/trainers/tests/torch/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_sac.py
        "Losses/Value Loss",
        "Losses/Q1 Loss",
        "Losses/Q2 Loss",
-        "Policy/Entropy Coeff",
+        "Policy/Continuous Entropy Coeff",
+        "Policy/Discrete Entropy Coeff",
        "Policy/Learning Rate",
    ]
    for stat in required_stats:
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
 from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
    DemonstrationMetaProto,
 )
-from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
-from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
+from mlagents_envs.communicator_objects.brain_parameters_pb2 import (
+    BrainParametersProto,
+    ActionSpecProto,
+)

 from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
 from mlagents.trainers.tests.check_env_trains import (
 SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ppo(use_discrete):
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ppo(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_2d_ppo(use_discrete):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
-    )
+@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
+def test_2d_ppo(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
    new_hyperparams = attr.evolve(
        PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
    )
    check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_visual_ppo(num_visual, use_discrete):
+def test_visual_ppo(num_visual, action_sizes):
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
 def test_visual_advanced_ppo(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
-        use_discrete=True,
+        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_recurrent_ppo(use_discrete):
-    env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_recurrent_ppo(action_sizes):
+    env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
    new_network_settings = attr.evolve(
        PPO_TORCH_CONFIG.network_settings,
        memory=NetworkSettings.MemorySettings(memory_size=16),
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_sac(use_discrete):
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_sac(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_2d_sac(use_discrete):
-    env = SimpleEnvironment(
-        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
-    )
+@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
+def test_2d_sac(action_sizes):
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
-        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
+        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_visual_sac(num_visual, use_discrete):
+def test_visual_sac(num_visual, action_sizes):
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        num_visual=num_visual,
        num_vector=0,
        step_size=0.2,
 def test_visual_advanced_sac(vis_encode_type, num_visual):
    env = SimpleEnvironment(
        [BRAIN_NAME],
-        use_discrete=True,
+        action_sizes=(0, 1),
        num_visual=num_visual,
        num_vector=0,
        step_size=0.5,
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_recurrent_sac(use_discrete):
-    step_size = 0.2 if use_discrete else 0.5
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_recurrent_sac(action_sizes):
+    step_size = 0.2 if action_sizes == (0, 1) else 0.5
-        [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
+        [BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
    )
    new_networksettings = attr.evolve(
        SAC_TORCH_CONFIG.network_settings,
    check_environment_trains(env, {BRAIN_NAME: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ghost(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ghost(action_sizes):
-        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_ghost_fails(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_ghost_fails(action_sizes):
-        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
    )
    # This config should fail because the ghosted policy is never swapped with a competent policy.
    # Swap occurs after max step is reached.
    )


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_asymm_ghost(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_asymm_ghost(action_sizes):
-        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
    )
    self_play_settings = SelfPlaySettings(
        play_against_latest_model_ratio=1.0,
    check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_simple_asymm_ghost_fails(use_discrete):
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_simple_asymm_ghost_fails(action_sizes):
-        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
+        [BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
    )
    # This config should fail because the team that us not learning when both have reached
    # max step should be executing the initial, untrained poliy.

@pytest.fixture(scope="session")
 def simple_record(tmpdir_factory):
-    def record_demo(use_discrete, num_visual=0, num_vector=1):
+    def record_demo(action_sizes, num_visual=0, num_vector=1):
-            use_discrete=use_discrete,
+            action_sizes=action_sizes,
            num_visual=num_visual,
            num_vector=num_vector,
            n_demos=100,
        env.solve()
        agent_info_protos = env.demonstration_protos[BRAIN_NAME]
        meta_data_proto = DemonstrationMetaProto()
+        continuous_action_size, discrete_action_size = action_sizes
+        action_spec_proto = ActionSpecProto(
+            num_continuous_actions=continuous_action_size,
+            num_discrete_actions=discrete_action_size,
+            discrete_branch_sizes=[2] if discrete_action_size > 0 else None,
+        )
-            vector_action_size=[2] if use_discrete else [1],
-            vector_action_descriptions=[""],
-            vector_action_space_type=discrete if use_discrete else continuous,
-            brain_name=BRAIN_NAME,
-            is_training=True,
+            brain_name=BRAIN_NAME, is_training=True, action_spec=action_spec_proto
-        action_type = "Discrete" if use_discrete else "Continuous"
+        action_type = "Discrete" if action_sizes else "Continuous"
        demo_path_name = "1DTest" + action_type + ".demo"
        demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
        write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)


-@pytest.mark.parametrize("use_discrete", [True, False])
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
-def test_gail(simple_record, use_discrete, trainer_config):
-    demo_path = simple_record(use_discrete)
-    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
+def test_gail(simple_record, action_sizes, trainer_config):
+    demo_path = simple_record(action_sizes)
+    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_gail_visual_ppo(simple_record, use_discrete):
-    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_gail_visual_ppo(simple_record, action_sizes):
+    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


-@pytest.mark.parametrize("use_discrete", [True, False])
-def test_gail_visual_sac(simple_record, use_discrete):
-    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
+@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
+def test_gail_visual_sac(simple_record, action_sizes):
+    demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
-        use_discrete=use_discrete,
+        action_sizes=action_sizes,
        step_size=0.2,
    )
    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
 from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.torch.encoders import VectorInput
-from mlagents.trainers.torch.distributions import (
-    CategoricalDistInstance,
-    GaussianDistInstance,
-)


 def test_min_visual_size():
    ]
    for res, exp in zip(oh_actions, expected_result):
        assert torch.equal(res, exp)
-
-
-def test_get_probs_and_entropy():
-    # Test continuous
-    # Add two dists to the list. This isn't done in the code but we'd like to support it.
-    dist_list = [
-        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
-        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
-    ]
-    action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))]
-    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
-        action_list, dist_list
-    )
-    assert log_probs.shape == (1, 2, 2)
-    assert entropies.shape == (1, 1, 2)
-    assert all_probs is None
-
-    for log_prob in log_probs.flatten():
-        # Log prob of standard normal at 0
-        assert log_prob == pytest.approx(-0.919, abs=0.01)
-
-    for ent in entropies.flatten():
-        # entropy of standard normal at 0
-        assert ent == pytest.approx(1.42, abs=0.01)
-
-    # Test continuous
-    # Add two dists to the list.
-    act_size = 2
-    test_prob = torch.tensor(
-        [[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]
-    )  # High prob for first action
-    dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)]
-    action_list = [torch.tensor([0]), torch.tensor([1])]
-    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
-        action_list, dist_list
-    )
-    assert all_probs.shape == (1, len(dist_list * act_size))
-    assert entropies.shape == (1, len(dist_list))
-    # Make sure the first action has high probability than the others.
-    assert log_probs.flatten()[0] > log_probs.flatten()[1]


 def test_masked_mean():
--- a/ml-agents/mlagents/trainers/tf/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/tf/components/bc/module.py
            self.policy.batch_size_ph: n_sequences,
            self.policy.sequence_length_ph: self.policy.sequence_length,
        }
-        feed_dict[self.model.action_in_expert] = mini_batch_demo["actions"]
+            feed_dict[self.model.action_in_expert] = mini_batch_demo["discrete_action"]
            feed_dict[self.policy.action_masks] = np.ones(
                (
                    self.n_sequences * self.policy.sequence_length,
            )
+        else:
+            feed_dict[self.model.action_in_expert] = mini_batch_demo[
+                "continuous_action"
+            ]
        if self.policy.vec_obs_size > 0:
            feed_dict[self.policy.vector_in] = mini_batch_demo["vector_obs"]
        for i, _ in enumerate(self.policy.visual_in):
--- a/ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py
+++ b/ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py

    def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
        feed_dict: Dict[tf.Tensor, Any] = {
-            self.policy.batch_size_ph: len(mini_batch["actions"]),
+            self.policy.batch_size_ph: len(mini_batch["vector_obs"]),
            self.policy.sequence_length_ph: self.policy.sequence_length,
        }
        if self.policy.use_vec_obs:
                feed_dict[self.model.next_visual_in[i]] = _next_obs

        if self.policy.use_continuous_act:
-            feed_dict[self.policy.selected_actions] = mini_batch["actions"]
+            feed_dict[self.policy.selected_actions] = mini_batch["continuous_action"]
-            feed_dict[self.policy.output] = mini_batch["actions"]
+            feed_dict[self.policy.output] = mini_batch["discrete_action"]
        unscaled_reward = self.policy.sess.run(
            self.model.intrinsic_reward, feed_dict=feed_dict
        )
            policy.mask_input: mini_batch["masks"],
        }
        if self.policy.use_continuous_act:
-            feed_dict[policy.selected_actions] = mini_batch["actions"]
+            feed_dict[policy.selected_actions] = mini_batch["continuous_action"]
-            feed_dict[policy.output] = mini_batch["actions"]
+            feed_dict[policy.output] = mini_batch["discrete_action"]
        if self.policy.use_vec_obs:
            feed_dict[policy.vector_in] = mini_batch["vector_obs"]
            feed_dict[self.model.next_vector_in] = mini_batch["next_vector_in"]
--- a/ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py
+++ b/ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py

    def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
        feed_dict: Dict[tf.Tensor, Any] = {
-            self.policy.batch_size_ph: len(mini_batch["actions"]),
+            self.policy.batch_size_ph: len(mini_batch["vector_obs"]),
            self.policy.sequence_length_ph: self.policy.sequence_length,
        }
        if self.model.use_vail:
                feed_dict[self.policy.visual_in[i]] = _obs

        if self.policy.use_continuous_act:
-            feed_dict[self.policy.selected_actions] = mini_batch["actions"]
+            feed_dict[self.policy.selected_actions] = mini_batch["continuous_action"]
-            feed_dict[self.policy.output] = mini_batch["actions"]
+            feed_dict[self.policy.output] = mini_batch["discrete_action"]
        feed_dict[self.model.done_policy_holder] = np.array(
            mini_batch["done"]
        ).flatten()
        if self.model.use_vail:
            feed_dict[self.model.use_noise] = [1]

-        feed_dict[self.model.action_in_expert] = np.array(mini_batch_demo["actions"])
-            feed_dict[policy.selected_actions] = mini_batch["actions"]
+            feed_dict[policy.selected_actions] = mini_batch["continuous_action"]
+            feed_dict[self.model.action_in_expert] = np.array(
+                mini_batch_demo["continuous_action"]
+            )
-            feed_dict[policy.output] = mini_batch["actions"]
+            feed_dict[policy.output] = mini_batch["discrete_action"]
+            feed_dict[self.model.action_in_expert] = np.array(
+                mini_batch_demo["discrete_action"]
+            )

        if self.policy.use_vis_obs > 0:
            for i in range(len(policy.visual_in)):
--- a/ml-agents/mlagents/trainers/torch/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/torch/components/bc/module.py
 from mlagents.trainers.policy.torch_policy import TorchPolicy
 from mlagents.trainers.demo_loader import demo_to_buffer
 from mlagents.trainers.settings import BehavioralCloningSettings, ScheduleType
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
 from mlagents.trainers.torch.utils import ModelUtils


        update_stats = {"Losses/Pretraining Loss": np.mean(batch_losses)}
        return update_stats

-    def _behavioral_cloning_loss(self, selected_actions, log_probs, expert_actions):
-        if self.policy.use_continuous_act:
-            bc_loss = torch.nn.functional.mse_loss(selected_actions, expert_actions)
-        else:
+    def _behavioral_cloning_loss(
+        self,
+        selected_actions: AgentAction,
+        log_probs: ActionLogProbs,
+        expert_actions: torch.Tensor,
+    ) -> torch.Tensor:
+        bc_loss = 0
+        if self.policy.behavior_spec.action_spec.continuous_size > 0:
+            bc_loss += torch.nn.functional.mse_loss(
+                selected_actions.continuous_tensor, expert_actions.continuous_tensor
+            )
+        if self.policy.behavior_spec.action_spec.discrete_size > 0:
+            one_hot_expert_actions = ModelUtils.actions_to_onehot(
+                expert_actions.discrete_tensor,
+                self.policy.behavior_spec.action_spec.discrete_branches,
+            )
-                log_probs, self.policy.act_size
+                log_probs.all_discrete_tensor,
+                self.policy.behavior_spec.action_spec.discrete_branches,
-            bc_loss = torch.mean(
+            bc_loss += torch.mean(
                torch.stack(
                    [
                        torch.sum(
                        )
                        for log_prob_branch, expert_actions_branch in zip(
-                            log_prob_branches, expert_actions
+                            log_prob_branches, one_hot_expert_actions
                        )
                    ]
                )
        """
        vec_obs = [ModelUtils.list_to_tensor(mini_batch_demo["vector_obs"])]
        act_masks = None
-        if self.policy.use_continuous_act:
-            expert_actions = ModelUtils.list_to_tensor(mini_batch_demo["actions"])
-        else:
-            raw_expert_actions = ModelUtils.list_to_tensor(
-                mini_batch_demo["actions"], dtype=torch.long
-            )
-            expert_actions = ModelUtils.actions_to_onehot(
-                raw_expert_actions, self.policy.act_size
-            )
+        expert_actions = AgentAction.from_dict(mini_batch_demo)
+        if self.policy.behavior_spec.action_spec.discrete_size > 0:
+
            act_masks = ModelUtils.list_to_tensor(
                np.ones(
                    (
        else:
            vis_obs = []

-        (
-            selected_actions,
-            clipped_actions,
-            all_log_probs,
-            _,
-            _,
-        ) = self.policy.sample_actions(
+        selected_actions, log_probs, _, _ = self.policy.sample_actions(
-            all_log_probs=True,
-            clipped_actions, all_log_probs, expert_actions
+            selected_actions, log_probs, expert_actions
        )
        self.optimizer.zero_grad()
        bc_loss.backward()
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
 import numpy as np
-from typing import Dict
+from typing import Dict, NamedTuple
 from mlagents.torch_utils import torch, default_device

 from mlagents.trainers.buffer import AgentBuffer
 from mlagents.trainers.settings import CuriositySettings

 from mlagents_envs.base_env import BehaviorSpec
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_flattener import ActionFlattener
+
+
+class ActionPredictionTuple(NamedTuple):
+    continuous: torch.Tensor
+    discrete: torch.Tensor


 class CuriosityRewardProvider(BaseRewardProvider):
            specs.observation_shapes, state_encoder_settings
        )

-        self._action_flattener = ModelUtils.ActionFlattener(self._action_spec)
+        self._action_flattener = ActionFlattener(self._action_spec)
-        self.inverse_model_action_prediction = torch.nn.Sequential(
-            LinearEncoder(2 * settings.encoding_size, 1, 256),
-            linear_layer(256, self._action_flattener.flattened_size),
+        self.inverse_model_action_encoding = torch.nn.Sequential(
+            LinearEncoder(2 * settings.encoding_size, 1, 256)
+        if self._action_spec.continuous_size > 0:
+            self.continuous_action_prediction = linear_layer(
+                256, self._action_spec.continuous_size
+            )
+        if self._action_spec.discrete_size > 0:
+            self.discrete_action_prediction = linear_layer(
+                256, sum(self._action_spec.discrete_branches)
+            )
+
        self.forward_model_next_state_prediction = torch.nn.Sequential(
            LinearEncoder(
                settings.encoding_size + self._action_flattener.flattened_size, 1, 256
        )
        return hidden

-    def predict_action(self, mini_batch: AgentBuffer) -> torch.Tensor:
+    def predict_action(self, mini_batch: AgentBuffer) -> ActionPredictionTuple:
        """
        In the continuous case, returns the predicted action.
        In the discrete case, returns the logits.
        )
-        hidden = self.inverse_model_action_prediction(inverse_model_input)
-        if self._action_spec.is_continuous():
-            return hidden
-        else:
+
+        continuous_pred = None
+        discrete_pred = None
+        hidden = self.inverse_model_action_encoding(inverse_model_input)
+        if self._action_spec.continuous_size > 0:
+            continuous_pred = self.continuous_action_prediction(hidden)
+        if self._action_spec.discrete_size > 0:
+            raw_discrete_pred = self.discrete_action_prediction(hidden)
-                hidden, self._action_spec.discrete_branches
+                raw_discrete_pred, self._action_spec.discrete_branches
-            return torch.cat(branches, dim=1)
+            discrete_pred = torch.cat(branches, dim=1)
+        return ActionPredictionTuple(continuous_pred, discrete_pred)

    def predict_next_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
        """
-        if self._action_spec.is_continuous():
-            action = ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
-        else:
-            action = torch.cat(
-                ModelUtils.actions_to_onehot(
-                    ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
-                    self._action_spec.discrete_branches,
-                ),
-                dim=1,
-            )
+        actions = AgentAction.from_dict(mini_batch)
+        flattened_action = self._action_flattener.forward(actions)
-            (self.get_current_state(mini_batch), action), dim=1
+            (self.get_current_state(mini_batch), flattened_action), dim=1
        )

        return self.forward_model_next_state_prediction(forward_model_input)
        action prediction (given the current and next state).
        """
        predicted_action = self.predict_action(mini_batch)
-        if self._action_spec.is_continuous():
+        actions = AgentAction.from_dict(mini_batch)
+        _inverse_loss = 0
+        if self._action_spec.continuous_size > 0:
-                ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.float)
-                - predicted_action
+                actions.continuous_tensor - predicted_action.continuous
-            return torch.mean(
+            _inverse_loss += torch.mean(
                ModelUtils.dynamic_partition(
                    sq_difference,
                    ModelUtils.list_to_tensor(mini_batch["masks"], dtype=torch.float),
-        else:
+        if self._action_spec.discrete_size > 0:
-                    ModelUtils.list_to_tensor(mini_batch["actions"], dtype=torch.long),
-                    self._action_spec.discrete_branches,
+                    actions.discrete_tensor, self._action_spec.discrete_branches
-                -torch.log(predicted_action + self.EPSILON) * true_action, dim=1
+                -torch.log(predicted_action.discrete + self.EPSILON) * true_action,
+                dim=1,
-            return torch.mean(
+            _inverse_loss += torch.mean(
                ModelUtils.dynamic_partition(
                    cross_entropy,
                    ModelUtils.list_to_tensor(
                )[1]
            )
+        return _inverse_loss

    def compute_reward(self, mini_batch: AgentBuffer) -> torch.Tensor:
        """
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
 from mlagents.trainers.settings import GAILSettings
 from mlagents_envs.base_env import BehaviorSpec
 from mlagents.trainers.torch.utils import ModelUtils
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_flattener import ActionFlattener
 from mlagents.trainers.torch.networks import NetworkBody
 from mlagents.trainers.torch.layers import linear_layer, Initialization
 from mlagents.trainers.settings import NetworkSettings, EncoderType
            vis_encode_type=EncoderType.SIMPLE,
            memory=None,
        )
-        self._action_flattener = ModelUtils.ActionFlattener(specs.action_spec)
+        self._action_flattener = ActionFlattener(specs.action_spec)
        unencoded_size = (
            self._action_flattener.flattened_size + 1 if settings.use_actions else 0
        )  # +1 is for dones
        Creates the action Tensor. In continuous case, corresponds to the action. In
        the discrete case, corresponds to the concatenation of one hot action Tensors.
        """
-        return self._action_flattener.forward(
-            torch.as_tensor(mini_batch["actions"], dtype=torch.float)
-        )
+        return self._action_flattener.forward(AgentAction.from_dict(mini_batch))

    def get_state_inputs(
        self, mini_batch: AgentBuffer
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py
        """
        pass

+    @abc.abstractmethod
+    def exported_model_output(self) -> torch.Tensor:
+        """
+        Returns the tensor to be exported to ONNX for the distribution
+        """
+        pass
+

 class DiscreteDistInstance(DistInstance):
    @abc.abstractmethod
            dim=1,
            keepdim=True,
        )  # Use equivalent behavior to TF
+
+    def exported_model_output(self):
+        return self.sample()


 class TanhGaussianDistInstance(GaussianDistInstance):
        return torch.log(self.probs + EPSILON)

    def entropy(self):
-        return -torch.sum(self.probs * torch.log(self.probs + EPSILON), dim=-1)
+        return -torch.sum(
+            self.probs * torch.log(self.probs + EPSILON), dim=-1
+        ).unsqueeze(-1)
+
+    def exported_model_output(self):
+        return self.all_log_prob()


 class GaussianDistribution(nn.Module):
            # verified version of Barracuda (1.0.2).
            log_sigma = torch.cat([self.log_sigma] * inputs.shape[0], axis=0)
        if self.tanh_squash:
-            return [TanhGaussianDistInstance(mu, torch.exp(log_sigma))]
+            return TanhGaussianDistInstance(mu, torch.exp(log_sigma))
-            return [GaussianDistInstance(mu, torch.exp(log_sigma))]
+            return GaussianDistInstance(mu, torch.exp(log_sigma))


 class MultiCategoricalDistribution(nn.Module):
--- a/ml-agents/mlagents/trainers/torch/model_serialization.py
+++ b/ml-agents/mlagents/trainers/torch/model_serialization.py
            + [f"visual_observation_{i}" for i in range(self.policy.vis_obs_size)]
            + ["action_masks", "memories"]
        )
+        self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}
-        self.output_names = [
-            "action",
-            "version_number",
-            "memory_size",
-            "is_continuous_control",
-            "action_output_shape",
-        ]
-
-        self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}
-        self.dynamic_axes.update({"action": {0: "batch"}})
+        self.output_names = ["version_number", "memory_size"]
+        if self.policy.behavior_spec.action_spec.continuous_size > 0:
+            self.output_names += [
+                "continuous_actions",
+                "continuous_action_output_shape",
+            ]
+            self.dynamic_axes.update({"continuous_actions": {0: "batch"}})
+        if self.policy.behavior_spec.action_spec.discrete_size > 0:
+            self.output_names += ["discrete_actions", "discrete_action_output_shape"]
+            self.dynamic_axes.update({"discrete_actions": {0: "batch"}})
+        if (
+            self.policy.behavior_spec.action_spec.continuous_size == 0
+            or self.policy.behavior_spec.action_spec.discrete_size == 0
+        ):
+            self.output_names += [
+                "action",
+                "is_continuous_control",
+                "action_output_shape",
+            ]
+            self.dynamic_axes.update({"action": {0: "batch"}})

    def export_policy_model(self, output_filepath: str) -> None:
        """
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
-from typing import Callable, List, Dict, Tuple, Optional
+from typing import Callable, List, Dict, Tuple, Optional, Union
-from mlagents.trainers.torch.distributions import (
-    GaussianDistribution,
-    MultiCategoricalDistribution,
-    DistInstance,
-)
+from mlagents.trainers.torch.action_model import ActionModel
+from mlagents.trainers.torch.agent_action import AgentAction
+from mlagents.trainers.torch.action_log_probs import ActionLogProbs
 from mlagents.trainers.settings import NetworkSettings
 from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.torch.decoders import ValueHeads
            else 0
        )

-        self.visual_processors, self.vector_processors, encoder_input_size = ModelUtils.create_input_processors(
+        (
+            self.visual_processors,
+            self.vector_processors,
+            encoder_input_size,
+        ) = ModelUtils.create_input_processors(
            observation_shapes,
            self.h_size,
            network_settings.vis_encode_type,
        pass

    @abc.abstractmethod
-    def sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
-        """
-        Takes a List of Distribution iinstances and samples an action from each.
-        """
-        pass
-
-    @abc.abstractmethod
-    def get_dists(
-        self,
-        vec_inputs: List[torch.Tensor],
-        vis_inputs: List[torch.Tensor],
-        masks: Optional[torch.Tensor] = None,
-        memories: Optional[torch.Tensor] = None,
-        sequence_length: int = 1,
-    ) -> Tuple[List[DistInstance], Optional[torch.Tensor]]:
-        """
-        Returns distributions from this Actor, from which actions can be sampled.
-        If memory is enabled, return the memories as well.
-        :param vec_inputs: A List of vector inputs as tensors.
-        :param vis_inputs: A List of visual inputs as tensors.
-        :param masks: If using discrete actions, a Tensor of action masks.
-        :param memories: If using memory, a Tensor of initial memories.
-        :param sequence_length: If using memory, the sequence length.
-        :return: A Tuple of a List of action distribution instances, and memories.
-            Memories will be None if not using memory.
-        """
-        pass
-
-    @abc.abstractmethod
    def forward(
        self,
        vec_inputs: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, int, int, int, int]:
+    ) -> Tuple[Union[int, torch.Tensor], ...]:
        """
        Forward pass of the Actor for inference. This is required for export to ONNX, and
        the inputs and outputs of this method should not be changed without a respective change
        pass

    @abc.abstractmethod
-    def get_dist_and_value(
+    def get_action_stats_and_value(
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
-    ) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
+    ) -> Tuple[
+        AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
+    ]:
        """
        Returns distributions, from which actions can be sampled, and value estimates.
        If memory is enabled, return the memories as well.
        :param memories: If using memory, a Tensor of initial memories.
        :param sequence_length: If using memory, the sequence length.
-        :return: A Tuple of a List of action distribution instances, a Dict of reward signal
+        :return: A Tuple of AgentAction, ActionLogProbs, entropies, Dict of reward signal
            name to value estimate, and memories. Memories will be None if not using memory.
        """
        pass
        super().__init__()
        self.action_spec = action_spec
        self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
-        self.is_continuous_int = torch.nn.Parameter(
+        self.is_continuous_int_deprecated = torch.nn.Parameter(
-        self.act_size_vector = torch.nn.Parameter(
+        self.continuous_act_size_vector = torch.nn.Parameter(
+            torch.Tensor([int(self.action_spec.continuous_size)]), requires_grad=False
+        )
+        # TODO: export list of branch sizes instead of sum
+        self.discrete_act_size_vector = torch.nn.Parameter(
+            torch.Tensor([sum(self.action_spec.discrete_branches)]), requires_grad=False
+        )
+        self.act_size_vector_deprecated = torch.nn.Parameter(
            torch.Tensor(
                [
                    self.action_spec.continuous_size
        else:
            self.encoding_size = network_settings.hidden_units

-        if self.action_spec.is_continuous():
-            self.distribution = GaussianDistribution(
-                self.encoding_size,
-                self.action_spec.continuous_size,
-                conditional_sigma=conditional_sigma,
-                tanh_squash=tanh_squash,
-            )
-        else:
-            self.distribution = MultiCategoricalDistribution(
-                self.encoding_size, self.action_spec.discrete_branches
-            )
-        # During training, clipping is done in TorchPolicy, but we need to clip before ONNX
-        # export as well.
-        self._clip_action_on_export = not tanh_squash
+        self.action_model = ActionModel(
+            self.encoding_size,
+            action_spec,
+            conditional_sigma=conditional_sigma,
+            tanh_squash=tanh_squash,
+        )

    @property
    def memory_size(self) -> int:
        self.network_body.update_normalization(vector_obs)

-    def sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
-        actions = []
-        for action_dist in dists:
-            action = action_dist.sample()
-            actions.append(action)
-        return actions
-
-    def get_dists(
-        self,
-        vec_inputs: List[torch.Tensor],
-        vis_inputs: List[torch.Tensor],
-        masks: Optional[torch.Tensor] = None,
-        memories: Optional[torch.Tensor] = None,
-        sequence_length: int = 1,
-    ) -> Tuple[List[DistInstance], Optional[torch.Tensor]]:
-        encoding, memories = self.network_body(
-            vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
-        )
-        if self.action_spec.is_continuous():
-            dists = self.distribution(encoding)
-        else:
-            dists = self.distribution(encoding, masks)
-
-        return dists, memories
-
    def forward(
        self,
        vec_inputs: List[torch.Tensor],
-    ) -> Tuple[torch.Tensor, int, int, int, int]:
+    ) -> Tuple[Union[int, torch.Tensor], ...]:
+
+        At this moment, torch.onnx.export() doesn't accept None as tensor to be exported,
+        so the size of return tuple varies with action spec.
-        dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
-        if self.action_spec.is_continuous():
-            action_list = self.sample_action(dists)
-            action_out = torch.stack(action_list, dim=-1)
-            if self._clip_action_on_export:
-                action_out = torch.clamp(action_out, -3, 3) / 3
-        else:
-            action_out = torch.cat([dist.all_log_prob() for dist in dists], dim=1)
-        return (
-            action_out,
+        encoding, memories_out = self.network_body(
+            vec_inputs, vis_inputs, memories=memories, sequence_length=1
+        )
+
+        (
+            cont_action_out,
+            disc_action_out,
+            action_out_deprecated,
+        ) = self.action_model.get_action_out(encoding, masks)
+        export_out = [
-            self.is_continuous_int,
-            self.act_size_vector,
-        )
+        ]
+        if self.action_spec.continuous_size > 0:
+            export_out += [cont_action_out, self.continuous_act_size_vector]
+        if self.action_spec.discrete_size > 0:
+            export_out += [disc_action_out, self.discrete_act_size_vector]
+        # Only export deprecated nodes with non-hybrid action spec
+        if self.action_spec.continuous_size == 0 or self.action_spec.discrete_size == 0:
+            export_out += [
+                action_out_deprecated,
+                self.is_continuous_int_deprecated,
+                self.act_size_vector_deprecated,
+            ]
+        return tuple(export_out)


 class SharedActorCritic(SimpleActor, ActorCritic):
        conditional_sigma: bool = False,
        tanh_squash: bool = False,
    ):
+        self.use_lstm = network_settings.memory is not None
        super().__init__(
            observation_shapes,
            network_settings,
        )
        return self.value_heads(encoding), memories_out

-    def get_dist_and_value(
+    def get_stats_and_value(
+        actions: AgentAction,
-    ) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
+    ) -> Tuple[ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor]]:
-        if self.action_spec.is_continuous():
-            dists = self.distribution(encoding)
-        else:
-            dists = self.distribution(encoding, masks=masks)
+        log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
+        value_outputs = self.value_heads(encoding)
+        return log_probs, entropies, value_outputs
+
+    def get_action_stats_and_value(
+        self,
+        vec_inputs: List[torch.Tensor],
+        vis_inputs: List[torch.Tensor],
+        masks: Optional[torch.Tensor] = None,
+        memories: Optional[torch.Tensor] = None,
+        sequence_length: int = 1,
+    ) -> Tuple[
+        AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
+    ]:
+        encoding, memories = self.network_body(
+            vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
+        )
+        action, log_probs, entropies = self.action_model(encoding, masks)
-        return dists, value_outputs, memories
+        return action, log_probs, entropies, value_outputs, memories


 class SeparateActorCritic(SimpleActor, ActorCritic):
        conditional_sigma: bool = False,
        tanh_squash: bool = False,
    ):
-        # Give the Actor only half the memories. Note we previously validate
-        # that memory_size must be a multiple of 4.
        self.use_lstm = network_settings.memory is not None
        super().__init__(
            observation_shapes,
            memories_out = None
        return value_outputs, memories_out

-    def get_dist_and_value(
+    def get_stats_and_value(
+        actions: AgentAction,
-    ) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
+    ) -> Tuple[ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor]]:
        if self.use_lstm:
            # Use only the back half of memories for critic and actor
            actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
-        dists, actor_mem_outs = self.get_dists(
-            vec_inputs,
-            vis_inputs,
-            memories=actor_mem,
-            sequence_length=sequence_length,
-            masks=masks,
+        encoding, actor_mem_outs = self.network_body(
+            vec_inputs, vis_inputs, memories=actor_mem, sequence_length=sequence_length
+        log_probs, entropies = self.action_model.evaluate(encoding, masks, actions)
+        value_outputs, critic_mem_outs = self.critic(
+            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
+        )
+
+        return log_probs, entropies, value_outputs
+
+    def get_action_stats_and_value(
+        self,
+        vec_inputs: List[torch.Tensor],
+        vis_inputs: List[torch.Tensor],
+        masks: Optional[torch.Tensor] = None,
+        memories: Optional[torch.Tensor] = None,
+        sequence_length: int = 1,
+    ) -> Tuple[
+        AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
+    ]:
+        if self.use_lstm:
+            # Use only the back half of memories for critic and actor
+            actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
+        else:
+            critic_mem = None
+            actor_mem = None
+        encoding, actor_mem_outs = self.network_body(
+            vec_inputs, vis_inputs, memories=actor_mem, sequence_length=sequence_length
+        )
+        action, log_probs, entropies = self.action_model(encoding, masks)
        value_outputs, critic_mem_outs = self.critic(
            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
        )
            mem_out = None
-        return dists, value_outputs, mem_out
+        return action, log_probs, entropies, value_outputs, mem_out

    def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
        super().update_normalization(vector_obs)
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
 )
 from mlagents.trainers.settings import EncoderType, ScheduleType
 from mlagents.trainers.exception import UnityTrainerException
-from mlagents_envs.base_env import ActionSpec
-from mlagents.trainers.torch.distributions import DistInstance, DiscreteDistInstance


 class ModelUtils:
        EncoderType.NATURE_CNN: 36,
        EncoderType.RESNET: 15,
    }
-
-    class ActionFlattener:
-        def __init__(self, action_spec: ActionSpec):
-            self._specs = action_spec
-
-        @property
-        def flattened_size(self) -> int:
-            if self._specs.is_continuous():
-                return self._specs.continuous_size
-            else:
-                return sum(self._specs.discrete_branches)
-
-        def forward(self, action: torch.Tensor) -> torch.Tensor:
-            if self._specs.is_continuous():
-                return action
-            else:
-                return torch.cat(
-                    ModelUtils.actions_to_onehot(
-                        torch.as_tensor(action, dtype=torch.long),
-                        self._specs.discrete_branches,
-                    ),
-                    dim=1,
-                )

    @staticmethod
    def update_learning_rate(optim: torch.optim.Optimizer, lr: float) -> None:
        for i in range(num_partitions):
            res += [data[(partitions == i).nonzero().squeeze(1)]]
        return res
-
-    @staticmethod
-    def get_probs_and_entropy(
-        action_list: List[torch.Tensor], dists: List[DistInstance]
-    ) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
-        log_probs_list = []
-        all_probs_list = []
-        entropies_list = []
-        for action, action_dist in zip(action_list, dists):
-            log_prob = action_dist.log_prob(action)
-            log_probs_list.append(log_prob)
-            entropies_list.append(action_dist.entropy())
-            if isinstance(action_dist, DiscreteDistInstance):
-                all_probs_list.append(action_dist.all_log_prob())
-        log_probs = torch.stack(log_probs_list, dim=-1)
-        entropies = torch.stack(entropies_list, dim=-1)
-        if not all_probs_list:
-            log_probs = log_probs.squeeze(-1)
-            entropies = entropies.squeeze(-1)
-            all_probs = None
-        else:
-            all_probs = torch.cat(all_probs_list, dim=-1)
-        return log_probs, entropies, all_probs

    @staticmethod
    def masked_mean(tensor: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
--- a/ml-agents/mlagents/trainers/trajectory.py
+++ b/ml-agents/mlagents/trainers/trajectory.py
 import numpy as np

 from mlagents.trainers.buffer import AgentBuffer
+from mlagents_envs.base_env import ActionTuple
+from mlagents.trainers.torch.action_log_probs import LogProbsTuple


 class AgentExperience(NamedTuple):
-    action: np.ndarray
-    action_probs: np.ndarray
-    action_pre: np.ndarray  # TODO: Remove this
+    action: ActionTuple
+    action_probs: LogProbsTuple
    action_mask: np.ndarray
    prev_action: np.ndarray
    interrupted: bool

            agent_buffer_trajectory["masks"].append(1.0)
            agent_buffer_trajectory["done"].append(exp.done)
-            # Add the outputs of the last eval
-            if exp.action_pre is not None:
-                actions_pre = exp.action_pre
-                agent_buffer_trajectory["actions_pre"].append(actions_pre)
-            # value is a dictionary from name of reward to value estimate of the value head
-            agent_buffer_trajectory["actions"].append(exp.action)
-            agent_buffer_trajectory["action_probs"].append(exp.action_probs)
+            # Adds the log prob and action of continuous/discrete separately
+            agent_buffer_trajectory["continuous_action"].append(exp.action.continuous)
+            agent_buffer_trajectory["discrete_action"].append(exp.action.discrete)
+            agent_buffer_trajectory["continuous_log_probs"].append(
+                exp.action_probs.continuous
+            )
+            agent_buffer_trajectory["discrete_log_probs"].append(
+                exp.action_probs.discrete
+            )

            # Store action masks if necessary. Note that 1 means active, while
            # in AgentExperience False means active.
            else:
                # This should never be needed unless the environment somehow doesn't supply the
                # action mask in a discrete space.
+
+                action_shape = exp.action.discrete.shape
-                    np.ones(exp.action_probs.shape, dtype=np.float32), padding_value=1
+                    np.ones(action_shape, dtype=np.float32), padding_value=1
-
            agent_buffer_trajectory["prev_action"].append(exp.prev_action)
            agent_buffer_trajectory["environment_rewards"].append(exp.reward)

--- a/ml-agents/tests/yamato/scripts/run_llapi.py
+++ b/ml-agents/tests/yamato/scripts/run_llapi.py
 import argparse
-import numpy as np

 from mlagents_envs.environment import UnityEnvironment
 from mlagents_envs.side_channel.engine_configuration_channel import (
            episode_rewards = 0
            tracked_agent = -1
            while not done:
-                if group_spec.action_spec.is_continuous():
-                    action = np.random.randn(
-                        len(decision_steps), group_spec.action_spec.continuous_size
-                    )
-
-                elif group_spec.action_spec.is_discrete():
-                    branch_size = group_spec.action_spec.discrete_branches
-                    action = np.column_stack(
-                        [
-                            np.random.randint(
-                                0, branch_size[i], size=(len(decision_steps))
-                            )
-                            for i in range(len(branch_size))
-                        ]
-                    )
-                else:
-                    # Should never happen
-                    action = None
+                action_tuple = group_spec.action_spec.random_action(len(decision_steps))
-                env.set_actions(group_name, action)
+                env.set_actions(group_name, action_tuple)
                env.step()
                decision_steps, terminal_steps = env.get_steps(group_name)
                done = False
--- a/protobuf-definitions/proto/mlagents_envs/communicator_objects/agent_action.proto
+++ b/protobuf-definitions/proto/mlagents_envs/communicator_objects/agent_action.proto
 package communicator_objects;

 message AgentActionProto {
-    repeated float vector_actions = 1;
+    repeated float vector_actions_deprecated = 1; // mark as deprecated in communicator v1.3.0
+    repeated float continuous_actions = 6;
+    repeated int32 discrete_actions = 7;
 }
--- a/protobuf-definitions/proto/mlagents_envs/communicator_objects/brain_parameters.proto
+++ b/protobuf-definitions/proto/mlagents_envs/communicator_objects/brain_parameters.proto
 option csharp_namespace = "Unity.MLAgents.CommunicatorObjects";
 package communicator_objects;

+message ActionSpecProto {
+    int32 num_continuous_actions = 1;
+    int32 num_discrete_actions = 2;
+    repeated int32 discrete_branch_sizes = 3;
+    repeated string action_descriptions = 4;
+}
+
-    repeated int32 vector_action_size = 3;
+    repeated int32 vector_action_size_deprecated = 3; // mark as deprecated in communicator v1.3.0
-    repeated string vector_action_descriptions = 5;
-    SpaceTypeProto vector_action_space_type = 6;
+    repeated string vector_action_descriptions_deprecated = 5; // mark as deprecated in communicator v1.3.0
+    SpaceTypeProto vector_action_space_type_deprecated = 6; // mark as deprecated in communicator v1.3.0
+    ActionSpecProto action_spec = 9;
 }
--- a/protobuf-definitions/proto/mlagents_envs/communicator_objects/capabilities.proto
+++ b/protobuf-definitions/proto/mlagents_envs/communicator_objects/capabilities.proto

    // compression mapping for stacking compressed observations.
    bool compressedChannelMapping = 3;
+
+    // support for hybrid action spaces (discrete + continuous)
+    bool hybridActions = 4;
 }