More misc hybrid action followup (#4777)

4 年前 · 8f389445
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        ///     </item>
        ///     <item>
        ///         <term>1.3.0</term>
-        ///         <description>Support action spaces with both continuous and discrete actions.</description>
+        ///         <description>Support both continuous and discrete actions.</description>
        ///     </item>
        /// </list>
        /// </remarks>
        /// NNModel and the InferenceDevice as provided.
        /// </summary>
        /// <param name="model">The NNModel the ModelRunner must use.</param>
-        /// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
        /// <param name="inferenceDevice">
        /// The inference device (CPU or GPU) the ModelRunner will use.
        /// </param>
--- a/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
 namespace Unity.MLAgents.Actuators
 {
    /// <summary>
-    /// Defines the structure of an Action Space to be used by the Actuator system.
+    /// Defines the structure of the actions to be used by the Actuator system.
    /// </summary>
    [Serializable]
    public struct ActionSpec

        /// <summary>
-        /// An array of branch sizes for our action space.
+        /// An array of branch sizes for discrete actions.
-        /// For an IActuator that uses a Discrete <see cref="SpaceType"/>, the number of
+        /// For an IActuator that uses discrete actions, the number of
        /// branches is the Length of the Array and each index contains the branch size.
        /// The cumulative sum of the total number of discrete actions can be retrieved
        /// by the <see cref="SumOfDiscreteBranchSizes"/> property.
        public int[] BranchSizes;

        /// <summary>
-        /// The number of actions for a Continuous <see cref="SpaceType"/>.
+        /// The number of continuous actions that an Agent can take.
-        /// The number of branches for a Discrete <see cref="SpaceType"/>.
+        /// The number of branches for discrete actions that an Agent can take.
        /// </summary>
        public int NumDiscreteActions { get { return BranchSizes == null ? 0 : BranchSizes.Length; } }

        /// Creates a Discrete <see cref="ActionSpec"/> with the array of branch sizes that
        /// represents the action space.
        /// </summary>
-        /// <param name="branchSizes">The array of branch sizes for the discrete action space.  Each index
+        /// <param name="branchSizes">The array of branch sizes for the discrete actions.  Each index
-            var numActions = branchSizes.Length;
            var actuatorSpace = new ActionSpec(0, branchSizes);
            return actuatorSpace;
        }
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
        public abstract IActuator CreateActuator();

        /// <summary>
-        /// The specification of the Action space for this ActuatorComponent.
+        /// The specification of the possible actions for this ActuatorComponent.
        /// This must produce the same results as the corresponding IActuator's ActionSpec.
        /// </summary>
        /// <seealso cref="ActionSpec"/>
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
        }

        /// <summary>
-        /// This method validates that all <see cref="IActuator"/>s have unique names and equivalent action space types
+        /// This method validates that all <see cref="IActuator"/>s have unique names
        /// if the `DEBUG` preprocessor macro is defined, and allocates the appropriate buffers to manage the actions for
        /// all of the <see cref="IActuator"/>s that may live on a particular object.
        /// </summary>
            }
 #if DEBUG
            // Make sure the names are actually unique
-            // Make sure all Actuators have the same SpaceType
            ValidateActuators();
 #endif

        }

        /// <summary>
-        /// Validates that the IActuators managed by this object have unique names and equivalent action space types.
+        /// Validates that the IActuators managed by this object have unique names.
        /// Each Actuator needs to have a unique name in order for this object to ensure that the storage of action
        /// buffers, and execution of Actuators remains deterministic across different sessions of running.
        /// </summary>
--- a/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
        /// <summary>
        /// Check if the <see cref="ActionBuffers"/> is empty.
        /// </summary>
+        /// <returns>Whether the buffers are empty.</returns>
        public bool IsEmpty()
        {
            return ContinuousActions.IsEmpty() && DiscreteActions.IsEmpty();
--- a/com.unity.ml-agents/Runtime/Actuators/IActuator.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IActuator.cs
    public interface IActuator : IActionReceiver
    {
        /// <summary>
-        /// The specification of the Action space for this IActuator.
+        /// The specification of the actions for this IActuator.
        /// </summary>
        /// <seealso cref="ActionSpec"/>
        ActionSpec ActionSpec { get; }
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
    internal struct AgentInfo
    {
        /// <summary>
-        /// Keeps track of the last vector action taken by the Brain.
+        /// Keeps track of the last actions taken by the Brain.
-        public ActionBuffers storedVectorActions;
+        public ActionBuffers storedActions;

        /// <summary>
        /// For discrete control, specifies the actions that the agent cannot take.

        public void ClearActions()
        {
-            storedVectorActions.Clear();
+            storedActions.Clear();
-            var continuousActions = storedVectorActions.ContinuousActions;
+            var continuousActions = storedActions.ContinuousActions;
-            var discreteActions = storedVectorActions.DiscreteActions;
+            var discreteActions = storedActions.DiscreteActions;
            for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++)
            {
                discreteActions[i] = actionBuffers.DiscreteActions[i];
                InitializeSensors();
            }

-            m_Info.storedVectorActions = new ActionBuffers(
+            m_Info.storedActions = new ActionBuffers(
                new float[m_ActuatorManager.NumContinuousActions],
                new int[m_ActuatorManager.NumDiscreteActions]
            );
            m_CumulativeReward = 0f;
            m_RequestAction = false;
            m_RequestDecision = false;
-            m_Info.storedVectorActions.Clear();
+            m_Info.storedActions.Clear();
        }

        /// <summary>
        /// <seealso cref="IActionReceiver.OnActionReceived"/>
        public virtual void Heuristic(in ActionBuffers actionsOut)
        {
+            var brainParams = m_PolicyFactory.BrainParameters;
+            var actionSpec = brainParams.ActionSpec;
+            // For continuous and discrete actions together, we don't need to fall back to the legacy method
+            if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
+            {
+                Debug.LogWarning("Heuristic method called but not implemented. Clearing ActionBuffers.");
+                actionsOut.Clear();
+                return;
+            }
+
-            switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
+            switch (brainParams.VectorActionSpaceType)
            {
                case SpaceType.Continuous:
                    Heuristic(actionsOut.ContinuousActions.Array);
                    CollectObservations(collectObservationsSensor);
                }
            }
-            using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks"))
+            using (TimerStack.Instance.Scoped("WriteActionMask"))
            {
                m_ActuatorManager.WriteActionMask();
            }
        }

        /// <summary>
-        /// Implement `CollectDiscreteActionMasks()` to collects the masks for discrete
+        /// Implement `WriteDiscreteActionMask()` to collects the masks for discrete
        /// actions. When using discrete actions, the agent will not perform the masked
        /// action.
        /// </summary>
        /// <remarks>
        /// When using Discrete Control, you can prevent the Agent from using a certain
-        /// action by masking it with <see cref="DiscreteActionMasker.SetMask(int, IEnumerable{int})"/>.
+        /// action by masking it with <see cref="IDiscreteActionMask.WriteMask(int, IEnumerable{int})"/>.
        ///
        /// See [Agents - Actions] for more information on masking actions.
        ///
        /// on the provided action.
        /// </summary>
        /// <remarks>
-        /// An action is passed to this function in the form of an array vector. Your
-        /// implementation must use the array to direct the agent's behavior for the
+        /// An action is passed to this function in the form of an  <seealso cref="ActionBuffers"/>.
+        /// Your implementation must use the array to direct the agent's behavior for the
-        /// You decide how many elements you need in the action array to control your
+        /// You decide how many elements you need in the ActionBuffers to control your
-        /// three values in the action array to use as the force components. During
-        /// training, the agent's  policy learns to set those particular elements of
+        /// three values in ActionBuffers.ContinuousActions array to use as the force components.
+        /// During training, the agent's  policy learns to set those particular elements of
-        /// Actions for an agent can be either *Continuous* or *Discrete*. Specify which
-        /// type of action space an agent uses, along with the size of the action array,
-        /// in the <see cref="BrainParameters"/> of the agent's associated
+        /// An Agent can use continuous and/or discrete actions. Configure this  along with the size
+        /// of the action array,  in the <see cref="BrainParameters"/> of the agent's associated
-        /// When an agent uses the continuous action space, the values in the action
+        /// When an agent uses continuous actions, the values in the ActionBuffers.ContinuousActions
-        /// When an agent uses the discrete action space, the values in the action array
+        /// When an agent uses discrete actions, the values in the ActionBuffers.DiscreteActions array
        /// are integers that each represent a specific, discrete action. For example,
        /// you could define a set of discrete actions such as:
        ///
        /// </code>
        ///
        /// When making a decision, the agent picks one of the five actions and puts the
-        /// corresponding integer value in the action vector. For example, if the agent
-        /// decided to move left, the action vector parameter would contain an array with
+        /// corresponding integer value in the ActionBuffers.DiscreteActions array. For example, if the agent
+        /// decided to move left, the ActionBuffers.DiscreteActions parameter would be an array with
        /// a single element with the value 1.
        ///
        /// You can define multiple sets, or branches, of discrete actions to allow an
        ///
-        /// The action vector of a discrete action space contains one element for each
-        /// branch. The value of each element is the integer representing the chosen
-        /// action for that branch. The agent always chooses one action for each
-        /// branch.
+        /// The ActionBuffers.DiscreteActions array of an agent with discrete actions contains one
+        /// element for each  branch. The value of each element is the integer representing the
+        /// chosen action for that branch. The agent always chooses one action for each branch.
-        /// When you use the discrete action space, you can prevent the training process
+        /// When you use the discrete actions, you can prevent the training process
-        /// implementing the <see cref="CollectDiscreteActionMasks(DiscreteActionMasker)"/>
-        /// function. For example, if your agent is next to a wall, you could mask out any
+        /// implementing the <see cref="WriteDiscreteActionMask(IDiscreteActionMask)"/>
+        /// method. For example, if your agent is next to a wall, you could mask out any
        /// actions that would result in the agent trying to move into the wall.
        ///
        /// For more information about implementing agent actions see [Agents - Actions].
        /// </param>
        public virtual void OnActionReceived(ActionBuffers actions)
        {
+            var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec;
+            // For continuous and discrete actions together, we don't need to fall back to the legacy method
+            if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
+            {
+                // Nothing implemented.
+                return;
+            }
+
            if (!actions.ContinuousActions.IsEmpty())
            {
                m_LegacyActionCache = actions.ContinuousActions.Array;
--- a/com.unity.ml-agents/Runtime/Agent.deprecated.cs
+++ b/com.unity.ml-agents/Runtime/Agent.deprecated.cs
        [Obsolete("GetAction has been deprecated, please use GetStoredActionBuffers instead.")]
        public float[] GetAction()
        {
-            var storedAction = m_Info.storedVectorActions;
+            var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec;
+            // For continuous and discrete actions together, this shouldn't be called because we can only return one.
+            if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
+            {
+                Debug.LogWarning("Agent.GetAction() when both continuous and discrete actions are in use. Use Agent.GetStoredActionBuffers() instead.");
+            }
+
+            var storedAction = m_Info.storedActions;
            if (!storedAction.ContinuousActions.IsEmpty())
            {
                return storedAction.ContinuousActions.Array;
--- a/com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs
+++ b/com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs
        /// <param name="behaviorName">The BehaviorName of the Agent using the model</param>
        /// <param name="inferenceDevice">Whether inference is being performed on the CPU or GPU</param>
        /// <param name="sensors">List of ISensors for the Agent. Used to generate information about the observation space.</param>
-        /// <param name="actionSpec">ActionSpec for the Agent. Used to generate information about the action space.</param>
+        /// <param name="actionSpec">ActionSpec for the Agent. Used to generate information about the actions.</param>
        /// <returns></returns>
        public static void InferenceModelSet(
            NNModel nnModel,
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs

            var agentActionProto = new AgentActionProto();

-            if (!ai.storedVectorActions.IsEmpty())
+            if (!ai.storedActions.IsEmpty())
-                if (!ai.storedVectorActions.ContinuousActions.IsEmpty())
+                if (!ai.storedActions.ContinuousActions.IsEmpty())
-                    agentActionProto.ContinuousActions.AddRange(ai.storedVectorActions.ContinuousActions.Array);
+                    agentActionProto.ContinuousActions.AddRange(ai.storedActions.ContinuousActions.Array);
-                if (!ai.storedVectorActions.DiscreteActions.IsEmpty())
+                if (!ai.storedActions.DiscreteActions.IsEmpty())
-                    agentActionProto.DiscreteActions.AddRange(ai.storedVectorActions.DiscreteActions.Array);
+                    agentActionProto.DiscreteActions.AddRange(ai.storedActions.DiscreteActions.Array);
                }
            }

        /// Converts an ActionSpec into to a Protobuf BrainInfoProto so it can be sent.
        /// </summary>
        /// <returns>The BrainInfoProto generated.</returns>
-        /// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
        /// <param name="name">The name of the brain.</param>
        /// <param name="isTraining">Whether or not the Brain is training.</param>
        public static BrainParametersProto ToBrainParametersProto(this ActionSpec actionSpec, string name, bool isTraining)
            ActionSpec actionSpec;
            if (bpp.ActionSpec == null)
            {
-                var spaceType = (SpaceType)bpp.VectorActionSpaceTypeDeprecated;
-                if (spaceType == SpaceType.Continuous)
+                var spaceType = bpp.VectorActionSpaceTypeDeprecated;
+                if (spaceType == SpaceTypeProto.Continuous)
                {
                    actionSpec = ActionSpec.MakeContinuous(bpp.VectorActionSizeDeprecated.ToArray()[0]);
                }
--- a/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
        /// Registers a new Brain to the Communicator.
        /// </summary>
        /// <param name="name">The name or key uniquely identifying the Brain.</param>
-        /// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
        void SubscribeBrain(string name, ActionSpec actionSpec);

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
        /// Adds the brain to the list of brains which will be sending information to External.
        /// </summary>
        /// <param name="brainKey">Brain key.</param>
-        /// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
        public void SubscribeBrain(string brainKey, ActionSpec actionSpec)
        {
            if (m_BehaviorNames.Contains(brainKey))
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
        }

        /// <summary>
-        /// Check if the model supports hybrid action spaces.
+        /// Check if the model supports both continuous and discrete actions.
-        /// <returns>True if the model supports hybrid action spaces.</returns>
+        /// <returns>True if the model supports both continuous and discrete actions.</returns>
        public static bool SupportsContinuousAndDiscrete(this Model model)
        {
            return model == null ||
--- a/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
            foreach (var infoSensorPair in infos)
            {
                var info = infoSensorPair.agentInfo;
-                var pastAction = info.storedVectorActions.DiscreteActions;
+                var pastAction = info.storedActions.DiscreteActions;
                if (!pastAction.IsEmpty())
                {
                    for (var j = 0; j < actionSize; j++)
--- a/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
        /// the agents
        /// </summary>
        /// <param name="model"> The Barracuda model to load </param>
-        /// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
        /// <param name="inferenceDevice"> Inference execution device. CPU is the fastest
        /// option for most of ML Agents models. </param>
        /// <param name="seed"> The seed that will be used to initialize the RandomNormal
--- a/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
        /// <summary>
        /// Returns a new TensorAppliers object.
        /// </summary>
-        /// <param name="actionSpec"> Description of the action spaces for the Agent.</param>
+        /// <param name="actionSpec"> Description of the actions for the Agent.</param>
        /// <param name="seed"> The seed the Appliers will be initialized with.</param>
        /// <param name="allocator"> Tensor allocator</param>
        /// <param name="memories">Dictionary of AgentInfo.id to memory used to pass to the inference model.</param>
--- a/com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
 namespace Unity.MLAgents.Policies
 {
    /// <summary>
-    /// Whether the action space is discrete or continuous.
+    /// This is deprecated. Agents can now use both continuous and discrete actions together.
+    [Obsolete("Continuous and discrete actions on the same Agent are now supported; see ActionSpec.")]
    public enum SpaceType
    {
        /// <summary>
        internal ActionSpec m_ActionSpec = new ActionSpec(0, null);

        /// <summary>
-        /// The specification of the Action space for the BrainParameters.
+        /// The specification of the Actions for the BrainParameters.
        /// </summary>
        public ActionSpec ActionSpec
        {
        }

        /// <summary>
-        /// (Deprecated) The size of the action space.
+        /// (Deprecated) The number of possible actions.
-        /// the agent uses the continuous or the discrete action space.</remarks>
+        /// the agent uses the continuous or the discrete actions.</remarks>
-        /// For the continuous action space: the length of the float vector that represents
+        /// For the continuous actions: the length of the float vector that represents
-        /// For the discrete action space: the number of branches in the action space.
+        /// For the discrete actions: the number of branches.
        /// </value>
        [Obsolete("VectorActionSize has been deprecated, please use ActionSpec instead.")]
        [FormerlySerializedAs("vectorActionSize")]
--- a/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
        }

        [Test]
+        public void TestAllowMixedActions()
+        {
+            // Make sure discrete + continuous actuators are allowed.
+            var manager = new ActuatorManager();
+            var actuator1 = new TestActuator(ActionSpec.MakeDiscrete(new[] { 1, 2, 3, 4 }), "actuator1");
+            var actuator2 = new TestActuator(ActionSpec.MakeContinuous(3), "actuator2");
+            manager.Add(actuator1);
+            manager.Add(actuator2);
+            manager.ReadyActuatorsForExecution(new[] { actuator1, actuator2 }, 3, 10, 4);
+        }
+
+        [Test]
        public void TestFailOnSameActuatorName()
        {
            var manager = new ActuatorManager();
--- a/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
                done = true,
                episodeId = 5,
                maxStepReached = true,
-                storedVectorActions = new ActionBuffers(null, new int[] { 0, 1 }),
+                storedActions = new ActionBuffers(null, new int[] { 0, 1 }),
            };


--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs

            var infoA = new AgentInfo
            {
-                storedVectorActions = new ActionBuffers(null, new[] { 1, 2 }),
+                storedActions = new ActionBuffers(null, new[] { 1, 2 }),
-                storedVectorActions = new ActionBuffers(null, new[] { 3, 4 }),
+                storedActions = new ActionBuffers(null, new[] { 3, 4 }),
                discreteActionMasks = new[] { true, false, false, false, false },
            };