Develop mm validation fixes (#3487)

* Doc fixes for Agent and Academy to remove all validation errors. * Made `ScaleAction()` in `Agent.cs` static.
5 年前 · cd0a38c3
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        // Lazy initializer pattern, see https://csharpindepth.com/articles/singleton#lazy
        static Lazy<Academy> s_Lazy = new Lazy<Academy>(() => new Academy());

+        /// <summary>
+        /// True if the Academy is initialized, false otherwise.
+        /// </summary>
+        /// <summary>
+        /// The singleton Academy object.
+        /// </summary>
+        /// <summary>
+        /// Collection of float properties (indexed by a string).
+        /// </summary>
        public IFloatProperties FloatProperties;


        // Signals to all the agents each time the Academy force resets.
        internal event Action AgentForceReset;

-        // Signals that the Academy has been reset by the training process
+        /// <summary>
+        /// Signals that the Academy has been reset by the training process.
+        /// </summary>
        public event Action OnEnvironmentReset;

        AcademyFixedUpdateStepper m_FixedUpdateStepper;

        /// <summary>
        /// Initialize the Academy if it hasn't already been initialized.
-        /// This method is always safe to call; it will have no effect if the Academy is already initialized.
+        /// This method is always safe to call; it will have no effect if the Academy is already
+        /// initialized.
        /// </summary>
        internal void LazyInitialize()
        {
        }

        /// <summary>
-        /// Enable stepping of the Academy during the FixedUpdate phase.  This is done by creating a temporary
-        /// GameObject with a MonoBehavior that calls Academy.EnvironmentStep().
+        /// Enable stepping of the Academy during the FixedUpdate phase. This is done by creating
+        /// a temporary GameObject with a MonoBehaviour that calls Academy.EnvironmentStep().
        /// </summary>
        void EnableAutomaticStepping()
        {
        /// Registers SideChannel to the Academy to send and receive data with Python.
        /// If IsCommunicatorOn is false, the SideChannel will not be registered.
        /// </summary>
-        /// <param name="sideChannel"> The side channel to be registered.</param>
+        /// <param name="channel"> The side channel to be registered.</param>
        public void RegisterSideChannel(SideChannel channel)
        {
            LazyInitialize();
        /// Unregisters SideChannel to the Academy. If the side channel was not registered,
        /// nothing will happen.
        /// </summary>
-        /// <param name="sideChannel"> The side channel to be unregistered.</param>
+        /// <param name="channel"> The side channel to be unregistered.</param>
        public void UnregisterSideChannel(SideChannel channel)
        {
            Communicator?.UnregisterSideChannel(channel);
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
 using System.Collections.Generic;
 using UnityEngine;
 using Barracuda;
-using UnityEngine.Serialization;
-    /// observations, actions and current status, that is sent to the Brain.
+    /// observations, actions and current status.
    /// </summary>
    internal struct AgentInfo
    {
        public float[] vectorActions;
    }

-
-    /// Agent Monobehavior class that is attached to a Unity GameObject, making it
+    /// Agent MonoBehaviour class that is attached to a Unity GameObject, making it
-    /// user in <see cref="CollectObservations"/>. On the other hand, actions
-    /// are determined by decisions produced by a Policy. Currently, this
-    /// class is expected to be extended to implement the desired agent behavior.
+    /// user in <see cref="Agent.CollectObservations(VectorSensor)"/> or
+    /// <see cref="Agent.CollectObservations(VectorSensor, ActionMasker)"/>.
+    /// On the other hand, actions are determined by decisions produced by a Policy.
+    /// Currently, this class is expected to be extended to implement the desired agent behavior.
    /// </summary>
    /// <remarks>
    /// Simply speaking, an agent roams through an environment and at each step
    /// little may have changed between successive steps.
    ///
-    /// At any step, an agent may be considered <see cref="m_Done"/>.
-    /// This could occur due to a variety of reasons:
+    /// At any step, an agent may be considered done due to a variety of reasons:
    ///     - The agent reached an end state within its environment.
    ///     - The agent reached the maximum # of steps (i.e. timed out).
    ///     - The academy reached the maximum # of steps (forced agent to be done).
        BehaviorParameters m_PolicyFactory;

        /// This code is here to make the upgrade path for users using maxStep
-        /// easier.  We will hook into the Serialization code and make sure that
+        /// easier. We will hook into the Serialization code and make sure that
        /// agentParameters.maxStep and this.maxStep are in sync.
        [Serializable]
        internal struct AgentParameters
        /// </summary>
        internal VectorSensor collectObservationsSensor;

-        /// MonoBehaviour function that is called when the attached GameObject
-        /// becomes enabled or active.
+        /// <summary>
+        /// <inheritdoc cref="OnBeforeSerialize"/>
+        /// </summary>
+            // Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved
+            // from AgentParameters (since removed) to Agent
            if (maxStep == 0 && maxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters)
            {
                maxStep = agentParameters.maxStep;

+        /// <summary>
+        /// <inheritdoc cref="OnAfterDeserialize"/>
+        /// </summary>
+            // Manages a serialization upgrade issue from v0.13 to v0.14 where maxStep moved
+            // from AgentParameters (since removed) to Agent
            if (maxStep == 0 && maxStep != agentParameters.maxStep && !hasUpgradedFromAgentParameters)
            {
                maxStep = agentParameters.maxStep;

-        /// Helper method for the <see cref="OnEnable"/> event, created to
-        /// facilitate testing.
+        /// <summary>
+        /// Initializes the agent. Can be safely called multiple times.
+        /// </summary>
        public void LazyInitialize()
        {
            if (m_Initialized)
            InitializeSensors();
        }

-        /// Monobehavior function that is called when the attached GameObject
-        /// becomes disabled or inactive.
        void OnDisable()
        {
            DemonstrationStores.Clear();
            m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
        }

+        /// <summary>
        /// Returns the current step counter (within the current episode).
        /// </summary>
        /// <returns>
        /// </returns>
        public virtual float[] Heuristic()
        {
-            throw new UnityAgentsException(string.Format(
+            throw new UnityAgentsException(
-                "{0} GameObject.",
-                gameObject.name));
+                $"{gameObject.name} GameObject.");
        }

        /// <summary>
                collectObservationsSensor = new VectorSensor(param.vectorObservationSize);
                if (param.numStackedVectorObservations > 1)
                {
-                    var stackingSensor = new StackingSensor(collectObservationsSensor, param.numStackedVectorObservations);
+                    var stackingSensor = new StackingSensor(
+                        collectObservationsSensor, param.numStackedVectorObservations);
                    sensors.Add(stackingSensor);
                }
                else
            // Make sure the names are actually unique
            for (var i = 0; i < sensors.Count - 1; i++)
            {
-                Debug.Assert(!sensors[i].GetName().Equals(sensors[i + 1].GetName()), "Sensor names must be unique.");
+                Debug.Assert(
+                    !sensors[i].GetName().Equals(sensors[i + 1].GetName()),
+                    "Sensor names must be unique.");
            }
 #endif
        }

        void UpdateSensors()
        {
-            for (var i = 0; i < sensors.Count; i++)
+            foreach (var sensor in sensors)
-                sensors[i].Update();
+                sensor.Update();
            }
        }

        /// The agent observation describes the current environment from the
        /// perspective of the agent.
        /// </summary>
+        /// <param name="sensor">
+        /// The vector observations for the agent.
+        /// </param>
        /// <remarks>
        /// An agents observation is any environment information that helps
        /// the Agent achieve its goal. For example, for a fighting Agent, its
        /// Vector observations are added by calling the provided helper methods
        /// on the VectorSensor input:
-        ///     - <see cref="AddObservation(int)"/>
-        ///     - <see cref="AddObservation(float)"/>
-        ///     - <see cref="AddObservation(Vector3)"/>
-        ///     - <see cref="AddObservation(Vector2)"/>
-        ///     - <see cref="AddObservation(Quaternion)"/>
-        ///     - <see cref="AddObservation(bool)"/>
-        ///     - <see cref="AddOneHotObservation(int, int)"/>
+        ///     - <see cref="VectorSensor.AddObservation(int)"/>
+        ///     - <see cref="VectorSensor.AddObservation(float)"/>
+        ///     - <see cref="VectorSensor.AddObservation(Vector3)"/>
+        ///     - <see cref="VectorSensor.AddObservation(Vector2)"/>
+        ///     - <see cref="VectorSensor.AddObservation(Quaternion)"/>
+        ///     - <see cref="VectorSensor.AddObservation(bool)"/>
+        ///     - <see cref="VectorSensor.AddObservation(IEnumerable{float})"/>
+        ///     - <see cref="VectorSensor.AddOneHotObservation(int, int)"/>
        /// Depending on your environment, any combination of these helpers can
        /// be used. They just need to be used in the exact same order each time
        /// this method is called and the resulting size of the vector observation
        }

        /// <summary>
-        /// Collects the vector observations of the agent.
+        /// Collects the vector observations of the agent alongside the masked actions.
+        /// <param name="sensor">
+        /// The vector observations for the agent.
+        /// </param>
+        /// <param name="actionMasker">
+        /// The masked actions for the agent.
+        /// </param>
        /// <remarks>
        /// An agents observation is any environment information that helps
        /// the Agent achieve its goal. For example, for a fighting Agent, its
        /// Vector observations are added by calling the provided helper methods
        /// on the VectorSensor input:
-        ///     - <see cref="AddObservation(int)"/>
-        ///     - <see cref="AddObservation(float)"/>
-        ///     - <see cref="AddObservation(Vector3)"/>
-        ///     - <see cref="AddObservation(Vector2)"/>
-        ///     - <see cref="AddObservation(Quaternion)"/>
-        ///     - <see cref="AddObservation(bool)"/>
-        ///     - <see cref="AddOneHotObservation(int, int)"/>
+        ///     - <see cref="VectorSensor.AddObservation(int)"/>
+        ///     - <see cref="VectorSensor.AddObservation(float)"/>
+        ///     - <see cref="VectorSensor.AddObservation(Vector3)"/>
+        ///     - <see cref="VectorSensor.AddObservation(Vector2)"/>
+        ///     - <see cref="VectorSensor.AddObservation(Quaternion)"/>
+        ///     - <see cref="VectorSensor.AddObservation(bool)"/>
+        ///     - <see cref="VectorSensor.AddObservation(IEnumerable{float})"/>
+        ///     - <see cref="VectorSensor.AddOneHotObservation(int, int)"/>
        /// Depending on your environment, any combination of these helpers can
        /// be used. They just need to be used in the exact same order each time
        /// this method is called and the resulting size of the vector observation
        /// When using Discrete Control, you can prevent the Agent from using a certain
        /// action by masking it. You can call the following method on the ActionMasker
        /// input :
-        ///     - <see cref="SetActionMask(int branch, IEnumerable<int> actionIndices)"/>
-        ///     - <see cref="SetActionMask(int branch, int actionIndex)"/>
-        ///     - <see cref="SetActionMask(IEnumerable<int> actionIndices)"/>
-        ///     - <see cref="SetActionMask(int branch, int actionIndex)"/>
+        ///     - <see cref="ActionMasker.SetActionMask(int)"/>
+        ///     - <see cref="ActionMasker.SetActionMask(int, int)"/>
+        ///     - <see cref="ActionMasker.SetActionMask(int, IEnumerable{int})"/>
+        ///     - <see cref="ActionMasker.SetActionMask(IEnumerable{int})"/>
        /// The branch input is the index of the action, actionIndices are the indices of the
        /// invalid options for that action.
        /// </remarks>
        }

        /// <summary>
-        /// Returns the last action that was decided on by the Agent (returns null if no decision has been made)
+        /// Returns the last action that was decided on by the Agent
+        /// <returns>
+        /// The last action that was decided by the Agent (or null if no decision has been made)
+        /// </returns>
        public float[] GetAction()
        {
            return m_Action.vectorActions;
        /// <param name="min"></param>
        /// <param name="max"></param>
        /// <returns></returns>
-        protected float ScaleAction(float rawAction, float min, float max)
+        protected static float ScaleAction(float rawAction, float min, float max)
        {
            var middle = (min + max) / 2;
            var range = (max - min) / 2;