Always reset when agent is done (#3222)

* Removing the AgentOnDone call * removing editor inspector field for ResetOnDone * Documentation changes * addressing comments * addressing comments * adding comments * Migrating steps * inference - fill 0s for done Agents (#3232) * fill 0s for done agents * docstrings * Simplifying the code * Removing GenerateSensorData * Update docs/Migrating.md Co-Authored-By: Chris Elion <chris.elion@unity3d.com> Co-authored-by: Chris Elion <celion@gmail.com>
5 年前 · 0366af0b
--- a/UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs
                new GUIContent(
                    "Max Step", "The per-agent maximum number of steps."));
            EditorGUILayout.PropertyField(
-                isResetOnDone,
-                new GUIContent(
-                    "Reset On Done",
-                    "If checked, the agent will reset on done. Else, AgentOnDone() will be called."));
-            EditorGUILayout.PropertyField(
                isOdd,
                new GUIContent(
                    "On Demand Decisions",
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
        public int collectObservationsCalls;
        public int agentActionCalls;
        public int agentResetCalls;
-        public int agentOnDoneCalls;
        public override void InitializeAgent()
        {
            initializeAgentCalls += 1;
        public override void AgentReset()
        {
            agentResetCalls += 1;
-        }
-
-        public override void AgentOnDone()
-        {
-            agentOnDoneCalls += 1;
        }

        public override float[] Heuristic()
    [TestFixture]
    public class EditModeTestMiscellaneous
    {
+
        [SetUp]
        public void SetUp()
        {
-            }
-        }
-
-        [Test]
-        public void TestResetOnDone()
-        {
-            var agentGo1 = new GameObject("TestAgent");
-            agentGo1.AddComponent<TestAgent>();
-            var agent1 = agentGo1.GetComponent<TestAgent>();
-            var agentGo2 = new GameObject("TestAgent");
-            agentGo2.AddComponent<TestAgent>();
-            var agent2 = agentGo2.GetComponent<TestAgent>();
-
-            var aca = Academy.Instance;
-
-            var agentEnableMethod = typeof(Agent).GetMethod(
-                "OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
-
-            agent1.agentParameters = new AgentParameters();
-            agent2.agentParameters = new AgentParameters();
-            // We use event based so the agent will now try to send anything to the brain
-            agent1.agentParameters.onDemandDecision = false;
-            // agent1 will take an action at every step and request a decision every steps
-            agent1.agentParameters.numberOfActionsBetweenDecisions = 1;
-            // agent2 will request decisions only when RequestDecision is called
-            agent2.agentParameters.onDemandDecision = true;
-            agent1.agentParameters.maxStep = 20;
-            //Here we specify that the agent does not reset when done
-            agent1.agentParameters.resetOnDone = false;
-            agent2.agentParameters.resetOnDone = false;
-
-            agentEnableMethod?.Invoke(agent2, new object[] { });
-            agentEnableMethod?.Invoke(agent1, new object[] { });
-
-            var agent1ResetOnDone = 0;
-            var agent2ResetOnDone = 0;
-            var agent1StepSinceReset = 0;
-            var agent2StepSinceReset = 0;
-
-            for (var i = 0; i < 50; i++)
-            {
-                Assert.AreEqual(i, aca.GetTotalStepCount());
-
-                Assert.AreEqual(agent1StepSinceReset, agent1.GetStepCount());
-                Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());
-                Assert.AreEqual(agent1ResetOnDone, agent1.agentOnDoneCalls);
-                Assert.AreEqual(agent2ResetOnDone, agent2.agentOnDoneCalls);
-
-                // we request a decision at each step
-                agent2.RequestDecision();
-                if (agent1ResetOnDone == 0)
-                    agent1StepSinceReset += 1;
-                if (agent2ResetOnDone == 0)
-                    agent2StepSinceReset += 1;
-
-                if ((i > 2) && (i % 21 == 0))
-                {
-                    agent1ResetOnDone = 1;
-                }
-
-                if (i == 31)
-                {
-                    agent2ResetOnDone = 1;
-                    agent2.Done();
-                }
-
-                aca.EnvironmentStep();
            }
        }

--- a/UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
        return new float[] { 0 };
    }

-    public override void AgentOnDone()
-    {
-    }
-
    public void FixedUpdate()
    {
        WaitTimeInference();
--- a/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
        SetResetParameters();
    }

-    public override void AgentOnDone()
-    {
-    }

    void FixedUpdate()
    {
--- a/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
        }
    }

-    public override void AgentOnDone()
-    {
-    }
-
    public void SetLaserLengths()
    {
        m_LaserLength = Academy.Instance.FloatProperties.GetPropertyWithDefault("laser_length", 1.0f);
--- a/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
            Done();
        }
    }
-
-    public override void AgentOnDone()
-    {
-    }
 }
--- a/UnitySDK/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
    public override void AgentReset()
    {
    }
-
-    public override void AgentOnDone()
-    {
-    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
        public int maxStep;

        /// <summary>
-        /// Determines the behaviour of the agent when done.
-        /// </summary>
-        /// <remarks>
-        /// If true, the agent will reset when done and start a new episode.
-        /// Otherwise, the agent will remain done and its behavior will be
-        /// dictated by the AgentOnDone method.
-        /// </remarks>
-        public bool resetOnDone = true;
-
-        /// <summary>
        /// Whether to enable On Demand Decisions or make a decision at
        /// every step.
        /// </summary>
        /// done has not been communicated (required for On Demand Decisions).
        bool m_HasAlreadyReset;

-        /// Flag to signify that an agent is done and should not reset until
-        /// the fact that it is done has been communicated.
-        bool m_Terminate;
-
        /// Unique identifier each agent receives at initialization. It is used
        /// to separate between different agents in the environment.
        int m_Id;
                Academy.Instance.AgentAct -= AgentStep;
                Academy.Instance.AgentForceReset -= _AgentReset;
            }
+            NotifyAgentDone();
+        }
+
+        void NotifyAgentDone()
+        {
+            m_Info.done = true;
+            // Request the last decision with no callbacks
+            // We request a decision so Python knows the Agent is disabled
+            m_Brain?.RequestDecision(m_Info, sensors, (a) => { });
        }

        /// <summary>
        }

        /// <summary>
-        /// Generate data for each sensor and store it in the observations input.
-        /// NOTE: At the moment, this is only called during training or when using a DemonstrationRecorder;
-        /// during inference the Sensors are used to write directly to the Tensor data. This will likely change in the
-        /// future to be controlled by the type of brain being used.
-        /// </summary>
-        /// <param name="sensors"> List of ISensors that will be used to generate the data.</param>
-        /// <param name="buffer"> A float array that will be used as buffer when generating the observations. Must
-        /// be at least the same length as the total number of uncompressed floats in the observations</param>
-        /// <param name="adapter"> The WriteAdapter that will be used to write the ISensor data to the observations</param>
-        /// <param name="observations"> A list of observations outputs. This argument will be modified by this method.</param>//
-        public static void GenerateSensorData(List<ISensor> sensors, float[] buffer, WriteAdapter adapter, List<Observation> observations)
-        {
-            int floatsWritten = 0;
-            // Generate data for all Sensors
-            for (var i = 0; i < sensors.Count; i++)
-            {
-                var sensor = sensors[i];
-                if (sensor.GetCompressionType() == SensorCompressionType.None)
-                {
-                    // TODO handle in communicator code instead
-                    adapter.SetTarget(buffer, sensor.GetObservationShape(), floatsWritten);
-                    var numFloats = sensor.Write(adapter);
-                    var floatObs = new Observation
-                    {
-                        FloatData = new ArraySegment<float>(buffer, floatsWritten, numFloats),
-                        Shape = sensor.GetObservationShape(),
-                        CompressionType = sensor.GetCompressionType()
-                    };
-                    observations.Add(floatObs);
-                    floatsWritten += numFloats;
-                }
-                else
-                {
-                    var compressedObs = new Observation
-                    {
-                        CompressedData = sensor.GetCompressedObservation(),
-                        Shape = sensor.GetObservationShape(),
-                        CompressionType = sensor.GetCompressionType()
-                    };
-                    observations.Add(compressedObs);
-                }
-            }
-        }
-
-        /// <summary>
        /// Collects the (vector, visual) observations of the agent.
        /// The agent observation describes the current environment from the
        /// perspective of the agent.
        }

        /// <summary>
-        /// Specifies the agent behavior when done and
-        /// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
-        /// used to remove the agent from the scene.
-        /// </summary>
-        public virtual void AgentOnDone()
-        {
-        }
-
-        /// <summary>
        /// Specifies the agent behavior when being reset, which can be due to
        /// the agent or Academy being done (i.e. completion of local or global
        /// episode).
            // request for a decision and an action
            if (IsDone())
            {
-                if (agentParameters.resetOnDone)
+                if (agentParameters.onDemandDecision)
-                    if (agentParameters.onDemandDecision)
-                    {
-                        if (!m_HasAlreadyReset)
-                        {
-                            // If event based, the agent can reset as soon
-                            // as it is done
-                            _AgentReset();
-                            m_HasAlreadyReset = true;
-                        }
-                    }
-                    else if (m_RequestDecision)
+                    if (!m_HasAlreadyReset)
-                        // If not event based, the agent must wait to request a
-                        // decision before resetting to keep multiple agents in sync.
+                        // If event based, the agent can reset as soon
+                        // as it is done
+                        m_HasAlreadyReset = true;
-                else
+                else if (m_RequestDecision)
-                    m_Terminate = true;
-                    RequestDecision();
+                    // If not event based, the agent must wait to request a
+                    // decision before resetting to keep multiple agents in sync.
+                    _AgentReset();
                }
            }
        }
        /// Used by the brain to make the agent perform a step.
        void AgentStep()
        {
-            if (m_Terminate)
-            {
-                m_Terminate = false;
-                ResetReward();
-                m_Done = false;
-                m_MaxStepReached = false;
-                m_RequestDecision = false;
-                m_RequestAction = false;
-
-                m_HasAlreadyReset = false;
-                OnDisable();
-                AgentOnDone();
-            }
-
            if ((m_RequestAction) && (m_Brain != null))
            {
                m_RequestAction = false;
--- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
            var agentIndex = 0;
            foreach (var info in infos)
            {
-                var tensorOffset = 0;
-                // Write each sensor consecutively to the tensor
-                foreach (var sensorIndex in m_SensorIndices)
+                if (info.agentInfo.done)
-                    var sensor = info.sensors[sensorIndex];
-                    m_WriteAdapter.SetTarget(tensorProxy, agentIndex, tensorOffset);
-                    var numWritten = sensor.Write(m_WriteAdapter);
-                    tensorOffset += numWritten;
+                    // If the agent is done, we might have a stale reference to the sensors
+                    // e.g. a dependent object might have been disposed.
+                    // To avoid this, just fill observation with zeroes instead of calling sensor.Write.
+                    TensorUtils.FillTensorBatch(tensorProxy, agentIndex, 0.0f);
-                Debug.AssertFormat(
-                    tensorOffset == vecObsSizeT,
-                    "mismatch between vector observation size ({0}) and number of observations written ({1})",
-                    vecObsSizeT, tensorOffset
-                );
+                else
+                {
+                    var tensorOffset = 0;
+                    // Write each sensor consecutively to the tensor
+                    foreach (var sensorIndex in m_SensorIndices)
+                    {
+                        var sensor = info.sensors[sensorIndex];
+                        m_WriteAdapter.SetTarget(tensorProxy, agentIndex, tensorOffset);
+                        var numWritten = sensor.Write(m_WriteAdapter);
+                        tensorOffset += numWritten;
+                    }
+                    Debug.AssertFormat(
+                        tensorOffset == vecObsSizeT,
+                        "mismatch between vector observation size ({0}) and number of observations written ({1})",
+                        vecObsSizeT, tensorOffset
+                    );
+                }

                agentIndex++;
            }
            foreach (var infoSensorPair in infos)
            {
                var sensor = infoSensorPair.sensors[m_SensorIndex];
-                m_WriteAdapter.SetTarget(tensorProxy, agentIndex, 0);
-                sensor.Write(m_WriteAdapter);
+                if (infoSensorPair.agentInfo.done)
+                {
+                    // If the agent is done, we might have a stale reference to the sensors
+                    // e.g. a dependent object might have been disposed.
+                    // To avoid this, just fill observation with zeroes instead of calling sensor.Write.
+                    TensorUtils.FillTensorBatch(tensorProxy, agentIndex, 0.0f);
+                }
+                else
+                {
+                    m_WriteAdapter.SetTarget(tensorProxy, agentIndex, 0);
+                    sensor.Write(m_WriteAdapter);
+
+                }
                agentIndex++;
            }
        }
--- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorProxy.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorProxy.cs
        }

        /// <summary>
+        /// Fill a specific batch of a TensorProxy with a given value
+        /// </summary>
+        /// <param name="tensorProxy"></param>
+        /// <param name="batch">The batch index to fill.</param>
+        /// <param name="fillValue"></param>
+        public static void FillTensorBatch(TensorProxy tensorProxy, int batch, float fillValue)
+        {
+            var height = tensorProxy.data.height;
+            var width = tensorProxy.data.width;
+            var channels = tensorProxy.data.channels;
+            for (var h = 0; h < height; h++)
+            {
+                for (var w = 0; w < width; w++)
+                {
+                    for (var c = 0; c < channels; c++)
+                    {
+                        tensorProxy.data[batch, h, w, c] = fillValue;
+                    }
+                }
+            }
+        }
+
+        /// <summary>
        /// Fill a pre-allocated Tensor with random numbers
        /// </summary>
        /// <param name="tensorProxy">The pre-allocated Tensor to fill</param>
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
 }
 ```

-**Note:** When you mark an Agent as done, it stops its activity until it is
-reset. You can have the Agent reset immediately, by setting the
-Agent.ResetOnDone property to true in the inspector or you can wait for the
-Academy to reset the environment. This RollerBall environment relies on the
-`ResetOnDone` mechanism and doesn't set a `Max Steps` limit for the Academy (so
-it never resets the environment).
-
-Finally, if the Agent falls off the platform,  set the Agent to done so that it can reset itself:
+Finally, if the Agent falls off the platform, set the Agent to done so that it can reset itself:

 ```csharp
 // Fell off platform
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md

 ## Destroying an Agent

-Before destroying an Agent GameObject, you must mark it as done (and wait for
-the next step in the simulation) so that the Policy knows that this Agent is no
-longer active. Thus, the best place to destroy an Agent is in the
-`Agent.AgentOnDone()` function:
-
-```csharp
-public override void AgentOnDone()
-{
-    Destroy(gameObject);
-}
-```
-
-Note that in order for `AgentOnDone()` to be called, the Agent's `ResetOnDone`
-property must be false. You can set `ResetOnDone` on the Agent's Inspector or in
-code.
+You can destroy an Agent GameObject during the simulation. Make sure that there is
+always at least one Agent training at all times by either spawning a new Agent
+every time one is destroyed or by re-spawning new Agents when the whole environment
+resets.
--- a/docs/Learning-Environment-Design.md
+++ b/docs/Learning-Environment-Design.md
 5. Calls the `AgentAction()` function for each Agent in the scene, passing in
   the action chosen by the Agent's Policy. (This function is not called if the
   Agent is done.)
-6. Calls the Agent's `AgentOnDone()` function if the Agent has reached its `Max
-   Step` count or has otherwise marked itself as `done`. Optionally, you can set
-   an Agent to restart if it finishes before the end of an episode. In this
-   case, the Academy calls the `AgentReset()` function.
+6. Calls the Agent's `AgentReset()` function if the Agent has reached its `Max
+   Step` count or has otherwise marked itself as `done`.

 To create a training environment, extend the Agent class to
 implement the above methods. The `Agent.CollectObservations()` and
 manually set an Agent to done in your `AgentAction()` function when the Agent
 has finished (or irrevocably failed) its task by calling the `Done()` function.
 You can also set the Agent's `Max Steps` property to a positive value and the
-Agent will consider itself done after it has taken that many steps. If you
-set an Agent's `ResetOnDone` property to true, then the Agent can attempt its
-task several times in one episode. (Use the `Agent.AgentReset()` function to
-prepare the Agent to start again.)
+Agent will consider itself done after it has taken that many steps. You can
+use the `Agent.AgentReset()` function to prepare the Agent to start again.

 See [Agents](Learning-Environment-Design-Agents.md) for detailed information
 about programming your own Agents.
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 * Trainer steps are now counted per-Agent, not per-environment as in previous versions. For instance, if you have 10 Agents in the scene, 20 environment steps now corresponds to 200 steps as printed in the terminal and in Tensorboard.
 * Curriculum config files are now YAML formatted and all curricula for a training run are combined into a single file.
 * The `--num-runs` command-line option has been removed.
+* The "Reset on Done" setting in AgentParameters was removed; this is now effectively always true. `AgentOnDone` virtual method on the Agent has been removed.

 ### Steps to Migrate
 * If you have a class that inherits from Academy:
 * Multiply `max_steps` and `summary_steps` in your `trainer_config.yaml` by the number of Agents in the scene.
 * Combine curriculum configs into a single file.  See [the WallJump curricula](../config/curricula/wall_jump.yaml) for an example of the new curriculum config format.
  A tool like https://www.json2yaml.com may be useful to help with the conversion.
+* If your Agent implements `AgentOnDone` and your Agent does not have the checkbox `Reset On Done` checked in the inspector, you must call the code that was in `AgentOnDone` manually.

 ## Migrating from ML-Agents toolkit v0.12.0 to v0.13.0