add DoneReason enum to Agent (#3517)

5 年前 · 8ce9dcfd
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
            {
                if (m_NumSteps > m_MaxEpisodes * m_Agent.maxStep)
                {
-                    // Stop recording so that we don't write partial rewards to the timer info.
-                    TimerStack.Instance.Recording = false;
                    Application.Quit(0);
                }
            }
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
            InitializeSensors();
        }

+        /// <summary>
+        /// Reason that the Agent is being considered "done"
+        /// </summary>
+        enum DoneReason
+        {
+            /// <summary>
+            /// The <see cref="Done"/> method was called.
+            /// </summary>
+            DoneCalled,
+
+            /// <summary>
+            /// The max steps for the Agent were reached.
+            /// </summary>
+            MaxStepReached,
+
+            /// <summary>
+            /// The Agent was disabled
+            /// </summary>
+            Disabled,
+        }
+
        void OnDisable()
        {
            DemonstrationWriters.Clear();
                Academy.Instance.AgentAct -= AgentStep;
                Academy.Instance.AgentForceReset -= _AgentReset;
            }
-            NotifyAgentDone();
+            NotifyAgentDone(DoneReason.Disabled);
-        void NotifyAgentDone(bool maxStepReached = false)
+        void NotifyAgentDone(DoneReason doneReason)
-            m_Info.maxStepReached = maxStepReached;
+            m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
            m_Brain?.RequestDecision(m_Info, sensors);
                demoWriter.Record(m_Info, sensors);
            }

-            UpdateRewardStats();
+            if (doneReason != DoneReason.Disabled)
+            {
+                // We don't want to udpate the reward stats when the Agent is disabled, because this will make
+                // the rewards look lower than they actually are during shutdown.
+                UpdateRewardStats();
+            }

            // The Agent is done, so we give it a new episode Id
            m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
        /// </summary>
        public void Done()
        {
-            NotifyAgentDone();
+            NotifyAgentDone(DoneReason.DoneCalled);
            _AgentReset();
        }


            if ((m_StepCount >= maxStep) && (maxStep > 0))
            {
-                NotifyAgentDone(true);
+                NotifyAgentDone(DoneReason.MaxStepReached);
                _AgentReset();
            }
        }
--- a/com.unity.ml-agents/Runtime/Timer.cs
+++ b/com.unity.ml-agents/Runtime/Timer.cs
        /// <summary>
        /// Stop timing a block of code, and increment internal counts.
        /// </summary>
-        public void End(bool isRecording)
+        public void End()
-            if (isRecording)
-            {
-                var elapsed = DateTime.Now.Ticks - m_TickStart;
-                m_TotalTicks += elapsed;
-                m_TickStart = 0;
-                m_NumCalls++;
-            }
-            // Note that samplers are always updated regardless of recording state, to ensure matching start and ends.
+            var elapsed = DateTime.Now.Ticks - m_TickStart;
+            m_TotalTicks += elapsed;
+            m_TickStart = 0;
+            m_NumCalls++;
            m_Sampler?.End();
        }

    /// This implements the Singleton pattern (solution 4) as described in
    /// https://csharpindepth.com/articles/singleton
    /// </remarks>
-    public class TimerStack : IDisposable
+    internal class TimerStack : IDisposable
-        // Whether or not new timers and gauges can be added.
-        bool m_Recording = true;

        // Explicit static constructor to tell C# compiler
        // not to mark type as beforefieldinit
        }

        /// <summary>
-        /// Whether or not new timers and gauges can be added.
-        /// </summary>
-        public bool Recording
-        {
-            get { return m_Recording; }
-            set { m_Recording = value; }
-        }
-
-        /// <summary>
        /// Updates the referenced gauge in the root node with the provided value.
        /// </summary>
        /// <param name="name">The name of the Gauge to modify.</param>
-            if (!Recording)
-            {
-                return;
-            }
-
            if (!float.IsNaN(value))
            {
                GaugeNode gauge;
        void Pop()
        {
            var node = m_Stack.Pop();
-            node.End(Recording);
+            node.End();
        }

        /// <summary>