testing: exit playback specified number of episodes (#3332)

5 年前 · 7ad624da
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
 {
    /// <summary>
    /// Utility class to allow the NNModel file for an agent to be overriden during inference.
-    /// This is useful to validate the file after training is done.
+    /// This is used internally to validate the file after training is done.
+    ///
+    /// Additionally, a number of episodes to run can be specified; after this, the application will quit.
-        const string k_CommandLineFlag = "--mlagents-override-model";
+        const string k_CommandLineModelOverrideFlag = "--mlagents-override-model";
+        const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
+
+        // The attached Agent
+        Agent m_Agent;
+
        // Assets paths to use, with the behavior name as the key.
        Dictionary<string, string> m_BehaviorNameOverrides = new Dictionary<string, string>();

+        // Max episodes to run. Only used if > 0
+        // Will default to 1 if override models are specified, otherwise 0.
+        int m_MaxEpisodes;
+
+        int m_NumSteps;
+
        /// <summary>
        /// Get the asset path to use from the commandline arguments.
        /// </summary>
            m_BehaviorNameOverrides.Clear();

+            var maxEpisodes = 0;
+
-            for (var i = 0; i < args.Length-2; i++)
+            for (var i = 0; i < args.Length - 1; i++)
-                if (args[i] == k_CommandLineFlag)
+                if (args[i] == k_CommandLineModelOverrideFlag && i < args.Length-2)
+                else if (args[i] == k_CommandLineQuitAfterEpisodesFlag)
+                {
+                    Int32.TryParse(args[i + 1], out maxEpisodes);
+                }
+            }
+
+            if (m_BehaviorNameOverrides.Count > 0)
+            {
+                // If overriding models, set maxEpisodes to 1 or the command line value
+                m_MaxEpisodes = maxEpisodes > 0 ? maxEpisodes : 1;
+                Debug.Log($"setting m_MaxEpisodes to {maxEpisodes}");
+            m_Agent = GetComponent<Agent>();
+
            GetAssetPathFromCommandLine();
            if (m_BehaviorNameOverrides.Count > 0)
            {

+        void FixedUpdate()
+        {
+            if (m_MaxEpisodes > 0)
+            {
+                if (m_NumSteps > m_MaxEpisodes * m_Agent.maxStep)
+                {
+                    // Stop recording so that we don't write partial rewards to the timer info.
+                    TimerStack.Instance.Recording = false;
+                    Application.Quit(0);
+                }
+            }
+            m_NumSteps++;
+        }
+
        NNModel GetModelForBehaviorName(string behaviorName)
        {
            if (m_CachedModels.ContainsKey(behaviorName))
            }

            var asset = ScriptableObject.CreateInstance<NNModel>();
+            asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
+
            asset.name = "Override - " + Path.GetFileName(assetPath);
            m_CachedModels[behaviorName] = asset;
            return asset;
        /// </summary>
        void OverrideModel()
        {
-            var agent = GetComponent<Agent>();
-            agent.LazyInitialize();
-            var bp = agent.GetComponent<BehaviorParameters>();
+            m_Agent.LazyInitialize();
+            var bp = m_Agent.GetComponent<BehaviorParameters>();
-            agent.GiveModel($"Override_{bp.behaviorName}", nnModel, InferenceDevice.CPU);
+            m_Agent.GiveModel($"Override_{bp.behaviorName}", nnModel, InferenceDevice.CPU);

        }
    }
--- a/com.unity.ml-agents/Runtime/Timer.cs
+++ b/com.unity.ml-agents/Runtime/Timer.cs
        /// <summary>
        /// Stop timing a block of code, and increment internal counts.
        /// </summary>
-        public void End()
+        public void End(bool isRecording)
-            var elapsed = DateTime.Now.Ticks - m_TickStart;
-            m_TotalTicks += elapsed;
-            m_TickStart = 0;
-            m_NumCalls++;
+            if (isRecording)
+            {
+                var elapsed = DateTime.Now.Ticks - m_TickStart;
+                m_TotalTicks += elapsed;
+                m_TickStart = 0;
+                m_NumCalls++;
+            }
+            // Note that samplers are always updated regardless of recording state, to ensure matching start and ends.
            m_Sampler?.End();
        }


        Stack<TimerNode> m_Stack;
        TimerNode m_RootNode;
+        // Whether or not new timers and gauges can be added.
+        bool m_Recording = true;

        // Explicit static constructor to tell C# compiler
        // not to mark type as beforefieldinit
        public TimerNode RootNode
        {
            get { return m_RootNode; }
+        }
+
+        public bool Recording
+        {
+            get { return m_Recording; }
+            set { m_Recording = value; }
+            if (!Recording)
+            {
+                return;
+            }
+
            if (!float.IsNaN(value))
            {
                GaugeNode gauge;
        void Pop()
        {
            var node = m_Stack.Pop();
-            node.End();
+            node.End(Recording);
        }

        /// <summary>