Modifications to reward providers.

5 年前 · 718650c0
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


            var j = 0;
-            var rewardProvider1 = agent1.rewardProvider as CumulativeRewardProvider;
-            var rewardProvider2 = agent2.rewardProvider as CumulativeRewardProvider;
+            var rewardProvider1 = agent1.rewardProvider as RewardProviderComponent;
+            var rewardProvider2 = agent2.rewardProvider as RewardProviderComponent;
            for (var i = 0; i < 500; i++)
            {
                agent2.RequestAction();
--- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
        /// </summary>
        public IRewardProvider rewardProvider;

-        CumulativeRewardProvider CumulativeRewardProvider
+        RewardProviderComponent RewardProviderComponent
-            get { return rewardProvider as CumulativeRewardProvider; }
+            get { return rewardProvider as RewardProviderComponent; }
        }

        /// MonoBehaviour function that is called when the attached GameObject
        {
            return m_StepCount;
        }
-        void CheckCumulativeRewardProviderExistence(string callee) {
-            if (CumulativeRewardProvider == null)
+        void CheckRewardProviderExistence(string callee) {
+            if (RewardProviderComponent == null)
-                Debug.LogWarningFormat("the CumulativeRewardProvider is null and " +
-                    "method '{0}' was called.  If your agent doesn't have the CumulativeRewardProvider," +
+                Debug.LogWarningFormat("the RewardProviderComponent is null and " +
+                    "method '{0}' was called.  If your agent doesn't have the RewardProviderComponent," +
-        /// Resets the step reward and possibly the episode reward for the agent.
-        /// </summary>
-        public void ResetReward()
-        {
-            CheckCumulativeRewardProviderExistence("ResetReward");
-            InternalResetReward();
-        }
-
-        void InternalResetReward()
-        {
-            CumulativeRewardProvider?.ResetReward(m_Done);
-        }
-
-        /// <summary>
        /// Overrides the current step reward of the agent and updates the episode
        /// reward accordingly.
        /// </summary>
-            CheckCumulativeRewardProviderExistence("SetReward");
-            CumulativeRewardProvider?.SetReward(reward);
+            CheckRewardProviderExistence("SetReward");
+            RewardProviderComponent.SetReward(reward);
        }

        /// <summary>
        public void AddReward(float increment)
        {
-            CheckCumulativeRewardProviderExistence("AddReward");
-            CumulativeRewardProvider?.AddReward(increment);
+            CheckRewardProviderExistence("AddReward");
+            RewardProviderComponent.AddReward(increment);
        }

        /// <summary>
        {
        }

-
        /// <summary>
        /// When the Agent uses Heuristics, it will call this method every time it
        /// needs an action. This can be used for debugging or controlling the agent
            var rewardProviderComponent = GetComponent<RewardProviderComponent>();
            if (rewardProviderComponent == null)
            {
-                rewardProviderComponent = gameObject.AddComponent<CumulativeRewardProviderComponent>();
-                rewardProvider = rewardProviderComponent.GetRewardProvider();
+                rewardProvider = gameObject.AddComponent<RewardProviderComponent>();
            }
        }

            if (m_RequestDecision)
            {
                SendInfoToBrain();
-                InternalResetReward();
+                rewardProvider.ResetReward(m_Done);
                m_Done = false;
                m_MaxStepReached = false;
                m_RequestDecision = false;
            if (m_Terminate)
            {
                m_Terminate = false;
-                InternalResetReward();
+                rewardProvider.ResetReward(m_Done);
                m_Done = false;
                m_MaxStepReached = false;
                m_RequestDecision = false;
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs
        /// <see cref="GetIncrementalReward"/> method.
        /// </summary>
        void RewardStep();
+
+        /// <summary>
+        /// Notifies the RewardProvider that the current reward should be reset.  If done is false,
+        /// the incremental reward should only be reset, otherwise both the incremental and cumulative
+        /// reward should be reset.
+        /// <param name="done">Flag indicating whether the Agent episode is done or not.</param>
+        /// </summary>
+        void ResetReward(bool done=false);
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs
 using UnityEngine;
+#if UNITY_EDITOR
+using UnityEditor;
+#endif
-    /// The abstract base class for all reward provider components.
+    /// The base class for all reward provider components.
-    public abstract class RewardProviderComponent : MonoBehaviour
+    public class RewardProviderComponent : MonoBehaviour, IRewardProvider
+
+#if UNITY_EDITOR
+        [Range(1, 100)]
+        [Tooltip("The sample rate of the reward to display in the UI.  5 means it samples every 5 frames.")]
+        public int RewardSampleRate = 20;
+#endif
-        /// Returns the IRewardProvider held by this component.
+        /// The reward that is accumulated between Agent steps.
+        /// </summary>
+        float m_IncrementalReward;
+
+        /// <summary>
+        /// The Reward that is accumulated between Agent episodes.
+        /// </summary>
+        float m_CumulativeReward;
+        
+        /// <summary>
+        /// Resets the step reward and possibly the episode reward for the agent.
+        /// </summary>
+        public void ResetReward(bool done = false)
+        {
+            m_IncrementalReward = 0f;
+            if (done)
+            {
+                m_CumulativeReward = 0f;
+            }
+            
+#if UNITY_EDITOR
+            InternalResetReward();
+#endif
+        }
+
+        /// <summary>
+        /// Overrides the current step reward of the agent and updates the episode
+        /// reward accordingly.
-        /// <returns>An instance of IRewardProvider</returns>
-        public abstract IRewardProvider GetRewardProvider();
+        /// <param name="reward">The new value of the reward.</param>
+        public void SetReward(float reward)
+        {
+            m_CumulativeReward += reward - m_IncrementalReward;
+            m_IncrementalReward = reward;
+        }
+
+        /// <summary>
+        /// Increments the step and episode rewards by the provided value.
+        /// </summary>
+        /// <param name="increment">Incremental reward value.</param>
+        public void AddReward(float increment)
+        {
+            m_IncrementalReward += increment;
+            m_CumulativeReward += increment;
+        }
+        
+        public float GetIncrementalReward()
+        {
+            return m_IncrementalReward;
+        }
+
+        public float GetCumulativeReward()
+        {
+            return m_CumulativeReward;
+        }
+
+        public virtual void RewardStep()
+        {
+            
+        }
+        
+
+#if UNITY_EDITOR
+        public AnimationCurve rewardCurve = new AnimationCurve();
+#endif
+        
+#if UNITY_EDITOR
+        void InternalResetReward()
+        {
+            if (Time.frameCount % RewardSampleRate != 0)
+                return;
+            var keyframe = new Keyframe
+            {
+                time = Time.realtimeSinceStartup,
+                value = m_CumulativeReward,
+                inTangent = 0.0f,
+                outTangent = 0.0f
+            };
+            var index = rewardCurve.AddKey(keyframe);
+            AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
+            AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
+        }
+#endif
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs
-using System;
-#if UNITY_EDITOR
-using UnityEditor;
-#endif
-using UnityEngine;
-
-namespace MLAgents.RewardProvider
-{
-    public class CumulativeRewardProviderComponent : RewardProviderComponent
-    {
-        CumulativeRewardProvider m_RewardProvider = new CumulativeRewardProvider();
-        
-#if UNITY_EDITOR
-        public AnimationCurve rewardCurve = new AnimationCurve();
-#endif
-
-        public override IRewardProvider GetRewardProvider()
-        {
-            return m_RewardProvider;
-        }
-
-#if UNITY_EDITOR
-        public void Start()
-        {
-            m_RewardProvider.OnRewardProviderReset += RewardReset;
-        }
-        
-        void RewardReset(float reward)
-        {
-            var keyframe = new Keyframe
-            {
-                time = Time.realtimeSinceStartup,
-                value = reward,
-                inTangent = 0.0f,
-                outTangent = 0.0f
-            };
-            var index = rewardCurve.AddKey(keyframe);
-            AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
-            AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
-        }
-#endif
-    }
-}
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs.meta
-fileFormatVersion: 2
-guid: 46aa889302734e5ca844235a4f69ff29
-timeCreated: 1576015982
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs.meta
-fileFormatVersion: 2
-guid: 9ff175c2b68f41e5b5aa045010677f61
-timeCreated: 1576019305
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs
-namespace MLAgents.RewardProvider
-{
-    /// <summary>
-    /// A reward provider that can be used to accumulate reward during a simulation step. 
-    /// </summary>
-    public class CumulativeRewardProvider : IRewardProvider
-    {
-        float m_IncrementalReward;
-        float m_CumulativeReward;
-
-        public delegate void RewardReset(float reward);
-
-        public event RewardReset OnRewardProviderReset;
-        
-        /// <summary>
-        /// Resets the step reward and possibly the episode reward for the agent.
-        /// </summary>
-        public void ResetReward(bool done = false)
-        {
-            m_IncrementalReward = 0f;
-            if (done)
-            {
-                var reward = m_CumulativeReward;
-                m_CumulativeReward = 0f;
-                OnRewardProviderReset?.Invoke(reward);
-            }
-        }
-
-        public void RewardStep()
-        {
-            // Do Nothing.  There is a special case for this reward provider in agent which resets
-            // the reward.
-        }
-
-        /// <summary>
-        /// Overrides the current step reward of the agent and updates the episode
-        /// reward accordingly.
-        /// </summary>
-        /// <param name="reward">The new value of the reward.</param>
-        public void SetReward(float reward)
-        {
-            m_CumulativeReward += reward - m_IncrementalReward;
-            m_IncrementalReward = reward;
-        }
-
-        /// <summary>
-        /// Increments the step and episode rewards by the provided value.
-        /// </summary>
-        /// <param name="increment">Incremental reward value.</param>
-        public void AddReward(float increment)
-        {
-            m_IncrementalReward += increment;
-            m_CumulativeReward += increment;
-        }
-
-        /// <summary>
-        /// Retrieves the step reward for the Agent.
-        /// </summary>
-        /// <returns>The step reward.</returns>
-        public float GetIncrementalReward()
-        {
-            return m_IncrementalReward;
-        }
-
-        /// <summary>
-        /// Retrieves the episode reward for the Agent.
-        /// </summary>
-        /// <returns>The episode reward.</returns>
-        public float GetCumulativeReward()
-        {
-            return m_CumulativeReward;
-        }
-    }
-}