Modifications to reward providers.

Rename function to properly describe its behavior.
Add an update for the Reward Provider in Agent. Rename some variable. Update docs.
--- a/UnitySDK/UnitySDK.sln.DotSettings
+++ b/UnitySDK/UnitySDK.sln.DotSettings
 	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=BLAS/@EntryIndexedValue">BLAS</s:String>
 	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=CPU/@EntryIndexedValue">CPU</s:String>
 	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=GPU/@EntryIndexedValue">GPU</s:String>
+	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=ID/@EntryIndexedValue">ID</s:String>
 	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=NN/@EntryIndexedValue">NN</s:String>
 	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=PNG/@EntryIndexedValue">PNG</s:String>
 	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=RL/@EntryIndexedValue">RL</s:String>
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
 using UnityEngine;
 using NUnit.Framework;
 using System.Reflection;
+using MLAgents.RewardProvider;
 using MLAgents.Sensor;

 namespace MLAgents.Tests


            var j = 0;
+            var rewardProvider1 = agent1.rewardProvider as RewardProviderComponent;
+            var rewardProvider2 = agent2.rewardProvider as RewardProviderComponent;
-                Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
-                Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
+                Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - rewardProvider1.GetCumulativeReward()), 0.05f);
+                Assert.LessOrEqual(Mathf.Abs(i * 0.1f - rewardProvider2.GetCumulativeReward()), 0.05f);


                academyStepMethod?.Invoke(aca, new object[] { });
--- a/UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs
                InstantiateCanvas();
                s_IsInstantiated = true;
            }
+            
+            if (s_Canvas == null)
+            {
+                Debug.LogWarning("Canvas was null and as assumed to be non-null.");
+                return;
+            }

            if (target == null)
            {
                InstantiateCanvas();
                s_IsInstantiated = true;
            }
+            
+            if (s_Canvas == null)
+            {
+                Debug.LogWarning("Canvas was null and as assumed to be non-null.");
+                return;
+            }

            if (target == null)
            {
            {
                InstantiateCanvas();
                s_IsInstantiated = true;
+            }
+
+            if (s_Canvas == null)
+            {
+                Debug.LogWarning("Canvas was null and as assumed to be non-null.");
+                return;
            }

            if (target == null)
--- a/UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs
 using Barracuda;
 using System;
 using System.Collections.Generic;
+using MLAgents.RewardProvider;
 using UnityEngine;

 namespace MLAgents
--- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
 using System.Collections.Generic;
 using UnityEngine;
 using Barracuda;
+using MLAgents.RewardProvider;
 using MLAgents.Sensor;
 using UnityEngine.Serialization;

        /// Current Agent action (message sent from Brain).
        AgentAction m_Action;

-        /// Represents the reward the agent accumulated during the current step.
-        /// It is reset to 0 at the beginning of every step.
-        /// Should be set to a positive value when the agent performs a "good"
-        /// action that we wish to reinforce/reward, and set to a negative value
-        /// when the agent performs a "bad" action that we wish to punish/deter.
-        /// Additionally, the magnitude of the reward should not exceed 1.0
-        float m_Reward;
-
-        /// Keeps track of the cumulative reward in this episode.
-        float m_CumulativeReward;
-
        /// Whether or not the agent requests an action.
        bool m_RequestAction;


        WriteAdapter m_WriteAdapter = new WriteAdapter();

+
+        /// <summary>
+        /// Represents the reward the agent accumulated during the current step.
+        /// It is reset at the beginning of every step.
+        /// The reward should be set to a positive value when the agent performs a "good"
+        /// action that we wish to reinforce/reward, and set to a negative value
+        /// when the agent performs a "bad" action that we wish to punish/deter.
+        /// Additionally, the magnitude of the reward should not exceed 1.0
+        /// </summary>
+        public IRewardProvider rewardProvider;
+
+        RewardProviderComponent RewardProviderComponent
+        {
+            get { return rewardProvider as RewardProviderComponent; }
+        }
+
        /// MonoBehaviour function that is called when the attached GameObject
        /// becomes enabled or active.
        void OnEnable()
        /// facilitate testing.
        void OnEnableHelper(Academy academy)
        {
+            InitializeRewardProvider();
            m_Info = new AgentInfo();
            m_Action = new AgentAction();
            sensors = new List<ISensor>();
        {
            return m_StepCount;
        }
-
-        /// <summary>
-        /// Resets the step reward and possibly the episode reward for the agent.
-        /// </summary>
-        public void ResetReward()
-        {
-            m_Reward = 0f;
-            if (m_Done)
+        void CheckRewardProviderExistence(string callee) {
+            if (RewardProviderComponent == null)
-                m_CumulativeReward = 0f;
+                Debug.LogWarningFormat("the RewardProviderComponent is null and " +
+                    "method '{0}' was called.  If your agent doesn't have the RewardProviderComponent," +
+                    "remove the call to '{0}'.", callee);
            }
        }

        /// <param name="reward">The new value of the reward.</param>
        public void SetReward(float reward)
        {
-            m_CumulativeReward += (reward - m_Reward);
-            m_Reward = reward;
+            CheckRewardProviderExistence("SetReward");
+            RewardProviderComponent.SetReward(reward);
        }

        /// <summary>
        public void AddReward(float increment)
        {
-            m_Reward += increment;
-            m_CumulativeReward += increment;
+            CheckRewardProviderExistence("AddReward");
+            RewardProviderComponent.AddReward(increment);
        }

        /// <summary>
-        public float GetReward()
+        public float GetIncrementalReward()
-            return m_Reward;
-        }
-
-        /// <summary>
-        /// Retrieves the episode reward for the Agent.
-        /// </summary>
-        /// <returns>The episode reward.</returns>
-        public float GetCumulativeReward()
-        {
-            return m_CumulativeReward;
+            return rewardProvider.GetIncrementalReward();
        }

        /// <summary>
        {
        }

-
        /// <summary>
        /// When the Agent uses Heuristics, it will call this method every time it
        /// needs an action. This can be used for debugging or controlling the agent
            m_VectorSensorBuffer = new float[numFloatObservations];
        }

+        void InitializeRewardProvider()
+        {
+            // Look for a legacy reward provider.
+            var rewardProviderComponent = GetComponent<RewardProviderComponent>();
+            if (rewardProviderComponent == null)
+            {
+                rewardProvider = gameObject.AddComponent<RewardProviderComponent>();
+            }
+        }
+
        /// <summary>
        /// Sends the Agent info to the linked Brain.
        /// </summary>
            m_Info.observations.Clear();
            m_ActionMasker.ResetMask();
            UpdateSensors();
+            rewardProvider.RewardStep();
            using (TimerStack.Instance.Scoped("CollectObservations"))
            {
                CollectObservations();
            // var param = m_PolicyFactory.brainParameters; // look, no brain params!

-            m_Info.reward = m_Reward;
+            m_Info.reward = rewardProvider.GetIncrementalReward();
-
            if (m_Recorder != null && m_Recorder.record && Application.isEditor)
            {
                // This is a bit of a hack - if we're in inference mode, observations won't be generated
            if (m_RequestDecision)
            {
                SendInfoToBrain();
-                ResetReward();
+                rewardProvider.ResetReward(m_Done);
-
                m_HasAlreadyReset = false;
            }
        }
            if (m_Terminate)
            {
                m_Terminate = false;
-                ResetReward();
+                rewardProvider.ResetReward(m_Done);
                m_Done = false;
                m_MaxStepReached = false;
                m_RequestDecision = false;
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider.meta
+fileFormatVersion: 2
+guid: 332fe3ab963e4b33bc528e8f5b2c82a7
+timeCreated: 1575329166
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs.meta
+fileFormatVersion: 2
+guid: bed12564f4c74e3e964fdb763ce73213
+timeCreated: 1575329472
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs.meta
+fileFormatVersion: 2
+guid: f726f84c46eb44bb9630a107f7ce7b96
+timeCreated: 1578522299
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs
+namespace MLAgents.RewardProvider
+{
+    /// <summary>
+    /// Reward providers allow users to provide rewards for Agent behavior during training in order to
+    /// give hints on what types of actions are "better" than others based on an Agent's previous observation.
+    /// </summary>
+    public interface IRewardProvider
+    {
+        /// <summary>
+        /// Get an incremental reward to pass along to a trainer.  
+        /// </summary>
+        /// <returns></returns>
+        float GetIncrementalReward();
+
+        /// <summary>
+        /// This function is called on every step of the simulation and should be
+        /// used as a place to store an <see cref="Agent"/>'s incremental reward
+        /// before the reward is sent off to the brain from the
+        /// <see cref="GetIncrementalReward"/> method.
+        /// </summary>
+        void RewardStep();
+
+        /// <summary>
+        /// Notifies the RewardProvider that the current reward should be reset.  If done is false,
+        /// the incremental reward should only be reset, otherwise both the incremental and cumulative
+        /// reward should be reset.
+        /// <param name="done">Flag indicating whether the Agent episode is done or not.</param>
+        /// </summary>
+        void ResetReward(bool done=false);
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs
+using UnityEngine;
+#if UNITY_EDITOR
+using UnityEditor;
+#endif
+
+namespace MLAgents.RewardProvider
+{
+    /// <summary>
+    /// The base class for all reward provider components.
+    /// </summary>
+    public class RewardProviderComponent : MonoBehaviour, IRewardProvider
+    {
+
+#if UNITY_EDITOR
+        [Range(1, 100)]
+        [Tooltip("The sample rate of the reward to display in the UI.  5 means it samples every 5 frames.")]
+        public int RewardSampleRate = 20;
+#endif
+        /// <summary>
+        /// The reward that is accumulated between Agent steps.
+        /// </summary>
+        float m_IncrementalReward;
+
+        /// <summary>
+        /// The Reward that is accumulated between Agent episodes.
+        /// </summary>
+        float m_CumulativeReward;
+        
+        /// <summary>
+        /// Resets the step reward and possibly the episode reward for the agent.
+        /// </summary>
+        public void ResetReward(bool done = false)
+        {
+            m_IncrementalReward = 0f;
+            if (done)
+            {
+                m_CumulativeReward = 0f;
+            }
+            
+#if UNITY_EDITOR
+            InternalResetReward();
+#endif
+        }
+
+        /// <summary>
+        /// Overrides the current step reward of the agent and updates the episode
+        /// reward accordingly.
+        /// </summary>
+        /// <param name="reward">The new value of the reward.</param>
+        public void SetReward(float reward)
+        {
+            m_CumulativeReward += reward - m_IncrementalReward;
+            m_IncrementalReward = reward;
+        }
+
+        /// <summary>
+        /// Increments the step and episode rewards by the provided value.
+        /// </summary>
+        /// <param name="increment">Incremental reward value.</param>
+        public void AddReward(float increment)
+        {
+            m_IncrementalReward += increment;
+            m_CumulativeReward += increment;
+        }
+        
+        public float GetIncrementalReward()
+        {
+            return m_IncrementalReward;
+        }
+
+        public float GetCumulativeReward()
+        {
+            return m_CumulativeReward;
+        }
+
+        public virtual void RewardStep()
+        {
+            
+        }
+        
+
+#if UNITY_EDITOR
+        public AnimationCurve rewardCurve = new AnimationCurve();
+#endif
+        
+#if UNITY_EDITOR
+        void InternalResetReward()
+        {
+            if (Time.frameCount % RewardSampleRate != 0)
+                return;
+            var keyframe = new Keyframe
+            {
+                time = Time.realtimeSinceStartup,
+                value = m_CumulativeReward,
+                inTangent = 0.0f,
+                outTangent = 0.0f
+            };
+            var index = rewardCurve.AddKey(keyframe);
+            AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
+            AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
+        }
+#endif
+    }
+}
作者	SHA1	备注	提交日期
Christopher Goy	718650c0	Modifications to reward providers.	5 年前
Christopher Goy	0e5b4975	Rename function to properly describe its behavior.	5 年前
Christopher Goy	fa2614e6	Add an update for the Reward Provider in Agent. Rename some variable. Update docs.	5 年前
Christopher Goy	1b618b49	Rename private property. Assert that the component isn't null.	5 年前
Christopher Goy	0d9511d4	Remove extra lines.	5 年前
Christopher Goy	310c94ba	Reintroduce a base RewardProviderComponent. Make changes based on PR feedback.	5 年前
Christopher Goy	79aebc08	Add ID to abbreviations.	5 年前
Christopher Goy	0432538c	Remove RewardProvider from Barracuda Policy.	5 年前
Christopher Goy	bbeb952e	Remove unused variable.	5 年前
Christopher Goy	969161ae	Add checks for null canvas in monitor. Instantiate reward provider at construction time.	5 年前
Christopher Goy	fbc37fe7	Instantiate reward provider earlier.	5 年前
Christopher Goy	39a535ea	Wrap with UNITY_EDITOR.	5 年前
Christopher Goy	2843d056	Remove BaseRewardProviderComponent.	5 年前
Christopher Goy	bd2a492b	Rename LegacyRewardProvider to LowLevelRewardProvider.	5 年前
Christopher Goy	db578832	[rewardProvider] Reset the reward after calls to GetIncrementalReward, and remove the calls from Agent.	5 年前
Christopher Goy	3a355570	[rewardProviders] First stab a reward provider implementation.	5 年前