using UnityEngine; #if UNITY_EDITOR using UnityEditor; #endif namespace MLAgents.RewardProvider { /// /// The base class for all reward provider components. /// public class RewardProviderComponent : MonoBehaviour, IRewardProvider { #if UNITY_EDITOR [Range(1, 100)] [Tooltip("The sample rate of the reward to display in the UI. 5 means it samples every 5 frames.")] public int RewardSampleRate = 20; #endif /// /// The reward that is accumulated between Agent steps. /// float m_IncrementalReward; /// /// The Reward that is accumulated between Agent episodes. /// float m_CumulativeReward; /// /// Resets the step reward and possibly the episode reward for the agent. /// public void ResetReward(bool done = false) { m_IncrementalReward = 0f; if (done) { m_CumulativeReward = 0f; } #if UNITY_EDITOR InternalResetReward(); #endif } /// /// Overrides the current step reward of the agent and updates the episode /// reward accordingly. /// /// The new value of the reward. public void SetReward(float reward) { m_CumulativeReward += reward - m_IncrementalReward; m_IncrementalReward = reward; } /// /// Increments the step and episode rewards by the provided value. /// /// Incremental reward value. public void AddReward(float increment) { m_IncrementalReward += increment; m_CumulativeReward += increment; } public float GetIncrementalReward() { return m_IncrementalReward; } public float GetCumulativeReward() { return m_CumulativeReward; } public virtual void RewardStep() { } #if UNITY_EDITOR public AnimationCurve rewardCurve = new AnimationCurve(); #endif #if UNITY_EDITOR void InternalResetReward() { if (Time.frameCount % RewardSampleRate != 0) return; var keyframe = new Keyframe { time = Time.realtimeSinceStartup, value = m_CumulativeReward, inTangent = 0.0f, outTangent = 0.0f }; var index = rewardCurve.AddKey(keyframe); AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear); AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear); } #endif } }