using UnityEngine;
#if UNITY_EDITOR
using UnityEditor;
#endif
namespace MLAgents.RewardProvider
{
///
/// The base class for all reward provider components.
///
public class RewardProviderComponent : MonoBehaviour, IRewardProvider
{
#if UNITY_EDITOR
[Range(1, 100)]
[Tooltip("The sample rate of the reward to display in the UI. 5 means it samples every 5 frames.")]
public int RewardSampleRate = 20;
#endif
///
/// The reward that is accumulated between Agent steps.
///
float m_IncrementalReward;
///
/// The Reward that is accumulated between Agent episodes.
///
float m_CumulativeReward;
///
/// Resets the step reward and possibly the episode reward for the agent.
///
public void ResetReward(bool done = false)
{
m_IncrementalReward = 0f;
if (done)
{
m_CumulativeReward = 0f;
}
#if UNITY_EDITOR
InternalResetReward();
#endif
}
///
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
///
/// The new value of the reward.
public void SetReward(float reward)
{
m_CumulativeReward += reward - m_IncrementalReward;
m_IncrementalReward = reward;
}
///
/// Increments the step and episode rewards by the provided value.
///
/// Incremental reward value.
public void AddReward(float increment)
{
m_IncrementalReward += increment;
m_CumulativeReward += increment;
}
public float GetIncrementalReward()
{
return m_IncrementalReward;
}
public float GetCumulativeReward()
{
return m_CumulativeReward;
}
public virtual void RewardStep()
{
}
#if UNITY_EDITOR
public AnimationCurve rewardCurve = new AnimationCurve();
#endif
#if UNITY_EDITOR
void InternalResetReward()
{
if (Time.frameCount % RewardSampleRate != 0)
return;
var keyframe = new Keyframe
{
time = Time.realtimeSinceStartup,
value = m_CumulativeReward,
inTangent = 0.0f,
outTangent = 0.0f
};
var index = rewardCurve.AddKey(keyframe);
AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
}
#endif
}
}