Christopher Goy
5 年前
当前提交
718650c0
共有 8 个文件被更改,包括 116 次插入 和 159 次删除
-
4UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
-
42UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
-
8UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs
-
98UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs
-
43UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs
-
3UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs.meta
-
3UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs.meta
-
74UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs
|
|||
using UnityEngine; |
|||
#if UNITY_EDITOR
|
|||
using UnityEditor; |
|||
#endif
|
|||
/// The abstract base class for all reward provider components.
|
|||
/// The base class for all reward provider components.
|
|||
public abstract class RewardProviderComponent : MonoBehaviour |
|||
public class RewardProviderComponent : MonoBehaviour, IRewardProvider |
|||
|
|||
#if UNITY_EDITOR
|
|||
[Range(1, 100)] |
|||
[Tooltip("The sample rate of the reward to display in the UI. 5 means it samples every 5 frames.")] |
|||
public int RewardSampleRate = 20; |
|||
#endif
|
|||
/// Returns the IRewardProvider held by this component.
|
|||
/// The reward that is accumulated between Agent steps.
|
|||
/// </summary>
|
|||
float m_IncrementalReward; |
|||
|
|||
/// <summary>
|
|||
/// The Reward that is accumulated between Agent episodes.
|
|||
/// </summary>
|
|||
float m_CumulativeReward; |
|||
|
|||
/// <summary>
|
|||
/// Resets the step reward and possibly the episode reward for the agent.
|
|||
/// </summary>
|
|||
public void ResetReward(bool done = false) |
|||
{ |
|||
m_IncrementalReward = 0f; |
|||
if (done) |
|||
{ |
|||
m_CumulativeReward = 0f; |
|||
} |
|||
|
|||
#if UNITY_EDITOR
|
|||
InternalResetReward(); |
|||
#endif
|
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Overrides the current step reward of the agent and updates the episode
|
|||
/// reward accordingly.
|
|||
/// <returns>An instance of IRewardProvider</returns>
|
|||
public abstract IRewardProvider GetRewardProvider(); |
|||
/// <param name="reward">The new value of the reward.</param>
|
|||
public void SetReward(float reward) |
|||
{ |
|||
m_CumulativeReward += reward - m_IncrementalReward; |
|||
m_IncrementalReward = reward; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Increments the step and episode rewards by the provided value.
|
|||
/// </summary>
|
|||
/// <param name="increment">Incremental reward value.</param>
|
|||
public void AddReward(float increment) |
|||
{ |
|||
m_IncrementalReward += increment; |
|||
m_CumulativeReward += increment; |
|||
} |
|||
|
|||
public float GetIncrementalReward() |
|||
{ |
|||
return m_IncrementalReward; |
|||
} |
|||
|
|||
public float GetCumulativeReward() |
|||
{ |
|||
return m_CumulativeReward; |
|||
} |
|||
|
|||
public virtual void RewardStep() |
|||
{ |
|||
|
|||
} |
|||
|
|||
|
|||
#if UNITY_EDITOR
|
|||
public AnimationCurve rewardCurve = new AnimationCurve(); |
|||
#endif
|
|||
|
|||
#if UNITY_EDITOR
|
|||
void InternalResetReward() |
|||
{ |
|||
if (Time.frameCount % RewardSampleRate != 0) |
|||
return; |
|||
var keyframe = new Keyframe |
|||
{ |
|||
time = Time.realtimeSinceStartup, |
|||
value = m_CumulativeReward, |
|||
inTangent = 0.0f, |
|||
outTangent = 0.0f |
|||
}; |
|||
var index = rewardCurve.AddKey(keyframe); |
|||
AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear); |
|||
AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear); |
|||
} |
|||
#endif
|
|||
} |
|||
} |
|
|||
using System; |
|||
#if UNITY_EDITOR
|
|||
using UnityEditor; |
|||
#endif
|
|||
using UnityEngine; |
|||
|
|||
namespace MLAgents.RewardProvider |
|||
{ |
|||
public class CumulativeRewardProviderComponent : RewardProviderComponent |
|||
{ |
|||
CumulativeRewardProvider m_RewardProvider = new CumulativeRewardProvider(); |
|||
|
|||
#if UNITY_EDITOR
|
|||
public AnimationCurve rewardCurve = new AnimationCurve(); |
|||
#endif
|
|||
|
|||
public override IRewardProvider GetRewardProvider() |
|||
{ |
|||
return m_RewardProvider; |
|||
} |
|||
|
|||
#if UNITY_EDITOR
|
|||
public void Start() |
|||
{ |
|||
m_RewardProvider.OnRewardProviderReset += RewardReset; |
|||
} |
|||
|
|||
void RewardReset(float reward) |
|||
{ |
|||
var keyframe = new Keyframe |
|||
{ |
|||
time = Time.realtimeSinceStartup, |
|||
value = reward, |
|||
inTangent = 0.0f, |
|||
outTangent = 0.0f |
|||
}; |
|||
var index = rewardCurve.AddKey(keyframe); |
|||
AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear); |
|||
AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear); |
|||
} |
|||
#endif
|
|||
} |
|||
} |
|
|||
fileFormatVersion: 2 |
|||
guid: 46aa889302734e5ca844235a4f69ff29 |
|||
timeCreated: 1576015982 |
|
|||
fileFormatVersion: 2 |
|||
guid: 9ff175c2b68f41e5b5aa045010677f61 |
|||
timeCreated: 1576019305 |
|
|||
namespace MLAgents.RewardProvider |
|||
{ |
|||
/// <summary>
|
|||
/// A reward provider that can be used to accumulate reward during a simulation step.
|
|||
/// </summary>
|
|||
public class CumulativeRewardProvider : IRewardProvider |
|||
{ |
|||
float m_IncrementalReward; |
|||
float m_CumulativeReward; |
|||
|
|||
public delegate void RewardReset(float reward); |
|||
|
|||
public event RewardReset OnRewardProviderReset; |
|||
|
|||
/// <summary>
|
|||
/// Resets the step reward and possibly the episode reward for the agent.
|
|||
/// </summary>
|
|||
public void ResetReward(bool done = false) |
|||
{ |
|||
m_IncrementalReward = 0f; |
|||
if (done) |
|||
{ |
|||
var reward = m_CumulativeReward; |
|||
m_CumulativeReward = 0f; |
|||
OnRewardProviderReset?.Invoke(reward); |
|||
} |
|||
} |
|||
|
|||
public void RewardStep() |
|||
{ |
|||
// Do Nothing. There is a special case for this reward provider in agent which resets
|
|||
// the reward.
|
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Overrides the current step reward of the agent and updates the episode
|
|||
/// reward accordingly.
|
|||
/// </summary>
|
|||
/// <param name="reward">The new value of the reward.</param>
|
|||
public void SetReward(float reward) |
|||
{ |
|||
m_CumulativeReward += reward - m_IncrementalReward; |
|||
m_IncrementalReward = reward; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Increments the step and episode rewards by the provided value.
|
|||
/// </summary>
|
|||
/// <param name="increment">Incremental reward value.</param>
|
|||
public void AddReward(float increment) |
|||
{ |
|||
m_IncrementalReward += increment; |
|||
m_CumulativeReward += increment; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Retrieves the step reward for the Agent.
|
|||
/// </summary>
|
|||
/// <returns>The step reward.</returns>
|
|||
public float GetIncrementalReward() |
|||
{ |
|||
return m_IncrementalReward; |
|||
} |
|||
|
|||
/// <summary>
|
|||
/// Retrieves the episode reward for the Agent.
|
|||
/// </summary>
|
|||
/// <returns>The episode reward.</returns>
|
|||
public float GetCumulativeReward() |
|||
{ |
|||
return m_CumulativeReward; |
|||
} |
|||
} |
|||
} |
撰写
预览
正在加载...
取消
保存
Reference in new issue