浏览代码

Modifications to reward providers.

/main/reward-providers
Christopher Goy 5 年前
当前提交
718650c0
共有 8 个文件被更改,包括 116 次插入159 次删除
  1. 4
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  2. 42
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  3. 8
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs
  4. 98
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs
  5. 43
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs
  6. 3
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs.meta
  7. 3
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs.meta
  8. 74
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs

4
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


var j = 0;
var rewardProvider1 = agent1.rewardProvider as CumulativeRewardProvider;
var rewardProvider2 = agent2.rewardProvider as CumulativeRewardProvider;
var rewardProvider1 = agent1.rewardProvider as RewardProviderComponent;
var rewardProvider2 = agent2.rewardProvider as RewardProviderComponent;
for (var i = 0; i < 500; i++)
{
agent2.RequestAction();

42
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


/// </summary>
public IRewardProvider rewardProvider;
CumulativeRewardProvider CumulativeRewardProvider
RewardProviderComponent RewardProviderComponent
get { return rewardProvider as CumulativeRewardProvider; }
get { return rewardProvider as RewardProviderComponent; }
}
/// MonoBehaviour function that is called when the attached GameObject

{
return m_StepCount;
}
void CheckCumulativeRewardProviderExistence(string callee) {
if (CumulativeRewardProvider == null)
void CheckRewardProviderExistence(string callee) {
if (RewardProviderComponent == null)
Debug.LogWarningFormat("the CumulativeRewardProvider is null and " +
"method '{0}' was called. If your agent doesn't have the CumulativeRewardProvider," +
Debug.LogWarningFormat("the RewardProviderComponent is null and " +
"method '{0}' was called. If your agent doesn't have the RewardProviderComponent," +
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward()
{
CheckCumulativeRewardProviderExistence("ResetReward");
InternalResetReward();
}
void InternalResetReward()
{
CumulativeRewardProvider?.ResetReward(m_Done);
}
/// <summary>
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
/// </summary>

CheckCumulativeRewardProviderExistence("SetReward");
CumulativeRewardProvider?.SetReward(reward);
CheckRewardProviderExistence("SetReward");
RewardProviderComponent.SetReward(reward);
}
/// <summary>

public void AddReward(float increment)
{
CheckCumulativeRewardProviderExistence("AddReward");
CumulativeRewardProvider?.AddReward(increment);
CheckRewardProviderExistence("AddReward");
RewardProviderComponent.AddReward(increment);
}
/// <summary>

{
}
/// <summary>
/// When the Agent uses Heuristics, it will call this method every time it
/// needs an action. This can be used for debugging or controlling the agent

var rewardProviderComponent = GetComponent<RewardProviderComponent>();
if (rewardProviderComponent == null)
{
rewardProviderComponent = gameObject.AddComponent<CumulativeRewardProviderComponent>();
rewardProvider = rewardProviderComponent.GetRewardProvider();
rewardProvider = gameObject.AddComponent<RewardProviderComponent>();
}
}

if (m_RequestDecision)
{
SendInfoToBrain();
InternalResetReward();
rewardProvider.ResetReward(m_Done);
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;

if (m_Terminate)
{
m_Terminate = false;
InternalResetReward();
rewardProvider.ResetReward(m_Done);
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;

8
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs


/// <see cref="GetIncrementalReward"/> method.
/// </summary>
void RewardStep();
/// <summary>
/// Notifies the RewardProvider that the current reward should be reset. If done is false,
/// the incremental reward should only be reset, otherwise both the incremental and cumulative
/// reward should be reset.
/// <param name="done">Flag indicating whether the Agent episode is done or not.</param>
/// </summary>
void ResetReward(bool done=false);
}
}

98
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs


using UnityEngine;
#if UNITY_EDITOR
using UnityEditor;
#endif
/// The abstract base class for all reward provider components.
/// The base class for all reward provider components.
public abstract class RewardProviderComponent : MonoBehaviour
public class RewardProviderComponent : MonoBehaviour, IRewardProvider
#if UNITY_EDITOR
[Range(1, 100)]
[Tooltip("The sample rate of the reward to display in the UI. 5 means it samples every 5 frames.")]
public int RewardSampleRate = 20;
#endif
/// Returns the IRewardProvider held by this component.
/// The reward that is accumulated between Agent steps.
/// </summary>
float m_IncrementalReward;
/// <summary>
/// The Reward that is accumulated between Agent episodes.
/// </summary>
float m_CumulativeReward;
/// <summary>
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward(bool done = false)
{
m_IncrementalReward = 0f;
if (done)
{
m_CumulativeReward = 0f;
}
#if UNITY_EDITOR
InternalResetReward();
#endif
}
/// <summary>
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
/// <returns>An instance of IRewardProvider</returns>
public abstract IRewardProvider GetRewardProvider();
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)
{
m_CumulativeReward += reward - m_IncrementalReward;
m_IncrementalReward = reward;
}
/// <summary>
/// Increments the step and episode rewards by the provided value.
/// </summary>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)
{
m_IncrementalReward += increment;
m_CumulativeReward += increment;
}
public float GetIncrementalReward()
{
return m_IncrementalReward;
}
public float GetCumulativeReward()
{
return m_CumulativeReward;
}
public virtual void RewardStep()
{
}
#if UNITY_EDITOR
public AnimationCurve rewardCurve = new AnimationCurve();
#endif
#if UNITY_EDITOR
void InternalResetReward()
{
if (Time.frameCount % RewardSampleRate != 0)
return;
var keyframe = new Keyframe
{
time = Time.realtimeSinceStartup,
value = m_CumulativeReward,
inTangent = 0.0f,
outTangent = 0.0f
};
var index = rewardCurve.AddKey(keyframe);
AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
}
#endif
}
}

43
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs


using System;
#if UNITY_EDITOR
using UnityEditor;
#endif
using UnityEngine;
namespace MLAgents.RewardProvider
{
public class CumulativeRewardProviderComponent : RewardProviderComponent
{
CumulativeRewardProvider m_RewardProvider = new CumulativeRewardProvider();
#if UNITY_EDITOR
public AnimationCurve rewardCurve = new AnimationCurve();
#endif
public override IRewardProvider GetRewardProvider()
{
return m_RewardProvider;
}
#if UNITY_EDITOR
public void Start()
{
m_RewardProvider.OnRewardProviderReset += RewardReset;
}
void RewardReset(float reward)
{
var keyframe = new Keyframe
{
time = Time.realtimeSinceStartup,
value = reward,
inTangent = 0.0f,
outTangent = 0.0f
};
var index = rewardCurve.AddKey(keyframe);
AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
}
#endif
}
}

3
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs.meta


fileFormatVersion: 2
guid: 46aa889302734e5ca844235a4f69ff29
timeCreated: 1576015982

3
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs.meta


fileFormatVersion: 2
guid: 9ff175c2b68f41e5b5aa045010677f61
timeCreated: 1576019305

74
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs


namespace MLAgents.RewardProvider
{
/// <summary>
/// A reward provider that can be used to accumulate reward during a simulation step.
/// </summary>
public class CumulativeRewardProvider : IRewardProvider
{
float m_IncrementalReward;
float m_CumulativeReward;
public delegate void RewardReset(float reward);
public event RewardReset OnRewardProviderReset;
/// <summary>
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward(bool done = false)
{
m_IncrementalReward = 0f;
if (done)
{
var reward = m_CumulativeReward;
m_CumulativeReward = 0f;
OnRewardProviderReset?.Invoke(reward);
}
}
public void RewardStep()
{
// Do Nothing. There is a special case for this reward provider in agent which resets
// the reward.
}
/// <summary>
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
/// </summary>
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)
{
m_CumulativeReward += reward - m_IncrementalReward;
m_IncrementalReward = reward;
}
/// <summary>
/// Increments the step and episode rewards by the provided value.
/// </summary>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)
{
m_IncrementalReward += increment;
m_CumulativeReward += increment;
}
/// <summary>
/// Retrieves the step reward for the Agent.
/// </summary>
/// <returns>The step reward.</returns>
public float GetIncrementalReward()
{
return m_IncrementalReward;
}
/// <summary>
/// Retrieves the episode reward for the Agent.
/// </summary>
/// <returns>The episode reward.</returns>
public float GetCumulativeReward()
{
return m_CumulativeReward;
}
}
}
正在加载...
取消
保存