浏览代码

Reintroduce a base RewardProviderComponent. Make changes based on PR feedback.

/main/reward-providers
Christopher Goy 5 年前
当前提交
310c94ba
共有 13 个文件被更改,包括 167 次插入177 次删除
  1. 7
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  2. 59
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  3. 4
      UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs
  4. 6
      UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs
  5. 21
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs
  6. 68
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs
  7. 43
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs
  8. 16
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs
  9. 3
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs.meta
  10. 77
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProvider.cs
  11. 40
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProviderComponent.cs
  12. 0
      /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs.meta
  13. 0
      /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs.meta

7
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


using UnityEngine;
using NUnit.Framework;
using System.Reflection;
using MLAgents.RewardProvider;
using MLAgents.Sensor;
namespace MLAgents.Tests

var j = 0;
var rewardProvider1 = agent1.rewardProvider as CumulativeRewardProvider;
var rewardProvider2 = agent2.rewardProvider as CumulativeRewardProvider;
Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - rewardProvider1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f - rewardProvider2.GetCumulativeReward()), 0.05f);
academyStepMethod?.Invoke(aca, new object[] { });

59
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


WriteAdapter m_WriteAdapter = new WriteAdapter();
RewardProviderComponent m_RewardProviderComponent;
/// <summary>
/// It is reset to 0 at the beginning of every step.
/// Should be set to a positive value when the agent performs a "good"
/// It is reset at the beginning of every step.
/// The reward should be set to a positive value when the agent performs a "good"
LowLevelRewardProviderComponent m_RewardProviderComponent;
/// </summary>
public CumulativeRewardProvider defaultRewardProvider
{
get { return rewardProvider as CumulativeRewardProvider; }
}
/// MonoBehaviour function that is called when the attached GameObject
/// becomes enabled or active.
void OnEnable()

academy.AgentAct += AgentStep;
academy.AgentForceReset += _AgentReset;
m_PolicyFactory = GetComponent<BehaviorParameters>();
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic, m_RewardProviderComponent.GetRewardProvider());
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
ResetData();
InitializeAgent();
InitializeSensors();

{
m_PolicyFactory.GiveModel(behaviorName, model, inferenceDevice);
m_Brain?.Dispose();
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic, rewardProvider);
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
}
/// <summary>

/// </summary>
public void ResetReward()
{
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
"legacy method 'ResetReward' was called.");
rewardProvider.ResetReward(m_Done);
Debug.Assert(defaultRewardProvider != null, "the defaultRewardProvider is null and " +
"method 'ResetReward' was called. If your agent doesn't have the CumulativeRewardProvider," +
"remove the call from ResetReward.");
defaultRewardProvider.ResetReward(m_Done);
}
/// <summary>

/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)
{
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
"legacy method 'SetReward' was called.");
rewardProvider.SetReward(reward);
Debug.Assert(defaultRewardProvider != null, "the defaultRewardProvider is null and " +
"method 'SetReward' was called. If your agent doesn't have the CumulativeRewardProvider," +
"remove the call from 'SetReward'.");
defaultRewardProvider.SetReward(reward);
}
/// <summary>

public void AddReward(float increment)
{
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
"legacy method 'AddReward' was called.");
rewardProvider.AddReward(increment);
Debug.Assert(defaultRewardProvider != null, "the defaultRewardProvider is null and " +
"method 'AddReward' was called. If your agent doesn't have the CumulativeRewardProvider," +
"remove the call from 'AddReward'.");
defaultRewardProvider.AddReward(increment);
}
/// <summary>

public float GetReward()
public float GetIncrementalReward()
"legacy method 'GetReward' was called.");
"method 'GetIncrementalReward' was called.");
}
/// <summary>
/// Retrieves the episode reward for the Agent.
/// </summary>
/// <returns>The episode reward.</returns>
public float GetCumulativeReward()
{
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
"legacy method 'GetCumulativeReward' was called.");
return rewardProvider.GetCumulativeReward();
}
/// <summary>

void InitializeRewardProvider()
{
// Look for a legacy reward provider.
m_RewardProviderComponent = GetComponent<LowLevelRewardProviderComponent>();
m_RewardProviderComponent = GetComponent<RewardProviderComponent>();
m_RewardProviderComponent = gameObject.AddComponent<LowLevelRewardProviderComponent>();
m_RewardProviderComponent = gameObject.AddComponent<CumulativeRewardProviderComponent>();
}
}

4
UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs


}
public IPolicy GeneratePolicy(Func<float[]> heuristic, IRewardProvider rewardProvider)
public IPolicy GeneratePolicy(Func<float[]> heuristic)
{
switch (m_BehaviorType)
{

case BehaviorType.Default:
if (FindObjectOfType<Academy>().IsCommunicatorOn)
{
return new RemotePolicy(m_BrainParameters, m_BehaviorName, rewardProvider);
return new RemotePolicy(m_BrainParameters, m_BehaviorName);
}
if (m_Model != null)
{

6
UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs


/// </summary>
List<int[]> m_SensorShapes;
IRewardProvider m_RewardProvider;
string behaviorName,
IRewardProvider rewardProvider)
string behaviorName)
m_RewardProvider = rewardProvider;
var aca = Object.FindObjectOfType<Academy>();
aca.LazyInitialization();
m_Communicator = aca.Communicator;

21
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs


/// </summary>
/// <returns></returns>
float GetIncrementalReward();
/// <summary>
/// Adds a scalar value to the current reward for this step.
/// </summary>
void AddReward(float reward);
void SetReward(float reward);
/// <summary>
/// Retrieves the step reward for the Agent.
/// </summary>
/// <returns>The step reward.</returns>
float GetReward();
/// <summary>
/// Retrieves the episode reward for the Agent.
/// </summary>
/// <returns>The episode reward.</returns>
float GetCumulativeReward();
void ResetReward(bool done);
}
}

68
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs


namespace MLAgents.RewardProvider
{
/// <summary>
/// A reward provider that can be used to accumulate reward during a simulation step.
/// </summary>
public class CumulativeRewardProvider : IRewardProvider
{
float m_IncrementalReward;
float m_CumulativeReward;
public delegate void RewardReset(float reward);
public event RewardReset OnRewardProviderReset;
/// <summary>
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward(bool done = false)
{
m_IncrementalReward = 0f;
if (done)
{
var reward = m_CumulativeReward;
m_CumulativeReward = 0f;
OnRewardProviderReset?.Invoke(reward);
}
}
/// <summary>
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
/// </summary>
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)
{
m_CumulativeReward += reward - m_IncrementalReward;
m_IncrementalReward = reward;
}
/// <summary>
/// Increments the step and episode rewards by the provided value.
/// </summary>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)
{
m_IncrementalReward += increment;
m_CumulativeReward += increment;
}
/// <summary>
/// Retrieves the step reward for the Agent.
/// </summary>
/// <returns>The step reward.</returns>
public float GetIncrementalReward()
{
return m_IncrementalReward;
}
/// <summary>
/// Retrieves the episode reward for the Agent.
/// </summary>
/// <returns>The episode reward.</returns>
public float GetCumulativeReward()
{
return m_CumulativeReward;
}
}
}

43
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs


using System;
#if UNITY_EDITOR
using UnityEditor;
#endif
using UnityEngine;
namespace MLAgents.RewardProvider
{
public class CumulativeRewardProviderComponent : RewardProviderComponent
{
CumulativeRewardProvider m_RewardProvider = new CumulativeRewardProvider();
#if UNITY_EDITOR
public AnimationCurve rewardCurve = new AnimationCurve();
#endif
public override IRewardProvider GetRewardProvider()
{
return m_RewardProvider;
}
#if UNITY_EDITOR
public void Start()
{
m_RewardProvider.OnRewardProviderReset += RewardReset;
}
void RewardReset(float reward)
{
var keyframe = new Keyframe
{
time = Time.realtimeSinceStartup,
value = reward,
inTangent = 0.0f,
outTangent = 0.0f
};
var index = rewardCurve.AddKey(keyframe);
AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
}
#endif
}
}

16
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs


using UnityEngine;
namespace MLAgents.RewardProvider
{
/// <summary>
/// The abstract base class for all reward provider components.
/// </summary>
public abstract class RewardProviderComponent : MonoBehaviour
{
/// <summary>
/// Returns the IRewardProvider held by this component.
/// </summary>
/// <returns>An instance of IRewardProvider</returns>
public abstract IRewardProvider GetRewardProvider();
}
}

3
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs.meta


fileFormatVersion: 2
guid: f726f84c46eb44bb9630a107f7ce7b96
timeCreated: 1578522299

77
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProvider.cs


namespace MLAgents.RewardProvider
{
/// <summary>
/// A legacy reward provider that can be used in an Agent as a way to easily upgrade
/// from the old reward system.
/// </summary>
public class LowLevelRewardProvider : IRewardProvider
{
float m_IncrementalReward;
float m_CumulativeReward;
public delegate void RewardReset(float reward);
public event RewardReset OnRewardProviderReset;
public float GetIncrementalReward()
{
var reward = m_IncrementalReward;
return reward;
}
/// <summary>
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward(bool done = false)
{
m_IncrementalReward = 0f;
if (done)
{
var reward = m_CumulativeReward;
m_CumulativeReward = 0f;
OnRewardProviderReset?.Invoke(reward);
}
}
/// <summary>
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
/// </summary>
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)
{
m_CumulativeReward += (reward - m_IncrementalReward);
m_IncrementalReward = reward;
}
/// <summary>
/// Increments the step and episode rewards by the provided value.
/// </summary>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)
{
m_IncrementalReward += increment;
m_CumulativeReward += increment;
}
/// <summary>
/// Retrieves the step reward for the Agent.
/// </summary>
/// <returns>The step reward.</returns>
public float GetReward()
{
return m_IncrementalReward;
}
/// <summary>
/// Retrieves the episode reward for the Agent.
/// </summary>
/// <returns>The episode reward.</returns>
public float GetCumulativeReward()
{
return m_CumulativeReward;
}
}
}

40
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProviderComponent.cs


using System;
#if UNITY_EDITOR
using UnityEditor;
#endif
using UnityEngine;
namespace MLAgents.RewardProvider
{
public class LowLevelRewardProviderComponent : MonoBehaviour
{
LowLevelRewardProvider m_RewardProvider = new LowLevelRewardProvider();
public AnimationCurve rewardCurve = new AnimationCurve();
public LowLevelRewardProvider GetRewardProvider()
{
return m_RewardProvider;
}
public virtual void Start()
{
GetRewardProvider().OnRewardProviderReset += RewardReset;
}
void RewardReset(float reward)
{
#if UNITY_EDITOR
var keyframe = new Keyframe
{
time = Time.realtimeSinceStartup,
value = reward,
inTangent = 0.0f,
outTangent = 0.0f
};
var index = rewardCurve.AddKey(keyframe);
AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
#endif
}
}
}

/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProvider.cs.meta → /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs.meta

/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProviderComponent.cs.meta → /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProviderComponent.cs.meta

正在加载...
取消
保存