浏览代码

Rename LegacyRewardProvider to LowLevelRewardProvider.

/main/reward-providers
Christopher Goy 5 年前
当前提交
bd2a492b
共有 11 个文件被更改,包括 104 次插入71 次删除
  1. 64
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  2. 13
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/BaseRewardProviderComponent.cs
  3. 21
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs
  4. 15
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProvider.cs
  5. 29
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProviderComponent.cs
  6. 7
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LegacyRewardProviderComponent.cs
  7. 23
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/TypedRewardProviderComponent.cs
  8. 3
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/TypedRewardProviderComponent.cs.meta
  9. 0
      /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProvider.cs.meta
  10. 0
      /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProviderComponent.cs.meta
  11. 0
      /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProvider.cs

64
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


/// action that we wish to reinforce/reward, and set to a negative value
/// when the agent performs a "bad" action that we wish to punish/deter.
/// Additionally, the magnitude of the reward should not exceed 1.0
IRewardProvider m_RewardProvider;
/// <summary>
/// Here for ease of upgrading from the old reward system.
/// </summary>
LegacyRewardProvider m_LegacyRewardProvider;
LowLevelRewardProviderComponent m_RewardProviderComponent;
public IRewardProvider rewardProvider
{
get { return m_RewardProviderComponent.GetRewardProvider(); }
}
/// MonoBehaviour function that is called when the attached GameObject
/// becomes enabled or active.

academy.AgentForceReset += _AgentReset;
InitializeRewardProvider();
m_PolicyFactory = GetComponent<BehaviorParameters>();
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic, m_RewardProvider);
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic, m_RewardProviderComponent.GetRewardProvider());
ResetData();
InitializeAgent();
InitializeSensors();

{
m_PolicyFactory.GiveModel(behaviorName, model, inferenceDevice);
m_Brain?.Dispose();
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic, m_RewardProvider);
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic, rewardProvider);
}
/// <summary>

}
/// <summary>
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward()
{
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
"legacy method 'ResetReward' was called.");
rewardProvider.ResetReward(m_Done);
}
/// <summary>
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
/// </summary>

Debug.Assert(m_LegacyRewardProvider != null, "LegacyRewardProvider is null and " +
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
m_LegacyRewardProvider.SetReward(reward);
rewardProvider.SetReward(reward);
}
/// <summary>

public void AddReward(float increment)
{
Debug.Assert(m_LegacyRewardProvider != null, "LegacyRewardProvider is null and " +
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
m_LegacyRewardProvider.AddReward(increment);
rewardProvider.AddReward(increment);
}
/// <summary>

public float GetReward()
{
Debug.Assert(m_LegacyRewardProvider != null, "LegacyRewardProvider is null and " +
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
return m_LegacyRewardProvider.GetIncrementalReward();
return rewardProvider.GetIncrementalReward();
}
/// <summary>

public float GetCumulativeReward()
{
Debug.Assert(m_LegacyRewardProvider != null, "LegacyRewardProvider is null and " +
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
return m_LegacyRewardProvider.GetCumulativeReward();
return rewardProvider.GetCumulativeReward();
}
/// <summary>

void InitializeRewardProvider()
{
// Look for a legacy reward provider.
var rewardProviderComponent = GetComponent<BaseRewardProviderComponent>();
if (rewardProviderComponent != null)
{
m_RewardProvider = rewardProviderComponent.GetRewardProvider();
}
if (m_RewardProvider == null)
m_RewardProviderComponent = GetComponent<LowLevelRewardProviderComponent>();
if (m_RewardProviderComponent == null)
var legacyRewardProviderComponent = gameObject.AddComponent<LegacyRewardProviderComponent>();
m_RewardProvider = legacyRewardProviderComponent.GetTypedRewardProvider();
m_RewardProviderComponent = gameObject.AddComponent<LowLevelRewardProviderComponent>();
m_LegacyRewardProvider = m_RewardProvider as LegacyRewardProvider;
}
/// <summary>

// var param = m_PolicyFactory.brainParameters; // look, no brain params!
m_Info.reward = m_RewardProvider.GetIncrementalReward();
m_Info.reward = rewardProvider.GetIncrementalReward();
// TODO(cgoy): Decouple Agent/Policy.
m_Brain.RequestDecision(this);
if (m_Recorder != null && m_Recorder.record && Application.isEditor)

public virtual void CollectObservations()
{
}
/// <summary>
/// Sets an action mask for discrete control agents. When used, the agent will not be

if (m_RequestDecision)
{
SendInfoToBrain();
ResetReward();
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;

if (m_Terminate)
{
m_Terminate = false;
ResetReward();
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;

13
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/BaseRewardProviderComponent.cs


using System;
public class BaseRewardProviderComponent: MonoBehaviour
/// <summary>
/// A typed reward provider that provides easy, typed access to RewardProvider implementations.
/// Subclasses should
/// </summary>
/// <typeparam name="T"></typeparam>
public abstract class BaseRewardProviderComponent<T> : MonoBehaviour
where T : IRewardProvider, new()
IRewardProvider m_RewardProvider;
T m_RewardProvider = new T();
public virtual IRewardProvider GetRewardProvider()
public T GetRewardProvider()
{
return m_RewardProvider;
}

21
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs


/// </summary>
/// <returns></returns>
float GetIncrementalReward();
/// <summary>
/// Adds a scalar value to the current reward for this step.
/// </summary>
void AddReward(float reward);
void SetReward(float reward);
/// <summary>
/// Retrieves the step reward for the Agent.
/// </summary>
/// <returns>The step reward.</returns>
float GetReward();
/// <summary>
/// Retrieves the episode reward for the Agent.
/// </summary>
/// <returns>The episode reward.</returns>
float GetCumulativeReward();
void ResetReward(bool done);
}
}

15
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProvider.cs


using UnityEngine;
namespace MLAgents.RewardProvider
{
/// <summary>

public class LegacyRewardProvider : IRewardProvider
public class LowLevelRewardProvider : IRewardProvider
public delegate void RewardReset(float reward);
public event RewardReset OnRewardProviderReset;
ResetReward();
return reward;
}

/// </summary>
void ResetReward(bool done = false)
public void ResetReward(bool done = false)
var reward = m_CumulativeReward;
OnRewardProviderReset?.Invoke(reward);
}
}

29
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProviderComponent.cs


using System;
using UnityEditor;
using UnityEngine;
namespace MLAgents.RewardProvider
{
public class LowLevelRewardProviderComponent : BaseRewardProviderComponent<LowLevelRewardProvider>
{
public AnimationCurve rewardCurve = new AnimationCurve();
public virtual void Start()
{
GetRewardProvider().OnRewardProviderReset += RewardReset;
}
void RewardReset(float reward)
{
var keyframe = new Keyframe
{
time = Time.realtimeSinceStartup,
value = reward,
inTangent = 0.0f,
outTangent = 0.0f
};
var index = rewardCurve.AddKey(keyframe);
AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
}
}
}

7
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LegacyRewardProviderComponent.cs


namespace MLAgents.RewardProvider
{
public class LegacyRewardProviderComponent : TypedRewardProviderComponent<LegacyRewardProvider>
{
}
}

23
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/TypedRewardProviderComponent.cs


namespace MLAgents.RewardProvider
{
/// <summary>
/// A typed reward provider that provides easy, typed access to RewardProvider implementations.
/// Subclasses should
/// </summary>
/// <typeparam name="T"></typeparam>
public class TypedRewardProviderComponent<T> : BaseRewardProviderComponent
where T : IRewardProvider, new()
{
T m_TypedRewardProvider = new T();
public T GetTypedRewardProvider()
{
return m_TypedRewardProvider;
}
public override IRewardProvider GetRewardProvider()
{
return m_TypedRewardProvider;
}
}
}

3
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/TypedRewardProviderComponent.cs.meta


fileFormatVersion: 2
guid: d5d464dcbd314f68a335a1b9b37c6c6e
timeCreated: 1576019945

/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LegacyRewardProvider.cs.meta → /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProvider.cs.meta

/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LegacyRewardProviderComponent.cs.meta → /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProviderComponent.cs.meta

/UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LegacyRewardProvider.cs → /UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/LowLevelRewardProvider.cs

正在加载...
取消
保存