比较提交

...
此合并请求有变更与目标分支冲突。
/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
/UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs
/UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs

16 次代码提交

共有 11 个文件被更改,包括 212 次插入43 次删除
  1. 1
      UnitySDK/UnitySDK.sln.DotSettings
  2. 7
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  3. 18
      UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs
  4. 1
      UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs
  5. 84
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  6. 3
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider.meta
  7. 3
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs.meta
  8. 3
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs.meta
  9. 31
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs
  10. 104
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs

1
UnitySDK/UnitySDK.sln.DotSettings


<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=BLAS/@EntryIndexedValue">BLAS</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=CPU/@EntryIndexedValue">CPU</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=GPU/@EntryIndexedValue">GPU</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=ID/@EntryIndexedValue">ID</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=NN/@EntryIndexedValue">NN</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=PNG/@EntryIndexedValue">PNG</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=RL/@EntryIndexedValue">RL</s:String>

7
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


using UnityEngine;
using NUnit.Framework;
using System.Reflection;
using MLAgents.RewardProvider;
using MLAgents.Sensor;
namespace MLAgents.Tests

var j = 0;
var rewardProvider1 = agent1.rewardProvider as RewardProviderComponent;
var rewardProvider2 = agent2.rewardProvider as RewardProviderComponent;
Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - rewardProvider1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f - rewardProvider2.GetCumulativeReward()), 0.05f);
academyStepMethod?.Invoke(aca, new object[] { });

18
UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs


InstantiateCanvas();
s_IsInstantiated = true;
}
if (s_Canvas == null)
{
Debug.LogWarning("Canvas was null and as assumed to be non-null.");
return;
}
if (target == null)
{

InstantiateCanvas();
s_IsInstantiated = true;
}
if (s_Canvas == null)
{
Debug.LogWarning("Canvas was null and as assumed to be non-null.");
return;
}
if (target == null)
{

{
InstantiateCanvas();
s_IsInstantiated = true;
}
if (s_Canvas == null)
{
Debug.LogWarning("Canvas was null and as assumed to be non-null.");
return;
}
if (target == null)

1
UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs


using Barracuda;
using System;
using System.Collections.Generic;
using MLAgents.RewardProvider;
using UnityEngine;
namespace MLAgents

84
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


using System.Collections.Generic;
using UnityEngine;
using Barracuda;
using MLAgents.RewardProvider;
using MLAgents.Sensor;
using UnityEngine.Serialization;

/// Current Agent action (message sent from Brain).
AgentAction m_Action;
/// Represents the reward the agent accumulated during the current step.
/// It is reset to 0 at the beginning of every step.
/// Should be set to a positive value when the agent performs a "good"
/// action that we wish to reinforce/reward, and set to a negative value
/// when the agent performs a "bad" action that we wish to punish/deter.
/// Additionally, the magnitude of the reward should not exceed 1.0
float m_Reward;
/// Keeps track of the cumulative reward in this episode.
float m_CumulativeReward;
/// Whether or not the agent requests an action.
bool m_RequestAction;

WriteAdapter m_WriteAdapter = new WriteAdapter();
/// <summary>
/// Represents the reward the agent accumulated during the current step.
/// It is reset at the beginning of every step.
/// The reward should be set to a positive value when the agent performs a "good"
/// action that we wish to reinforce/reward, and set to a negative value
/// when the agent performs a "bad" action that we wish to punish/deter.
/// Additionally, the magnitude of the reward should not exceed 1.0
/// </summary>
public IRewardProvider rewardProvider;
RewardProviderComponent RewardProviderComponent
{
get { return rewardProvider as RewardProviderComponent; }
}
/// MonoBehaviour function that is called when the attached GameObject
/// becomes enabled or active.
void OnEnable()

/// facilitate testing.
void OnEnableHelper(Academy academy)
{
InitializeRewardProvider();
m_Info = new AgentInfo();
m_Action = new AgentAction();
sensors = new List<ISensor>();

{
return m_StepCount;
}
/// <summary>
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward()
{
m_Reward = 0f;
if (m_Done)
void CheckRewardProviderExistence(string callee) {
if (RewardProviderComponent == null)
m_CumulativeReward = 0f;
Debug.LogWarningFormat("the RewardProviderComponent is null and " +
"method '{0}' was called. If your agent doesn't have the RewardProviderComponent," +
"remove the call to '{0}'.", callee);
}
}

/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)
{
m_CumulativeReward += (reward - m_Reward);
m_Reward = reward;
CheckRewardProviderExistence("SetReward");
RewardProviderComponent.SetReward(reward);
}
/// <summary>

public void AddReward(float increment)
{
m_Reward += increment;
m_CumulativeReward += increment;
CheckRewardProviderExistence("AddReward");
RewardProviderComponent.AddReward(increment);
}
/// <summary>

public float GetReward()
public float GetIncrementalReward()
return m_Reward;
}
/// <summary>
/// Retrieves the episode reward for the Agent.
/// </summary>
/// <returns>The episode reward.</returns>
public float GetCumulativeReward()
{
return m_CumulativeReward;
return rewardProvider.GetIncrementalReward();
}
/// <summary>

{
}
/// <summary>
/// When the Agent uses Heuristics, it will call this method every time it
/// needs an action. This can be used for debugging or controlling the agent

m_VectorSensorBuffer = new float[numFloatObservations];
}
void InitializeRewardProvider()
{
// Look for a legacy reward provider.
var rewardProviderComponent = GetComponent<RewardProviderComponent>();
if (rewardProviderComponent == null)
{
rewardProvider = gameObject.AddComponent<RewardProviderComponent>();
}
}
/// <summary>
/// Sends the Agent info to the linked Brain.
/// </summary>

m_Info.observations.Clear();
m_ActionMasker.ResetMask();
UpdateSensors();
rewardProvider.RewardStep();
using (TimerStack.Instance.Scoped("CollectObservations"))
{
CollectObservations();

// var param = m_PolicyFactory.brainParameters; // look, no brain params!
m_Info.reward = m_Reward;
m_Info.reward = rewardProvider.GetIncrementalReward();
if (m_Recorder != null && m_Recorder.record && Application.isEditor)
{
// This is a bit of a hack - if we're in inference mode, observations won't be generated

if (m_RequestDecision)
{
SendInfoToBrain();
ResetReward();
rewardProvider.ResetReward(m_Done);
m_HasAlreadyReset = false;
}
}

if (m_Terminate)
{
m_Terminate = false;
ResetReward();
rewardProvider.ResetReward(m_Done);
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;

3
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider.meta


fileFormatVersion: 2
guid: 332fe3ab963e4b33bc528e8f5b2c82a7
timeCreated: 1575329166

3
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs.meta


fileFormatVersion: 2
guid: bed12564f4c74e3e964fdb763ce73213
timeCreated: 1575329472

3
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs.meta


fileFormatVersion: 2
guid: f726f84c46eb44bb9630a107f7ce7b96
timeCreated: 1578522299

31
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs


namespace MLAgents.RewardProvider
{
/// <summary>
/// Reward providers allow users to provide rewards for Agent behavior during training in order to
/// give hints on what types of actions are "better" than others based on an Agent's previous observation.
/// </summary>
public interface IRewardProvider
{
/// <summary>
/// Get an incremental reward to pass along to a trainer.
/// </summary>
/// <returns></returns>
float GetIncrementalReward();
/// <summary>
/// This function is called on every step of the simulation and should be
/// used as a place to store an <see cref="Agent"/>'s incremental reward
/// before the reward is sent off to the brain from the
/// <see cref="GetIncrementalReward"/> method.
/// </summary>
void RewardStep();
/// <summary>
/// Notifies the RewardProvider that the current reward should be reset. If done is false,
/// the incremental reward should only be reset, otherwise both the incremental and cumulative
/// reward should be reset.
/// <param name="done">Flag indicating whether the Agent episode is done or not.</param>
/// </summary>
void ResetReward(bool done=false);
}
}

104
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/RewardProviderComponent.cs


using UnityEngine;
#if UNITY_EDITOR
using UnityEditor;
#endif
namespace MLAgents.RewardProvider
{
/// <summary>
/// The base class for all reward provider components.
/// </summary>
public class RewardProviderComponent : MonoBehaviour, IRewardProvider
{
#if UNITY_EDITOR
[Range(1, 100)]
[Tooltip("The sample rate of the reward to display in the UI. 5 means it samples every 5 frames.")]
public int RewardSampleRate = 20;
#endif
/// <summary>
/// The reward that is accumulated between Agent steps.
/// </summary>
float m_IncrementalReward;
/// <summary>
/// The Reward that is accumulated between Agent episodes.
/// </summary>
float m_CumulativeReward;
/// <summary>
/// Resets the step reward and possibly the episode reward for the agent.
/// </summary>
public void ResetReward(bool done = false)
{
m_IncrementalReward = 0f;
if (done)
{
m_CumulativeReward = 0f;
}
#if UNITY_EDITOR
InternalResetReward();
#endif
}
/// <summary>
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.
/// </summary>
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)
{
m_CumulativeReward += reward - m_IncrementalReward;
m_IncrementalReward = reward;
}
/// <summary>
/// Increments the step and episode rewards by the provided value.
/// </summary>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)
{
m_IncrementalReward += increment;
m_CumulativeReward += increment;
}
public float GetIncrementalReward()
{
return m_IncrementalReward;
}
public float GetCumulativeReward()
{
return m_CumulativeReward;
}
public virtual void RewardStep()
{
}
#if UNITY_EDITOR
public AnimationCurve rewardCurve = new AnimationCurve();
#endif
#if UNITY_EDITOR
void InternalResetReward()
{
if (Time.frameCount % RewardSampleRate != 0)
return;
var keyframe = new Keyframe
{
time = Time.realtimeSinceStartup,
value = m_CumulativeReward,
inTangent = 0.0f,
outTangent = 0.0f
};
var index = rewardCurve.AddKey(keyframe);
AnimationUtility.SetKeyLeftTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
AnimationUtility.SetKeyRightTangentMode(rewardCurve, index, AnimationUtility.TangentMode.Linear);
}
#endif
}
}
正在加载...
取消
保存