浏览代码

Add an update for the Reward Provider in Agent. Rename some variable. Update docs.

/main/reward-providers
Christopher Goy 5 年前
当前提交
fa2614e6
共有 5 个文件被更改,包括 43 次插入35 次删除
  1. 61
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  2. 2
      UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs
  3. 1
      UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs
  4. 6
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs
  5. 8
      UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs

61
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


WriteAdapter m_WriteAdapter = new WriteAdapter();
RewardProviderComponent m_RewardProviderComponent;
/// <summary>
/// Represents the reward the agent accumulated during the current step.
/// It is reset at the beginning of every step.

/// Additionally, the magnitude of the reward should not exceed 1.0
/// </summary>
public IRewardProvider rewardProvider
{
get
{
Debug.Assert(m_RewardProviderComponent != null,
nameof(m_RewardProviderComponent) + " != null");
return m_RewardProviderComponent.GetRewardProvider();
}
}
public IRewardProvider rewardProvider;
CumulativeRewardProvider DefaultRewardProvider
CumulativeRewardProvider CumulativeRewardProvider
{
get { return rewardProvider as CumulativeRewardProvider; }
}

{
return m_StepCount;
}
void WarnDefaultRewardProvider(string callee) {
if (CumulativeRewardProvider == null)
{
Debug.LogWarningFormat("the CumulativeRewardProvider is null and " +
"method '{0}' was called. If your agent doesn't have the CumulativeRewardProvider," +
"remove the call to '{0}'.", callee);
}
}
/// <summary>
/// Resets the step reward and possibly the episode reward for the agent.

Debug.Assert(DefaultRewardProvider != null, "the DefaultRewardProvider is null and " +
"method 'ResetReward' was called. If your agent doesn't have the CumulativeRewardProvider," +
"remove the call from ResetReward.");
DefaultRewardProvider.ResetReward(m_Done);
WarnDefaultRewardProvider("ResetReward");
InternalResetReward();
}
void InternalResetReward()
{
CumulativeRewardProvider?.ResetReward(m_Done);
}
/// <summary>

/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)
{
Debug.Assert(DefaultRewardProvider != null, "the DefaultRewardProvider is null and " +
"method 'SetReward' was called. If your agent doesn't have the CumulativeRewardProvider," +
"remove the call from 'SetReward'.");
DefaultRewardProvider.SetReward(reward);
WarnDefaultRewardProvider("SetReward");
CumulativeRewardProvider?.SetReward(reward);
}
/// <summary>

public void AddReward(float increment)
{
Debug.Assert(DefaultRewardProvider != null, "the DefaultRewardProvider is null and " +
"method 'AddReward' was called. If your agent doesn't have the CumulativeRewardProvider," +
"remove the call from 'AddReward'.");
DefaultRewardProvider.AddReward(increment);
WarnDefaultRewardProvider("AddReward");
CumulativeRewardProvider?.AddReward(increment);
}
/// <summary>

public float GetIncrementalReward()
{
Debug.Assert(rewardProvider != null, "m_RewardProviderComponent is null and " +
"method 'GetIncrementalReward' was called.");
return rewardProvider.GetIncrementalReward();
}

void InitializeRewardProvider()
{
// Look for a legacy reward provider.
m_RewardProviderComponent = GetComponent<RewardProviderComponent>();
if (m_RewardProviderComponent == null)
var rewardProviderComponent = GetComponent<RewardProviderComponent>();
if (rewardProviderComponent == null)
m_RewardProviderComponent = gameObject.AddComponent<CumulativeRewardProviderComponent>();
rewardProviderComponent = gameObject.AddComponent<CumulativeRewardProviderComponent>();
rewardProvider = rewardProviderComponent.GetRewardProvider();
}
}

m_Info.observations.Clear();
m_ActionMasker.ResetMask();
UpdateSensors();
rewardProvider.RewardStep();
using (TimerStack.Instance.Scoped("CollectObservations"))
{
CollectObservations();

m_Info.id = m_Id;
m_Brain.RequestDecision(this);
if (m_Recorder != null && m_Recorder.record && Application.isEditor)
{
// This is a bit of a hack - if we're in inference mode, observations won't be generated

if (m_RequestDecision)
{
SendInfoToBrain();
ResetReward();
InternalResetReward();
m_HasAlreadyReset = false;
}
}

if (m_Terminate)
{
m_Terminate = false;
ResetReward();
InternalResetReward();
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;

2
UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs


case BehaviorType.Default:
if (FindObjectOfType<Academy>().IsCommunicatorOn)
{
return new RemotePolicy(m_BrainParameters, m_BehaviorName);
return new RemotePolicy(m_BrainParameters, behaviorName);
}
if (m_Model != null)
{

1
UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs


using UnityEngine;
using System.Collections.Generic;
using MLAgents.RewardProvider;
namespace MLAgents
{

6
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/CumulativeRewardProvider.cs


}
}
public void RewardStep()
{
// Do Nothing. There is a special case for this reward provider in agent which resets
// the reward.
}
/// <summary>
/// Overrides the current step reward of the agent and updates the episode
/// reward accordingly.

8
UnitySDK/Assets/ML-Agents/Scripts/RewardProvider/IRewardProvider.cs


/// </summary>
/// <returns></returns>
float GetIncrementalReward();
/// <summary>
/// This function is called on every step of the simulation and should be
/// used as a place to store an <see cref="Agent"/>'s incremental reward
/// before the reward is sent off to the brain from the
/// <see cref="GetIncrementalReward"/> method.
/// </summary>
void RewardStep();
}
}
正在加载...
取消
保存