浏览代码

add DoneReason enum to Agent (#3517)

/asymm-envs
GitHub 5 年前
当前提交
8ce9dcfd
共有 3 个文件被更改,包括 39 次插入35 次删除
  1. 2
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  2. 38
      com.unity.ml-agents/Runtime/Agent.cs
  3. 34
      com.unity.ml-agents/Runtime/Timer.cs

2
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


{
if (m_NumSteps > m_MaxEpisodes * m_Agent.maxStep)
{
// Stop recording so that we don't write partial rewards to the timer info.
TimerStack.Instance.Recording = false;
Application.Quit(0);
}
}

38
com.unity.ml-agents/Runtime/Agent.cs


InitializeSensors();
}
/// <summary>
/// Reason that the Agent is being considered "done"
/// </summary>
enum DoneReason
{
/// <summary>
/// The <see cref="Done"/> method was called.
/// </summary>
DoneCalled,
/// <summary>
/// The max steps for the Agent were reached.
/// </summary>
MaxStepReached,
/// <summary>
/// The Agent was disabled
/// </summary>
Disabled,
}
void OnDisable()
{
DemonstrationWriters.Clear();

Academy.Instance.AgentAct -= AgentStep;
Academy.Instance.AgentForceReset -= _AgentReset;
}
NotifyAgentDone();
NotifyAgentDone(DoneReason.Disabled);
void NotifyAgentDone(bool maxStepReached = false)
void NotifyAgentDone(DoneReason doneReason)
m_Info.maxStepReached = maxStepReached;
m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
// Request the last decision with no callbacks
// We request a decision so Python knows the Agent is done immediately
m_Brain?.RequestDecision(m_Info, sensors);

demoWriter.Record(m_Info, sensors);
}
UpdateRewardStats();
if (doneReason != DoneReason.Disabled)
{
// We don't want to udpate the reward stats when the Agent is disabled, because this will make
// the rewards look lower than they actually are during shutdown.
UpdateRewardStats();
}
// The Agent is done, so we give it a new episode Id
m_EpisodeId = EpisodeIdCounter.GetEpisodeId();

/// </summary>
public void Done()
{
NotifyAgentDone();
NotifyAgentDone(DoneReason.DoneCalled);
_AgentReset();
}

if ((m_StepCount >= maxStep) && (maxStep > 0))
{
NotifyAgentDone(true);
NotifyAgentDone(DoneReason.MaxStepReached);
_AgentReset();
}
}

34
com.unity.ml-agents/Runtime/Timer.cs


/// <summary>
/// Stop timing a block of code, and increment internal counts.
/// </summary>
public void End(bool isRecording)
public void End()
if (isRecording)
{
var elapsed = DateTime.Now.Ticks - m_TickStart;
m_TotalTicks += elapsed;
m_TickStart = 0;
m_NumCalls++;
}
// Note that samplers are always updated regardless of recording state, to ensure matching start and ends.
var elapsed = DateTime.Now.Ticks - m_TickStart;
m_TotalTicks += elapsed;
m_TickStart = 0;
m_NumCalls++;
m_Sampler?.End();
}

/// This implements the Singleton pattern (solution 4) as described in
/// https://csharpindepth.com/articles/singleton
/// </remarks>
public class TimerStack : IDisposable
internal class TimerStack : IDisposable
// Whether or not new timers and gauges can be added.
bool m_Recording = true;
// Explicit static constructor to tell C# compiler
// not to mark type as beforefieldinit

}
/// <summary>
/// Whether or not new timers and gauges can be added.
/// </summary>
public bool Recording
{
get { return m_Recording; }
set { m_Recording = value; }
}
/// <summary>
/// Updates the referenced gauge in the root node with the provided value.
/// </summary>
/// <param name="name">The name of the Gauge to modify.</param>

if (!Recording)
{
return;
}
if (!float.IsNaN(value))
{
GaugeNode gauge;

void Pop()
{
var node = m_Stack.Pop();
node.End(Recording);
node.End();
}
/// <summary>

正在加载...
取消
保存