浏览代码

Fix off-by-one error on AgentReset and maxSteps (#3394)

* Fix ballance ball 100 reward

* Re-test

* Add test for maxSteps and number of AgentActions

Co-authored-by: Chris Elion <celion@gmail.com>
/release-0.14.0
GitHub 5 年前
当前提交
51f7690d
共有 2 个文件被更改,包括 44 次插入2 次删除
  1. 3
      com.unity.ml-agents/Runtime/Agent.cs
  2. 43
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs

3
com.unity.ml-agents/Runtime/Agent.cs


/// Used by the brain to make the agent perform a step.
void AgentStep()
{
if ((m_StepCount >= maxStep - 1) && (maxStep > 0))
if ((m_StepCount >= maxStep) && (maxStep > 0))
{
NotifyAgentDone(true);
_AgentReset();

m_StepCount += 1;
}
if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;

43
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


public int initializeAgentCalls;
public int collectObservationsCalls;
public int collectObservationsCallsSinceLastReset;
public int agentActionCallsSinceLastReset;
public int agentResetCalls;
public override void InitializeAgent()
{

public override void CollectObservations()
{
collectObservationsCalls += 1;
collectObservationsCallsSinceLastReset += 1;
AddVectorObs(0f);
}

agentActionCallsSinceLastReset += 1;
AddReward(0.1f);
}

collectObservationsCallsSinceLastReset = 0;
agentActionCallsSinceLastReset = 0;
}
public override float[] Heuristic()

var j = 0;
for (var i = 0; i < 500; i++)
{
if (i % 20 == 0)
if (i % 21 == 0)
{
j = 0;
}

agent1.AddReward(10f);
aca.EnvironmentStep();
}
}
[Test]
public void TestMaxStepsReset()
{
var agentGo1 = new GameObject("TestAgent");
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;
var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
decisionRequester.DecisionPeriod = 1;
decisionRequester.Awake();
var maxStep = 6;
agent1.maxStep = maxStep;
agent1.LazyInitialize();
for (var i = 0; i < 15; i++)
{
// We expect resets to occur when there are maxSteps actions since the last reset (and on the first step)
var expectReset = agent1.agentActionCallsSinceLastReset == maxStep || (i == 0);
var previousNumResets = agent1.agentResetCalls;
aca.EnvironmentStep();
if (expectReset)
{
Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls);
}
else
{
Assert.AreEqual(previousNumResets, agent1.agentResetCalls);
}
}
}
}
正在加载...
取消
保存