浏览代码

Develop modify stepping logic (#3448)

* Moving the max step logic
 - Created a new Academy Event called AgentIncrementStep to be called before SetStatus
 - Implemented the AgentSteping logic

* second commit : Moving the step counting at the begining. I had to edit the tests but I think they are now closer to what we want

* addressing comments

* Update com.unity.ml-agents/Runtime/Agent.cs

Co-Authored-By: Chris Goy <goyenator@gmail.com>

* Update com.unity.ml-agents/Runtime/Agent.cs

Co-Authored-By: Chris Goy <goyenator@gmail.com>

* Made the tests not be broken

* Update com.unity.ml-agents/Runtime/Agent.cs

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>

* step logic changes: unit test (#3467)

* Added a line in the changelog

Co-authored-by: Chris Goy <christopherg@unity3d.com>
Co-authored-by: Chris Elion <celion@gmail.com>
/asymm-envs
GitHub 5 年前
当前提交
764d8948
共有 4 个文件被更改,包括 59 次插入33 次删除
  1. 1
      com.unity.ml-agents/CHANGELOG.md
  2. 10
      com.unity.ml-agents/Runtime/Academy.cs
  3. 23
      com.unity.ml-agents/Runtime/Agent.cs
  4. 58
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs

1
com.unity.ml-agents/CHANGELOG.md


- Several classes were changed from public to internal visibility. (#3390)
- Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391)
- A tutorial on adding custom SideChannels was added (#3391)
- The stepping logic for the Agent and the Academy has been simplified (#3448)
- Update Barracuda to 0.6.0-preview
### Bugfixes

10
com.unity.ml-agents/Runtime/Academy.cs


// Signals to all the listeners that the academy is being destroyed
internal event Action DestroyAction;
// Signals the Agent that a new step is about to start.
// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;
// Signals to all the agents at each environment step along with the
// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done

AgentSetStatus?.Invoke(m_StepCount);
m_StepCount += 1;
m_TotalStepCount += 1;
AgentIncrementStep?.Invoke();
using (TimerStack.Instance.Scoped("AgentSendState"))
{

{
AgentAct?.Invoke();
}
m_StepCount += 1;
m_TotalStepCount += 1;
}
/// <summary>

23
com.unity.ml-agents/Runtime/Agent.cs


m_Action = new AgentAction();
sensors = new List<ISensor>();
Academy.Instance.AgentIncrementStep += AgentIncrementStep;
Academy.Instance.AgentSendState += SendInfo;
Academy.Instance.DecideAction += DecideAction;
Academy.Instance.AgentAct += AgentStep;

// We don't want to even try, because this will lazily create a new Academy!
if (Academy.IsInitialized)
{
Academy.Instance.AgentIncrementStep -= AgentIncrementStep;
Academy.Instance.AgentSendState -= SendInfo;
Academy.Instance.DecideAction -= DecideAction;
Academy.Instance.AgentAct -= AgentStep;

}
}
void AgentIncrementStep()
{
m_StepCount += 1;
}
if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;
AgentAction(m_Action.vectorActions);
}
}
else
{
m_StepCount += 1;
}
if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;
AgentAction(m_Action.vectorActions);
}
}

58
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


public override void AgentReset()
{
agentResetCalls += 1;
collectObservationsCallsSinceLastReset = 0;
agentActionCallsSinceLastReset = 0;

agent1.LazyInitialize();
agent2.SetPolicy(new TestPolicy());
var j = 0;
for (var i = 0; i < 500; i++)
var expectedAgent1ActionSinceReset = 0;
for (var i = 0; i < 50; i++)
if (i % 21 == 0)
{
j = 0;
}
else
{
j++;
expectedAgent1ActionSinceReset += 1;
if (expectedAgent1ActionSinceReset == agent1.maxStep || i == 0){
expectedAgent1ActionSinceReset = 0;
Assert.LessOrEqual(Mathf.Abs(j * 10.1f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(expectedAgent1ActionSinceReset * 10.1f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
agent1.AddReward(10f);

decisionRequester.DecisionPeriod = 1;
decisionRequester.Awake();
var maxStep = 6;
const int maxStep = 6;
var expectedAgentStepCount = 0;
var expectedResets= 0;
var expectedAgentAction = 0;
var expectedAgentActionSinceReset = 0;
var expectedCollectObsCalls = 0;
var expectedCollectObsCallsSinceReset = 0;
// We expect resets to occur when there are maxSteps actions since the last reset (and on the first step)
var expectReset = agent1.agentActionCallsSinceLastReset == maxStep || (i == 0);
var previousNumResets = agent1.agentResetCalls;
aca.EnvironmentStep();
// Agent should observe and act on each Academy step
expectedAgentAction += 1;
expectedAgentActionSinceReset += 1;
expectedCollectObsCalls += 1;
expectedCollectObsCallsSinceReset += 1;
expectedAgentStepCount += 1;
if (expectReset)
// If the next step will put the agent at maxSteps, we expect it to reset
if (agent1.GetStepCount() == maxStep - 1 || (i == 0))
Assert.AreEqual(previousNumResets + 1, agent1.agentResetCalls);
expectedResets +=1;
else
if (agent1.GetStepCount() == maxStep - 1)
Assert.AreEqual(previousNumResets, agent1.agentResetCalls);
expectedAgentActionSinceReset = 0;
expectedCollectObsCallsSinceReset = 0;
expectedAgentStepCount = 0;
aca.EnvironmentStep();
Assert.AreEqual(expectedAgentStepCount, agent1.GetStepCount());
Assert.AreEqual(expectedResets, agent1.agentResetCalls);
Assert.AreEqual(expectedAgentAction, agent1.agentActionCalls);
Assert.AreEqual(expectedAgentActionSinceReset, agent1.agentActionCallsSinceLastReset);
Assert.AreEqual(expectedCollectObsCalls, agent1.collectObservationsCalls);
Assert.AreEqual(expectedCollectObsCallsSinceReset, agent1.collectObservationsCallsSinceLastReset);
}
}
}
正在加载...
取消
保存