浏览代码

Make the Agent reset immediately after Done (#3291)

* Made the Agent reset immediately

* fixing the C# tests

* Fixing the tests still

* Trying with incremental episode ids

* deleting buffer rather than using an empty list

* Addressing the comments

* Forgot to edit the comment on AgentInfo

* Updating the migrating doc

* Fixed an obvious bug

* cleaning after an agent is done in agent processor

* Fixing the pytest errors
/asymm-envs
GitHub 5 年前
当前提交
590559e7
共有 12 个文件被更改,包括 97 次插入117 次删除
  1. 2
      UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
  2. 47
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  3. 10
      UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
  4. 99
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  5. 2
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
  6. 2
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
  7. 8
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
  8. 2
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs
  9. 1
      docs/Migrating.md
  10. 19
      ml-agents/mlagents/trainers/agent_processor.py
  11. 11
      UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs
  12. 11
      UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs.meta

2
UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs


reward = 1f,
actionMasks = new[] { false, true },
done = true,
id = 5,
episodeId = 5,
maxStepReached = true,
storedVectorActions = new[] { 0f, 1f },
};

47
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


}
}
public bool IsDone()
{
return (bool)typeof(Agent).GetField("m_Done", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
}
public int initializeAgentCalls;
public int collectObservationsCalls;
public int agentActionCalls;

agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();
Assert.AreEqual(false, agent1.IsDone());
Assert.AreEqual(false, agent2.IsDone());
Assert.AreEqual(0, agent1.agentResetCalls);
Assert.AreEqual(0, agent2.agentResetCalls);
Assert.AreEqual(0, agent1.initializeAgentCalls);

agentEnableMethod?.Invoke(agent2, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] { });
Assert.AreEqual(false, agent1.IsDone());
Assert.AreEqual(false, agent2.IsDone());
// agent1 was not enabled when the academy started
// The agents have been initialized
Assert.AreEqual(0, agent1.agentResetCalls);

if (i % 11 == 5)
{
agent1.Done();
numberAgent1Reset += 1;
if (!(agent2.IsDone()))
{
// If the agent was already reset before the request decision
// We should not reset again
agent2.Done();
numberAgent2Reset += 1;
agent2StepSinceReset = 0;
}
agent2.Done();
numberAgent2Reset += 1;
agent2StepSinceReset = 0;
}
// Request a decision for agent 2 regularly
if (i % 3 == 2)

{
// Request an action without decision regularly
agent2.RequestAction();
}
if (agent1.IsDone())
{
numberAgent1Reset += 1;
//Agent 1 is only initialized at step 2
if (i < 2)
{ }
aca.EnvironmentStep();
}
}

var j = 0;
for (var i = 0; i < 500; i++)
{
if (i % 20 == 0)
{
j = 0;
}
else
{
j++;
}
Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(j * 10.1f - agent1.GetCumulativeReward()), 0.05f);
agent1.AddReward(10f);
agent1.AddReward(10f);
if ((i % 21 == 0) && (i > 0))
{
j = 0;
}
j++;
}
}
}

10
UnitySDK/Assets/ML-Agents/Scripts/Academy.cs


// in addition to aligning on the step count of the global episode.
public event System.Action<int> AgentSetStatus;
// Signals to all the agents at each environment step so they can reset
// if their flag has been set to done (assuming the agent has requested a
// decision).
public event System.Action AgentResetIfDone;
// Signals to all the agents at each environment step so they can send
// their state to their Policy if they have requested a decision.
public event System.Action AgentSendState;

DecideAction = () => { };
DestroyAction = () => { };
AgentSetStatus = i => { };
AgentResetIfDone = () => { };
AgentSendState = () => { };
AgentAct = () => { };
AgentForceReset = () => { };

AgentSetStatus?.Invoke(m_StepCount);
using (TimerStack.Instance.Scoped("AgentResetIfDone"))
{
AgentResetIfDone?.Invoke();
}
using (TimerStack.Instance.Scoped("AgentSendState"))
{

99
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


public bool maxStepReached;
/// <summary>
/// Unique identifier each agent receives at initialization. It is used
/// Episode identifier each agent receives at every reset. It is used
public int id;
public int episodeId;
}
/// <summary>

/// Whether or not the agent requests a decision.
bool m_RequestDecision;
/// Whether or not the agent has completed the episode. This may be due
/// to either reaching a success or fail state, or reaching the maximum
/// number of steps (i.e. timing out).
bool m_Done;
/// Whether or not the agent reached the maximum number of steps.
bool m_MaxStepReached;
/// Keeps track of the number of steps taken by the agent in this episode.
/// Note that this value is different for each agent, and may not overlap

/// Unique identifier each agent receives at initialization. It is used
/// Episode identifier each agent receives. It is used
int m_Id;
/// This Id will be changed every time the Agent resets.
int m_EpisodeId;
/// Keeps track of the actions that are masked at each step.
ActionMasker m_ActionMasker;

/// becomes enabled or active.
void OnEnable()
{
m_Id = gameObject.GetInstanceID();
m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
OnEnableHelper();
m_Recorder = GetComponent<DemonstrationRecorder>();

m_Action = new AgentAction();
sensors = new List<ISensor>();
Academy.Instance.AgentResetIfDone += ResetIfDone;
Academy.Instance.AgentSendState += SendInfo;
Academy.Instance.DecideAction += DecideAction;
Academy.Instance.AgentAct += AgentStep;

// We don't want to even try, because this will lazily create a new Academy!
if (Academy.IsInitialized)
{
Academy.Instance.AgentResetIfDone -= ResetIfDone;
Academy.Instance.AgentSendState -= SendInfo;
Academy.Instance.DecideAction -= DecideAction;
Academy.Instance.AgentAct -= AgentStep;

m_Brain?.Dispose();
}
void NotifyAgentDone()
void NotifyAgentDone(bool maxStepReached = false)
m_Info.reward = m_Reward;
m_Info.maxStepReached = maxStepReached;
// We request a decision so Python knows the Agent is disabled
// We request a decision so Python knows the Agent is done immediately
// The Agent is done, so we give it a new episode Id
m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
m_Reward = 0f;
m_CumulativeReward = 0f;
m_RequestAction = false;
m_RequestDecision = false;
}
/// <summary>

/// </summary>
public void Done()
{
m_Done = true;
NotifyAgentDone();
_AgentReset();
}
/// <summary>

m_RequestAction = true;
}
/// <summary>
/// Indicates if the agent has reached his maximum number of steps.
/// </summary>
/// <returns>
/// <c>true</c>, if max step reached was reached, <c>false</c> otherwise.
/// </returns>
public bool IsMaxStepReached()
{
return m_MaxStepReached;
}
/// <summary>
/// Indicates if the agent is done
/// </summary>
/// <returns>
/// <c>true</c>, if the agent is done, <c>false</c> otherwise.
/// </returns>
public bool IsDone()
{
return m_Done;
}
/// Helper function that resets all the data structures associated with
/// the agent. Typically used when the agent is being initialized or reset
/// at the end of an episode.

m_Info.actionMasks = m_ActionMasker.GetMask();
m_Info.reward = m_Reward;
m_Info.done = m_Done;
m_Info.maxStepReached = m_MaxStepReached;
m_Info.id = m_Id;
m_Info.done = false;
m_Info.maxStepReached = false;
m_Info.episodeId = m_EpisodeId;
m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction);

}
/// Signals the agent that it must reset if its done flag is set to true.
void ResetIfDone()
{
if (m_Done)
{
_AgentReset();
}
}
/// <summary>
/// Signals the agent that it must sent its decision to the brain.
/// </summary>

if (m_RequestDecision || m_Done)
if (m_RequestDecision)
if (m_Done)
{
m_CumulativeReward = 0f;
}
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;
}
}

{
if ((m_StepCount >= maxStep - 1) && (maxStep > 0))
{
NotifyAgentDone(true);
_AgentReset();
}
else
{
m_StepCount += 1;
}
if ((m_StepCount >= maxStep) && (maxStep > 0))
{
m_MaxStepReached = true;
Done();
}
m_StepCount += 1;
}
void DecideAction()

2
UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs


Reward = ai.reward,
MaxStepReached = ai.maxStepReached,
Done = ai.done,
Id = ai.id,
Id = ai.episodeId,
};
if (ai.actionMasks != null)

2
UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs


{
m_ActionCallbacks[brainKey] = new List<IdCallbackPair>();
}
m_ActionCallbacks[brainKey].Add(new IdCallbackPair { AgentId = info.id, Callback = action });
m_ActionCallbacks[brainKey].Add(new IdCallbackPair { AgentId = info.episodeId, Callback = action });
}
/// <summary>

8
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs


if (info.done)
{
m_Memories.Remove(info.id);
m_Memories.Remove(info.episodeId);
if (!m_Memories.TryGetValue(info.id, out memory))
if (!m_Memories.TryGetValue(info.episodeId, out memory))
{
for (var j = 0; j < memorySize; j++)
{

List<float> memory;
if (info.done)
{
m_Memories.Remove(info.id);
m_Memories.Remove(info.episodeId);
if (!m_Memories.TryGetValue(info.id, out memory))
if (!m_Memories.TryGetValue(info.episodeId, out memory))
{
for (var j = 0; j < memorySize; j++)

2
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs


sensors = sensors
});
m_ActionFuncs.Add(new AgentIdActionPair { action = action, agentId = info.id });
m_ActionFuncs.Add(new AgentIdActionPair { action = action, agentId = info.episodeId });
}
public void DecideBatch()
{

1
docs/Migrating.md


* The `AgentAction` struct no longer contains a `value` field. (Value estimates were not set during inference)
* The `GetValueEstimate()` method on the Agent has been removed.
* The `UpdateValueAction()` method on the Agent has been removed.
* Calling `Done()` on the Agent will now reset it immediately and call the `AgentReset` virtual method. (This is to simplify the previous logic in which the Agent had to wait for the next `EnvironmentStep` to reset)
### Steps to Migrate
* If you were not using `On Demand Decision` for your Agent, you **must** add a `DecisionRequester` component to your Agent GameObject and set its `Decision Period` field to the old `Decision Period` of the Agent.

19
ml-agents/mlagents/trainers/agent_processor.py


"Policy/Learning Rate", take_action_outputs["learning_rate"]
)
terminated_agents: List[str] = []
# Make unique agent_ids that are global across workers
action_global_agent_ids = [
get_global_agent_id(worker_id, ag_id) for ag_id in previous_action.agent_ids

"Environment/Episode Length",
self.episode_steps.get(global_id, 0),
)
del self.episode_steps[global_id]
del self.episode_rewards[global_id]
terminated_agents += [global_id]
elif not curr_agent_step.done:
self.episode_steps[global_id] += 1

self.policy.save_previous_action(
previous_action.agent_ids, take_action_outputs["action"]
)
for terminated_id in terminated_agents:
self._clean_agent_data(terminated_id)
def _clean_agent_data(self, global_id: str) -> None:
"""
Removes the data for an Agent.
"""
del self.experience_buffers[global_id]
del self.last_take_action_outputs[global_id]
del self.episode_steps[global_id]
del self.episode_rewards[global_id]
del self.last_step_result[global_id]
self.policy.remove_previous_action([global_id])
self.policy.remove_memories([global_id])
def publish_trajectory_queue(
self, trajectory_queue: "AgentManagerQueue[Trajectory]"

11
UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs


namespace MLAgents
{
public static class EpisodeIdCounter
{
private static int Counter;
public static int GetEpisodeId()
{
return Counter++;
}
}
}

11
UnitySDK/Assets/ML-Agents/Scripts/EpisodeIdCounter.cs.meta


fileFormatVersion: 2
guid: 847786b7bcf9d4817b3f3879d57517c7
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存