浏览代码

Always reset when agent is done (#3222)

* Removing the AgentOnDone call

* removing editor inspector field for ResetOnDone

* Documentation changes

* addressing comments

* addressing comments

* adding comments

* Migrating steps

* inference - fill 0s for done Agents (#3232)

* fill 0s for done agents

* docstrings

* Simplifying the code

* Removing GenerateSensorData

* Update docs/Migrating.md

Co-Authored-By: Chris Elion <chris.elion@unity3d.com>

Co-authored-by: Chris Elion <celion@gmail.com>
/asymm-envs
GitHub 5 年前
当前提交
0366af0b
共有 14 个文件被更改,包括 88 次插入241 次删除
  1. 5
      UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs
  2. 74
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  3. 4
      UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
  4. 3
      UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
  5. 4
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  6. 4
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
  7. 4
      UnitySDK/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
  8. 117
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  9. 49
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
  10. 23
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorProxy.cs
  11. 9
      docs/Learning-Environment-Create-New.md
  12. 19
      docs/Learning-Environment-Design-Agents.md
  13. 12
      docs/Learning-Environment-Design.md
  14. 2
      docs/Migrating.md

5
UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs


new GUIContent(
"Max Step", "The per-agent maximum number of steps."));
EditorGUILayout.PropertyField(
isResetOnDone,
new GUIContent(
"Reset On Done",
"If checked, the agent will reset on done. Else, AgentOnDone() will be called."));
EditorGUILayout.PropertyField(
isOdd,
new GUIContent(
"On Demand Decisions",

74
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


public int collectObservationsCalls;
public int agentActionCalls;
public int agentResetCalls;
public int agentOnDoneCalls;
public override void InitializeAgent()
{
initializeAgentCalls += 1;

public override void AgentReset()
{
agentResetCalls += 1;
}
public override void AgentOnDone()
{
agentOnDoneCalls += 1;
}
public override float[] Heuristic()

[TestFixture]
public class EditModeTestMiscellaneous
{
[SetUp]
public void SetUp()
{

}
}
[Test]
public void TestResetOnDone()
{
var agentGo1 = new GameObject("TestAgent");
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var agentGo2 = new GameObject("TestAgent");
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();
var aca = Academy.Instance;
var agentEnableMethod = typeof(Agent).GetMethod(
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
// We use event based so the agent will now try to send anything to the brain
agent1.agentParameters.onDemandDecision = false;
// agent1 will take an action at every step and request a decision every steps
agent1.agentParameters.numberOfActionsBetweenDecisions = 1;
// agent2 will request decisions only when RequestDecision is called
agent2.agentParameters.onDemandDecision = true;
agent1.agentParameters.maxStep = 20;
//Here we specify that the agent does not reset when done
agent1.agentParameters.resetOnDone = false;
agent2.agentParameters.resetOnDone = false;
agentEnableMethod?.Invoke(agent2, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] { });
var agent1ResetOnDone = 0;
var agent2ResetOnDone = 0;
var agent1StepSinceReset = 0;
var agent2StepSinceReset = 0;
for (var i = 0; i < 50; i++)
{
Assert.AreEqual(i, aca.GetTotalStepCount());
Assert.AreEqual(agent1StepSinceReset, agent1.GetStepCount());
Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());
Assert.AreEqual(agent1ResetOnDone, agent1.agentOnDoneCalls);
Assert.AreEqual(agent2ResetOnDone, agent2.agentOnDoneCalls);
// we request a decision at each step
agent2.RequestDecision();
if (agent1ResetOnDone == 0)
agent1StepSinceReset += 1;
if (agent2ResetOnDone == 0)
agent2StepSinceReset += 1;
if ((i > 2) && (i % 21 == 0))
{
agent1ResetOnDone = 1;
}
if (i == 31)
{
agent2ResetOnDone = 1;
agent2.Done();
}
aca.EnvironmentStep();
}
}

4
UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs


return new float[] { 0 };
}
public override void AgentOnDone()
{
}
public void FixedUpdate()
{
WaitTimeInference();

3
UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs


SetResetParameters();
}
public override void AgentOnDone()
{
}
void FixedUpdate()
{

4
UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


}
}
public override void AgentOnDone()
{
}
public void SetLaserLengths()
{
m_LaserLength = Academy.Instance.FloatProperties.GetPropertyWithDefault("laser_length", 1.0f);

4
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs


Done();
}
}
public override void AgentOnDone()
{
}
}

4
UnitySDK/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs


public override void AgentReset()
{
}
public override void AgentOnDone()
{
}
}

117
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


public int maxStep;
/// <summary>
/// Determines the behaviour of the agent when done.
/// </summary>
/// <remarks>
/// If true, the agent will reset when done and start a new episode.
/// Otherwise, the agent will remain done and its behavior will be
/// dictated by the AgentOnDone method.
/// </remarks>
public bool resetOnDone = true;
/// <summary>
/// Whether to enable On Demand Decisions or make a decision at
/// every step.
/// </summary>

/// done has not been communicated (required for On Demand Decisions).
bool m_HasAlreadyReset;
/// Flag to signify that an agent is done and should not reset until
/// the fact that it is done has been communicated.
bool m_Terminate;
/// Unique identifier each agent receives at initialization. It is used
/// to separate between different agents in the environment.
int m_Id;

Academy.Instance.AgentAct -= AgentStep;
Academy.Instance.AgentForceReset -= _AgentReset;
}
NotifyAgentDone();
}
void NotifyAgentDone()
{
m_Info.done = true;
// Request the last decision with no callbacks
// We request a decision so Python knows the Agent is disabled
m_Brain?.RequestDecision(m_Info, sensors, (a) => { });
}
/// <summary>

}
/// <summary>
/// Generate data for each sensor and store it in the observations input.
/// NOTE: At the moment, this is only called during training or when using a DemonstrationRecorder;
/// during inference the Sensors are used to write directly to the Tensor data. This will likely change in the
/// future to be controlled by the type of brain being used.
/// </summary>
/// <param name="sensors"> List of ISensors that will be used to generate the data.</param>
/// <param name="buffer"> A float array that will be used as buffer when generating the observations. Must
/// be at least the same length as the total number of uncompressed floats in the observations</param>
/// <param name="adapter"> The WriteAdapter that will be used to write the ISensor data to the observations</param>
/// <param name="observations"> A list of observations outputs. This argument will be modified by this method.</param>//
public static void GenerateSensorData(List<ISensor> sensors, float[] buffer, WriteAdapter adapter, List<Observation> observations)
{
int floatsWritten = 0;
// Generate data for all Sensors
for (var i = 0; i < sensors.Count; i++)
{
var sensor = sensors[i];
if (sensor.GetCompressionType() == SensorCompressionType.None)
{
// TODO handle in communicator code instead
adapter.SetTarget(buffer, sensor.GetObservationShape(), floatsWritten);
var numFloats = sensor.Write(adapter);
var floatObs = new Observation
{
FloatData = new ArraySegment<float>(buffer, floatsWritten, numFloats),
Shape = sensor.GetObservationShape(),
CompressionType = sensor.GetCompressionType()
};
observations.Add(floatObs);
floatsWritten += numFloats;
}
else
{
var compressedObs = new Observation
{
CompressedData = sensor.GetCompressedObservation(),
Shape = sensor.GetObservationShape(),
CompressionType = sensor.GetCompressionType()
};
observations.Add(compressedObs);
}
}
}
/// <summary>
/// Collects the (vector, visual) observations of the agent.
/// The agent observation describes the current environment from the
/// perspective of the agent.

}
/// <summary>
/// Specifies the agent behavior when done and
/// <see cref="AgentParameters.resetOnDone"/> is false. This method can be
/// used to remove the agent from the scene.
/// </summary>
public virtual void AgentOnDone()
{
}
/// <summary>
/// Specifies the agent behavior when being reset, which can be due to
/// the agent or Academy being done (i.e. completion of local or global
/// episode).

// request for a decision and an action
if (IsDone())
{
if (agentParameters.resetOnDone)
if (agentParameters.onDemandDecision)
if (agentParameters.onDemandDecision)
{
if (!m_HasAlreadyReset)
{
// If event based, the agent can reset as soon
// as it is done
_AgentReset();
m_HasAlreadyReset = true;
}
}
else if (m_RequestDecision)
if (!m_HasAlreadyReset)
// If not event based, the agent must wait to request a
// decision before resetting to keep multiple agents in sync.
// If event based, the agent can reset as soon
// as it is done
m_HasAlreadyReset = true;
else
else if (m_RequestDecision)
m_Terminate = true;
RequestDecision();
// If not event based, the agent must wait to request a
// decision before resetting to keep multiple agents in sync.
_AgentReset();
}
}
}

/// Used by the brain to make the agent perform a step.
void AgentStep()
{
if (m_Terminate)
{
m_Terminate = false;
ResetReward();
m_Done = false;
m_MaxStepReached = false;
m_RequestDecision = false;
m_RequestAction = false;
m_HasAlreadyReset = false;
OnDisable();
AgentOnDone();
}
if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;

49
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs


var agentIndex = 0;
foreach (var info in infos)
{
var tensorOffset = 0;
// Write each sensor consecutively to the tensor
foreach (var sensorIndex in m_SensorIndices)
if (info.agentInfo.done)
var sensor = info.sensors[sensorIndex];
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var numWritten = sensor.Write(m_WriteAdapter);
tensorOffset += numWritten;
// If the agent is done, we might have a stale reference to the sensors
// e.g. a dependent object might have been disposed.
// To avoid this, just fill observation with zeroes instead of calling sensor.Write.
TensorUtils.FillTensorBatch(tensorProxy, agentIndex, 0.0f);
Debug.AssertFormat(
tensorOffset == vecObsSizeT,
"mismatch between vector observation size ({0}) and number of observations written ({1})",
vecObsSizeT, tensorOffset
);
else
{
var tensorOffset = 0;
// Write each sensor consecutively to the tensor
foreach (var sensorIndex in m_SensorIndices)
{
var sensor = info.sensors[sensorIndex];
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var numWritten = sensor.Write(m_WriteAdapter);
tensorOffset += numWritten;
}
Debug.AssertFormat(
tensorOffset == vecObsSizeT,
"mismatch between vector observation size ({0}) and number of observations written ({1})",
vecObsSizeT, tensorOffset
);
}
agentIndex++;
}

foreach (var infoSensorPair in infos)
{
var sensor = infoSensorPair.sensors[m_SensorIndex];
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, 0);
sensor.Write(m_WriteAdapter);
if (infoSensorPair.agentInfo.done)
{
// If the agent is done, we might have a stale reference to the sensors
// e.g. a dependent object might have been disposed.
// To avoid this, just fill observation with zeroes instead of calling sensor.Write.
TensorUtils.FillTensorBatch(tensorProxy, agentIndex, 0.0f);
}
else
{
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, 0);
sensor.Write(m_WriteAdapter);
}
agentIndex++;
}
}

23
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorProxy.cs


}
/// <summary>
/// Fill a specific batch of a TensorProxy with a given value
/// </summary>
/// <param name="tensorProxy"></param>
/// <param name="batch">The batch index to fill.</param>
/// <param name="fillValue"></param>
public static void FillTensorBatch(TensorProxy tensorProxy, int batch, float fillValue)
{
var height = tensorProxy.data.height;
var width = tensorProxy.data.width;
var channels = tensorProxy.data.channels;
for (var h = 0; h < height; h++)
{
for (var w = 0; w < width; w++)
{
for (var c = 0; c < channels; c++)
{
tensorProxy.data[batch, h, w, c] = fillValue;
}
}
}
}
/// <summary>
/// Fill a pre-allocated Tensor with random numbers
/// </summary>
/// <param name="tensorProxy">The pre-allocated Tensor to fill</param>

9
docs/Learning-Environment-Create-New.md


}
```
**Note:** When you mark an Agent as done, it stops its activity until it is
reset. You can have the Agent reset immediately, by setting the
Agent.ResetOnDone property to true in the inspector or you can wait for the
Academy to reset the environment. This RollerBall environment relies on the
`ResetOnDone` mechanism and doesn't set a `Max Steps` limit for the Academy (so
it never resets the environment).
Finally, if the Agent falls off the platform, set the Agent to done so that it can reset itself:
Finally, if the Agent falls off the platform, set the Agent to done so that it can reset itself:
```csharp
// Fell off platform

19
docs/Learning-Environment-Design-Agents.md


## Destroying an Agent
Before destroying an Agent GameObject, you must mark it as done (and wait for
the next step in the simulation) so that the Policy knows that this Agent is no
longer active. Thus, the best place to destroy an Agent is in the
`Agent.AgentOnDone()` function:
```csharp
public override void AgentOnDone()
{
Destroy(gameObject);
}
```
Note that in order for `AgentOnDone()` to be called, the Agent's `ResetOnDone`
property must be false. You can set `ResetOnDone` on the Agent's Inspector or in
code.
You can destroy an Agent GameObject during the simulation. Make sure that there is
always at least one Agent training at all times by either spawning a new Agent
every time one is destroyed or by re-spawning new Agents when the whole environment
resets.

12
docs/Learning-Environment-Design.md


5. Calls the `AgentAction()` function for each Agent in the scene, passing in
the action chosen by the Agent's Policy. (This function is not called if the
Agent is done.)
6. Calls the Agent's `AgentOnDone()` function if the Agent has reached its `Max
Step` count or has otherwise marked itself as `done`. Optionally, you can set
an Agent to restart if it finishes before the end of an episode. In this
case, the Academy calls the `AgentReset()` function.
6. Calls the Agent's `AgentReset()` function if the Agent has reached its `Max
Step` count or has otherwise marked itself as `done`.
To create a training environment, extend the Agent class to
implement the above methods. The `Agent.CollectObservations()` and

manually set an Agent to done in your `AgentAction()` function when the Agent
has finished (or irrevocably failed) its task by calling the `Done()` function.
You can also set the Agent's `Max Steps` property to a positive value and the
Agent will consider itself done after it has taken that many steps. If you
set an Agent's `ResetOnDone` property to true, then the Agent can attempt its
task several times in one episode. (Use the `Agent.AgentReset()` function to
prepare the Agent to start again.)
Agent will consider itself done after it has taken that many steps. You can
use the `Agent.AgentReset()` function to prepare the Agent to start again.
See [Agents](Learning-Environment-Design-Agents.md) for detailed information
about programming your own Agents.

2
docs/Migrating.md


* Trainer steps are now counted per-Agent, not per-environment as in previous versions. For instance, if you have 10 Agents in the scene, 20 environment steps now corresponds to 200 steps as printed in the terminal and in Tensorboard.
* Curriculum config files are now YAML formatted and all curricula for a training run are combined into a single file.
* The `--num-runs` command-line option has been removed.
* The "Reset on Done" setting in AgentParameters was removed; this is now effectively always true. `AgentOnDone` virtual method on the Agent has been removed.
### Steps to Migrate
* If you have a class that inherits from Academy:

* Multiply `max_steps` and `summary_steps` in your `trainer_config.yaml` by the number of Agents in the scene.
* Combine curriculum configs into a single file. See [the WallJump curricula](../config/curricula/wall_jump.yaml) for an example of the new curriculum config format.
A tool like https://www.json2yaml.com may be useful to help with the conversion.
* If your Agent implements `AgentOnDone` and your Agent does not have the checkbox `Reset On Done` checked in the inspector, you must call the code that was in `AgentOnDone` manually.
## Migrating from ML-Agents toolkit v0.12.0 to v0.13.0

正在加载...
取消
保存