浏览代码

Develop remove academy done (#2519)

* Initial Commit

* Remove the Academy Done flag from the protobuf definitions

* remove global_done in the environment

* Removed irrelevant unitTests

* Remove the max_step from the Academy inspector

* Removed global_done from the python scripts

* Modified and removed some tests

* This actually does not break either curriculum nor generalization training

* Replace global_done with reserved.
Addressing Chris Elion's comment regarding the deprecation of the global_done field. We will use a reserved field to make sure the global done does not get replaced in the future causing errors.

* Removed unused fake brain

* Tested that the first call to step was the same as a reset call

* black formating

* Added documentation changes

* Editing the migrating doc

* Addressing comments on the Migrating doc

* Addressing comments :
 - Removing dead code
 - Resolving forgotten merged conflicts
 - Editing documentations...
/develop-gpu-test
GitHub 5 年前
当前提交
babe9e2f
共有 19 个文件被更改,包括 91 次插入475 次删除
  1. 244
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  2. 1
      UnitySDK/Assets/ML-Agents/Editor/Tests/RayPerceptionTests.cs
  3. 76
      UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
  4. 43
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  5. 19
      UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
  6. 5
      docs/Learning-Environment-Design-Academy.md
  7. 24
      docs/Learning-Environment-Design.md
  8. 1
      docs/Migrating.md
  9. 4
      docs/Training-Curriculum-Learning.md
  10. 5
      ml-agents-envs/mlagents/envs/base_unity_environment.py
  11. 33
      ml-agents-envs/mlagents/envs/environment.py
  12. 8
      ml-agents-envs/mlagents/envs/mock_communicator.py
  13. 23
      ml-agents-envs/mlagents/envs/simple_env_manager.py
  14. 27
      ml-agents-envs/mlagents/envs/subprocess_env_manager.py
  15. 12
      ml-agents-envs/mlagents/envs/tests/test_envs.py
  16. 34
      ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
  17. 4
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  18. 1
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  19. 2
      protobuf-definitions/proto/mlagents/envs/communicator_objects/unity_rl_output.proto

244
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


public class TestAcademy : Academy
{
public int initializeAcademyCalls;
public int academyResetCalls;
public int AcademyStepCalls;
public override void InitializeAcademy()

public override void AcademyReset()
{
academyResetCalls += 1;
}
public override void AcademyStep()

public class EditModeTestInitialization
{
/*
private Brain GenerateTestBrain()
{
return ScriptableObject.CreateInstance<TestBrain>();
}
*/
[Test]
public void TestAcademy()
{

Assert.AreEqual(0, aca.initializeAcademyCalls);
Assert.AreEqual(0, aca.GetStepCount());
Assert.AreEqual(0, aca.GetEpisodeCount());
Assert.AreEqual(false, aca.IsDone());
academyInitializeMethod?.Invoke(aca, new object[] {});
academyInitializeMethod?.Invoke(aca, new object[] { });
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(0, aca.academyResetCalls);
Assert.AreEqual(0, aca.AcademyStepCalls);
}

agentEnableMethod?.Invoke(agent2, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] {});
academyInitializeMethod?.Invoke(aca, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] { aca });
Assert.AreEqual(false, agent1.IsDone());

var aca = acaGo.GetComponent<TestAcademy>();
var academyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
BindingFlags.Instance | BindingFlags.NonPublic);
academyInitializeMethod?.Invoke(aca, new object[] {});
academyInitializeMethod?.Invoke(aca, new object[] { });
var academyStepMethod = typeof(Academy).GetMethod("EnvironmentStep",
BindingFlags.Instance | BindingFlags.NonPublic);

Assert.AreEqual(1, aca.initializeAcademyCalls);
Assert.AreEqual(numberReset, aca.GetEpisodeCount());
Assert.AreEqual(i, aca.GetStepCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(numberReset, aca.academyResetCalls);
Assert.AreEqual(i, aca.AcademyStepCalls);
// The reset happens at the beginning of the first step

}
academyStepMethod?.Invoke(aca, new object[] {});
academyStepMethod?.Invoke(aca, new object[] { });
}
}

agent2.GiveBrain(brain);
agentEnableMethod?.Invoke(agent1, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] {});
academyInitializeMethod?.Invoke(aca, new object[] { });
var academyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);

requestAction += 1;
agent2.RequestAction();
}
academyStepMethod?.Invoke(aca, new object[] {});
academyStepMethod?.Invoke(aca, new object[] { });
}
}
}

var aca = acaGo.GetComponent<TestAcademy>();
var academyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
academyInitializeMethod?.Invoke(aca, new object[] {});
academyInitializeMethod?.Invoke(aca, new object[] { });
var academyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);

Assert.AreEqual(stepsSinceReset, aca.GetStepCount());
Assert.AreEqual(1, aca.initializeAcademyCalls);
Assert.AreEqual(numberReset, aca.GetEpisodeCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(numberReset, aca.academyResetCalls);
Assert.AreEqual(i, aca.AcademyStepCalls);
// Academy resets at the first step
if (i == 0)

stepsSinceReset += 1;
// Regularly set the academy to done to check behavior
if (i % 5 == 3)
{
aca.Done();
numberReset += 1;
stepsSinceReset = 1;
Assert.AreEqual(true, aca.IsDone());
}
academyStepMethod?.Invoke(aca, new object[] {});
academyStepMethod.Invoke((object)aca, new object[] { });
}
}

agent2.GiveBrain(brain);
agentEnableMethod?.Invoke(agent2, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] {});
academyInitializeMethod?.Invoke(aca, new object[] { });
var numberAgent1Reset = 0;
var numberAgent2Reset = 0;

Assert.AreEqual(1, aca.initializeAcademyCalls);
Assert.AreEqual(numberAcaReset, aca.GetEpisodeCount());
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(numberAcaReset, aca.academyResetCalls);
Assert.AreEqual(i, aca.AcademyStepCalls);
Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());

{
agentEnableMethod?.Invoke(agent1, new object[] { aca });
}
// Reset Academy every 100 steps
if (i % 100 == 3)
{
aca.Done();
numberAcaReset += 1;
acaStepsSinceReset = 0;
}
// Set agent 1 to done every 11 steps to test behavior
if (i % 11 == 5)
{

if (i % 13 == 3)
{
if (!(agent2.IsDone() || aca.IsDone()))
if (!(agent2.IsDone()))
{
// If the agent was already reset before the request decision
// We should not reset again

// Request an action without decision regularly
agent2.RequestAction();
}
if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions == 0)) || aca.IsDone())
if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions == 0)))
if (aca.IsDone())
{
numberAgent2Reset += 1;
agent2StepSinceReset = 0;
}
{}
academyStepMethod?.Invoke(aca, new object[] {});
}
}
}
public class EditModeTestMaxStep
{
[Test]
public void TestAcademy()
{
var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
var academyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
academyInitializeMethod?.Invoke(aca, new object[] {});
var academyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
var maxStep = typeof(Academy).GetField(
"m_MaxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
maxStep?.SetValue(aca, 20);
var numberReset = 0;
var stepsSinceReset = 0;
for (var i = 0; i < 50; i++)
{
Assert.AreEqual(stepsSinceReset, aca.GetStepCount());
Assert.AreEqual(1, aca.initializeAcademyCalls);
Assert.AreEqual(false, aca.IsDone());
Assert.AreEqual(i, aca.AcademyStepCalls);
Assert.AreEqual(numberReset, aca.GetEpisodeCount());
Assert.AreEqual(numberReset, aca.academyResetCalls);
stepsSinceReset += 1;
// Make sure max step is reached every 20 steps
if (i % 20 == 0)
{
numberReset += 1;
stepsSinceReset = 1;
}
if (academyStepMethod != null) academyStepMethod.Invoke(aca, new object[] {});
}
}
[Test]
public void TestAgent()
{
var agentGo1 = new GameObject("TestAgent");
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var agentGo2 = new GameObject("TestAgent");
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();
var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
var brain = TestBrain.Instantiate();
var agentEnableMethod = typeof(Agent).GetMethod(
"OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
var academyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
var academyStepMethod = typeof(Academy).GetMethod(
"EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
var maxStep = typeof(Academy).GetField(
"m_MaxSteps", BindingFlags.Instance | BindingFlags.NonPublic);
maxStep?.SetValue(aca, 100);
agent1.agentParameters = new AgentParameters();
agent2.agentParameters = new AgentParameters();
brain.brainParameters = new BrainParameters();
// We use event based so the agent will now try to send anything to the brain
agent1.agentParameters.onDemandDecision = false;
agent1.agentParameters.numberOfActionsBetweenDecisions = 1;
// agent1 will take an action at every step and request a decision every 2 steps
agent2.agentParameters.onDemandDecision = true;
// agent2 will request decisions only when RequestDecision is called
agent1.agentParameters.maxStep = 20;
agent2.agentParameters.maxStep = 30;
brain.brainParameters.vectorObservationSize = 0;
brain.brainParameters.cameraResolutions = new Resolution[0];
agent1.GiveBrain(brain);
agent2.GiveBrain(brain);
agentEnableMethod?.Invoke(agent2, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] {});
var numberAgent1Reset = 0;
var numberAgent2Reset = 0;
var numberAcaReset = 0;
var acaStepsSinceReset = 0;
var agent1StepSinceReset = 0;
var agent2StepSinceReset = 0;
for (var i = 0; i < 500; i++)
{
Assert.AreEqual(acaStepsSinceReset, aca.GetStepCount());
Assert.AreEqual(1, aca.initializeAcademyCalls);
Assert.AreEqual(i, aca.AcademyStepCalls);
Assert.AreEqual(agent1StepSinceReset, agent1.GetStepCount());
Assert.AreEqual(agent2StepSinceReset, agent2.GetStepCount());
Assert.AreEqual(numberAcaReset, aca.GetEpisodeCount());
Assert.AreEqual(numberAcaReset, aca.academyResetCalls);
Assert.AreEqual(numberAgent1Reset, agent1.agentResetCalls);
Assert.AreEqual(numberAgent2Reset, agent2.agentResetCalls);
//At the first step, Academy and agent 2 reset
if (i == 0)
{
numberAcaReset += 1;
numberAgent2Reset += 1;
}
//Agent 1 is only initialized at step 2
if (i == 2)
{
agentEnableMethod?.Invoke(agent1, new object[] { aca });
}
// we request a decision at each step
agent2.RequestDecision();
if (i > 3)
{
// Make sure the academy max steps at 100
if (i % 100 == 0)
{
acaStepsSinceReset = 0;
agent1StepSinceReset = 0;
agent2StepSinceReset = 0;
numberAcaReset += 1;
numberAgent1Reset += 1;
numberAgent2Reset += 1;
}
else
{
//Make sure the agents reset when their max steps is reached
if (agent1StepSinceReset % 21 == 0)
{
agent1StepSinceReset = 0;
numberAgent1Reset += 1;
}
if (agent2StepSinceReset % 31 == 0)
{
agent2StepSinceReset = 0;
numberAgent2Reset += 1;
}
}
}
acaStepsSinceReset += 1;
agent1StepSinceReset += 1;
agent2StepSinceReset += 1;
//Agent 1 is only initialized at step 2
if (i < 2)
{
agent1StepSinceReset = 0;
}
academyStepMethod?.Invoke(aca, new object[] {});
{ }
academyStepMethod?.Invoke(aca, new object[] { });
}
}
}

agent2.GiveBrain(brain);
agentEnableMethod?.Invoke(agent2, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] {});
academyInitializeMethod?.Invoke(aca, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] { aca });
var agent1ResetOnDone = 0;

}
academyStepMethod?.Invoke(aca, new object[] {});
academyStepMethod?.Invoke(aca, new object[] { });
}
}

agent2.GiveBrain(brain);
agentEnableMethod?.Invoke(agent2, new object[] { aca });
academyInitializeMethod?.Invoke(aca, new object[] {});
academyInitializeMethod?.Invoke(aca, new object[] { });
agentEnableMethod?.Invoke(agent1, new object[] { aca });

Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
academyStepMethod?.Invoke(aca, new object[] {});
academyStepMethod?.Invoke(aca, new object[] { });
agent1.AddReward(10f);
if ((i % 21 == 0) && (i > 0))

1
UnitySDK/Assets/ML-Agents/Editor/Tests/RayPerceptionTests.cs


var rayPer2D = go.AddComponent<RayPerception2D>();
var result = rayPer2D.Perceive(1f, angles,
tags);
Debug.Log(result.Count);
Assert.IsTrue(result.Count == angles.Length * (tags.Length + 2));
}
}

76
UnitySDK/Assets/ML-Agents/Scripts/Academy.cs


[Tooltip("Height of the environment window in pixels.")]
public int height;
[Tooltip("Rendering quality of environment. (Higher is better quality.)")][Range(0, 5)]
[Tooltip("Rendering quality of environment. (Higher is better quality.)")]
[Range(0, 5)]
[Tooltip("Speed at which environment is run. (Higher is faster.)")][Range(1f, 100f)]
[Tooltip("Speed at which environment is run. (Higher is faster.)")]
[Range(1f, 100f)]
public float timeScale;
[Tooltip("Frames per second (FPS) engine attempts to maintain.")]

[FormerlySerializedAs("maxSteps")]
[SerializeField]
[Tooltip("Total number of steps per global episode.\nNon-positive " +
"values correspond to episodes without a maximum number of \n" +
"steps. Once the step counter reaches this maximum value, the " +
"environment will reset.")]
int m_MaxSteps;
[FormerlySerializedAs("trainingConfiguration")]
[SerializeField]
[Tooltip("The engine-level settings which correspond to rendering " +
"quality and engine speed during Training.")]
EnvironmentConfiguration m_TrainingConfiguration =

/// external Brain during reset via <see cref="SetIsInference"/>.
bool m_IsInference = true;
/// The done flag of the academy. When set to true, the academy will
/// call <see cref="AcademyReset"/> instead of <see cref="AcademyStep"/>
/// at step time. If true, all agents done flags will be set to true.
bool m_Done;
/// Whether the academy has reached the maximum number of steps for the
/// current episode.
bool m_MaxStepReached;
/// The number of episodes completed by the environment. Incremented
/// each time the environment is reset.
int m_EpisodeCount;

// Academy's maxStepReached, done and stepCount values. The agents rely
// on this event to update their own values of max step reached and done
// in addition to aligning on the step count of the global episode.
public event System.Action<bool, bool, int> AgentSetStatus;
public event System.Action<int> AgentSetStatus;
// Signals to all the agents at each environment step so they can reset
// if their flag has been set to done (assuming the agent has requested a

// in inference mode.
m_IsInference = !m_IsCommunicatorOn;
BrainDecideAction += () => {};
DestroyAction += () => {};
AgentSetStatus += (m, d, i) => {};
AgentResetIfDone += () => {};
AgentSendState += () => {};
AgentAct += () => {};
AgentForceReset += () => {};
BrainDecideAction += () => { };
DestroyAction += () => { };
AgentSetStatus += (i) => { };
AgentResetIfDone += () => { };
AgentSendState += () => { };
AgentAct += () => { };
AgentForceReset += () => { };
// Configure the environment using the configurations provided by
// the developer in the Editor.

}
/// <summary>
/// Sets the done flag to true.
/// </summary>
public void Done()
{
m_Done = true;
}
/// <summary>
/// Returns whether or not the academy is done.
/// </summary>
/// <returns>
/// <c>true</c>, if academy is done, <c>false</c> otherwise.
/// </returns>
public bool IsDone()
{
return m_Done;
}
/// <summary>
/// Returns whether or not the communicator is on.
/// </summary>
/// <returns>

ForcedFullReset();
}
if ((m_StepCount >= m_MaxSteps) && m_MaxSteps > 0)
{
m_MaxStepReached = true;
Done();
}
AgentSetStatus(m_MaxStepReached, m_Done, m_StepCount);
m_BrainBatcher.RegisterAcademyDoneFlag(m_Done);
if (m_Done)
{
EnvironmentReset();
}
AgentSetStatus(m_StepCount);
AgentResetIfDone();

{
m_StepCount = 0;
m_EpisodeCount++;
m_Done = false;
m_MaxStepReached = false;
AcademyReset();
}

43
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


{
var agentInfoProto = new AgentInfoProto
{
StackedVectorObservation = {stackedVectorObservation},
StoredVectorActions = {storedVectorActions},
StackedVectorObservation = { stackedVectorObservation },
StoredVectorActions = { storedVectorActions },
StoredTextActions = storedTextActions,
TextObservation = textObservation,
Reward = reward,

academy.AgentResetIfDone -= ResetIfDone;
academy.AgentSendState -= SendInfo;
academy.AgentAct -= AgentStep;
academy.AgentForceReset -= _AgentReset;
academy.AgentForceReset -= ForceReset;
}
}

}
/// <summary>
/// This method will forcefully reset the agent and will also reset the hasAlreadyReset flag.
/// This way, even if the agent was already in the process of reseting, it will be reset again
/// and will not send a Done flag at the next step.
/// </summary>
void ForceReset()
{
m_HasAlreadyReset = false;
_AgentReset();
}
/// <summary>
/// An internal reset method that updates internal data structures in
/// addition to calling <see cref="AgentReset"/>.
/// </summary>

}
/// <summary>
/// Sets the status of the agent.
/// Sets the status of the agent. Will request decisions or actions according
/// to the Academy's stepcount.
/// <param name="academyMaxStep">If set to <c>true</c>
/// The agent must set maxStepReached.</param>
/// <param name="academyDone">If set to <c>true</c>
/// The agent must set done.</param>
void SetStatus(bool academyMaxStep, bool academyDone, int academyStepCounter)
void SetStatus(int academyStepCounter)
if (academyDone)
{
academyStepCounter = 0;
}
if (academyMaxStep)
{
m_MaxStepReached = true;
}
// If the Academy needs to reset, the agent should reset
// even if it reset recently.
if (academyDone)
{
Done();
m_HasAlreadyReset = false;
}
}
/// Signals the agent that it must reset if its done flag is set to true.

19
UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs


CommunicatorObjects.UnityRLOutput m_CurrentUnityRlOutput =
new CommunicatorObjects.UnityRLOutput();
/// Keeps track of the done flag of the Academy
bool m_AcademyDone;
/// Keeps track of last CommandProto sent by External
CommunicatorObjects.CommandProto m_Command;

m_EnvironmentParameters = firstRlInput.EnvironmentParameters;
m_IsTraining = firstRlInput.IsTraining;
return initializationInput.RlInitializationInput;
}
/// <summary>
/// Registers the done flag of the academy to the next output to be sent
/// to the communicator.
/// </summary>
/// <param name="done">If set to <c>true</c>
/// The academy done state will be sent to External at the next Exchange.</param>
public void RegisterAcademyDoneFlag(bool done)
{
m_AcademyDone = done;
}
/// <summary>

// must be sent
if (m_HasQueried.Values.All(x => x))
{
if (m_HasData.Values.Any(x => x) || m_AcademyDone)
if (m_HasData.Values.Any(x => x))
m_CurrentUnityRlOutput.GlobalDone = m_AcademyDone;
SendBatchedMessageHelper();
}

}
/// <summary>
/// Helper method that sends the curent UnityRLOutput, receives the next UnityInput and
/// Helper method that sends the current UnityRLOutput, receives the next UnityInput and
/// Applies the appropriate AgentAction to the agents.
/// </summary>
void SendBatchedMessageHelper()

5
docs/Learning-Environment-Design-Academy.md


Implement an `AcademyReset()` function to alter the environment at the start of
each episode. For example, you might want to reset an Agent to its starting
position or move a goal to a random position. An environment resets when the
Academy `Max Steps` count is reached.
`reset()` method is called on the Python `UnityEnvironment`.
When you reset an environment, consider the factors that should change so that
training is generalizable to different conditions. For example, if you were

process. Any Brain added to the Broadcast Hub will be visible from the external
process. In addition, if the checkbox `Control` is checked, the Brain will be
controllable from the external process and will thus be trainable.
* `Max Steps` - Total number of steps per-episode. `0` corresponds to episodes
without a maximum number of steps. Once the step counter reaches maximum, the
environment will reset.
* `Configuration` - The engine-level settings which correspond to rendering
quality and engine speed.
* `Width` - Width of the environment window in pixels.

24
docs/Learning-Environment-Design.md


Training and simulation proceed in steps orchestrated by the ML-Agents Academy
class. The Academy works with Agent objects in the scene to step
through the simulation. When either the Academy has reached its maximum number
of steps or all Agents in the scene are _done_, one training episode is
finished.
through the simulation. When all Agents in the scene are _done_,
one training episode is finished.
During training, the external Python training process communicates with the
Academy to run a series of episodes while it collects data and optimizes its

Step` count or has otherwise marked itself as `done`. Optionally, you can set
an Agent to restart if it finishes before the end of an episode. In this
case, the Academy calls the `AgentReset()` function.
8. When the Academy reaches its own `Max Step` count, it starts the next episode
again by calling your Academy subclass's `AcademyReset()` function.
To create a training environment, extend the Academy and Agent classes to
implement the above methods. The `Agent.CollectObservations()` and

Agents have already collected their observations and chosen an action before
the Academy invokes this method.
The base Academy classes also defines several important properties that you can
set in the Unity Editor Inspector. For training, the most important of these
properties is `Max Steps`, which determines how long each training episode
lasts. Once the Academy's step counter reaches this value, it calls the
`AcademyReset()` function to start the next episode.
See [Academy](Learning-Environment-Design-Academy.md) for a complete list of
the Academy properties and their uses.

manually set an Agent to done in your `AgentAction()` function when the Agent
has finished (or irrevocably failed) its task by calling the `Done()` function.
You can also set the Agent's `Max Steps` property to a positive value and the
Agent will consider itself done after it has taken that many steps. When the
Academy reaches its own `Max Steps` count, it starts the next episode. If you
Agent will consider itself done after it has taken that many steps. If you
set an Agent's `ResetOnDone` property to true, then the Agent can attempt its
task several times in one episode. (Use the `Agent.AgentReset()` function to
prepare the Agent to start again.)

properties that can be set differently for a training scene versus a regular
scene. The Academy's **Configuration** properties control rendering and time
scale. You can set the **Training Configuration** to minimize the time Unity
spends rendering graphics in order to speed up training. You may need to adjust
the other functional, Academy settings as well. For example, `Max Steps` should
be as short as possible for training — just long enough for the agent to
accomplish its task, with some extra time for "wandering" while it learns. In
regular scenes, you often do not want the Academy to reset the scene at all; if
so, `Max Steps` should be set to zero.
spends rendering graphics in order to speed up training.
When you create a training environment in Unity, you must set up the scene so
that it can be controlled by the external training process. Considerations
include:

1
docs/Migrating.md


#### Steps to Migrate
* `UnitySDK/Assets/ML-Agents/Scripts/Communicator.cs` and its class `Communicator` have been renamed to `UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs` and `ICommunicator` respectively.
* The `SpaceType` Enums `discrete`, and `continuous` have been renamed to `Discrete` and `Continuous`.
* We have removed the `Done` call as well as the capacity to set `Max Steps` on the Academy. Therefore an AcademyReset will never be triggered from C# (only from Python). If you want to reset the simulation after a fixed number of steps, or when an event in the simulation occurs, we recommend looking at our multi-agent example environments (such as BananaCollector). In our examples, groups of Agents can be reset through an "Area" that can reset groups of Agents.
## Migrating from ML-Agents toolkit v0.8 to v0.9

4
docs/Training-Curriculum-Learning.md


Once our curriculum is defined, we have to use the reset parameters we defined
and modify the environment from the Agent's `AgentReset()` function. See
[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
for an example. Note that if the Academy's __Max Steps__ is not set to some
positive number the environment will never be reset. The Academy must reset
for the environment to reset.
for an example.
We will save this file into our metacurriculum folder with the name of its
corresponding Brain. For example, in the Wall Jump environment, there are two

5
ml-agents-envs/mlagents/envs/base_unity_environment.py


@property
@abstractmethod
def global_done(self):
pass
@property
@abstractmethod
def external_brains(self) -> Dict[str, BrainParameters]:
pass

33
ml-agents-envs/mlagents/envs/environment.py


"of ML-Agents.".format(self._version_, self._unity_version)
)
self._n_agents: Dict[str, int] = {}
self._global_done: Optional[bool] = None
self._is_first_message = True
self._academy_name = aca_params.name
self._log_path = aca_params.log_path
self._brains: Dict[str, BrainParameters] = {}

@property
def brains(self):
return self._brains
@property
def global_done(self):
return self._global_done
@property
def academy_name(self):

raise UnityCommunicationException("Communicator has stopped.")
rl_output = outputs.rl_output
s = self._get_state(rl_output)
self._global_done = s[1]
self._n_agents[_b] = len(s[0][_b].agents)
return s[0]
self._n_agents[_b] = len(s[_b].agents)
self._is_first_message = False
return s
else:
raise UnityEnvironmentException("No Unity environment is loaded.")

:param custom_action: Optional instance of a CustomAction protobuf message.
:return: AllBrainInfo : A Data structure corresponding to the new state of the environment.
"""
if self._is_first_message:
return self.reset()
vector_action = {} if vector_action is None else vector_action
memory = {} if memory is None else memory
text_action = {} if text_action is None else text_action

# Check that environment is loaded, and episode is currently running.
if not self._loaded:
raise UnityEnvironmentException("No Unity environment is loaded.")
elif self._global_done:
raise UnityActionException(
"The episode is completed. Reset the environment with 'reset()'"
)
elif self.global_done is None:
raise UnityActionException(
"You cannot conduct step without first calling reset. "
"Reset the environment with 'reset()'"
)
else:
if isinstance(vector_action, self.SINGLE_BRAIN_ACTION_TYPES):
if self._num_external_brains == 1:

raise UnityCommunicationException("Communicator has stopped.")
rl_output = outputs.rl_output
state = self._get_state(rl_output)
self._global_done = state[1]
self._n_agents[_b] = len(state[0][_b].agents)
return state[0]
self._n_agents[_b] = len(state[_b].agents)
return state
def close(self):
"""

arr = [float(x) for x in arr]
return arr
def _get_state(self, output: UnityRLOutput) -> Tuple[AllBrainInfo, bool]:
def _get_state(self, output: UnityRLOutput) -> AllBrainInfo:
global_done = output.global_done
return _data, global_done
return _data
@timed
def _generate_step_input(

8
ml-agents-envs/mlagents/envs/mock_communicator.py


dict_agent_info["RealFakeBrain"] = UnityRLOutput.ListAgentInfoProto(
value=list_agent_info
)
global_done = False
try:
fake_brain = inputs.rl_input.agent_actions["RealFakeBrain"]
global_done = fake_brain.value[0].vector_actions[0] == -1
except Exception:
pass
result = UnityRLOutput(global_done=global_done, agentInfos=dict_agent_info)
result = UnityRLOutput(agentInfos=dict_agent_info)
return UnityOutput(rl_output=result)
def close(self):

23
ml-agents-envs/mlagents/envs/simple_env_manager.py


all_action_info = self._take_step(self.previous_step)
self.previous_all_action_info = all_action_info
if self.env.global_done:
all_brain_info = self.env.reset()
else:
actions = {}
memories = {}
texts = {}
values = {}
for brain_name, action_info in all_action_info.items():
actions[brain_name] = action_info.action
memories[brain_name] = action_info.memory
texts[brain_name] = action_info.text
values[brain_name] = action_info.value
all_brain_info = self.env.step(actions, memories, texts, values)
actions = {}
memories = {}
texts = {}
values = {}
for brain_name, action_info in all_action_info.items():
actions[brain_name] = action_info.action
memories[brain_name] = action_info.memory
texts[brain_name] = action_info.text
values[brain_name] = action_info.value
all_brain_info = self.env.step(actions, memories, texts, values)
step_brain_info = all_brain_info
step_info = EnvironmentStep(

27
ml-agents-envs/mlagents/envs/subprocess_env_manager.py


cmd: EnvironmentCommand = parent_conn.recv()
if cmd.name == "step":
all_action_info = cmd.payload
# When an environment is "global_done" it means automatic agent reset won't occur, so we need
# to perform an academy reset.
if env.global_done:
all_brain_info = env.reset()
else:
actions = {}
memories = {}
texts = {}
values = {}
for brain_name, action_info in all_action_info.items():
actions[brain_name] = action_info.action
memories[brain_name] = action_info.memory
texts[brain_name] = action_info.text
values[brain_name] = action_info.value
all_brain_info = env.step(actions, memories, texts, values)
actions = {}
memories = {}
texts = {}
values = {}
for brain_name, action_info in all_action_info.items():
actions[brain_name] = action_info.action
memories[brain_name] = action_info.memory
texts[brain_name] = action_info.text
values[brain_name] = action_info.value
all_brain_info = env.step(actions, memories, texts, values)
# The timers in this process are independent from all the processes and the "main" process
# So after we send back the root timer, we can safely clear them.
# Note that we could randomly return timers a fraction of the time if we wanted to reduce

cmd.payload[0], cmd.payload[1], cmd.payload[2]
)
_send_response("reset", all_brain_info)
elif cmd.name == "global_done":
_send_response("global_done", env.global_done)
elif cmd.name == "close":
break
except (KeyboardInterrupt, UnityCommunicationException):

12
ml-agents-envs/mlagents/envs/tests/test_envs.py


discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
with pytest.raises(UnityActionException):
env.step([0])
assert env.brain_names[0] == "RealFakeBrain"
env.close()

brain = env.brains["RealFakeBrain"]
brain_info = env.reset()
env.close()
assert not env.global_done
assert isinstance(brain_info, dict)
assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)

)
env = UnityEnvironment(" ")
brain = env.brains["RealFakeBrain"]
brain_info = env.reset()
brain_info = env.step()
brain_info = env.step(
[0]
* brain.vector_action_space_size[0]

* brain.vector_action_space_size[0]
* len(brain_info["RealFakeBrain"].agents)
)
with pytest.raises(UnityActionException):
env.step(
[0]
* brain.vector_action_space_size[0]
* len(brain_info["RealFakeBrain"].agents)
)
assert env.global_done
assert isinstance(brain_info, dict)
assert isinstance(brain_info["RealFakeBrain"], BrainInfo)
assert isinstance(brain_info["RealFakeBrain"].visual_observations, list)

34
ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py


)
self.assertEqual(len(env.env_workers), 2)
def test_worker_step_resets_on_global_done(self):
env_mock = Mock()
env_mock.reset = Mock(return_value="reset_data")
env_mock.global_done = True
def mock_global_done_env_factory(worker_id: int):
return env_mock
mock_parent_connection = Mock()
mock_step_queue = Mock()
step_command = EnvironmentCommand("step", (None, None, None, None))
close_command = EnvironmentCommand("close")
mock_parent_connection.recv.side_effect = [step_command, close_command]
mock_parent_connection.send = Mock()
worker(
mock_parent_connection,
mock_step_queue,
cloudpickle.dumps(mock_global_done_env_factory),
0,
)
# recv called twice to get step and close command
self.assertEqual(mock_parent_connection.recv.call_count, 2)
expected_step_response = StepResponse(
all_brain_info="reset_data", timer_root=mock.ANY
)
# worker returns the data from the reset
mock_step_queue.put.assert_called_with(
EnvironmentResponse("step", 0, expected_step_response)
)
def test_reset_passes_reset_params(self):
SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory: MockEnvWorker(
worker_id, EnvironmentResponse("reset", worker_id, worker_id)

4
ml-agents/mlagents/trainers/tests/test_simple_rl.py


}
@property
def global_done(self):
return False
@property
def external_brains(self) -> Dict[str, BrainParameters]:
return self._brains

1
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


env_mock = MagicMock()
env_mock.step.return_value = [new_step_info]
env_mock.reset.return_value = [old_step_info]
env_mock.global_done = False
tc.advance(env_mock)
env_mock.reset.assert_not_called()

2
protobuf-definitions/proto/mlagents/envs/communicator_objects/unity_rl_output.proto


message ListAgentInfoProto {
repeated AgentInfoProto value = 1;
}
bool global_done = 1;
reserved 1; // deprecated bool global_done field
map<string, ListAgentInfoProto> agentInfos = 2;
}
正在加载...
取消
保存