浏览代码

Merge remote-tracking branch 'origin/develop' into try-tf2-support

/develop-gpu-test
Chris Elion 5 年前
当前提交
254c7d86
共有 44 个文件被更改,包括 462 次插入1763 次删除
  1. 1
      README.md
  2. 8
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  3. 58
      UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
  4. 2
      UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
  5. 65
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
  6. 17
      UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs
  7. 16
      UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs
  8. 12
      docs/Basic-Guide.md
  9. 7
      docs/FAQ.md
  10. 6
      docs/Getting-Started-with-Balance-Ball.md
  11. 22
      docs/Learning-Environment-Create-New.md
  12. 3
      docs/Learning-Environment-Design-Academy.md
  13. 3
      docs/Learning-Environment-Design-Brains.md
  14. 2
      docs/Learning-Environment-Design-Learning-Brains.md
  15. 4
      docs/Learning-Environment-Design.md
  16. 6
      docs/Learning-Environment-Executable.md
  17. 27
      docs/ML-Agents-Overview.md
  18. 3
      docs/Migrating.md
  19. 7
      docs/Python-API.md
  20. 2
      docs/Training-Behavioral-Cloning.md
  21. 3
      docs/Training-ML-Agents.md
  22. 4
      docs/Training-PPO.md
  23. 6
      docs/Training-SAC.md
  24. 630
      docs/images/academy.png
  25. 157
      docs/images/mlagents-NewTutAcademy.png
  26. 4
      markdown-link-check.config.json
  27. 26
      ml-agents-envs/mlagents/envs/environment.py
  28. 1
      ml-agents-envs/mlagents/envs/rpc_communicator.py
  29. 28
      ml-agents/mlagents/trainers/learn.py
  30. 7
      ml-agents/mlagents/trainers/sac/models.py
  31. 1
      ml-agents/mlagents/trainers/tensorflow_to_barracuda.py
  32. 9
      ml-agents/mlagents/trainers/tests/test_learn.py
  33. 7
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  34. 10
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  35. 21
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  36. 61
      ml-agents/mlagents/trainers/trainer_controller.py
  37. 160
      ml-agents/mlagents/trainers/trainer_util.py
  38. 3
      UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs.meta
  39. 186
      UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs
  40. 3
      UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs.meta
  41. 46
      UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs
  42. 236
      docs/images/broadcast.png
  43. 243
      docs/images/mlagents-SetBrainToTrain.png
  44. 102
      ml-agents-envs/mlagents/envs/socket_communicator.py

1
README.md


* Support for multiple environment configurations and training scenarios
* Train memory-enhanced agents using deep reinforcement learning
* Easily definable Curriculum Learning and Generalization scenarios
* Broadcasting of agent behavior for supervised learning
* Built-in support for Imitation Learning
* Flexible agent control with On Demand Decision Making
* Visualizing network outputs within the environment

8
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
aca.resetParameters = new ResetParameters();
Assert.AreEqual(0, aca.initializeAcademyCalls);
Assert.AreEqual(0, aca.GetStepCount());
Assert.AreEqual(0, aca.GetEpisodeCount());

var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
aca.resetParameters = new ResetParameters();
var brain = TestBrain.Instantiate();
brain.brainParameters = new BrainParameters();
brain.brainParameters.vectorObservationSize = 0;

var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
aca.resetParameters = new ResetParameters();
var academyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
BindingFlags.Instance | BindingFlags.NonPublic);
academyInitializeMethod?.Invoke(aca, new object[] {});

var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
aca.resetParameters = new ResetParameters();
var brain = TestBrain.Instantiate();

var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
aca.resetParameters = new ResetParameters();
var academyInitializeMethod = typeof(Academy).GetMethod(
"InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
academyInitializeMethod?.Invoke(aca, new object[] {});

var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
aca.resetParameters = new ResetParameters();
var brain = TestBrain.Instantiate();

var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
aca.resetParameters = new ResetParameters();
var brain = TestBrain.Instantiate();

var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
aca.resetParameters = new ResetParameters();
var brain = TestBrain.Instantiate();

58
UnitySDK/Assets/ML-Agents/Scripts/Academy.cs


"docs/Learning-Environment-Design-Academy.md")]
public abstract class Academy : MonoBehaviour
{
[SerializeField]
public BroadcastHub broadcastHub = new BroadcastHub();
private const string k_ApiVersion = "API-10";
/// Temporary storage for global gravity value

/// </returns>
bool IsCommunicatorOn
{
get { return m_Communicator != null; }
get { return Communicator != null; }
}
/// If true, the Academy will use inference settings. This field is

bool m_ModeSwitched;
/// Pointer to the communicator currently in use by the Academy.
ICommunicator m_Communicator;
public ICommunicator Communicator;
// Flag used to keep track of the first time the Academy is reset.
bool m_FirstAcademyReset;

InitializeAcademy();
var controlledBrains = broadcastHub.brainsToControl.Where(x => x != null).ToList();
m_Communicator = new RpcCommunicator(
Communicator = new RpcCommunicator(
// If it fails, we check if there are any external brains in the scene
// and if Unity is in Editor mode
// If there are : Launch the communicator on the default port
m_Communicator = null;
if (controlledBrains.Any())
{
m_Communicator = new RpcCommunicator(
new CommunicatorInitParameters
{
port = 5005
});
}
Communicator = new RpcCommunicator(
new CommunicatorInitParameters
{
port = 5004
});
foreach (var trainingBrain in controlledBrains)
{
trainingBrain.SetCommunicator(m_Communicator);
}
if (m_Communicator != null)
if (Communicator != null)
m_Communicator.QuitCommandReceived += OnQuitCommandReceived;
m_Communicator.ResetCommandReceived += OnResetCommand;
m_Communicator.RLInputReceived += OnRLInputReceived;
var unityRLInitParameters = m_Communicator.Initialize(
var unityRLInitParameters = Communicator.Initialize(
brains = controlledBrains,
}, broadcastHub);
});
m_Communicator = null;
foreach (var brain in controlledBrains)
{
brain.SetCommunicator(null);
}
Communicator = null;
}
if (Communicator != null){
Communicator.QuitCommandReceived += OnQuitCommandReceived;
Communicator.ResetCommandReceived += OnResetCommand;
Communicator.RLInputReceived += OnRLInputReceived;
}
}

SetIsInference(!IsCommunicatorOn);
BrainDecideAction += () => {};

ConfigureEnvironment();
m_ModeSwitched = false;
}
if (!m_FirstAcademyReset)
{
ForcedFullReset();

2
UnitySDK/Assets/ML-Agents/Scripts/Brain.cs


var academy = FindObjectOfType<Academy>();
if (academy)
{
m_IsInitialized = true;
m_IsInitialized = true;
}
}
}

65
UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs


Dictionary<string, bool> m_HasData =
new Dictionary<string, bool>();
/// Keeps track of which brains queried the batcher on the current step
/// Keeps track of which brains queried the communicator on the current step
Dictionary<string, bool> m_HasQueried =
new Dictionary<string, bool>();

/// The current UnityRLOutput to be sent when all the brains queried the batcher
/// The current UnityRLOutput to be sent when all the brains queried the communicator
private UnityRLInitializationOutputProto m_CurrentUnityRlInitializationOutput;
# if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX

/// </summary>
/// <returns>The External Initialization Parameters received.</returns>
/// <param name="initParameters">The Unity Initialization Parameters to be sent.</param>
/// <param name="broadcastHub">The BroadcastHub to get the controlled brains.</param>
public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters,
BroadcastHub broadcastHub)
public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters)
{
var academyParameters = new UnityRLInitializationOutputProto
{

foreach (var brain in initParameters.brains)
{
academyParameters.BrainParameters.Add(brain.brainParameters.ToProto(
brain.name, true));
SubscribeBrain(brain.name);
}
academyParameters.EnvironmentParameters = new EnvironmentParametersProto();

return initializationInput.RlInitializationInput.ToUnityRLInitParameters();
}
/// <summary>
/// Adds the brain to the list of brains which will be sending information to External.
/// </summary>
/// <param name="brainKey">Brain key.</param>
public void SubscribeBrain(string brainKey, BrainParameters brainParameters)
{
m_HasQueried[brainKey] = false;
m_HasData[brainKey] = false;
m_CurrentAgents[brainKey] = new List<Agent>(k_NumAgents);
m_CurrentUnityRlOutput.AgentInfos.Add(
brainKey,
new CommunicatorObjects.UnityRLOutputProto.Types.ListAgentInfoProto());
if (m_CurrentUnityRlInitializationOutput == null){
m_CurrentUnityRlInitializationOutput = new CommunicatorObjects.UnityRLInitializationOutputProto();
}
m_CurrentUnityRlInitializationOutput.BrainParameters.Add(brainParameters.ToProto(brainKey, true));
}
void UpdateEnvironmentWithInput(UnityRLInputProto rlInput)
{
SendRLInputReceivedEvent(rlInput.IsTraining);

#region Sending and retreiving data
/// <summary>
/// Adds the brain to the list of brains which will be sending information to External.
/// </summary>
/// <param name="brainKey">Brain key.</param>
private void SubscribeBrain(string brainKey)
{
m_HasQueried[brainKey] = false;
m_HasData[brainKey] = false;
m_CurrentAgents[brainKey] = new List<Agent>(k_NumAgents);
m_CurrentUnityRlOutput.AgentInfos.Add(
brainKey,
new UnityRLOutputProto.Types.ListAgentInfoProto());
}
public void PutObservations(
string brainKey, IEnumerable<Agent> agents)
{

/// </summary>
void SendBatchedMessageHelper()
{
var input = Exchange(
new UnityOutputProto
{
RlOutput = m_CurrentUnityRlOutput
});
var message = new CommunicatorObjects.UnityOutputProto
{
RlOutput = m_CurrentUnityRlOutput,
};
if (m_CurrentUnityRlInitializationOutput != null)
{
message.RlInitializationOutput = m_CurrentUnityRlInitializationOutput;
}
var input = Exchange(message);
m_CurrentUnityRlInitializationOutput = null;
foreach (var k in m_CurrentUnityRlOutput.AgentInfos.Keys)
{

17
UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs


/// </summary>
public string version;
/// <summary>
/// The list of brains parameters used for training.
/// </summary>
public IEnumerable<Brain> brains;
/// <summary>
/// The set of environment parameters defined by the user that will be sent to the communicator.
/// </summary>
public EnvironmentResetParameters environmentResetParameters;

/// </summary>
/// <returns>The External Initialization Parameters received.</returns>
/// <param name="initParameters">The Unity Initialization Parameters to be sent.</param>
/// <param name="broadcastHub">The BroadcastHub to get the controlled brains.</param>
UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters,
BroadcastHub broadcastHub);
UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters);
/// <summary>
/// Registers a new Brain to the Communicator.
/// </summary>
/// <param name="name">The name or key uniquely identifying the Brain</param>
/// <param name="brainParameters">The Parameters for the Brain being registered</param>
void SubscribeBrain(string name, BrainParameters brainParameters);
/// sent once all the brains that subscribed to the batcher have tried
/// sent once all the brains that were part of initialization have tried
/// to send information.
/// </summary>
/// <param name="key">Batch Key.</param>

16
UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs


/// <summary>
/// The Learning Brain works differently if you are training it or not.
/// When training your Agents, drag the Learning Brain to the Academy's BroadcastHub.
/// When using a pretrained model, just drag the Model file into the
/// Model property of the Learning Brain.
/// When training your Agents, the LearningBrain will be controlled by Python.
/// When using a pretrained model, just drag the Model file into the
/// Model property of the Learning Brain and do not launch the Python training process.
/// least one LearningBrain in the BroadcastHub.
/// least one LearningBrain in the scene.
/// The property model corresponds to the Model currently attached to the Brain. Before
/// being used, a call to ReloadModel is required.
/// When the Learning Brain is not training, it uses a TensorFlow model to make decisions.

protected ICommunicator m_Communicator;
/// <summary>
/// Sets the Batcher of the Brain. The brain will call the communicator at every step and give
/// Sets the ICommunicator of the Brain. The brain will call the communicator at every step and give
public void SetCommunicator(ICommunicator communicator)
private void SetCommunicator(ICommunicator communicator)
m_Communicator?.SubscribeBrain(name, brainParameters);
}
/// <inheritdoc />

var comm = FindObjectOfType<Academy>()?.Communicator;
SetCommunicator(comm);
}
/// <summary>

12
docs/Basic-Guide.md


if you want to [use an executable](Learning-Environment-Executable.md) or to
`None` if you want to interact with the current scene in the Unity Editor.
Before building the environment or interacting with it in the editor, select `Ball3DAcademy` in the **Hierarchy** window of the Unity editor and make sure the `3DBallLearningBrain` is in the Broadcast Hub of the `Ball3DAcademy` component.
More information and documentation is provided in the
[Python API](Python-API.md) page.

to the training and which Brain is being trained. You can only perform training with
a `Learning Brain`.
1. Each platform agent needs an assigned `Learning Brain`. In this example, each platform agent was created using a prefab. To update all of the brains in each platform agent at once, you only need to update the platform agent prefab. In the **Project** window, go to the `Assets/ML-Agents/Examples/3DBall/Prefabs` folder. Expand `Game` and click on the `Platform` prefab. You should see the `Platform` prefab in the **Inspector** window. In the **Project** window, drag the **3DBallLearning** Brain located in `Assets/ML-Agents/Examples/3DBall/Brains` into the `Brain` property under `Ball 3D Agent (Script)` component in the **Inspector** window.
Each platform agent needs an assigned `Learning Brain`. In this example, each platform agent was created using a prefab. To update all of the brains in each platform agent at once, you only need to update the platform agent prefab. In the **Project** window, go to the `Assets/ML-Agents/Examples/3DBall/Prefabs` folder. Expand `Game` and click on the `Platform` prefab. You should see the `Platform` prefab in the **Inspector** window. In the **Project** window, drag the **3DBallLearning** Brain located in `Assets/ML-Agents/Examples/3DBall/Brains` into the `Brain` property under `Ball 3D Agent (Script)` component in the **Inspector** window.
2. In the **Hierarchy** window, select `Ball3DAcademy`.
3. In the **Project** window, go to `Assets/ML-Agents/Examples/3DBall/Brains` folder and drag the **3DBallLearning** Brain to the `Brains` property under `Braodcast Hub` in the `Ball3DAcademy` object in the **Inspector** window.
the agent) means that the Brain will be making decision for that agent. Whereas dragging
a Brain into the Broadcast Hub means that the Brain will be exposed to the Python process.
![Set Brain to External](images/mlagents-SetBrainToTrain.png)
the agent) means that the Brain will be making decision for that agent. If the Agent uses a
LearningBrain either Python controls the Brain or the model on the Brain does.
### Training the environment

7
docs/FAQ.md


There may be a number of possible causes:
* _Cause_: There may be no LearningBrain in the
`Broadcast Hub` of the Academy. In this case, the environment will not attempt
to communicate with Python. _Solution_: Click `Add New` in your Academy's
`Broadcast Hub`, and drag your LearningBrain asset into the `Brains` field.
Also you need to assign this LearningBrain
asset to all of the Agents you wish to do training on.
* _Cause_: There may be no agent in the scene with a LearningBrain
* _Cause_: On OSX, the firewall may be preventing communication with the
environment. _Solution_: Add the built environment binary to the list of
exceptions on the firewall by following

6
docs/Getting-Started-with-Balance-Ball.md


The Academy object for the scene is placed on the Ball3DAcademy GameObject. When
you look at an Academy component in the inspector, you can see several
properties that control how the environment works.
The **Broadcast Hub** keeps track of which Brains will send data during training.
If a Brain is added to the hub, the data from this Brain will be sent to the external training
process.
The **Training Configuration** and **Inference Configuration** properties
set the graphics and timescale properties for the Unity application.
The Academy uses the **Training Configuration** during training and the

You can create new Brain assets by selecting `Assets ->
Create -> ML-Agents -> Brain`. There are 3 types of Brains.
The **Learning Brain** is a Brain that uses a trained neural network to make decisions.
When the **Learning Brain** is dragged into the **Broadcast Hub** in the Academy, the external process that is training the neural network will take over decision making for the agents
When Unity is connected to Python, the external process will be controlling the Brain.
The external process that is training the neural network will take over decision making for the agents
and ultimately generate a trained neural network. You can also use the
**Learning Brain** with a pre-trained model.
The **Heuristic** Brain allows you to hand-code the Agent logic by extending

22
docs/Learning-Environment-Create-New.md


5. Add your Agent subclasses to appropriate GameObjects, typically, the object
in the scene that represents the Agent in the simulation. Each Agent object
must be assigned a Brain object.
6. If training, drag the Brain in the BroadcastHub of the Academy.
[run the training process](Training-ML-Agents.md).
**Note:** If you are unfamiliar with Unity, refer to
[Learning the interface](https://docs.unity3d.com/Manual/LearningtheInterface.html)

The default settings for the Academy properties are also fine for this
environment, so we don't need to change anything for the RollerAcademy component
in the Inspector window. You may not have the RollerBrain in the Broadcast Hub yet,
more on that later.
in the Inspector window.
![The Academy properties](images/mlagents-NewTutAcademy.png)

Brain asset to the Agent, changing some of the Agent Component's properties, and
setting the Brain properties so that they are compatible with our Agent code.
1. In the Academy Inspector, add the `RollerBallBrain` and `RollerBallPlayer`
Brains to the **Broadcast Hub**.
2. Select the **RollerAgent** GameObject to show its properties in the Inspector
1. Select the **RollerAgent** GameObject to show its properties in the Inspector
3. Drag the Brain **RollerBallPlayer** from the Project window to the
2. Drag the Brain **RollerBallPlayer** from the Project window to the
4. Change **Decision Interval** from `1` to `10`.
5. Drag the Target GameObject from the Hierarchy window to the RollerAgent
3. Change **Decision Interval** from `1` to `10`.
4. Drag the Target GameObject from the Hierarchy window to the RollerAgent
Target field.
![Assign the Brain to the RollerAgent](images/mlagents-NewTutAssignBrain.png)

## Training the Environment
Now you can train the Agent. To get ready for training, you must first drag the
`RollerBallBrain` asset to the **RollerAgent** GameObject `Brain` field to change to the learning brain.
Then, select the Academy GameObject and drag
the RollerBallBrain item in the **Broadcast Hub** list. From there, the process is
`RollerBallBrain` asset to the **RollerAgent** GameObject `Brain` field to change
to the LearningBrain. From there, the process is
the same as described in [Training ML-Agents](Training-ML-Agents.md). Note that the
models will be created in the original ml-agents project folder, `ml-agents/models`.

Keep in mind:
* There can only be one Academy game object in a scene.
* You can only train Learning Brains that have been added to the Academy's Broadcast Hub list.

3
docs/Learning-Environment-Design-Academy.md


## Academy Properties
![Academy Inspector](images/academy.png)
* `Broadcast Hub` - Gathers the Brains that will communicate with the external
process. Any Brain added to the Broadcast Hub will be visible from the external
process and controllable from the external process and will thus be trainable.
* `Configuration` - The engine-level settings which correspond to rendering
quality and engine speed.
* `Width` - Width of the environment window in pixels.

3
docs/Learning-Environment-Design-Brains.md


**PlayerBrain** to map keyboard keys to Agent actions, which can be
useful to test your Agent code.
During training, use a **Learning Brain**
and drag it into the Academy's `Broadcast Hub`.
During training, use a **Learning Brain**.
When you want to use the trained model, import the model file into the Unity
project, add it to the **Model** property of the **Learning Brain**.

2
docs/Learning-Environment-Design-Learning-Brains.md


## Training Mode / External Control
When [running an ML-Agents training algorithm](Training-ML-Agents.md), at least
one Brain asset must be in the Academy's `Broadcast Hub`. This allows the training
one Agent must have a LearningBrain. This allows the training
process to collect the observations of Agents using that Brain and give the Agents
their actions.

4
docs/Learning-Environment-Design.md


search bar on top of the Scene Hierarchy window.
To Create a Brain, go to `Assets -> Create -> Ml-Agents` and select the
type of Brain you want to use. During training, use a **Learning Brain**
and drag it into the Academy's `Broadcast Hub`.
type of Brain you want to use. During training, use a **Learning Brain** .
When you want to use the trained model, import the model file into the Unity
project, add it to the **Model** property of the **Learning Brain**.
If the Python process is not active, the **Learning Brain** will not train but

* The training scene must start automatically when your Unity application is
launched by the training process.
* The scene must include an Academy with at least one Brain in the `Broadcast Hub`.
* The Academy must reset the scene to a valid starting point for each episode of
training.
* A training episode must have a definite end — either using `Max Steps` or by

6
docs/Learning-Environment-Executable.md


Make sure the Brains in the scene have the right type. For example, if you want
to be able to control your agents from Python, you will need to put the Brain
controlling the Agents to be a **Learning Brain** and drag it into the
Academy's `Broadcast Hub`. In the 3DBall
controlling the Agents to be a **Learning Brain**. In the 3DBall
scene, this can be done in the Platform GameObject within the Game prefab in
`Assets/ML-Agents/Examples/3DBall/Prefabs/`, or in each instance of the
Platform in the Scene.

5. Drag the `<brain_name>.nn` file from the Project window of
the Editor to the **Model** placeholder in the **Ball3DLearning**
inspector window.
6. Remove the **Ball3DLearning** from the Academy's `Broadcast Hub`
7. Press the Play button at the top of the editor.
6. Press the Play button at the top of the editor.

27
docs/ML-Agents-Overview.md


[TensorFlow](Background-TensorFlow.md) model. The embedded TensorFlow model
represents a learned policy and the Brain directly uses this model to
determine the action for each Agent. You can train a **Learning Brain**
by dragging it into the Academy's `Broadcast Hub` and launching the game with
the Python training process.
by launching the game with the Python training process.
- **Player** - where decisions are made using real input from a keyboard or
controller. Here, a human player is controlling the Agent and the observations
and rewards collected by the Brain are not used to control the Agent.

trained a Brain for the medics we could assign a medic on one team to the
trained Brain and assign the medic on the other team a Heuristic Brain with
hard-coded behaviors. We can then evaluate which medic is more effective.
As currently described, it may seem that the External Communicator and Python
API are only leveraged by the Learning Brain. This is not true. It is possible
to configure the Learning, Player and Heuristic Brains to also send the
observations, rewards and actions to the Python API through the External
Communicator (a feature called _broadcasting_). As we will see shortly, this
enables additional training modes.
<p align="center">
<img src="images/learning_environment.png"

In the previous mode, the Learning Brain was used for training to generate
a TensorFlow model that the Learning Brain can later use. However,
any user of the ML-Agents toolkit can leverage their own algorithms for
training. In this case, the Brain type would be set to Learning and be linked
to the BroadcastHub
training. In this case, the Brain type would be set to Learning
and the behaviors of all the Agents in the scene will be controlled within Python.
You can even turn your environment into a [gym.](../gym-unity/README.md)

a way to randomly sample Reset Parameters of the environment during training. See
[Training Generalized Reinforcement Learning Agents](Training-Generalized-Reinforcement-Learning-Agents.md)
to learn more about this feature.
- **Broadcasting** - As discussed earlier, a Learning Brain sends the
observations for all its Agents to the Python API when dragged into the
Academy's `Broadcast Hub` with the `Control` checkbox checked. This is helpful
for training and later inference. Broadcasting is a feature which can be
enabled all types of Brains (Player, Learning, Heuristic) where the Agent
observations and actions are also sent to the Python API (despite the fact
that the Agent is **not** controlled by the Python API). This feature is
leveraged by Imitation Learning, where the observations and actions for a
Player Brain are used to learn the policies of an agent through demonstration.
However, this could also be helpful for the Heuristic and Learning Brains,
particularly when debugging agent behaviors. You can learn more about using
the broadcasting feature
[here](Learning-Environment-Design-Brains.md#using-the-broadcast-feature).
- **Cloud Training on AWS** - To facilitate using the ML-Agents toolkit on
Amazon Web Services (AWS) machines, we provide a

3
docs/Migrating.md


### Important Changes
* The definition of the gRPC service has changed.
* The online BC training feature has been removed.
* The BroadcastHub of the Academy no longer has a `Control` checkbox. All Learning Brains in the BroadcastHub will be considered as trainable (although the training will only be launched if the Python Process is ready and will use inference otherwise)
* The broadcast feature has been deprecated. Only LearningBrains can communicate with Python.
* The BroadcastHub has been deprecated. If there is a training Python process, all LearningBrains in the scene will automatically be trained. If there is no Python process, inference will be used.
#### Steps to Migrate
* In order to be able to train, make sure both your ML-Agents Python package and UnitySDK code come from the v0.11 release. Training will not work, for example, if you update the ML-Agents Python package, and only update the API Version in UnitySDK.

7
docs/Python-API.md


the ML-Agents SDK.
To communicate with an Agent in a Unity environment from a Python program, the
Agent mus use a LearningBrain present in the Academy's `Broadcast Hub`.
Agent must use a LearningBrain.
actions for Agents with Brains in the
Academy's `Broadcast Hub`..
actions for Agents with LearningBrains.
_Notice: Currently communication between Unity and Python takes place over an
open socket without authentication. As such, please make sure that the network

observations = brainInfo.vector_observations
```
Note that if you have more than one Brain in the Academy's `Broadcast Hub`, you
Note that if you have more than one LearningBrain in the scene, you
must provide dictionaries from Brain names to arrays for `action`, `memory`
and `value`. For example: If you have two Learning Brains named `brain1` and
`brain2` each with one Agent taking two continuous actions, then you can

2
docs/Training-Behavioral-Cloning.md


1. Choose an agent you would like to learn to imitate some set of demonstrations.
2. Record a set of demonstration using the `Demonstration Recorder` (see [here](Training-Imitation-Learning.md)).
For illustrative purposes we will refer to this file as `AgentRecording.demo`.
3. Build the scene, assigning the agent a Learning Brain, and dragging it in the Broadcast Hub. For more information on Brains, see
3. Build the scene, assigning the agent a Learning Brain. For more information on Brains, see
[here](Learning-Environment-Design-Brains.md).
4. Open the `config/offline_bc_config.yaml` file.
5. Modify the `demo_path` parameter in the file to reference the path to the

3
docs/Training-ML-Agents.md


* `--base-port`: Specifies the starting port. Each concurrent Unity environment instance
will get assigned a port sequentially, starting from the `base-port`. Each instance
will use the port `(base_port + worker_id)`, where the `worker_id` is sequential IDs
given to each instance from 0 to `num_envs - 1`. Default is 5005.
given to each instance from 0 to `num_envs - 1`. Default is 5005. __Note:__ When
training using the Editor rather than an executable, the base port will be ignored.
* `--slow`: Specify this option to run the Unity environment at normal, game
speed. The `--slow` mode uses the **Time Scale** and **Target Frame Rate**
specified in the Academy's **Inference Configuration**. By default, training

4
docs/Training-PPO.md


`vis_encode_type` corresponds to the encoder type for encoding visual observations.
Valid options include:
* `simple` (default): a simple encoder which consists of two convolutional layers
* `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236),
* `nature_cnn`: [CNN implementation proposed by Mnih et al.](https://www.nature.com/articles/nature14236),
* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561),
* `resnet`: [IMPALA Resnet implementation](https://arxiv.org/abs/1802.01561),
consisting of three stacked layers, each with two residual blocks, making a
much larger network than the other two.

6
docs/Training-SAC.md


`vis_encode_type` corresponds to the encoder type for encoding visual observations.
Valid options include:
* `simple` (default): a simple encoder which consists of two convolutional layers
* `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236),
* `nature_cnn`: [CNN implementation proposed by Mnih et al.](https://www.nature.com/articles/nature14236),
* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561),
consisting of three stacked layers, each with two risidual blocks, making a
* `resnet`: [IMPALA Resnet implementation](https://arxiv.org/abs/1802.01561),
consisting of three stacked layers, each with two residual blocks, making a
much larger network than the other two.
Options: `simple`, `nature_cnn`, `resnet`

630
docs/images/academy.png

之前 之后
宽度: 303  |  高度: 326  |  大小: 29 KiB

157
docs/images/mlagents-NewTutAcademy.png

之前 之后
宽度: 351  |  高度: 112  |  大小: 14 KiB

4
markdown-link-check.config.json


{
"pattern": "^https://developer.nvidia.com/compute/machine-learning/cudnn/secure",
"comment": "Requires login"
},
{
"pattern": "^https?://bair.berkeley.edu",
"comment": "Temporary berkeley outage"
}
]
}

26
ml-agents-envs/mlagents/envs/environment.py


self._log_path = aca_params.log_path
self._brains: Dict[str, BrainParameters] = {}
self._external_brain_names: List[str] = []
for brain_param in aca_params.brain_parameters:
self._brains[brain_param.brain_name] = BrainParameters.from_proto(
brain_param
)
self._external_brain_names += [brain_param.brain_name]
self._num_external_brains = len(self._external_brain_names)
self._num_external_brains = 0
self._update_brain_parameters(aca_params)
if self._num_external_brains == 0:
logger.warning(
" No Learning Brains set to train found in the Unity Environment. "
"You will not be able to pass actions to your agent(s)."
)
@property
def logfile_path(self):

)
if outputs is None:
raise UnityCommunicationException("Communicator has stopped.")
self._update_brain_parameters(outputs.rl_initialization_output)
rl_output = outputs.rl_output
s = self._get_state(rl_output)
for _b in self._external_brain_names:

outputs = self.communicator.exchange(step_input)
if outputs is None:
raise UnityCommunicationException("Communicator has stopped.")
self._update_brain_parameters(outputs.rl_initialization_output)
rl_output = outputs.rl_output
state = self._get_state(rl_output)
for _b in self._external_brain_names:

self.worker_id, agent_info_list, self.brains[brain_name]
)
return _data
def _update_brain_parameters(
self, init_output: Optional[UnityRLInitializationOutputProto]
) -> None:
if init_output is not None:
for brain_param in init_output.brain_parameters:
self._brains[brain_param.brain_name] = BrainParameters.from_proto(
brain_param
)
self._external_brain_names = list(self._brains.keys())
self._num_external_brains = len(self._external_brain_names)
@timed
def _generate_step_input(

1
ml-agents-envs/mlagents/envs/rpc_communicator.py


raise UnityTimeOutException(
"The Unity environment took too long to respond. Make sure that :\n"
"\t The environment does not need user interaction to launch\n"
"\t The Academy's Broadcast Hub is configured correctly\n"
"\t The Agents are linked to the appropriate Brains\n"
"\t The environment and the Python interface have compatible versions."
)

28
ml-agents/mlagents/trainers/learn.py


# # Unity ML-Agents Toolkit
import logging
import argparse

from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.exception import TrainerError
from mlagents.trainers.meta_curriculum import MetaCurriculumError, MetaCurriculum
from mlagents.trainers.trainer_util import initialize_trainers, load_config
from mlagents.trainers.trainer_util import load_config, TrainerFactory
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.sampler_class import SamplerManager
from mlagents.envs.exception import SamplerException

nargs=argparse.REMAINDER,
help="Arguments passed to the Unity executable.",
)
args = parser.parse_args(argv)
return CommandLineOptions.from_argparse(args)

:param run_options: Command line arguments for training.
"""
# Docker Parameters
# Recognize and use docker volume if one is passed as an argument
if not options.docker_target_name:
model_path = "./models/{run_id}-{sub_id}".format(

summaries_dir = "/{docker_target_name}/summaries".format(
docker_target_name=options.docker_target_name
)
port = options.base_port + (sub_id * options.num_envs)
if options.env_path is None:
port = 5004 # This is the in Editor Training Port
options.base_port + (sub_id * options.num_envs),
port,
options.env_args,
)
env = SubprocessEnvManager(env_factory, options.num_envs)

sampler_manager, resampling_interval = create_sampler_manager(
options.sampler_file_path, env.reset_parameters, run_seed
)
trainers = initialize_trainers(
trainer_factory = TrainerFactory(
env.external_brains,
summaries_dir,
options.run_id,
model_path,

maybe_meta_curriculum,
options.multi_gpu,
)
trainers,
trainer_factory,
model_path,
summaries_dir,
options.run_id + "-" + str(sub_id),

sampler_manager,
resampling_interval,
)
# Begin training
tc.start_learning(env)

"Specified resampling-interval is not valid. Please provide"
" a positive integer value for resampling-interval"
)
sampler_manager = SamplerManager(sampler_config, run_seed)
return sampler_manager, resample_interval

) -> Optional[MetaCurriculum]:
if curriculum_folder is None:
return None
else:
meta_curriculum = MetaCurriculum(curriculum_folder, env.reset_parameters)
# TODO: Should be able to start learning at different lesson numbers

"name as the Brain "
"whose curriculum it defines."
)
return meta_curriculum

)
except Exception:
print("\n\n\tUnity Technologies\n")
options = parse_command_line()
trainer_logger = logging.getLogger("mlagents.trainers")
env_logger = logging.getLogger("mlagents.envs")

env_logger.setLevel("DEBUG")
if options.env_path is None and options.num_runs > 1:
raise TrainerError(
"It is not possible to launch more than one concurrent training session "

jobs = []
run_seed = options.seed
if options.num_runs == 1:
if options.seed == -1:
run_seed = np.random.randint(0, 10000)

7
ml-agents/mlagents/trainers/sac/models.py


)
# We assume m_size is divisible by 4
# Create the non-Policy inputs
# Use a default placeholder here so nothing has to be provided during
# Barracuda inference. Note that the default value is just the tiled input
# for the policy, which is thrown away.
self.other_memory_in = tf.placeholder(
self.other_memory_in = tf.placeholder_with_default(
input=tf.tile(self.inference_memory_in, [1, 3]),
dtype=tf.float32,
name="other_recurrent_in",
)

1
ml-agents/mlagents/trainers/tensorflow_to_barracuda.py


"OneHot": Struct(id=67, rank=lambda inputs: inputs[0] + 1),
# Broadcast ops
"Add": Struct(id=100, rank=lambda inputs: np.max(inputs)),
"AddV2": Struct(id=100, rank=lambda inputs: np.max(inputs)),
"Sub": Struct(id=101, rank=lambda inputs: np.max(inputs)),
"Mul": Struct(id=102, rank=lambda inputs: np.max(inputs)),
"RealDiv": Struct(id=103, rank=lambda inputs: np.max(inputs)),

9
ml-agents/mlagents/trainers/tests/test_learn.py


return parse_command_line(args)
@patch("mlagents.trainers.learn.TrainerFactory")
load_config, create_environment_factory, subproc_env_mock, sampler_manager_mock
load_config,
create_environment_factory,
subproc_env_mock,
sampler_manager_mock,
trainer_factory_mock,
):
mock_env = MagicMock()
mock_env.external_brain_names = []

with patch.object(TrainerController, "start_learning", MagicMock()):
learn.run_training(0, 0, basic_options(), MagicMock())
mock_init.assert_called_once_with(
{},
trainer_factory_mock.return_value,
"./models/ppo-0",
"./summaries",
"ppo-0",

7
ml-agents/mlagents/trainers/tests/test_simple_rl.py


from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import initialize_trainers
from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.envs.base_unity_environment import BaseUnityEnvironment
from mlagents.envs.brain import BrainInfo, AllBrainInfo, BrainParameters
from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto

trainer_config = yaml.safe_load(config)
env_manager = SimpleEnvManager(env)
trainers = initialize_trainers(
trainer_factory = TrainerFactory(
external_brains=env_manager.external_brains,
summaries_dir=dir,
run_id=run_id,
model_path=dir,

)
tc = TrainerController(
trainers=trainers,
trainer_factory=trainer_factory,
summaries_dir=dir,
model_path=dir,
run_id=run_id,

10
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


@pytest.fixture
def basic_trainer_controller():
return TrainerController(
trainer_factory=None,
model_path="test_model_path",
summaries_dir="test_summaries_dir",
run_id="test_run_id",

fast_simulation=True,
sampler_manager=SamplerManager({}),
resampling_interval=None,
trainers={},
)

seed = 27
TrainerController(
trainer_factory=None,
model_path="",
summaries_dir="",
run_id="1",

fast_simulation=True,
sampler_manager=SamplerManager({}),
resampling_interval=None,
trainers={},
)
numpy_random_seed.assert_called_with(seed)
tensorflow_set_seed.assert_called_with(seed)

def test_take_step_adds_experiences_to_trainer_and_trains():
tc, trainer_mock = trainer_controller_with_take_step_mocks()
old_step_info = EnvironmentStep(Mock(), Mock(), MagicMock())
new_step_info = EnvironmentStep(Mock(), Mock(), MagicMock())
action_info_dict = {"testbrain": MagicMock()}
old_step_info = EnvironmentStep(Mock(), Mock(), action_info_dict)
new_step_info = EnvironmentStep(Mock(), Mock(), action_info_dict)
trainer_mock.is_ready_update = MagicMock(return_value=True)
env_mock = MagicMock()

21
ml-agents/mlagents/trainers/tests/test_trainer_util.py


expected_config["normalize"] = False
brain_params_mock = BrainParametersMock()
BrainParametersMock.return_value.brain_name = "testbrain"
external_brains = {"testbrain": brain_params_mock}
def mock_constructor(self, brain, trainer_parameters, training, load, seed, run_id):

assert run_id == run_id
with patch.object(OfflineBCTrainer, "__init__", mock_constructor):
trainers = trainer_util.initialize_trainers(
trainer_factory = trainer_util.TrainerFactory(
external_brains=external_brains,
summaries_dir=summaries_dir,
run_id=run_id,
model_path=model_path,

seed=seed,
)
trainers = {}
for _, brain_parameters in external_brains.items():
trainers["testbrain"] = trainer_factory.generate(brain_parameters)
assert "testbrain" in trainers
assert isinstance(trainers["testbrain"], OfflineBCTrainer)

brain_params_mock = BrainParametersMock()
BrainParametersMock.return_value.brain_name = "testbrain"
external_brains = {"testbrain": BrainParametersMock()}
summaries_dir = "test_dir"
run_id = "testrun"

assert multi_gpu == multi_gpu
with patch.object(PPOTrainer, "__init__", mock_constructor):
trainers = trainer_util.initialize_trainers(
trainer_factory = trainer_util.TrainerFactory(
external_brains=external_brains,
summaries_dir=summaries_dir,
run_id=run_id,
model_path=model_path,

seed=seed,
)
trainers = {}
for brain_name, brain_parameters in external_brains.items():
trainers[brain_name] = trainer_factory.generate(brain_parameters)
assert "testbrain" in trainers
assert isinstance(trainers["testbrain"], PPOTrainer)

load_model = False
seed = 11
bad_config = dummy_bad_config()
BrainParametersMock.return_value.brain_name = "testbrain"
trainer_util.initialize_trainers(
trainer_factory = trainer_util.TrainerFactory(
external_brains=external_brains,
summaries_dir=summaries_dir,
run_id=run_id,
model_path=model_path,

seed=seed,
)
trainers = {}
for brain_name, brain_parameters in external_brains.items():
trainers[brain_name] = trainer_factory.generate(brain_parameters)
def test_load_config_missing_file():

61
ml-agents/mlagents/trainers/trainer_controller.py


import os
import json
import logging
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Set
import numpy as np
from mlagents.trainers import tf

from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
from mlagents.trainers.trainer import Trainer, TrainerMetrics
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.trainer_util import TrainerFactory
trainers: Dict[str, Trainer],
trainer_factory: TrainerFactory,
model_path: str,
summaries_dir: str,
run_id: str,

resampling_interval: Optional[int],
):
"""
:param trainers: Trainers for each brain to train.
:param model_path: Path to save the model.
:param summaries_dir: Folder to save training summaries.
:param run_id: The sub-directory name for model and summary statistics

:param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters.
:param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled.
"""
self.trainers = trainers
self.trainers: Dict[str, Trainer] = {}
self.trainer_factory = trainer_factory
self.model_path = model_path
self.summaries_dir = summaries_dir
self.logger = logging.getLogger("mlagents.envs")

return (
any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()])
or not self.train_model
)
) or len(self.trainers) == 0
def write_to_tensorboard(self, global_step: int) -> None:
for brain_name, trainer in self.trainers.items():

else:
trainer.write_summary(global_step, delta_train_start)
def start_trainer(self, trainer: Trainer, env_manager: EnvManager) -> None:
self.trainers[trainer.brain_name] = trainer
self.logger.info(trainer)
if self.train_model:
trainer.write_tensorboard_text("Hyperparameters", trainer.parameters)
env_manager.set_policy(trainer.brain_name, trainer.policy)
for _, t in self.trainers.items():
self.logger.info(t)
if self.train_model:
for brain_name, trainer in self.trainers.items():
trainer.write_tensorboard_text("Hyperparameters", trainer.parameters)
last_brain_names: Set[str] = set()
for brain_name, trainer in self.trainers.items():
env_manager.set_policy(brain_name, trainer.policy)
external_brains = set(env_manager.external_brains.keys())
new_brains = external_brains - last_brain_names
if last_brain_names != env_manager.external_brains.keys():
for name in new_brains:
trainer = self.trainer_factory.generate(
env_manager.external_brains[name]
)
self.start_trainer(trainer, env_manager)
last_brain_names = external_brains
n_steps = self.advance(env_manager)
for i in range(n_steps):
global_step += 1

)
else:
lessons_incremented = {}
generalization_reset = (
not self.sampler_manager.is_empty()
and (steps != 0)

time_start_step = time()
new_step_infos = env.step()
delta_time_step = time() - time_start_step
trainer.add_experiences(
step_info.previous_all_brain_info,
step_info.current_all_brain_info,
step_info.brain_name_to_action_info[brain_name].outputs,
)
trainer.process_experiences(
step_info.previous_all_brain_info, step_info.current_all_brain_info
)
if brain_name in step_info.brain_name_to_action_info:
trainer.add_experiences(
step_info.previous_all_brain_info,
step_info.current_all_brain_info,
step_info.brain_name_to_action_info[brain_name].outputs,
)
trainer.process_experiences(
step_info.previous_all_brain_info,
step_info.current_all_brain_info,
)
for brain_name, trainer in self.trainers.items():
if brain_name in self.trainer_metrics:
self.trainer_metrics[brain_name].add_delta_step(delta_time_step)

160
ml-agents/mlagents/trainers/trainer_util.py


from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
def initialize_trainers(
trainer_config: Dict[str, Any],
external_brains: Dict[str, BrainParameters],
class TrainerFactory:
def __init__(
self,
trainer_config: Any,
summaries_dir: str,
run_id: str,
model_path: str,
keep_checkpoints: int,
train_model: bool,
load_model: bool,
seed: int,
meta_curriculum: MetaCurriculum = None,
multi_gpu: bool = False,
):
self.trainer_config = trainer_config
self.summaries_dir = summaries_dir
self.run_id = run_id
self.model_path = model_path
self.keep_checkpoints = keep_checkpoints
self.train_model = train_model
self.load_model = load_model
self.seed = seed
self.meta_curriculum = meta_curriculum
self.multi_gpu = multi_gpu
def generate(self, brain_parameters: BrainParameters) -> Trainer:
return initialize_trainer(
self.trainer_config,
brain_parameters,
self.summaries_dir,
self.run_id,
self.model_path,
self.keep_checkpoints,
self.train_model,
self.load_model,
self.seed,
self.meta_curriculum,
self.multi_gpu,
)
def initialize_trainer(
trainer_config: Any,
brain_parameters: BrainParameters,
summaries_dir: str,
run_id: str,
model_path: str,

seed: int,
meta_curriculum: MetaCurriculum = None,
multi_gpu: bool = False,
) -> Dict[str, Trainer]:
) -> Trainer:
Initializes trainers given a provided trainer configuration and set of brains from the environment, as well as
Initializes a trainer given a provided trainer configuration and brain parameters, as well as
:param external_brains: BrainParameters provided by the Unity environment
:param brain_parameters: BrainParameters provided by the Unity environment
:param summaries_dir: Directory to store trainer summary statistics
:param run_id: Run ID to associate with this training run
:param model_path: Path to save the model

:param multi_gpu: Whether to use multi-GPU training
:return:
"""
trainers: Dict[str, Trainer] = {}
trainer_parameters_dict = {}
for brain_name in external_brains: