浏览代码

Develop deprecate broadcasting (#2669)

* Feature Deprecation : Online Behavioral Cloning

In this PR :
 - Delete the online_bc_trainer
 - Delete the tests for online bc
 - delete the configuration file for online bc training

* Deleting the BCTeacherHelper.cs Script
   TODO :
 - Remove usages in the scene
 - Documentation Edits

*DO NOT MERGE*

* IMPORTANT : REMOVED ALL IL SCENES
 - Removed all the IL scenes from the Examples folder

* Removed all mentions of online BC training in the Documentation

* Made a note in the Migrating.md doc about the removal of the Online BC feature.

* Modified the Academy UI to remove the control checkbox and replaced it with a train in the editor checkbox

* Removed the Broadcast functionality from the non-Learning brains

* Bug fix

* Note that the scenes are broken since the BroadcastHub has changed

* Modified the LL-API for Python to remove the broadcasting functiuonality.

* All unit tests are running

* Modifie...
/develop-gpu-test
GitHub 5 年前
当前提交
24ba9d58
共有 49 个文件被更改,包括 158 次插入324 次删除
  1. 45
      UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs
  2. 6
      UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
  3. 6
      UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity
  4. 4
      UnitySDK/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
  5. 4
      UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
  6. 4
      UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity
  7. 4
      UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity
  8. 6
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollector.unity
  9. 4
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity
  10. 6
      UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  11. 4
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity
  12. 3
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/VisualHallway.unity
  13. 4
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/PushBlock.unity
  14. 3
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/VisualPushBlock.unity
  15. 4
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity
  16. 3
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity
  17. 4
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/Reacher.unity
  18. 6
      UnitySDK/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
  19. 4
      UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
  20. 4
      UnitySDK/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity
  21. 8
      UnitySDK/Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
  22. 55
      UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
  23. 25
      UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
  24. 36
      UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs
  25. 4
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
  26. 18
      UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs
  27. 9
      docs/Basic-Guide.md
  28. 6
      docs/FAQ.md
  29. 5
      docs/Getting-Started-with-Balance-Ball.md
  30. 4
      docs/Learning-Environment-Create-New.md
  31. 3
      docs/Learning-Environment-Design-Academy.md
  32. 43
      docs/Learning-Environment-Design-Brains.md
  33. 12
      docs/Learning-Environment-Design-Learning-Brains.md
  34. 10
      docs/Learning-Environment-Design.md
  35. 2
      docs/Learning-Environment-Executable.md
  36. 20
      docs/ML-Agents-Overview.md
  37. 2
      docs/Migrating.md
  38. 10
      docs/Python-API.md
  39. 3
      docs/Training-Behavioral-Cloning.md
  40. 2
      docs/Unity-Inference-Engine.md
  41. 32
      ml-agents-envs/mlagents/envs/environment.py
  42. 2
      ml-agents-envs/mlagents/envs/tests/test_envs.py
  43. 3
      ml-agents/mlagents/trainers/tests/mock_brain.py
  44. 8
      ml-agents/mlagents/trainers/tests/test_bc.py
  45. 2
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  46. 12
      ml-agents/mlagents/trainers/tests/test_ppo.py
  47. 6
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  48. 10
      ml-agents/mlagents/trainers/tests/test_sac.py
  49. 2
      notebooks/getting-started.ipynb

45
UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs


private const float k_LineHeight = 17f;
// The vertical space left below the BroadcastHub UI.
private const float k_ExtraSpaceBelow = 10f;
// The horizontal size of the Control checkbox
private const int k_ControlSize = 80;
/// <summary>
/// Computes the height of the Drawer depending on the property it is showing

position.y += k_LineHeight;
// This is the labels for each columns
var brainWidth = position.width - k_ControlSize;
var brainWidth = position.width;
var controlRect = new Rect(
position.x + brainWidth, position.y, k_ControlSize, position.height);
EditorGUI.LabelField(controlRect, "Control");
controlRect.y += k_LineHeight;
controlRect.x += 15;
DrawBrains(brainRect, controlRect);
DrawBrains(brainRect);
EditorGUI.indentLevel--;
EditorGUI.EndProperty();
}

}
/// <summary>
/// Draws the Brain and Control checkbox for the brains contained in the BroadCastHub.
/// Draws the Brain contained in the BroadcastHub.
/// <param name="controlRect">The Rect to draw the control checkbox.</param>
private void DrawBrains(Rect brainRect, Rect controlRect)
private void DrawBrains(Rect brainRect)
var exposedBrains = m_Hub.broadcastingBrains;
var brain = exposedBrains[index];
var controlledBrains = m_Hub.brainsToControl;
var brain = controlledBrains[index];
brainRect, brain, typeof(Brain), true) as Brain;
brainRect, brain, typeof(LearningBrain), true) as LearningBrain;
m_Hub.broadcastingBrains.RemoveAt(index);
var brainToInsert = exposedBrains.Contains(newBrain) ? null : newBrain;
exposedBrains.Insert(index, brainToInsert);
m_Hub.brainsToControl.RemoveAt(index);
var brainToInsert = controlledBrains.Contains(newBrain) ? null : newBrain;
controlledBrains.Insert(index, brainToInsert);
}
// This is the Rectangle for the control checkbox
EditorGUI.BeginChangeCheck();
if (brain is LearningBrain)
{
var isTraining = m_Hub.IsControlled(brain);
isTraining = EditorGUI.Toggle(controlRect, isTraining);
m_Hub.SetControlled(brain, isTraining);
}
controlRect.y += k_LineHeight;
if (EditorGUI.EndChangeCheck())
{
MarkSceneAsDirty();
}
}
}

{
if (m_Hub.Count > 0)
{
m_Hub.broadcastingBrains.RemoveAt(m_Hub.broadcastingBrains.Count - 1);
m_Hub.brainsToControl.RemoveAt(m_Hub.brainsToControl.Count - 1);
}
}

private void AddBrain()
{
m_Hub.broadcastingBrains.Add(null);
m_Hub.brainsToControl.Add(null);
}
}
}

6
UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 300
height: 200

6
UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 300
height: 200

4
UnitySDK/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

4
UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

4
UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

4
UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 1280
height: 720

6
UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollector.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 1500
m_TrainingConfiguration:
width: 500
height: 500

4
UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 500
height: 500

6
UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 84
height: 84

4
UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 128
height: 128

3
UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/VisualHallway.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_TrainingConfiguration:
width: 128
height: 128

4
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/PushBlock.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 400
height: 300

3
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/VisualPushBlock.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_TrainingConfiguration:
width: 1280
height: 720

4
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

3
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_TrainingConfiguration:
width: 80
height: 80

4
UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/Reacher.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

6
UnitySDK/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
- {fileID: 11400000, guid: 29ed78b3e8fef4340b3a1f6954b88f18, type: 2}
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
- {fileID: 11400000, guid: 29ed78b3e8fef4340b3a1f6954b88f18, type: 2}
m_TrainingConfiguration:
width: 800
height: 500

4
UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 25000
m_TrainingConfiguration:
width: 300
height: 200

4
UnitySDK/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity


m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
m_TrainingConfiguration:
width: 80
height: 80

8
UnitySDK/Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.44971484, g: 0.49977952, b: 0.57563835, a: 1}
m_IndirectSpecularColor: {r: 0.44971442, g: 0.499779, b: 0.5756377, a: 1}
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains:
- {fileID: 11400000, guid: b5f530c5bf8d64bf8a18df92e283bb9c, type: 2}
brainsToControl:
m_BrainsToControl: []
m_MaxSteps: 0
- {fileID: 11400000, guid: b5f530c5bf8d64bf8a18df92e283bb9c, type: 2}
m_TrainingConfiguration:
width: 80
height: 80

55
UnitySDK/Assets/ML-Agents/Scripts/Academy.cs


InitializeAcademy();
ICommunicator communicator;
var exposedBrains = broadcastHub.broadcastingBrains.Where(x => x != null).ToList();
var controlledBrains = broadcastHub.broadcastingBrains.Where(
x => x != null && x is LearningBrain && broadcastHub.IsControlled(x));
foreach (var brain1 in controlledBrains)
{
var brain = (LearningBrain)brain1;
brain.SetToControlledExternally();
}
var controlledBrains = broadcastHub.brainsToControl.Where(x => x != null).ToList();
// Try to launch the communicator by usig the arguments passed at launch
// Try to launch the communicator by using the arguments passed at launch
try
{
communicator = new RpcCommunicator(

});
}
// If it fails, we check if there are any external brains in the scene
// and if Unity is in Editor mode
// If there arn't, there is no need for a communicator and it is set
// If there are not, there is no need for a communicator and it is set
#if UNITY_EDITOR
if (controlledBrains.ToList().Count > 0)
{
communicator = new RpcCommunicator(

});
}
#endif
foreach (var trainingBrain in exposedBrains)
{
trainingBrain.SetBatcher(m_BrainBatcher);
}
foreach (var trainingBrain in controlledBrains)
{
trainingBrain.SetBatcher(m_BrainBatcher);
}
m_IsCommunicatorOn = true;
var academyParameters =

foreach (var brain in exposedBrains)
foreach (var brain in controlledBrains)
bp.ToProto(brain.name, broadcastHub.IsControlled(brain)));
bp.ToProto(brain.name, true));
}
academyParameters.EnvironmentParameters =
new CommunicatorObjects.EnvironmentParametersProto();

key, resetParameters[key]
);
}
var pythonParameters = m_BrainBatcher.SendAcademyParameters(academyParameters);
Random.InitState(pythonParameters.Seed);
// We try to exchange the first message with Python. If this fails, it means
// no Python Process is ready to train the environment. In this case, the
//environment must use Inference.
try
{
var pythonParameters = m_BrainBatcher.SendAcademyParameters(academyParameters);
Random.InitState(pythonParameters.Seed);
}
catch
{
communicator = null;
m_BrainBatcher = new Batcher(null);
m_IsCommunicatorOn = false;
foreach (var trainingBrain in controlledBrains)
{
trainingBrain.SetBatcher(null);
}
}
}
// If a communicator is enabled/provided, then we assume we are in

private void UpdateResetParameters()
{
var newResetParameters = m_BrainBatcher.GetEnvironmentParameters();
var newResetParameters = m_BrainBatcher?.GetEnvironmentParameters();
if (newResetParameters != null)
{
foreach (var kv in newResetParameters.FloatParameters)

25
UnitySDK/Assets/ML-Agents/Scripts/Brain.cs


protected Dictionary<Agent, AgentInfo> m_AgentInfos =
new Dictionary<Agent, AgentInfo>(1024);
protected Batcher m_BrainBatcher;
/// <summary>
/// Sets the Batcher of the Brain. The brain will call the batcher at every step and give
/// it the agent's data using SendBrainInfo at each DecideAction call.
/// </summary>
/// <param name="batcher"> The Batcher the brain will use for the current session</param>
public void SetBatcher(Batcher batcher)
{
if (batcher == null)
{
m_BrainBatcher = null;
}
else
{
m_BrainBatcher = batcher;
m_BrainBatcher.SubscribeBrain(name);
}
LazyInitialize();
}
/// <summary>
/// Adds the data of an agent to the current batch so it will be processed in DecideAction.

if (m_IsInitialized)
{
m_AgentInfos.Clear();
m_IsInitialized = false;
}
}

/// </summary>
private void BrainDecideAction()
{
m_BrainBatcher?.SendBrainInfo(name, m_AgentInfos);
/// Is called only once at the begening of the training or inference session.
/// Is called only once at the beginning of the training or inference session.
/// </summary>
protected abstract void Initialize();

36
UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs


[System.Serializable]
public class BroadcastHub
{
[SerializeField]
public List<Brain> broadcastingBrains = new List<Brain>();
private List<Brain> m_BrainsToControl = new List<Brain>();
public List<LearningBrain> brainsToControl = new List<LearningBrain>();
/// <summary>
/// The number of Brains inside the BroadcastingHub.

get { return broadcastingBrains.Count; }
}
/// <summary>
/// Checks that a given Brain is set to be remote controlled.
/// </summary>
/// <param name="brain"> The Brain that is beeing checked</param>
/// <returns>true if the Brain is set to Controlled and false otherwise. Will return
/// false if the Brain is not present in the Hub.</returns>
public bool IsControlled(Brain brain)
{
return m_BrainsToControl.Contains(brain);
get { return brainsToControl.Count; }
}
/// <summary>

/// <param name="controlled"> if true, the Brain will be set to remote controlled. Otherwise
/// the brain will be set to broadcast only.</param>
public void SetControlled(Brain brain, bool controlled)
public void SetControlled(LearningBrain brain)
if (broadcastingBrains.Contains(brain))
if (!brainsToControl.Contains(brain))
if (controlled && !m_BrainsToControl.Contains(brain))
{
m_BrainsToControl.Add(brain);
}
if (!controlled && m_BrainsToControl.Contains(brain))
{
m_BrainsToControl.Remove(brain);
}
brainsToControl.Add(brain);
}
}

public void Clear()
{
broadcastingBrains.Clear();
m_BrainsToControl.Clear();
brainsToControl.Clear();
}
}
}

4
UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs


}
return agentInfoProto;
}
/// <summary>
/// Converts a Brain into to a Protobuff BrainInfoProto so it can be sent
/// </summary>

};
return demoProto;
}
/// <summary>
/// Initialize metadata values based on proto object.
/// </summary>

18
UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs


/// <summary>
/// The Learning Brain works differently if you are training it or not.
/// When training your Agents, drag the Learning Brain to the Academy's BroadcastHub and check
/// the checkbox Control. When using a pretrained model, just drag the Model file into the
/// When training your Agents, drag the Learning Brain to the Academy's BroadcastHub.
/// When using a pretrained model, just drag the Model file into the
/// The training will start automatically if Python is ready to train and there is at
/// least one LearningBrain in the BroadcastHub.
/// The property model corresponds to the Model currently attached to the Brain. Before
/// being used, a call to ReloadModel is required.
/// When the Learning Brain is not training, it uses a TensorFlow model to make decisions.

[CreateAssetMenu(fileName = "NewLearningBrain", menuName = "ML-Agents/Learning Brain")]
public class LearningBrain : Brain
{
private Batcher m_Batcher;
private ITensorAllocator m_TensorAllocator;
private TensorGenerator m_TensorGenerator;
private TensorApplier m_TensorApplier;

private IReadOnlyList<TensorProxy> m_InferenceInputs;
private IReadOnlyList<TensorProxy> m_InferenceOutputs;
[NonSerialized]
private bool m_IsControlled;
public void SetToControlledExternally()
public void SetBatcher(Batcher batcher)
m_IsControlled = true;
m_Batcher = batcher;
m_Batcher?.SubscribeBrain(name);
}
/// <inheritdoc />

/// <inheritdoc />
protected override void DecideAction()
{
if (m_IsControlled)
m_Batcher?.SendBrainInfo(name, m_AgentInfos);
if (m_Batcher != null)
{
m_AgentInfos.Clear();
return;

9
docs/Basic-Guide.md


if you want to [use an executable](Learning-Environment-Executable.md) or to
`None` if you want to interact with the current scene in the Unity Editor.
Before building the environment or interacting with it in the editor, select `Ball3DAcademy` in the **Hierarchy** window of the Unity editor and make sure `Control` checkbox is checked under `Ball 3D Academy` component.
Before building the environment or interacting with it in the editor, select `Ball3DAcademy` in the **Hierarchy** window of the Unity editor and make sure the `3DBallLearningBrain` is in the Broadcast Hub of the `Ball3DAcademy` component.
More information and documentation is provided in the
[Python API](Python-API.md) page.

**Note**: The Unity prefab system will modify all instances of the agent properties in your scene. If the agent does not synchronize automatically with the prefab, you can hit the Revert button in the top of the **Inspector** window.
2. In the **Hierarchy** window, select `Ball3DAcademy`.
3. In the **Project** window, go to `Assets/ML-Agents/Examples/3DBall/Brains` folder and drag the **3DBallLearning** Brain to the `Brains` property under `Braodcast Hub` in the `Ball3DAcademy` object in the **Inspector** window. In order to train, make sure the `Control` checkbox is selected.
3. In the **Project** window, go to `Assets/ML-Agents/Examples/3DBall/Brains` folder and drag the **3DBallLearning** Brain to the `Brains` property under `Braodcast Hub` in the `Ball3DAcademy` object in the **Inspector** window.
The `Control` checkbox means that in addition to being exposed to Python, the Brain will
be controlled by the Python process (required for training).
![Set Brain to External](images/mlagents-SetBrainToTrain.png)

4. Drag the `<brain_name>.nn` file from the Project window of
the Editor to the **Model** placeholder in the **3DBallLearning**
inspector window.
5. Select Ball3DAcademy in the scene and toggle off Control, each platform's brain now regains control.
6. Press the :arrow_forward: button at the top of the Editor.
5. Press the :arrow_forward: button at the top of the Editor.
## Next Steps

6
docs/FAQ.md


There may be a number of possible causes:
* _Cause_: There may be no LearningBrain with `Control` option checked in the
* _Cause_: There may be no LearningBrain in the
`Broadcast Hub`, and drag your LearningBrain asset into the `Brains` field,
and check the `Control` toggle. Also you need to assign this LearningBrain
`Broadcast Hub`, and drag your LearningBrain asset into the `Brains` field.
Also you need to assign this LearningBrain
asset to all of the Agents you wish to do training on.
* _Cause_: On OSX, the firewall may be preventing communication with the
environment. _Solution_: Add the built environment binary to the list of

5
docs/Getting-Started-with-Balance-Ball.md


properties that control how the environment works.
The **Broadcast Hub** keeps track of which Brains will send data during training.
If a Brain is added to the hub, the data from this Brain will be sent to the external training
process. If the `Control` checkbox is checked, the training process will be able to
control and train the agents linked to the Brain.
process.
The **Training Configuration** and **Inference Configuration** properties
set the graphics and timescale properties for the Unity application.
The Academy uses the **Training Configuration** during training and the

You can create new Brain assets by selecting `Assets ->
Create -> ML-Agents -> Brain`. There are 3 types of Brains.
The **Learning Brain** is a Brain that uses a trained neural network to make decisions.
When the `Control` box is checked in the Brains property under the **Broadcast Hub** in the Academy, the external process that is training the neural network will take over decision making for the agents
When the **Learning Brain** is dragged into the **Broadcast Hub** in the Academy, the external process that is training the neural network will take over decision making for the agents
and ultimately generate a trained neural network. You can also use the
**Learning Brain** with a pre-trained model.
The **Heuristic** Brain allows you to hand-code the Agent logic by extending

4
docs/Learning-Environment-Create-New.md


5. Add your Agent subclasses to appropriate GameObjects, typically, the object
in the scene that represents the Agent in the simulation. Each Agent object
must be assigned a Brain object.
6. If training, check the `Control` checkbox in the BroadcastHub of the Academy.
6. If training, drag the Brain in the BroadcastHub of the Academy.
[run the training process](Training-ML-Agents.md).
**Note:** If you are unfamiliar with Unity, refer to

Now you can train the Agent. To get ready for training, you must first drag the
`RollerBallBrain` asset to the **RollerAgent** GameObject `Brain` field to change to the learning brain.
Then, select the Academy GameObject and check the `Control` checkbox for
Then, select the Academy GameObject and drag
the RollerBallBrain item in the **Broadcast Hub** list. From there, the process is
the same as described in [Training ML-Agents](Training-ML-Agents.md). Note that the
models will be created in the original ml-agents project folder, `ml-agents/models`.

3
docs/Learning-Environment-Design-Academy.md


![Academy Inspector](images/academy.png)
* `Broadcast Hub` - Gathers the Brains that will communicate with the external
process. Any Brain added to the Broadcast Hub will be visible from the external
process. In addition, if the checkbox `Control` is checked, the Brain will be
controllable from the external process and will thus be trainable.
process and controllable from the external process and will thus be trainable.
* `Configuration` - The engine-level settings which correspond to rendering
quality and engine speed.
* `Width` - Width of the environment window in pixels.

43
docs/Learning-Environment-Design-Brains.md


useful to test your Agent code.
During training, use a **Learning Brain**
and drag it into the Academy's `Broadcast Hub` with the `Control` checkbox checked.
and drag it into the Academy's `Broadcast Hub`.
project, add it to the **Model** property of the **Learning Brain** and uncheck
the `Control` checkbox of the `Broadcast Hub`.
project, add it to the **Model** property of the **Learning Brain**.
Brain assets has several important properties that you can set using the
Inspector window. These properties must be appropriate for the Agents using the

actions for the Brain.
The other properties of the Brain depend on the type of Brain you are using.
## Using the Broadcast Feature
The Player, Heuristic and Learning Brains can support
broadcast to an external process. The broadcast feature allows you to collect data
from your Agents using a Python program without controlling them.
### How to use: Unity
To turn it on in Unity, drag the Brain into the Academy's Broadcast Hub but leave
the `Control` checkbox unchecked when present. This will expose the Brain's data
without letting the external process control it.
![Broadcast](images/broadcast.png)
### How to use: Python
When you launch your Unity Environment from a Python program, you can see what
the Agents connected to Brains present in the `Broadcast Hub` are doing.
When calling `step` or
`reset` on your environment, you retrieve a dictionary mapping Brain names to
`BrainInfo` objects. The dictionary contains a `BrainInfo` object for each
Brain in the `Broadcast Hub`.
Just like with a Learning Brain, the `BrainInfo` object contains the fields for
`visual_observations`, `vector_observations`, `text_observations`,
`memories`,`rewards`, `local_done`, `max_reached`, `agents` and
`previous_actions`. Note that `previous_actions` corresponds to the actions that
were taken by the Agents at the previous step, not the current one.
Note that when you do a `step` on the environment, you can only provide actions
for the Brains in the `Broadcast Hub` with the `Control` checkbox checked. If there
are Brains in the `Broadcast Hub` with the
`Control` checkbox checked, simply call `step()` with no arguments.
You can use the broadcast feature to collect data generated by Player,
Heuristics or Learning Brains game sessions. You can then use this data to train
an agent in a supervised context.

12
docs/Learning-Environment-Design-Learning-Brains.md


# Learning Brains
The **Learning Brain** works differently if you are training it or not.
When training your Agents, drag the **Learning Brain** to the
Academy's `Broadcast Hub` and check the checkbox `Control`. When using a pre-trained
model, just drag the Model file into the `Model` property of the **Learning Brain**.
When used in an environment connected to Python, the Python process will train
the Brain. If no Python Process exists, the **Learning Brain** will use its
pre-trained model.
one Brain asset must be in the Academy's `Broadcast Hub` with the checkbox `Control`
checked. This allows the training process to collect the observations of Agents
using that Brain and give the Agents their actions.
one Brain asset must be in the Academy's `Broadcast Hub`. This allows the training
process to collect the observations of Agents using that Brain and give the Agents
their actions.
In addition to using a **Learning Brain** for training using the ML-Agents learning
algorithms, you can use a **Learning Brain** to control Agents in a Unity

10
docs/Learning-Environment-Design.md


To Create a Brain, go to `Assets -> Create -> Ml-Agents` and select the
type of Brain you want to use. During training, use a **Learning Brain**
and drag it into the Academy's `Broadcast Hub` with the `Control` checkbox checked.
and drag it into the Academy's `Broadcast Hub`.
project, add it to the **Model** property of the **Learning Brain** and uncheck
the `Control` checkbox of the `Broadcast Hub`. See
project, add it to the **Model** property of the **Learning Brain**.
If the Python process is not active, the **Learning Brain** will not train but
use its model. See
[Brains](Learning-Environment-Design-Brains.md) for details on using the
different types of Brains. You can create new kinds of Brains if the three
built-in don't do what you need.

* The training scene must start automatically when your Unity application is
launched by the training process.
* The scene must include an Academy with at least one Brain in the `Broadcast Hub`
with the `Control` checkbox checked.
* The scene must include an Academy with at least one Brain in the `Broadcast Hub`.
* The Academy must reset the scene to a valid starting point for each episode of
training.
* A training episode must have a definite end — either using `Max Steps` or by

2
docs/Learning-Environment-Executable.md


Make sure the Brains in the scene have the right type. For example, if you want
to be able to control your agents from Python, you will need to put the Brain
controlling the Agents to be a **Learning Brain** and drag it into the
Academy's `Broadcast Hub` with the `Control` checkbox checked. In the 3DBall
Academy's `Broadcast Hub`. In the 3DBall
scene, this can be done in the Platform GameObject within the Game prefab in
`Assets/ML-Agents/Examples/3DBall/Prefabs/`, or in each instance of the
Platform in the Scene.

20
docs/ML-Agents-Overview.md


[TensorFlow](Background-TensorFlow.md) model. The embedded TensorFlow model
represents a learned policy and the Brain directly uses this model to
determine the action for each Agent. You can train a **Learning Brain**
by dragging it into the Academy's `Broadcast Hub` with the `Control`
checkbox checked.
by dragging it into the Academy's `Broadcast Hub` and launching the game with
the Python training process.
- **Player** - where decisions are made using real input from a keyboard or
controller. Here, a human player is controlling the Agent and the observations
and rewards collected by the Brain are not used to control the Agent.

a TensorFlow model that the Learning Brain can later use. However,
any user of the ML-Agents toolkit can leverage their own algorithms for
training. In this case, the Brain type would be set to Learning and be linked
to the BroadcastHub (with checked `Control` checkbox)
to the BroadcastHub
and the behaviors of all the Agents in the scene will be controlled within Python.
You can even turn your environment into a [gym.](../gym-unity/README.md)

a way to randomly sample Reset Parameters of the environment during training. See
[Training Generalized Reinforcement Learning Agents](Training-Generalized-Reinforcement-Learning-Agents.md)
to learn more about this feature.
- **Broadcasting** - As discussed earlier, a Learning Brain sends the
observations for all its Agents to the Python API when dragged into the
Academy's `Broadcast Hub` with the `Control` checkbox checked. This is helpful
for training and later inference. Broadcasting is a feature which can be
enabled all types of Brains (Player, Learning, Heuristic) where the Agent
observations and actions are also sent to the Python API (despite the fact
that the Agent is **not** controlled by the Python API). This feature is
leveraged by Imitation Learning, where the observations and actions for a
Player Brain are used to learn the policies of an agent through demonstration.
However, this could also be helpful for the Heuristic and Learning Brains,
particularly when debugging agent behaviors. You can learn more about using
the broadcasting feature
[here](Learning-Environment-Design-Brains.md#using-the-broadcast-feature).
- **Docker Set-up (Experimental)** - To facilitate setting up ML-Agents without
installing Python or TensorFlow directly, we provide a

2
docs/Migrating.md


### Important Changes
* The definition of the gRPC service has changed.
* The online BC training feature has been removed.
* The BroadcastHub of the Academy no longer has a `Control` checkbox. All Learning Brains in the BroadcastHub will be considered as trainable (although the training will only be launched if the Python Process is ready and will use inference otherwise)
* The broadcast feature has been deprecated. Only LearningBrains can communicate with Python.
#### Steps to Migrate
* In order to be able to train, make sure both your ML-Agents Python package and UnitySDK code come from the v0.11 release. Training will not work, for example, if you update the ML-Agents Python package, and only update the API Version in UnitySDK.

10
docs/Python-API.md


the ML-Agents SDK.
To communicate with an Agent in a Unity environment from a Python program, the
Agent must either use a Brain present in the Academy's `Broadcast Hub`.
Agent mus use a LearningBrain present in the Academy's `Broadcast Hub`.
actions for Agents with Brains with the `Control` checkbox of the
Academy's `Broadcast Hub` checked, but can only observe broadcasting
Brains (the information you receive for an Agent is the same in both cases).
actions for Agents with Brains in the
Academy's `Broadcast Hub`..
_Notice: Currently communication between Unity and Python takes place over an
open socket without authentication. As such, please make sure that the network

observations = brainInfo.vector_observations
```
Note that if you have more than one Brain in the Academy's `Broadcast Hub` with
the `Control` checkbox checked, you
Note that if you have more than one Brain in the Academy's `Broadcast Hub`, you
must provide dictionaries from Brain names to arrays for `action`, `memory`
and `value`. For example: If you have two Learning Brains named `brain1` and
`brain2` each with one Agent taking two continuous actions, then you can

3
docs/Training-Behavioral-Cloning.md


1. Choose an agent you would like to learn to imitate some set of demonstrations.
2. Record a set of demonstration using the `Demonstration Recorder` (see [here](Training-Imitation-Learning.md)).
For illustrative purposes we will refer to this file as `AgentRecording.demo`.
3. Build the scene, assigning the agent a Learning Brain, and set the Brain to
Control in the Broadcast Hub. For more information on Brains, see
3. Build the scene, assigning the agent a Learning Brain, and dragging it in the Broadcast Hub. For more information on Brains, see
[here](Learning-Environment-Design-Brains.md).
4. Open the `config/offline_bc_config.yaml` file.
5. Modify the `demo_path` parameter in the file to reference the path to the

2
docs/Unity-Inference-Engine.md


When using a **Learning Brain**, drag the `.nn` file into the **Model** field
in the Inspector.
Uncheck the `Control` checkbox for the corresponding **Brain** in the
**BroadcastHub** of the Academy.
Select the **Inference Device** : CPU or GPU you want to use for Inference.
**Note:** For most of the models generated with the ML-Agents toolkit, CPU will be faster than GPU.

32
ml-agents-envs/mlagents/envs/environment.py


self._academy_name = aca_params.name
self._log_path = aca_params.log_path
self._brains: Dict[str, BrainParameters] = {}
self._brain_names: List[str] = []
self._brain_names += [brain_param.brain_name]
if brain_param.is_training:
self._external_brain_names += [brain_param.brain_name]
self._num_brains = len(self._brain_names)
self._external_brain_names += [brain_param.brain_name]
self._num_external_brains = len(self._external_brain_names)
self._resetParameters = dict(aca_params.environment_parameters.float_parameters)
logger.info(

return self._academy_name
@property
def number_brains(self):
return self._num_brains
@property
@property
def brain_names(self):
return self._brain_names
@property
def external_brain_names(self):

def __str__(self):
return (
"""Unity Academy name: {0}
Number of Brains: {1}
Number of Training Brains : {2}
Reset Parameters :\n\t\t{3}""".format(
Number of Training Brains : {1}
Reset Parameters :\n\t\t{2}""".format(
str(self._num_brains),
str(self._num_external_brains),
"\n\t\t".join(
[

elif self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names a keys, "
"and vector_actions as values".format(self._num_brains)
"and vector_actions as values".format(self._num_external_brains)
)
else:
raise UnityActionException(

elif self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and memories as values".format(self._num_brains)
"and memories as values".format(self._num_external_brains)
)
else:
raise UnityActionException(

elif self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and text_actions as values".format(self._num_brains)
"and text_actions as values".format(self._num_external_brains)
)
else:
raise UnityActionException(

raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and state/action value estimates as values".format(
self._num_brains
self._num_external_brains
)
)
else:

elif self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and CustomAction instances as values".format(self._num_brains)
"and CustomAction instances as values".format(
self._num_external_brains
)
)
else:
raise UnityActionException(

2
ml-agents-envs/mlagents/envs/tests/test_envs.py


discrete_action=False, visual_inputs=0
)
env = UnityEnvironment(" ")
assert env.brain_names[0] == "RealFakeBrain"
assert env.external_brain_names[0] == "RealFakeBrain"
env.close()

3
ml-agents/mlagents/trainers/tests/mock_brain.py


mock_env.return_value.academy_name = "MockAcademy"
mock_env.return_value.brains = {brain_name: mock_brain}
mock_env.return_value.external_brain_names = [brain_name]
mock_env.return_value.brain_names = [brain_name]
mock_env.return_value.reset.return_value = {brain_name: mock_braininfo}
mock_env.return_value.step.return_value = {brain_name: mock_braininfo}

for i in range(buffer_init_samples):
brain_info_list.append(env.step()[env.brain_names[0]])
brain_info_list.append(env.step()[env.external_brain_names[0]])
buffer = create_buffer(brain_info_list, policy.brain, policy.sequence_length)
return buffer

8
ml-agents/mlagents/trainers/tests/test_bc.py


)
env = UnityEnvironment(" ")
brain_infos = env.reset()
brain_info = brain_infos[env.brain_names[0]]
brain_info = brain_infos[env.external_brain_names[0]]
model_path = env.brain_names[0]
model_path = env.external_brain_names[0]
policy = BCPolicy(0, env.brains[env.brain_names[0]], trainer_parameters, False)
policy = BCPolicy(
0, env.brains[env.external_brain_names[0]], trainer_parameters, False
)
run_out = policy.evaluate(brain_info)
assert run_out["action"].shape == (3, 2)

2
ml-agents/mlagents/trainers/tests/test_bcmodule.py


mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
env = mock_env()
model_path = env.brain_names[0]
model_path = env.external_brain_names[0]
trainer_config["model_path"] = model_path
trainer_config["keep_checkpoints"] = 3
trainer_config["use_recurrent"] = use_rnn

12
ml-agents/mlagents/trainers/tests/test_ppo.py


)
env = UnityEnvironment(" ")
brain_infos = env.reset()
brain_info = brain_infos[env.brain_names[0]]
brain_info = brain_infos[env.external_brain_names[0]]
model_path = env.brain_names[0]
model_path = env.external_brain_names[0]
0, env.brains[env.brain_names[0]], trainer_parameters, False, False
0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False
)
run_out = policy.evaluate(brain_info)
assert run_out["action"].shape == (3, 2)

)
env = UnityEnvironment(" ")
brain_infos = env.reset()
brain_info = brain_infos[env.brain_names[0]]
brain_info = brain_infos[env.external_brain_names[0]]
model_path = env.brain_names[0]
model_path = env.external_brain_names[0]
0, env.brains[env.brain_names[0]], trainer_parameters, False, False
0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False
)
run_out = policy.get_value_estimates(brain_info, 0, done=False)
for key, val in run_out.items():

6
ml-agents/mlagents/trainers/tests/test_reward_signals.py


)
trainer_parameters = trainer_config
model_path = env.brain_names[0]
model_path = env.external_brain_names[0]
trainer_parameters["model_path"] = model_path
trainer_parameters["keep_checkpoints"] = 3
trainer_parameters["reward_signals"].update(reward_signal_config)

def reward_signal_eval(env, policy, reward_signal_name):
brain_infos = env.reset()
brain_info = brain_infos[env.brain_names[0]]
next_brain_info = env.step()[env.brain_names[0]]
brain_info = brain_infos[env.external_brain_names[0]]
next_brain_info = env.step()[env.external_brain_names[0]]
# Test evaluate
rsig_result = policy.reward_signals[reward_signal_name].evaluate(
brain_info, next_brain_info

10
ml-agents/mlagents/trainers/tests/test_sac.py


)
trainer_parameters = dummy_config
model_path = env.brain_names[0]
model_path = env.external_brain_names[0]
trainer_parameters["model_path"] = model_path
trainer_parameters["keep_checkpoints"] = 3
trainer_parameters["use_recurrent"] = use_rnn

mock_env, dummy_config, use_rnn=False, use_discrete=False, use_visual=False
)
brain_infos = env.reset()
brain_info = brain_infos[env.brain_names[0]]
brain_info = brain_infos[env.external_brain_names[0]]
run_out = policy.evaluate(brain_info)
assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE[0])

mock_env, dummy_config, use_rnn=False, use_discrete=True, use_visual=False
)
brain_infos = env.reset()
brain_info = brain_infos[env.brain_names[0]]
brain_info = brain_infos[env.external_brain_names[0]]
run_out = policy.evaluate(brain_info)
assert run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))

mock_env, dummy_config, use_rnn=False, use_discrete=True, use_visual=True
)
brain_infos = env.reset()
brain_info = brain_infos[env.brain_names[0]]
brain_info = brain_infos[env.external_brain_names[0]]
run_out = policy.evaluate(brain_info)
assert run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))

mock_env, dummy_config, use_rnn=True, use_discrete=True, use_visual=False
)
brain_infos = env.reset()
brain_info = brain_infos[env.brain_names[0]]
brain_info = brain_infos[env.external_brain_names[0]]
run_out = policy.evaluate(brain_info)
assert run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))

2
notebooks/getting-started.ipynb


"env = UnityEnvironment(file_name=env_name)\n",
"\n",
"# Set the default brain to work with\n",
"default_brain = env.brain_names[0]\n",
"default_brain = env.external_brain_names[0]\n",
"brain = env.brains[default_brain]"
]
},

正在加载...
取消
保存