浏览代码

Merge branch 'master' into soccer-fives

/soccer-fives
Andrew Cohen 5 年前
当前提交
53bea15c
共有 181 个文件被更改,包括 3535 次插入3419 次删除
  1. 7
      .circleci/config.yml
  2. 6
      .yamato/com.unity.ml-agents-test.yml
  3. 8
      DevProject/Packages/manifest.json
  4. 4
      DevProject/ProjectSettings/ProjectVersion.txt
  5. 12
      Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
  6. 12
      Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity
  7. 10
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  8. 10
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
  9. 9
      Project/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
  10. 4
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs
  11. 14
      Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
  12. 10
      Project/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity
  13. 6
      Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  14. 7
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  15. 1001
      Project/Assets/ML-Agents/Examples/GridWorld/Demos/ExpertGrid.demo
  16. 28
      Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  17. 12
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  18. 2
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs
  19. 9
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  20. 9
      Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
  21. 1001
      Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
  22. 9
      Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
  23. 6
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  24. 2
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs
  25. 5
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  26. 440
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldFives.prefab
  27. 176
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
  28. 12
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
  29. 2
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
  30. 34
      Project/Assets/ML-Agents/Examples/Startup/Scripts/Startup.cs
  31. 722
      Project/Assets/ML-Agents/Examples/Template/AgentPrefabsAndColors.unity
  32. 133
      Project/Assets/ML-Agents/Examples/Template/Scene.unity
  33. 4
      Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
  34. 4
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
  35. 8
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  36. 8
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  37. 16
      Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  38. 4
      Project/Packages/manifest.json
  39. 5
      Project/ProjectSettings/ProjectSettings.asset
  40. 2
      Project/ProjectSettings/ProjectVersion.txt
  41. 110
      README.md
  42. 25
      com.unity.ml-agents/CHANGELOG.md
  43. 93
      com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
  44. 64
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  45. 27
      com.unity.ml-agents/Editor/RayPerceptionSensorComponentBaseEditor.cs
  46. 10
      com.unity.ml-agents/Runtime/Academy.cs
  47. 154
      com.unity.ml-agents/Runtime/Agent.cs
  48. 12
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  49. 61
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  50. 9
      com.unity.ml-agents/Runtime/DecisionRequester.cs
  51. 16
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  52. 2
      com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
  53. 164
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  54. 20
      com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
  55. 19
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  56. 82
      com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs
  57. 29
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
  58. 8
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponent3D.cs
  59. 75
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs
  60. 28
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
  61. 57
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs
  62. 31
      com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
  63. 109
      com.unity.ml-agents/Runtime/SideChannels/FloatPropertiesChannel.cs
  64. 10
      com.unity.ml-agents/Runtime/SideChannels/RawBytesChannel.cs
  65. 13
      com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
  66. 178
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  67. 69
      com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
  68. 4
      com.unity.ml-agents/package.json
  69. 2
      config/sac_trainer_config.yaml
  70. 4
      config/trainer_config.yaml
  71. 21
      docs/Getting-Started-with-Balance-Ball.md
  72. 160
      docs/Installation.md
  73. 5
      docs/Learning-Environment-Best-Practices.md
  74. 38
      docs/Learning-Environment-Create-New.md
  75. 47
      docs/Learning-Environment-Design-Agents.md
  76. 33
      docs/Learning-Environment-Design.md
  77. 4
      docs/Learning-Environment-Examples.md
  78. 35
      docs/Limitations.md
  79. 29
      docs/Migrating.md
  80. 236
      docs/Python-API.md
  81. 3
      docs/Readme.md
  82. 2
      docs/Training-Curriculum-Learning.md
  83. 37
      docs/Training-Imitation-Learning.md
  84. 1
      docs/Training-ML-Agents.md
  85. 2
      docs/Unity-Inference-Engine.md
  86. 9
      docs/Using-Docker.md
  87. 12
      docs/Using-Virtual-Environment.md
  88. 951
      docs/images/unity_package_manager_window.png
  89. 2
      docs/localized/KR/docs/Installation.md
  90. 2
      docs/localized/zh-CN/docs/Installation.md
  91. 2
      gym-unity/gym_unity/__init__.py
  92. 2
      ml-agents-envs/mlagents_envs/__init__.py
  93. 10
      ml-agents-envs/mlagents_envs/base_env.py
  94. 4
      ml-agents-envs/mlagents_envs/communicator.py
  95. 7
      ml-agents-envs/mlagents_envs/environment.py
  96. 5
      ml-agents-envs/mlagents_envs/exception.py
  97. 3
      ml-agents-envs/mlagents_envs/rpc_communicator.py
  98. 7
      ml-agents-envs/mlagents_envs/rpc_utils.py
  99. 4
      ml-agents-envs/mlagents_envs/side_channel/__init__.py
  100. 23
      ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py

7
.circleci/config.yml


pip_constraints:
type: string
description: Constraints file that is passed to "pip install". We constraint older versions of libraries for older python runtime, in order to help ensure compatibility.
enforce_onnx_conversion:
type: integer
default: 0
description: Whether to raise an exception if ONNX models couldn't be saved.
executor: << parameters.executor >>
working_directory: ~/repo

TEST_ENFORCE_ONNX_CONVERSION: << parameters.enforce_onnx_conversion >>
steps:
- checkout

pyversion: 3.7.3
# Test python 3.7 with the newest supported versions
pip_constraints: test_constraints_max_tf1_version.txt
# Make sure ONNX conversion passes here (recent version of tensorflow 1.x)
enforce_onnx_conversion: 1
- build_python:
name: python_3.7.3+tf2
executor: python373

6
.yamato/com.unity.ml-agents-test.yml


test_editors:
- version: 2018.4
# 2018.4 doesn't support code-coverage
coverageOptions:
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
test_platforms:
- name: win
type: Unity::VM

flavor: {{ platform.flavor}}
commands:
- npm install upm-ci-utils@stable -g --registry https://api.bintray.com/npm/unity/unity-npm
- upm-ci package test -u {{ editor.version }} --package-path com.unity.ml-agents
- upm-ci package test -u {{ editor.version }} --package-path com.unity.ml-agents {{ editor.coverageOptions }}
artifacts:
logs:
paths:

8
DevProject/Packages/manifest.json


"com.unity.2d.tilemap": "1.0.0",
"com.unity.ads": "2.0.8",
"com.unity.analytics": "3.3.5",
"com.unity.coding": "0.1.0-preview.13",
"com.unity.collab-proxy": "1.2.16",
"com.unity.ide.rider": "1.1.4",
"com.unity.ide.vscode": "1.1.4",

"com.unity.package-validation-suite": "0.7.15-preview",
"com.unity.purchasing": "2.0.6",
"com.unity.test-framework": "1.1.9",
"com.unity.test-framework": "1.1.11",
"com.unity.timeline": "1.2.10",
"com.unity.timeline": "1.2.12",
"com.unity.ugui": "1.0.0",
"com.unity.xr.legacyinputhelpers": "1.3.8",
"com.unity.modules.ai": "1.0.0",

"com.unity.modules.video": "1.0.0",
"com.unity.modules.vr": "1.0.0",
"com.unity.modules.wind": "1.0.0",
"com.unity.modules.xr": "1.0.0",
"com.unity.coding" : "0.1.0-preview.13"
"com.unity.modules.xr": "1.0.0"
},
"registry": "https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-candidates",
"testables": [

4
DevProject/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2019.3.0f6
m_EditorVersionWithRevision: 2019.3.0f6 (27ab2135bccf)
m_EditorVersion: 2019.3.3f1
m_EditorVersionWithRevision: 2019.3.3f1 (7ceaae5f7503)

12
Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.49977785, b: 0.57563704, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:

m_Component:
- component: {fileID: 807556627}
- component: {fileID: 807556626}
- component: {fileID: 807556625}
- component: {fileID: 807556624}
- component: {fileID: 807556623}
m_Layer: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 807556622}
m_Enabled: 1
--- !u!92 &807556625
Behaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 807556622}
m_Enabled: 1
--- !u!20 &807556626
Camera:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
gravityMultiplier: 1
monitorVerticalOffset: 0
fixedDeltaTime: 0.02
maximumDeltaTime: 0.33333334
solverIterations: 6

12
Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.49977785, b: 0.57563704, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:

m_Component:
- component: {fileID: 807556627}
- component: {fileID: 807556626}
- component: {fileID: 807556625}
- component: {fileID: 807556624}
- component: {fileID: 807556623}
m_Layer: 0

m_GameObject: {fileID: 807556622}
m_Enabled: 1
--- !u!124 &807556624
Behaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 807556622}
m_Enabled: 1
--- !u!92 &807556625
Behaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Name:
m_EditorClassIdentifier:
gravityMultiplier: 1
monitorVerticalOffset: 0
fixedDeltaTime: 0.02
maximumDeltaTime: 0.33333334
solverIterations: 6

10
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


[Header("Specific to Ball3D")]
public GameObject ball;
Rigidbody m_BallRb;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
public override void InitializeAgent()
public override void Initialize()
{
m_BallRb = ball.GetComponent<Rigidbody>();
m_ResetParams = Academy.Instance.FloatProperties;

sensor.AddObservation(m_BallRb.velocity);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);

Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f)
{
SetReward(-1f);
Done();
EndEpisode();
}
else
{

public override void AgentReset()
public override void OnEpisodeBegin()
{
gameObject.transform.rotation = new Quaternion(0f, 0f, 0f, 0f);
gameObject.transform.Rotate(new Vector3(1, 0, 0), Random.Range(-10f, 10f));

10
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs


[Header("Specific to Ball3DHard")]
public GameObject ball;
Rigidbody m_BallRb;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
public override void InitializeAgent()
public override void Initialize()
{
m_BallRb = ball.GetComponent<Rigidbody>();
m_ResetParams = Academy.Instance.FloatProperties;

sensor.AddObservation((ball.transform.position - gameObject.transform.position));
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);

Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f)
{
SetReward(-1f);
Done();
EndEpisode();
}
else
{

public override void AgentReset()
public override void OnEpisodeBegin()
{
gameObject.transform.rotation = new Quaternion(0f, 0f, 0f, 0f);
gameObject.transform.Rotate(new Vector3(1, 0, 0), Random.Range(-10f, 10f));

9
Project/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity


m_Component:
- component: {fileID: 1715640925}
- component: {fileID: 1715640924}
- component: {fileID: 1715640923}
- component: {fileID: 1715640922}
- component: {fileID: 1715640921}
m_Layer: 0

m_GameObject: {fileID: 1715640920}
m_Enabled: 1
--- !u!124 &1715640922
Behaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1715640920}
m_Enabled: 1
--- !u!92 &1715640923
Behaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

4
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs


if (m_Position == k_SmallGoalPosition)
{
m_Agent.AddReward(0.1f);
m_Agent.Done();
m_Agent.EndEpisode();
ResetAgent();
}

m_Agent.Done();
m_Agent.EndEpisode();
ResetAgent();
}
}

14
Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs


int m_NumberJumps = 20;
int m_JumpLeft = 20;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
public override void InitializeAgent()
public override void Initialize()
{
m_Rb = gameObject.GetComponent<Rigidbody>();
m_LookDir = Vector3.zero;

sensor.AddObservation(target.transform.localPosition);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
for (var i = 0; i < vectorAction.Length; i++)
{

m_LookDir = new Vector3(x, y, z);
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
gameObject.transform.localPosition = new Vector3(
(1 - 2 * Random.value) * 5, 2, (1 - 2 * Random.value) * 5);

if (gameObject.transform.position.y < -1)
{
AddReward(-1);
Done();
EndEpisode();
return;
}

AddReward(-1);
Done();
EndEpisode();
Done();
EndEpisode();
}
}

10
Project/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity


m_Component:
- component: {fileID: 1392866532}
- component: {fileID: 1392866531}
- component: {fileID: 1392866530}
- component: {fileID: 1392866529}
- component: {fileID: 1392866528}
- component: {fileID: 1392866533}

m_GameObject: {fileID: 1392866527}
m_Enabled: 1
--- !u!124 &1392866529
Behaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1392866527}
m_Enabled: 1
--- !u!92 &1392866530
Behaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Name:
m_EditorClassIdentifier:
gravityMultiplier: 1
monitorVerticalOffset: 1
fixedDeltaTime: 0.01333
maximumDeltaTime: 0.15
solverIterations: 12

6
Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


Quaternion m_LookRotation;
Matrix4x4 m_TargetDirMatrix;
public override void InitializeAgent()
public override void Initialize()
{
m_JdController = GetComponent<JointDriveController>();
m_DirToTarget = target.position - body.position;

target.position = newTargetPos + ground.position;
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
// The dictionary with all the body parts in it are in the jdController
var bpDict = m_JdController.bodyPartsDict;

/// <summary>
/// Loop over body parts and reset them to initial conditions.
/// </summary>
public override void AgentReset()
public override void OnEpisodeBegin()
{
if (m_DirToTarget != Vector3.zero)
{

7
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


public bool useVectorObs;
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
m_AgentRb = GetComponent<Rigidbody>();
m_MyArea = area.GetComponent<FoodCollectorArea>();
m_FoodCollecterSettings = FindObjectOfType<FoodCollectorSettings>();

gameObject.GetComponentInChildren<Renderer>().material = normalMaterial;
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
MoveAgent(vectorAction);
}

return action;
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
Unfreeze();
Unpoison();

1001
Project/Assets/ML-Agents/Examples/GridWorld/Demos/ExpertGrid.demo
文件差异内容过多而无法显示
查看文件

28
Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.49977785, b: 0.57563704, a: 1}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:

m_Component:
- component: {fileID: 99095116}
- component: {fileID: 99095115}
- component: {fileID: 99095114}
- component: {fileID: 99095113}
m_Layer: 0
m_Name: Main Camera

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 99095112}
m_Enabled: 1
--- !u!92 &99095114
Behaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 99095112}
m_Enabled: 1
--- !u!20 &99095115
Camera:
m_ObjectHideFlags: 0

m_EditorClassIdentifier:
agentParameters:
maxStep: 100
resetOnDone: 1
onDemandDecision: 1
numberOfActionsBetweenDecisions: 1
hasUpgradedFromAgentParameters: 1
maxStep: 100
area: {fileID: 1795599557}
timeBetweenDecisionsAtInference: 0.15
renderCamera: {fileID: 797520692}

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: GridWorld
m_TeamID: 0
m_useChildSensors: 1
TeamId: 0
m_UseChildSensors: 1
--- !u!114 &125487791
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 132e1194facb64429b007ea1edf562d0, type: 3}
m_Name:
m_EditorClassIdentifier:
renderTexture: {fileID: 8400000, guid: 114608d5384404f89bff4b6f88432958, type: 2}
sensorName: RenderTextureSensor
grayscale: 0
compression: 1
m_RenderTexture: {fileID: 8400000, guid: 114608d5384404f89bff4b6f88432958, type: 2}
m_SensorName: RenderTextureSensor
m_Grayscale: 0
m_Compression: 1
--- !u!1 &260425459
GameObject:
m_ObjectHideFlags: 0

12
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


const int k_Left = 3;
const int k_Right = 4;
public override void InitializeAgent()
{
}
public override void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
{
// Mask the necessary actions if selected by the user.

}
// to be implemented by the developer
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
AddReward(-0.01f);
var action = Mathf.FloorToInt(vectorAction[0]);

if (hit.Where(col => col.gameObject.CompareTag("goal")).ToArray().Length == 1)
{
SetReward(1f);
Done();
EndEpisode();
Done();
EndEpisode();
}
}
}

}
// to be implemented by the developer
public override void AgentReset()
public override void OnEpisodeBegin()
{
area.AreaReset();
}

2
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs


public GameObject trueAgent;
IFloatProperties m_ResetParameters;
FloatPropertiesChannel m_ResetParameters;
Camera m_AgentCam;

9
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


HallwaySettings m_HallwaySettings;
int m_Selection;
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
m_HallwaySettings = FindObjectOfType<HallwaySettings>();
m_AgentRb = GetComponent<Rigidbody>();
m_GroundRenderer = ground.GetComponent<Renderer>();

m_AgentRb.AddForce(dirToGo * m_HallwaySettings.agentRunSpeed, ForceMode.VelocityChange);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
AddReward(-1f / maxStep);
MoveAgent(vectorAction);

SetReward(-0.1f);
StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.failMaterial, 0.5f));
}
Done();
EndEpisode();
}
}

return new float[] { 0 };
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
var agentOffset = -15f;
var blockOffset = 0f;

9
Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs


m_PushBlockSettings = FindObjectOfType<PushBlockSettings>();
}
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
goalDetect = block.GetComponent<GoalDetect>();
goalDetect.agent = this;

AddReward(5f);
// By marking an agent as done AgentReset() will be called automatically.
Done();
EndEpisode();
// Swap ground material for a bit to indicate we scored.
StartCoroutine(GoalScoredSwapGroundMaterial(m_PushBlockSettings.goalScoredMaterial, 0.5f));

/// <summary>
/// Called every step of the engine. Here the agent takes an action.
/// </summary>
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
// Move the agent using the action.
MoveAgent(vectorAction);

/// In the editor, if "Reset On Done" is checked then AgentReset() will be
/// called automatically anytime we mark done = true in an agent script.
/// </summary>
public override void AgentReset()
public override void OnEpisodeBegin()
{
var rotation = Random.Range(0, 4);
var rotationAngle = rotation * 90f;

1001
Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
文件差异内容过多而无法显示
查看文件

9
Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs


public GameObject areaSwitch;
public bool useVectorObs;
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
m_AgentRb = GetComponent<Rigidbody>();
m_MyArea = area.GetComponent<PyramidArea>();
m_SwitchLogic = areaSwitch.GetComponent<PyramidSwitch>();

m_AgentRb.AddForce(dirToGo * 2f, ForceMode.VelocityChange);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
AddReward(-1f / maxStep);
MoveAgent(vectorAction);

return new float[] { 0 };
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
var enumerable = Enumerable.Range(0, 9).OrderBy(x => Guid.NewGuid()).Take(9);
var items = enumerable.ToArray();

if (collision.gameObject.CompareTag("goal"))
{
SetReward(2f);
Done();
EndEpisode();
}
}
}

6
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


/// Collect the rigidbodies of the reacher in order to resue them for
/// observations and actions.
/// </summary>
public override void InitializeAgent()
public override void Initialize()
{
m_RbA = pendulumA.GetComponent<Rigidbody>();
m_RbB = pendulumB.GetComponent<Rigidbody>();

/// <summary>
/// The agent's four actions correspond to torques on each of the two joints.
/// </summary>
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
m_GoalDegree += m_GoalSpeed;
UpdateGoalPosition();

/// <summary>
/// Resets the position and velocity of the agent and the goal.
/// </summary>
public override void AgentReset()
public override void OnEpisodeBegin()
{
pendulumA.transform.position = new Vector3(0f, -4f, 0f) + transform.position;
pendulumA.transform.rotation = Quaternion.Euler(180f, 0f, 0f);

2
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs


if (agentDoneOnGroundContact)
{
agent.Done();
agent.EndEpisode();
}
}
}

5
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


void OverrideModel()
{
m_Agent.LazyInitialize();
var name = m_Agent.BehaviorName;
var bp = m_Agent.GetComponent<BehaviorParameters>();
var name = bp.behaviorName;
m_Agent.GiveModel($"Override_{name}", nnModel);
m_Agent.SetModel($"Override_{name}", nnModel);
}
}

440
Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldFives.prefab


m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 1
TeamId: 1
m_UseChildSensors: 1
--- !u!114 &114492261207303438
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensor
detectableTags:
m_SensorName: PurpleRayPerceptionSensor
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &9152743230243588598
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1100217258374548
GameObject:

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 0
TeamId: 0
m_UseChildSensors: 1
--- !u!114 &114850431417842684
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensor
detectableTags:
m_SensorName: BlueRayPerceptionSensor
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &404683423509059512
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1141134673700168
GameObject:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensorReverse
detectableTags:
m_SensorName: BlueRayPerceptionSensorReverse
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &1380156559769609756
GameObject:
m_ObjectHideFlags: 0

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 1
TeamId: 1
m_UseChildSensors: 1
--- !u!114 &6954070104302829726
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensor
detectableTags:
m_SensorName: PurpleRayPerceptionSensor
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &1627345754148825771
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1897170901855235213
GameObject:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensorReverse
detectableTags:
m_SensorName: BlueRayPerceptionSensorReverse
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &1986779485041503505
GameObject:
m_ObjectHideFlags: 0

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 0
TeamId: 0
m_UseChildSensors: 1
--- !u!114 &2022429702144620855
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensor
detectableTags:
m_SensorName: BlueRayPerceptionSensor
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &383509589018865836
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &2124761965305375139
GameObject:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensorReverse
detectableTags:
m_SensorName: PurpleRayPerceptionSensorReverse
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &2226531867376102072
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensorReverse
detectableTags:
m_SensorName: PurpleRayPerceptionSensorReverse
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &2525142235152062310
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensorReverse
detectableTags:
m_SensorName: PurpleRayPerceptionSensorReverse
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &3088685873901699079
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensorReverse
detectableTags:
m_SensorName: BlueRayPerceptionSensorReverse
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &3137532328811276395
GameObject:
m_ObjectHideFlags: 0

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 1
TeamId: 1
m_UseChildSensors: 1
--- !u!114 &6855830296884182341
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensor
detectableTags:
m_SensorName: PurpleRayPerceptionSensor
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &6384345363145965643
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &5189658827866653388
GameObject:

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 0
TeamId: 0
m_UseChildSensors: 1
--- !u!114 &582843529477248009
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensor
detectableTags:
m_SensorName: BlueRayPerceptionSensor
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &207635274551600999
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &5298260412645894501
GameObject:

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 1
TeamId: 1
m_UseChildSensors: 1
--- !u!114 &2375211931252174569
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensor
detectableTags:
m_SensorName: PurpleRayPerceptionSensor
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &8829516373144387202
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &5646796767244272979
GameObject:

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 0
TeamId: 0
m_UseChildSensors: 1
--- !u!114 &5596404811566570806
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensor
detectableTags:
m_SensorName: BlueRayPerceptionSensor
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &3995363798833944065
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &5722527935048982596
GameObject:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensorReverse
detectableTags:
m_SensorName: PurpleRayPerceptionSensorReverse
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &7995690102123273994
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensorReverse
detectableTags:
m_SensorName: BlueRayPerceptionSensorReverse
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &8107722383771667773
GameObject:
m_ObjectHideFlags: 0

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 1
TeamId: 1
m_UseChildSensors: 1
--- !u!114 &7920144334342864513
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensor
detectableTags:
m_SensorName: PurpleRayPerceptionSensor
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &3783133589427975123
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &9136065168043485173
GameObject:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensorReverse
detectableTags:
m_SensorName: PurpleRayPerceptionSensorReverse
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &9137412374636088465
GameObject:
m_ObjectHideFlags: 0

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerFives
m_TeamID: 0
TeamId: 0
m_UseChildSensors: 1
--- !u!114 &1559066246895354931
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensor
detectableTags:
m_SensorName: BlueRayPerceptionSensor
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &6424037218293500571
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &9174017734929201195
GameObject:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensorReverse
detectableTags:
m_SensorName: BlueRayPerceptionSensorReverse
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 60
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 60
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5

176
Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab


m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerTwos
m_TeamID: 1
TeamId: 1
m_UseChildSensors: 1
--- !u!114 &114492261207303438
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensor
detectableTags:
m_SensorName: PurpleRayPerceptionSensor
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 20
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 20
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &9152743230243588598
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1100217258374548
GameObject:

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerTwos
m_TeamID: 0
TeamId: 0
m_UseChildSensors: 1
--- !u!114 &114850431417842684
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensor
detectableTags:
m_SensorName: BlueRayPerceptionSensor
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 20
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 20
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &404683423509059512
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1141134673700168
GameObject:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensorReverse
detectableTags:
m_SensorName: PurpleRayPerceptionSensorReverse
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 20
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 20
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &742736642297762088
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensorReverse
detectableTags:
m_SensorName: BlueRayPerceptionSensorReverse
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 20
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 20
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &2016057044266316337
GameObject:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensorReverse
detectableTags:
m_SensorName: BlueRayPerceptionSensorReverse
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 20
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 20
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!1 &4599713170205044794
GameObject:
m_ObjectHideFlags: 0

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerTwos
m_TeamID: 1
TeamId: 1
m_UseChildSensors: 1
--- !u!114 &5320024511406682322
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensor
detectableTags:
m_SensorName: PurpleRayPerceptionSensor
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 20
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 20
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &8734522883866558980
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &6442519122303792292
GameObject:

m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: SoccerTwos
m_TeamID: 0
TeamId: 0
m_UseChildSensors: 1
--- !u!114 &5379409612883756837
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: BlueRayPerceptionSensor
detectableTags:
m_SensorName: BlueRayPerceptionSensor
m_DetectableTags:
- ball
- blueGoal
- purpleGoal

raysPerDirection: 5
maxRayDegrees: 60
sphereCastRadius: 0.5
rayLength: 20
rayLayerMask:
m_RaysPerDirection: 5
m_MaxRayDegrees: 60
m_SphereCastRadius: 0.5
m_RayLength: 20
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5
--- !u!114 &1018414316889932458
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &8673569163220857793
GameObject:

m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
m_Name:
m_EditorClassIdentifier:
sensorName: PurpleRayPerceptionSensorReverse
detectableTags:
m_SensorName: PurpleRayPerceptionSensorReverse
m_DetectableTags:
- ball
- purpleGoal
- blueGoal

raysPerDirection: 2
maxRayDegrees: 90
sphereCastRadius: 0.5
rayLength: 20
rayLayerMask:
m_RaysPerDirection: 1
m_MaxRayDegrees: 45
m_SphereCastRadius: 0.5
m_RayLength: 20
m_RayLayerMask:
observationStacks: 3
m_ObservationStacks: 3
startVerticalOffset: 0.5
endVerticalOffset: 0.5
m_StartVerticalOffset: 0.5
m_EndVerticalOffset: 0.5

12
Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs


[HideInInspector]
public Rigidbody agentRb;
SoccerSettings m_SoccerSettings;
BehaviorParameters m_BehaviorParameters;
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
m_BallTouch = Academy.Instance.FloatProperties.GetPropertyWithDefault("ball_touch", 0);
if (TeamId == (int)Team.Blue)
m_BehaviorParameters = gameObject.GetComponent<BehaviorParameters>();
if (m_BehaviorParameters.TeamId == (int)Team.Blue)
{
team = Team.Blue;
m_Transform = new Vector3(transform.position.x - 4f, .5f, transform.position.z);

ForceMode.VelocityChange);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
// Existential penalty for strikers.
AddReward(-1f / 3000f);

}
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
m_BallTouch = Academy.Instance.FloatProperties.GetPropertyWithDefault("ball_touch", 0);

2
Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs


{
ps.agentScript.AddReward(-1);
}
ps.agentScript.Done(); //all agents need to be reset
ps.agentScript.EndEpisode(); //all agents need to be reset
if (goalTextUI)
{

34
Project/Assets/ML-Agents/Examples/Startup/Scripts/Startup.cs


internal class Startup : MonoBehaviour
{
const string k_SceneVariableName = "SCENE_NAME";
private const string k_SceneCommandLineFlag = "--mlagents-scene-name";
var sceneName = Environment.GetEnvironmentVariable(k_SceneVariableName);
var sceneName = "";
// Check for the CLI '--scene-name' flag. This will be used if
// no scene environment variable is found.
var args = Environment.GetCommandLineArgs();
Console.WriteLine("Command line arguments passed: " + String.Join(" ", args));
for (int i = 0; i < args.Length; i++) {
if (args [i] == k_SceneCommandLineFlag && i < args.Length - 1) {
sceneName = args[i + 1];
}
}
var sceneEnvironmentVariable = Environment.GetEnvironmentVariable(k_SceneVariableName);
if (!string.IsNullOrEmpty(sceneEnvironmentVariable))
{
sceneName = sceneEnvironmentVariable;
}
SwitchScene(sceneName);
}

{
throw new ArgumentException(
$"You didn't specified the {k_SceneVariableName} environment variable");
Console.WriteLine(
$"You didn't specify the {k_SceneVariableName} environment variable or the {k_SceneCommandLineFlag} command line argument."
);
Application.Quit(22);
return;
throw new ArgumentException(
$"The scene {sceneName} doesn't exist within your build. ");
Console.WriteLine(
$"The scene {sceneName} doesn't exist within your build."
);
Application.Quit(22);
return;
}
SceneManager.LoadSceneAsync(sceneName);
}

722
Project/Assets/ML-Agents/Examples/Template/AgentPrefabsAndColors.unity
文件差异内容过多而无法显示
查看文件

133
Project/Assets/ML-Agents/Examples/Template/Scene.unity


--- !u!104 &2
RenderSettings:
m_ObjectHideFlags: 0
serializedVersion: 8
serializedVersion: 9
m_Fog: 0
m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
m_FogMode: 3

m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:
m_ObjectHideFlags: 0

m_BounceScale: 1
m_IndirectOutputScale: 1
m_AlbedoBoost: 1
m_TemporalCoherenceThreshold: 1
serializedVersion: 9
serializedVersion: 10
m_TextureWidth: 1024
m_TextureHeight: 1024
m_AtlasSize: 1024
m_AO: 0
m_AOMaxDistance: 1
m_CompAOExponent: 1

--- !u!1 &762086410
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 762086412}
- component: {fileID: 762086411}

--- !u!108 &762086411
Light:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 762086410}
m_Enabled: 1
serializedVersion: 8

serializedVersion: 2
m_Bits: 4294967295
m_Lightmapping: 4
m_LightShadowCasterMode: 0
m_AreaSize: {x: 1, y: 1}
m_BounceIntensity: 1
m_ColorTemperature: 6570

--- !u!4 &762086412
Transform:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 762086410}
m_LocalRotation: {x: 0.40821788, y: -0.23456968, z: 0.10938163, w: 0.8754261}
m_LocalPosition: {x: 0, y: 3, z: 0}

--- !u!1 &1223085755
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 1223085757}
- component: {fileID: 1223085756}

--- !u!114 &1223085756
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1223085755}
m_Enabled: 1
m_EditorHideFlags: 0

brain: {fileID: 0}
agentCameras: []
resetOnDone: 1
onDemandDecision: 0
numberOfActionsBetweenDecisions: 1
hasUpgradedFromAgentParameters: 1
maxStep: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1223085755}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0.71938086, y: 0.27357092, z: 4.1970553}

--- !u!1 &1574236047
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
- component: {fileID: 1574236048}
m_Layer: 0
m_Name: Academy
m_TagString: Untagged

m_IsActive: 1
--- !u!114 &1574236048
MonoBehaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1574236047}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 9af83cd96d4bc4088a966af174446d1b, type: 3}
m_Name:
m_EditorClassIdentifier:
broadcastHub:
broadcastingBrains: []
_brainsToControl: []
maxSteps: 0
trainingConfiguration:
width: 80
height: 80
qualityLevel: 0
timeScale: 100
targetFrameRate: 60
inferenceConfiguration:
width: 1024
height: 720
qualityLevel: 1
timeScale: 1
targetFrameRate: 60
resetParameters:
resetParameters: []
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1574236047}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0.71938086, y: 0.27357092, z: 4.1970553}

--- !u!1 &1715640920
GameObject:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
serializedVersion: 5
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
- component: {fileID: 1715640923}
- component: {fileID: 1715640922}
- component: {fileID: 1715640921}
m_Layer: 0

--- !u!81 &1715640921
AudioListener:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_GameObject: {fileID: 1715640920}
m_Enabled: 1
--- !u!92 &1715640923
Behaviour:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_projectionMatrixMode: 1
m_SensorSize: {x: 36, y: 24}
m_LensShift: {x: 0, y: 0}
m_GateFitMode: 2
m_FocalLength: 50
m_NormalizedViewPortRect:
serializedVersion: 2
x: 0

--- !u!4 &1715640925
Transform:
m_ObjectHideFlags: 0
m_PrefabParentObject: {fileID: 0}
m_PrefabInternal: {fileID: 0}
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1715640920}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 1, z: -10}

4
Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs


{
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
public override void AgentReset()
public override void OnEpisodeBegin()
{
}
}

4
Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs


void Reset()
{
m_AgentA.Done();
m_AgentB.Done();
m_AgentA.EndEpisode();
m_AgentB.EndEpisode();
m_Area.MatchReset();
lastFloorHit = FloorHit.Service;
net = false;

8
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


Rigidbody m_AgentRb;
Rigidbody m_BallRb;
float m_InvertMult;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
// Looks for the scoreboard based on the name of the gameObjects.
// Do not modify the names of the Score GameObjects

public override void InitializeAgent()
public override void Initialize()
{
m_AgentRb = GetComponent<Rigidbody>();
m_BallRb = ball.GetComponent<Rigidbody>();

sensor.AddObservation(m_InvertMult * gameObject.transform.rotation.z);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
var moveX = Mathf.Clamp(vectorAction[0], -1f, 1f) * m_InvertMult;
var moveY = Mathf.Clamp(vectorAction[1], -1f, 1f);

return action;
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
m_InvertMult = invertX ? -1f : 1f;

8
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


Rigidbody m_ChestRb;
Rigidbody m_SpineRb;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
public override void InitializeAgent()
public override void Initialize()
{
m_JdController = GetComponent<JointDriveController>();
m_JdController.SetupBodyPart(hips);

}
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
var bpDict = m_JdController.bodyPartsDict;
var i = -1;

/// <summary>
/// Loop over body parts and reset them to initial conditions.
/// </summary>
public override void AgentReset()
public override void OnEpisodeBegin()
{
if (m_DirToTarget != Vector3.zero)
{

16
Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


Vector3 m_JumpTargetPos;
Vector3 m_JumpStartingPos;
public override void InitializeAgent()
public override void Initialize()
{
m_WallJumpSettings = FindObjectOfType<WallJumpSettings>();
m_Configuration = Random.Range(0, 5);

jumpingTime -= Time.fixedDeltaTime;
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
MoveAgent(vectorAction);
if ((!Physics.Raycast(m_AgentRb.position, Vector3.down, 20))

Done();
EndEpisode();
ResetBlock(m_ShortBlockRb);
StartCoroutine(
GoalScoredSwapGroundMaterial(m_WallJumpSettings.failMaterial, .5f));

if (col.gameObject.CompareTag("goal") && DoGroundCheck(true))
{
SetReward(1f);
Done();
EndEpisode();
StartCoroutine(
GoalScoredSwapGroundMaterial(m_WallJumpSettings.goalScoredMaterial, 2));
}

blockRb.angularVelocity = Vector3.zero;
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
ResetBlock(m_ShortBlockRb);
transform.localPosition = new Vector3(

Academy.Instance.FloatProperties.GetPropertyWithDefault("no_wall_height", 0),
localScale.z);
wall.transform.localScale = localScale;
GiveModel("SmallWallJump", noWallBrain);
SetModel("SmallWallJump", noWallBrain);
}
else if (config == 1)
{

localScale.z);
wall.transform.localScale = localScale;
GiveModel("SmallWallJump", smallWallBrain);
SetModel("SmallWallJump", smallWallBrain);
}
else
{

height,
localScale.z);
wall.transform.localScale = localScale;
GiveModel("BigWallJump", bigWallBrain);
SetModel("BigWallJump", bigWallBrain);
}
}
}

4
Project/Packages/manifest.json


{
"dependencies": {
"com.unity.ads": "2.0.8",
"com.unity.analytics": "3.2.2",
"com.unity.analytics": "3.2.3",
"com.unity.collab-proxy": "1.2.15",
"com.unity.ml-agents": "file:../../com.unity.ml-agents",
"com.unity.package-manager-ui": "2.0.8",

"com.unity.modules.wind": "1.0.0",
"com.unity.modules.xr": "1.0.0"
},
"testables" : [
"testables": [
"com.unity.ml-agents"
]
}

5
Project/ProjectSettings/ProjectSettings.asset


useOnDemandResources: 0
accelerometerFrequency: 60
companyName: Unity Technologies
productName: Unity Environment
productName: UnityEnvironment
m_ShowUnitySplashScreen: 0
m_ShowUnitySplashScreen: 1
m_ShowUnitySplashLogo: 1
m_SplashScreenOverlayOpacity: 1
m_SplashScreenAnimation: 1

xboxOneMonoLoggingLevel: 0
xboxOneLoggingLevel: 1
xboxOneDisableEsram: 0
xboxOneEnableTypeOptimization: 0
xboxOnePresentImmediateThreshold: 0
switchQueueCommandMemory: 1048576
switchQueueControlMemory: 16384

2
Project/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2018.4.14f1
m_EditorVersion: 2018.4.18f1

110
README.md


used for multiple purposes, including controlling NPC behavior (in a variety of
settings such as multi-agent and adversarial), automated testing of game builds
and evaluating different game design decisions pre-release. The ML-Agents
toolkit is mutually beneficial for both game developers and AI researchers as it
Toolkit is mutually beneficial for both game developers and AI researchers as it
provides a central platform where advances in AI can be evaluated on Unity’s
rich environments and then made accessible to the wider research and game
developer communities.

* Unity environment control from Python
* 10+ sample Unity environments
* 15+ sample Unity environments
* Two deep reinforcement learning algorithms,
[Proximal Policy Optimization](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Training-PPO.md)
(PPO) and [Soft Actor-Critic](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Training-SAC.md)

* Train memory-enhanced agents using deep reinforcement learning
* Easily definable Curriculum Learning and Generalization scenarios
* Built-in support for Imitation Learning
* Built-in support for [Imitation Learning](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Training-Imitation-Learning.md) through Behavioral Cloning or Generative Adversarial Imitation Learning
* Simplified set-up with Docker
## Documentation
## Releases & Documentation
**Our latest, stable release is 0.14.1. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md) to
get started with the latest release of ML-Agents.**
The table below lists all our releases, including our `master` branch which is under active
development and may be unstable. A few helpful guidelines:
* The docs links in the table below include installation and usage instructions specific to each
release. Remember to always use the documentation that corresponds to the release version you're
using.
* See the [GitHub releases](https://github.com/Unity-Technologies/ml-agents/releases) for more
details of the changes between versions.
* If you have used an earlier version of the ML-Agents Toolkit, we strongly recommend our
[guide on migrating from earlier versions](docs/Migrating.md).
| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master** (unstable) | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **0.14.1** (latest stable release) | February 26, 2020 | **[source](https://github.com/Unity-Technologies/ml-agents/tree/latest_release)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/latest_release.zip)** |
| **0.14.0** | February 13, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
| **0.13.1** | January 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) |
| **0.13.0** | January 8, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.0.zip) |
| **0.12.1** | December 11, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.1.zip) |
| **0.12.0** | December 2, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.0.zip) |
| **0.11.0** | November 4, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.11.0.zip) |
| **0.10.1** | October 9, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.1.zip) |
| **0.10.0** | September 30, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.0.zip) |
## Citation
If you are a researcher interested in a discussion of Unity as an AI platform, see a pre-print
of our [reference paper on Unity and the ML-Agents Toolkit](https://arxiv.org/abs/1809.02627).
If you use Unity or the ML-Agents Toolkit to conduct research, we ask that you cite the following
paper as a reference:
Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D. (2018). Unity: A General Platform for Intelligent Agents. *arXiv preprint arXiv:1809.02627.* https://github.com/Unity-Technologies/ml-agents.
* For more information, in addition to installation and usage instructions, see
the [documentation for the latest release](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md).
* If you are a researcher interested in a discussion of Unity as an AI platform, see a pre-print of our [reference paper on Unity and the ML-Agents Toolkit](https://arxiv.org/abs/1809.02627). Also, see below for instructions on citing this paper.
* If you have used an earlier version of the ML-Agents toolkit, we strongly
recommend our [guide on migrating from earlier versions](docs/Migrating.md).
* (February 28, 2020) [Training intelligent adversaries using self-play with ML-Agents](https://blogs.unity3d.com/2020/02/28/training-intelligent-adversaries-using-self-play-with-ml-agents/)
* (November 11, 2019) [Training your agents 7 times faster with ML-Agents](https://blogs.unity3d.com/2019/11/11/training-your-agents-7-times-faster-with-ml-agents/)
* (October 21, 2019) [The AI@Unity interns help shape the world](https://blogs.unity3d.com/2019/10/21/the-aiunity-interns-help-shape-the-world/)
* (April 15, 2019) [Unity ML-Agents Toolkit v0.8: Faster training on real games](https://blogs.unity3d.com/2019/04/15/unity-ml-agents-toolkit-v0-8-faster-training-on-real-games/)
* (March 1, 2019) [Unity ML-Agents Toolkit v0.7: A leap towards cross-platform inference](https://blogs.unity3d.com/2019/03/01/unity-ml-agents-toolkit-v0-7-a-leap-towards-cross-platform-inference/)
* (December 17, 2018) [ML-Agents Toolkit v0.6: Improved usability of Brains and Imitation Learning](https://blogs.unity3d.com/2018/12/17/ml-agents-toolkit-v0-6-improved-usability-of-brains-and-imitation-learning/)
* (October 2, 2018) [Puppo, The Corgi: Cuteness Overload with the Unity ML-Agents Toolkit](https://blogs.unity3d.com/2018/10/02/puppo-the-corgi-cuteness-overload-with-the-unity-ml-agents-toolkit/)
* (September 11, 2018) [ML-Agents Toolkit v0.5, new resources for AI researchers available now](https://blogs.unity3d.com/2018/09/11/ml-agents-toolkit-v0-5-new-resources-for-ai-researchers-available-now/)
* (June 26, 2018) [Solving sparse-reward tasks with Curiosity](https://blogs.unity3d.com/2018/06/26/solving-sparse-reward-tasks-with-curiosity/)
* (June 19, 2018) [Unity ML-Agents Toolkit v0.4 and Udacity Deep Reinforcement Learning Nanodegree](https://blogs.unity3d.com/2018/06/19/unity-ml-agents-toolkit-v0-4-and-udacity-deep-reinforcement-learning-nanodegree/)
* (May 24, 2018) [Imitation Learning in Unity: The Workflow](https://blogs.unity3d.com/2018/05/24/imitation-learning-in-unity-the-workflow/)
* (March 15, 2018) [ML-Agents Toolkit v0.3 Beta released: Imitation Learning, feedback-driven features, and more](https://blogs.unity3d.com/2018/03/15/ml-agents-v0-3-beta-released-imitation-learning-feedback-driven-features-and-more/)
* (December 11, 2017) [Using Machine Learning Agents in a real game: a beginner’s guide](https://blogs.unity3d.com/2017/12/11/using-machine-learning-agents-in-a-real-game-a-beginners-guide/)
* (December 8, 2017) [Introducing ML-Agents Toolkit v0.2: Curriculum Learning, new environments, and more](https://blogs.unity3d.com/2017/12/08/introducing-ml-agents-v0-2-curriculum-learning-new-environments-and-more/)
* (September 19, 2017) [Introducing: Unity Machine Learning Agents Toolkit](https://blogs.unity3d.com/2017/09/19/introducing-unity-machine-learning-agents/)
* [Using Machine Learning Agents in a real game: a beginner’s guide](https://blogs.unity3d.com/2017/12/11/using-machine-learning-agents-in-a-real-game-a-beginners-guide/)
* [Post](https://blogs.unity3d.com/2018/02/28/introducing-the-winners-of-the-first-ml-agents-challenge/)
announcing the winners of our
[first ML-Agents Challenge](https://connect.unity.com/challenges/ml-agents-1)
* [Post](https://blogs.unity3d.com/2018/01/23/designing-safer-cities-through-simulations/)
overviewing how Unity can be leveraged as a simulator to design safer cities.
In addition to our own documentation, here are some additional, relevant articles:

## Community and Feedback
The ML-Agents toolkit is an open-source project and we encourage and welcome
The ML-Agents Toolkit is an open-source project and we encourage and welcome
For problems with the installation and setup of the the ML-Agents toolkit, or
For problems with the installation and setup of the the ML-Agents Toolkit, or
If you run into any other problems using the ML-Agents toolkit, or have a specific
If you run into any other problems using the ML-Agents Toolkit, or have a specific
Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue
to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents
Toolkit can we continue to improve and grow. Please take a few minutes to
[let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
## Releases
The latest release is 0.14.1. Previous releases can be found below:
| **Version** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|
| **0.14.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
| **0.13.1** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) |
| **0.13.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.0.zip) |
| **0.12.1** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.1.zip) |
| **0.12.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.0.zip) |
| **0.11.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.11.0.zip) |
| **0.10.1** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.1.zip) |
| **0.10.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.0.zip) |
See the [GitHub releases](https://github.com/Unity-Technologies/ml-agents/releases) for more details of the changes
between versions.
Please note that the `master` branch is under active development, so the documentation there may differ from the code
of a previous release. Always use the documentation that corresponds to the release version you're using.
## Citation
If you use Unity or the ML-Agents Toolkit to conduct research, we ask that you cite the following paper as a reference:
Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D. (2018). Unity: A General Platform for Intelligent Agents. *arXiv preprint arXiv:1809.02627.* https://github.com/Unity-Technologies/ml-agents.

25
com.unity.ml-agents/CHANGELOG.md


## [Unreleased]
### Major Changes
### Minor Changes
- Format of console output has changed slightly and now matches the name of the model/summary directory. (#3630, #3616)
## [0.15.0-preview] - 2020-03-18
### Major Changes
- `Agent.CollectObservations` now takes a VectorSensor argument. (#3352, #3389)
- Added `Agent.CollectDiscreteActionMasks` virtual method with a `DiscreteActionMasker` argument to specify which discrete actions are unavailable to the Agent. (#3525)
- Beta support for ONNX export was added. If the `tf2onnx` python package is installed, models will be saved to `.onnx` as well as `.nn` format.

- All SideChannel related code has been moved to the namespace `MLAgents.SideChannels`.
- `BrainParameters` and `SpaceType` have been removed from the public API
- `BehaviorParameters` have been removed from the public API.
- The following methods in the `Agent` class have been deprecated and will be removed in a later release:
- `InitializeAgent()` was renamed to `Initialize()`
- `AgentAction()` was renamed to `OnActionReceived()`
- `AgentReset()` was renamed to `OnEpisodeBegin()`
- `Done()` was renamed to `EndEpisode()`
- `GiveModel()` was renamed to `SetModel()`
### Minor Changes
- Monitor.cs was moved to Examples. (#3372)

- Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391)
- A tutorial on adding custom SideChannels was added (#3391)
- The stepping logic for the Agent and the Academy has been simplified (#3448)
- Update Barracuda to 0.6.0-preview
- Update Barracuda to 0.6.1-preview
* The interface for `RayPerceptionSensor.PerceiveStatic()` was changed to take an input class and write to an output class, and the method was renamed to `Perceive()`.
- The checkpoint file suffix was changed from `.cptk` to `.ckpt` (#3470)
- The command-line argument used to determine the port that an environment will listen on was changed from `--port` to `--mlagents-port`.

- The method `GetStepCount()` on the Agent class has been replaced with the property getter `StepCount`
- `RayPerceptionSensorComponent` and related classes now display the debug gizmos whenever the Agent is selected (not just Play mode).
- Most fields on `RayPerceptionSensorComponent` can now be changed while the editor is in Play mode. The exceptions to this are fields that affect the number of observations.
- Most fields on `CameraSensorComponent` and `RenderTextureSensorComponent` were changed to private and replaced by properties with the same name.
- `DecisionRequester` has been made internal (you can still use the DecisionRequesterComponent from the inspector). `RepeatAction` was renamed `TakeActionsBetweenDecisions` for clarity. (#3555)
- The `IFloatProperties` interface has been removed.
- Fix #3579.
- Improved inference performance for models with multiple action branches. (#3598)
- Fixed an issue when using GAIL with less than `batch_size` number of demonstrations. (#3591)
- The interfaces to the `SideChannel` classes (on C# and python) have changed to use new `IncomingMessage` and `OutgoingMessage` classes. These should make reading and writing data to the channel easier. (#3596)
- Updated the ExpertPyramid.demo example demonstration file (#3613)
- Updated project version for example environments to 2018.4.18f1. (#3618)
- Changed the Product Name in the example environments to remove spaces, so that the default build executable file doesn't contain spaces. (#3612)
## [0.14.1-preview] - 2020-02-25

93
com.unity.ml-agents/Documentation~/com.unity.ml-agents.md


Please see the [ML-Agents README)(https://github.com/Unity-Technologies/ml-agents/blob/master/README.md)
# About ML-Agents package (`com.unity.ml-agents`)
The Unity ML-Agents package contains the C# SDK for the
[Unity ML-Agents Toolkit](https://github.com/Unity-Technologies/ml-agents).
The package provides the ability for any Unity scene to be converted into a learning
environment where character behaviors can be trained using a variety of machine learning
algorithms. Additionally, it enables any trained behavior to be embedded back into the Unity
scene. More specifically, the package provides the following core functionalities:
* Define Agents: entities whose behavior will be learned. Agents are entities
that generate observations (through sensors), take actions and receive rewards from
the environment.
* Define Behaviors: entities that specifiy how an agent should act. Multiple agents can
share the same Behavior and a scene may have multiple Behaviors.
* Record demonstrations of an agent within the Editor. These demonstrations can be
valuable to train a behavior for that agent.
* Embedding a trained behavior into the scene via the
[Unity Inference Engine](https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html).
Thus an Agent can switch from a learning behavior to an inference behavior.
Note that this package does not contain the machine learning algorithms for training
behaviors. It relies on a Python package to orchestrate the training. This package
only enables instrumenting a Unity scene and setting it up for training, and then
embedding the trained model back into your Unity scene.
## Preview package
This package is available as a preview, so it is not ready for production use.
The features and documentation in this package might change before it is verified for release.
## Package contents
The following table describes the package folder structure:
|**Location**|**Description**|
|---|---|
|*Documentation~*|Contains the documentation for the Unity package.|
|*Editor*|Contains utilities for Editor windows and drawers.|
|*Plugins*|Contains third-party DLLs.|
|*Runtime*|Contains core C# APIs for integrating ML-Agents into your Unity scene. |
|*Tests*|Contains the unit tests for the package.|
<a name="Installation"></a>
## Installation
To install this package, follow the instructions in the
[Package Manager documentation](https://docs.unity3d.com/Manual/upm-ui-install.html).
To install the Python package to enable training behaviors, follow the instructions on our
[GitHub repository](https://github.com/Unity-Technologies/ml-agents/blob/latest_release/docs/Installation.md).
## Requirements
This version of the Unity ML-Agents package is compatible with the following versions of the Unity Editor:
* 2018.4 and later (recommended)
## Known limitations
### Headless Mode
If you enable Headless mode, you will not be able to collect visual observations
from your agents.
### Rendering Speed and Synchronization
Currently the speed of the game physics can only be increased to 100x real-time.
The Academy also moves in time with FixedUpdate() rather than Update(), so game
behavior implemented in Update() may be out of sync with the agent decision
making. See
[Execution Order of Event Functions](https://docs.unity3d.com/Manual/ExecutionOrder.html)
for more information.
You can control the frequency of Academy stepping by calling
`Academy.Instance.DisableAutomaticStepping()`, and then calling
`Academy.Instance.EnvironmentStep()`
### Unity Inference Engine Models
Currently, only models created with our trainers are supported for running
ML-Agents with a neural network behavior.
## Helpful links
If you are new to the Unity ML-Agents package, or have a question after reading
the documentation, you can checkout our
[GitHUb Repository](https://github.com/Unity-Technologies/ml-agents), which
also includes a number of ways to
[connect with us](https://github.com/Unity-Technologies/ml-agents#community-and-feedback)
including our [ML-Agents Forum](https://forum.unity.com/forums/ml-agents.453/).

64
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


using UnityEditor;
using Barracuda;
using MLAgents.Policies;
using UnityEngine;
namespace MLAgents.Editor
{

{
var so = serializedObject;
so.Update();
bool needPolicyUpdate; // Whether the name, model, inference device, or BehaviorType changed.
EditorGUI.BeginChangeCheck();
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
EditorGUI.indentLevel++;
EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
EditorGUI.indentLevel--;
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
EditorGUI.BeginChangeCheck(); // global
EditorGUI.BeginChangeCheck();
{
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
}
needPolicyUpdate = EditorGUI.EndChangeCheck();
EditorGUI.BeginDisabledGroup(!EditorUtilities.CanUpdateModelProperties());
{
EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
}
EditorGUI.EndDisabledGroup();
EditorGUI.BeginChangeCheck();
{
EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
EditorGUI.indentLevel++;
EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
EditorGUI.indentLevel--;
}
needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();
EditorGUI.BeginChangeCheck();
{
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
}
needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();
EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
// EditorGUILayout.PropertyField(serializedObject.FindProperty("m_Heuristic"), true);
EditorGUI.indentLevel--;
if (EditorGUI.EndChangeCheck())
EditorGUI.BeginDisabledGroup(!EditorUtilities.CanUpdateModelProperties());
m_RequireReload = true;
EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
EditorGUI.EndDisabledGroup();
EditorGUI.indentLevel--;
m_RequireReload = EditorGUI.EndChangeCheck();
if (needPolicyUpdate)
{
UpdateAgentPolicy();
}
}
/// <summary>

if (brainParameters != null)
{
var failedChecks = Inference.BarracudaModelParamLoader.CheckModel(
barracudaModel, brainParameters, sensorComponents);
barracudaModel, brainParameters, sensorComponents, behaviorParameters.behaviorType
);
foreach (var check in failedChecks)
{
if (check != null)

}
}
}
void UpdateAgentPolicy()
{
var behaviorParameters = (BehaviorParameters)target;
behaviorParameters.UpdateAgentPolicy();
}
}
}

27
com.unity.ml-agents/Editor/RayPerceptionSensorComponentBaseEditor.cs


EditorGUI.BeginChangeCheck();
EditorGUI.indentLevel++;
EditorGUILayout.PropertyField(so.FindProperty("m_SensorName"), true);
// Because the number of rays and the tags affect the observation shape,
// they are not editable during play mode.
EditorGUI.BeginDisabledGroup(Application.isPlaying);
// Don't allow certain fields to be modified during play mode.
// * SensorName affects the ordering of the Agent's observations
// * The number of tags and rays affects the size of the observations.
EditorGUI.BeginDisabledGroup(!EditorUtilities.CanUpdateModelProperties());
EditorGUILayout.PropertyField(so.FindProperty("m_SensorName"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_DetectableTags"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_RaysPerDirection"), true);
}

// Because the number of observation stacks affects the observation shape,
// it is not editable during play mode.
EditorGUI.BeginDisabledGroup(Application.isPlaying);
EditorGUI.BeginDisabledGroup(!EditorUtilities.CanUpdateModelProperties());
{
EditorGUILayout.PropertyField(so.FindProperty("m_ObservationStacks"), true);
}

m_RequireSensorUpdate = true;
}
UpdateSensorIfDirty();
UpdateSensorIfDirty();
if (m_RequireSensorUpdate)
{
var sensorComponent = serializedObject.targetObject as RayPerceptionSensorComponentBase;
sensorComponent?.UpdateSensor();
m_RequireSensorUpdate = false;
}
if (m_RequireSensorUpdate)
{
var sensorComponent = serializedObject.targetObject as RayPerceptionSensorComponentBase;
sensorComponent?.UpdateSensor();
m_RequireSensorUpdate = false;
}
}
}

10
com.unity.ml-agents/Runtime/Academy.cs


/// Unity package version of com.unity.ml-agents.
/// This must match the version string in package.json and is checked in a unit test.
/// </summary>
internal const string k_PackageVersion = "0.14.1-preview";
internal const string k_PackageVersion = "0.15.0-preview";
const int k_EditorTrainingPort = 5004;

/// <summary>
/// Collection of float properties (indexed by a string).
/// </summary>
public IFloatProperties FloatProperties;
public FloatPropertiesChannel FloatProperties;
// Fields not provided in the Inspector.

List<ModelRunner> m_ModelRunners = new List<ModelRunner>();
// Flag used to keep track of the first time the Academy is reset.
bool m_FirstAcademyReset;
bool m_HadFirstReset;
// The Academy uses a series of events to communicate with agents
// to facilitate synchronization. More specifically, it ensure

{
EnvironmentReset();
AgentForceReset?.Invoke();
m_FirstAcademyReset = true;
m_HadFirstReset = true;
}
/// <summary>

public void EnvironmentStep()
{
if (!m_FirstAcademyReset)
if (!m_HadFirstReset)
{
ForcedFullReset();
}

154
com.unity.ml-agents/Runtime/Agent.cs


/// value takes precedence (since the agent max step will never be reached).
///
/// Lastly, note that at any step the policy to the agent is allowed to
/// change model with <see cref="GiveModel"/>.
/// change model with <see cref="SetModel"/>.
///
/// Implementation-wise, it is required that this class is extended and the
/// virtual methods overridden. For sample implementations of agent behavior,

internal struct AgentParameters
{
public int maxStep;
}
public int TeamId {
get {
LazyInitialize();
return m_PolicyFactory.TeamId;
}
}
public string BehaviorName {
get {
LazyInitialize();
return m_PolicyFactory.behaviorName;
}
}
[SerializeField][HideInInspector]

/// </summary>
internal VectorSensor collectObservationsSensor;
void OnEnable()
/// <summary>
/// Called when the attached <see cref="GameObject"/> becomes enabled and active.
/// </summary>
protected virtual void OnEnable()
{
LazyInitialize();
}

Academy.Instance.AgentForceReset += _AgentReset;
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
ResetData();
InitializeAgent();
Initialize();
// The first time the Academy resets, all Agents in the scene will be
// forced to reset through the <see cref="AgentForceReset"/> event.
// To avoid the Agent resetting twice, the Agents will not begin their
// episode when initializing until after the Academy had its first reset.
if (Academy.Instance.TotalStepCount != 0)
{
OnEpisodeBegin();
}
}
/// <summary>

Disabled,
}
void OnDisable()
/// <summary>
/// Called when the attached <see cref="GameObject"/> becomes disabled and inactive.
/// </summary>
protected virtual void OnDisable()
{
DemonstrationWriters.Clear();

m_Brain?.RequestDecision(m_Info, sensors);
// We also have to write any to any DemonstationStores so that they get the "done" flag.
foreach(var demoWriter in DemonstrationWriters)
foreach (var demoWriter in DemonstrationWriters)
{
demoWriter.Record(m_Info, sensors);
}

m_RequestDecision = false;
}
[Obsolete("GiveModel() has been deprecated, use SetModel() instead.")]
public void GiveModel(
string behaviorName,
NNModel model,
InferenceDevice inferenceDevice = InferenceDevice.CPU)
{
SetModel(behaviorName, model, inferenceDevice);
}
/// <summary>
/// Updates the Model for the agent. Any model currently assigned to the
/// agent will be replaced with the provided one. If the arguments are

/// <param name="model"> The model to use for inference.</param>
/// <param name = "inferenceDevice"> Define on what device the model
/// will be run.</param>
public void GiveModel(
public void SetModel(
m_PolicyFactory.GiveModel(behaviorName, model, inferenceDevice);
if (behaviorName == m_PolicyFactory.behaviorName &&
model == m_PolicyFactory.model &&
inferenceDevice == m_PolicyFactory.inferenceDevice)
{
// If everything is the same, don't make any changes.
return;
}
m_PolicyFactory.model = model;
m_PolicyFactory.inferenceDevice = inferenceDevice;
m_PolicyFactory.behaviorName = behaviorName;
ReloadPolicy();
}
internal void ReloadPolicy()
{
if (!m_Initialized)
{
// If we haven't initialized yet, no need to make any changes now; they'll
// happen in LazyInitialize later.
return;
}
m_Brain?.Dispose();
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
}

TimerStack.Instance.SetGauge(gaugeName, GetCumulativeReward());
}
[Obsolete("Done() has been deprecated, use EndEpisode() instead.")]
public void Done()
{
EndEpisode();
}
public void Done()
public void EndEpisode()
{
NotifyAgentDone(DoneReason.DoneCalled);
_AgentReset();

m_RequestAction = true;
}
/// Helper function that resets all the data structures associated with
/// the agent. Typically used when the agent is being initialized or reset
/// at the end of an episode.

// should stay the previous action before the Done(), so that it is properly recorded.
if (m_Action.vectorActions == null)
{
if (param.vectorActionSpaceType == SpaceType.Continuous)
{
m_Action.vectorActions = new float[param.vectorActionSize[0]];
m_Info.storedVectorActions = new float[param.vectorActionSize[0]];
}
else
{
m_Action.vectorActions = new float[param.vectorActionSize.Length];
m_Info.storedVectorActions = new float[param.vectorActionSize.Length];
}
m_Action.vectorActions = new float[param.numActions];
m_Info.storedVectorActions = new float[param.numActions];
[Obsolete("InitializeAgent() has been deprecated, use Initialize() instead.")]
public virtual void InitializeAgent()
{
}
/// <summary>
/// Initializes the agent, called once when the agent is enabled. Can be
/// left empty if there is no special, unique set-up behavior for the

/// One sample use is to store local references to other objects in the
/// scene which would facilitate computing this agents observation.
/// </remarks>
public virtual void InitializeAgent()
public virtual void Initialize()
#pragma warning disable 0618
InitializeAgent();
#pragma warning restore 0618
}
/// <summary>

{
Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions.");
var param = m_PolicyFactory.brainParameters;
var actionSize = param.vectorActionSpaceType == SpaceType.Continuous ?
param.vectorActionSize[0] :
param.vectorActionSize.Length;
return new float[actionSize];
return new float[param.numActions];
}
/// <summary>

/// </summary>
void SendInfoToBrain()
{
if (!m_Initialized)
{
throw new UnityAgentsException("Call to SendInfoToBrain when Agent hasn't been initialized." +
"Please ensure that you are calling 'base.OnEnable()' if you have overridden OnEnable.");
}
if (m_Brain == null)
{
return;

m_Brain.RequestDecision(m_Info, sensors);
// If we have any DemonstrationWriters, write the AgentInfo and sensors to them.
foreach(var demoWriter in DemonstrationWriters)
foreach (var demoWriter in DemonstrationWriters)
{
demoWriter.Record(m_Info, sensors);
}

}
}
/// <summary>
/// Collects the vector observations of the agent.
/// The agent observation describes the current environment from the

{
}
[Obsolete("AgentAction() has been deprecated, use OnActionReceived() instead.")]
public virtual void AgentAction(float[] vectorAction)
{
}
/// <summary>
/// Specifies the agent behavior at every step based on the provided
/// action.

/// will be of length 1.
/// </param>
public virtual void AgentAction(float[] vectorAction)
public virtual void OnActionReceived(float[] vectorAction)
{
#pragma warning disable 0618
AgentAction(m_Action.vectorActions);
#pragma warning restore 0618
}
[Obsolete("AgentReset() has been deprecated, use OnEpisodeBegin() instead.")]
public virtual void AgentReset()
{
}

/// episode).
/// </summary>
public virtual void AgentReset()
public virtual void OnEpisodeBegin()
#pragma warning disable 0618
AgentReset();
#pragma warning restore 0618
}
/// <summary>

}
/// <summary>
/// This method will forcefully reset the agent and will also reset the hasAlreadyReset flag.
/// This way, even if the agent was already in the process of reseting, it will be reset again
/// and will not send a Done flag at the next step.
/// </summary>
void ForceReset()
{
_AgentReset();
}
/// <summary>
/// An internal reset method that updates internal data structures in
/// addition to calling <see cref="AgentReset"/>.
/// </summary>

m_StepCount = 0;
AgentReset();
OnEpisodeBegin();
}
/// <summary>

if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;
AgentAction(m_Action.vectorActions);
OnActionReceived(m_Action.vectorActions);
}
if ((m_StepCount >= maxStep) && (maxStep > 0))

void DecideAction()
{
m_Action.vectorActions = m_Brain?.DecideAction();
if (m_Action.vectorActions == null){
if (m_Action.vectorActions == null)
{
ResetData();
}
}

12
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


}
else
{
var compressedObs = sensor.GetCompressedObservation();
if (compressedObs == null)
{
throw new UnityAgentsException(
$"GetCompressedObservation() returned null data for sensor named {sensor.GetName()}. " +
"You must return a byte[]. If you don't want to use compressed observations, " +
"return SensorCompressionType.None from GetCompressionType()."
);
}
CompressedData = ByteString.CopyFrom(sensor.GetCompressedObservation()),
CompressedData = ByteString.CopyFrom(compressedObs),
CompressionType = (CompressionTypeProto)sensor.GetCompressionType(),
};
}

61
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


"Python library version: {2}.",
pythonCommunicationVersion, initParameters.unityCommunicationVersion,
pythonPackageVersion
);
);
}
else
{

"A side channel with type index {0} is already registered. You cannot register multiple " +
"side channels of the same id.", channelId));
}
// Process any messages that we've already received for this channel ID.
var numMessages = m_CachedMessages.Count;
for (int i = 0; i < numMessages; i++)
{
var cachedMessage = m_CachedMessages.Dequeue();
if (channelId == cachedMessage.ChannelId)
{
using (var incomingMsg = new IncomingMessage(cachedMessage.Message))
{
sideChannel.OnMessageReceived(incomingMsg);
}
}
else
{
m_CachedMessages.Enqueue(cachedMessage);
}
}
m_SideChannels.Add(channelId, sideChannel);
}

}
}
private struct CachedSideChannelMessage
{
public Guid ChannelId;
public byte[] Message;
}
private static Queue<CachedSideChannelMessage> m_CachedMessages = new Queue<CachedSideChannelMessage>();
/// <summary>
/// Separates the data received from Python into individual messages for each registered side channel.
/// </summary>

{
while (m_CachedMessages.Count != 0)
{
var cachedMessage = m_CachedMessages.Dequeue();
if (sideChannels.ContainsKey(cachedMessage.ChannelId))
{
using (var incomingMsg = new IncomingMessage(cachedMessage.Message))
{
sideChannels[cachedMessage.ChannelId].OnMessageReceived(incomingMsg);
}
}
else
{
Debug.Log(string.Format(
"Unknown side channel data received. Channel Id is "
+ ": {0}", cachedMessage.ChannelId));
}
}
if (dataReceived.Length == 0)
{
return;

}
if (sideChannels.ContainsKey(channelId))
{
sideChannels[channelId].OnMessageReceived(message);
using (var incomingMsg = new IncomingMessage(message))
{
sideChannels[channelId].OnMessageReceived(incomingMsg);
}
Debug.Log(string.Format(
"Unknown side channel data received. Channel Id is "
+ ": {0}", channelId));
// Don't recognize this ID, but cache it in case the SideChannel that can handle
// it is registered before the next call to ProcessSideChannelData.
m_CachedMessages.Enqueue(new CachedSideChannelMessage
{
ChannelId = channelId,
Message = message
});
}
}
}

9
com.unity.ml-agents/Runtime/DecisionRequester.cs


using System.Runtime.CompilerServices;
using UnityEngine.Serialization;
namespace MLAgents
{

/// </summary>
[AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
public class DecisionRequester : MonoBehaviour
internal class DecisionRequester : MonoBehaviour
{
/// <summary>
/// The frequency with which the agent requests a decision. A DecisionPeriod of 5 means

[Tooltip("Indicates whether or not the agent will take an action during the Academy " +
"steps where it does not request a decision. Has no effect when DecisionPeriod " +
"is set to 1.")]
public bool RepeatAction = true;
[FormerlySerializedAs("RepeatAction")]
public bool TakeActionsBetweenDecisions = true;
/// <summary>
/// Whether or not the Agent decisions should start at an offset (different for each agent).

{
m_Agent?.RequestDecision();
}
if (RepeatAction)
if (TakeActionsBetweenDecisions)
{
m_Agent?.RequestAction();
}

16
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


/// </param>
/// <param name="sensorComponents">Attached sensor components</param>
/// <returns>The list the error messages of the checks that failed</returns>
public static IEnumerable<string> CheckModel(Model model, BrainParameters brainParameters, SensorComponent[] sensorComponents)
public static IEnumerable<string> CheckModel(Model model, BrainParameters brainParameters,
SensorComponent[] sensorComponents, BehaviorType behaviorType = BehaviorType.Default)
failedModelChecks.Add(
"There is no model for this Brain, cannot run inference. " +
"(But can still train)");
var errorMsg = "There is no model for this Brain; cannot run inference. ";
if (behaviorType == BehaviorType.InferenceOnly)
{
errorMsg += "Either assign a model, or change to a different Behavior Type.";
}
else
{
errorMsg += "(But can still train)";
}
failedModelChecks.Add(errorMsg);
return failedModelChecks;
}

2
com.unity.ml-agents/Runtime/Inference/ModelRunner.cs


m_VisualObservationsInitialized = true;
}
Profiler.BeginSample("LearningBrain.DecideAction");
Profiler.BeginSample("ModelRunner.DecideAction");
Profiler.BeginSample($"MLAgents.{m_Model.name}.GenerateTensors");
// Prepare the input tensors to be feed into the engine

164
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


namespace MLAgents.Policies
{
/// <summary>
/// Defines what type of behavior the Agent will be using
/// </summary>
[Serializable]
public enum BehaviorType
{
/// <summary>
/// The Agent will use the remote process for decision making.
/// if unavailable, will use inference and if no model is provided, will use
/// the heuristic.
/// </summary>
Default,
/// <summary>
/// The Agent will always use its heuristic
/// </summary>
HeuristicOnly,
/// <summary>
/// The Agent will always use inference with the provided
/// neural network model.
/// </summary>
InferenceOnly
}
/// <summary>
internal class BehaviorParameters : MonoBehaviour
public class BehaviorParameters : MonoBehaviour
[Serializable]
enum BehaviorType
[HideInInspector, SerializeField]
BrainParameters m_BrainParameters = new BrainParameters();
/// <summary>
/// The associated <see cref="BrainParameters"/> for this behavior.
/// </summary>
public BrainParameters brainParameters
Default,
HeuristicOnly,
InferenceOnly
get { return m_BrainParameters; }
internal set { m_BrainParameters = value; }
[HideInInspector]
[SerializeField]
BrainParameters m_BrainParameters = new BrainParameters();
[HideInInspector]
[SerializeField]
[HideInInspector, SerializeField]
[HideInInspector]
[SerializeField]
InferenceDevice m_InferenceDevice;
[HideInInspector]
[SerializeField]
// Disable warning /com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs(...):
// warning CS0649: Field 'BehaviorParameters.m_BehaviorType' is never assigned to,
// and will always have its default value
// This field is set in the custom editor.
#pragma warning disable 0649
BehaviorType m_BehaviorType;
#pragma warning restore 0649
[HideInInspector]
[SerializeField]
string m_BehaviorName = "My Behavior";
/// The team ID for this behavior.
/// The neural network model used when in inference mode.
/// This should not be set at runtime; use <see cref="Agent.SetModel(string,NNModel,InferenceDevice)"/>
/// to set it instead.
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("m_TeamID")]
public int TeamId;
public NNModel model
{
get { return m_Model; }
set { m_Model = value; UpdateAgentPolicy(); }
}
[FormerlySerializedAs("m_useChildSensors")]
[HideInInspector]
[SerializeField]
[Tooltip("Use all Sensor components attached to child GameObjects of this Agent.")]
bool m_UseChildSensors = true;
[HideInInspector, SerializeField]
InferenceDevice m_InferenceDevice;
/// The associated <see cref="BrainParameters"/> for this behavior.
/// How inference is performed for this Agent's model.
/// This should not be set at runtime; use <see cref="Agent.SetModel(string,NNModel,InferenceDevice)"/>
/// to set it instead.
internal BrainParameters brainParameters
public InferenceDevice inferenceDevice
get { return m_BrainParameters; }
get { return m_InferenceDevice; }
set { m_InferenceDevice = value; UpdateAgentPolicy();}
[HideInInspector, SerializeField]
BehaviorType m_BehaviorType;
/// Whether or not to use all the sensor components attached to child GameObjects of the agent.
/// The BehaviorType for the Agent.
public bool useChildSensors
public BehaviorType behaviorType
get { return m_UseChildSensors; }
get { return m_BehaviorType; }
set { m_BehaviorType = value; UpdateAgentPolicy(); }
[HideInInspector, SerializeField]
string m_BehaviorName = "My Behavior";
/// This should not be set at runtime; use <see cref="Agent.SetModel(string,NNModel,InferenceDevice)"/>
/// to set it instead.
set { m_BehaviorName = value; UpdateAgentPolicy(); }
}
/// <summary>
/// The team ID for this behavior.
/// </summary>
[HideInInspector, SerializeField, FormerlySerializedAs("m_TeamID")]
public int TeamId;
// TODO properties here instead of Agent
[FormerlySerializedAs("m_useChildSensors")]
[HideInInspector]
[SerializeField]
[Tooltip("Use all Sensor components attached to child GameObjects of this Agent.")]
bool m_UseChildSensors = true;
/// <summary>
/// Whether or not to use all the sensor components attached to child GameObjects of the agent.
/// Note that changing this after the Agent has been initialized will not have any effect.
/// </summary>
public bool useChildSensors
{
get { return m_UseChildSensors; }
set { m_UseChildSensors = value; }
}
/// <summary>

get { return m_BehaviorName + "?team=" + TeamId; }
}
public IPolicy GeneratePolicy(Func<float[]> heuristic)
internal IPolicy GeneratePolicy(Func<float[]> heuristic)
{
switch (m_BehaviorType)
{

{
if (m_Model == null)
{
var behaviorType = BehaviorType.InferenceOnly.ToString();
throw new UnityAgentsException(
$"Can't use Behavior Type {behaviorType} without a model. " +
"Either assign a model, or change to a different Behavior Type."
);
}
}
case BehaviorType.Default:
if (Academy.Instance.IsCommunicatorOn)
{

}
}
/// <summary>
/// Updates the model and related details for this behavior.
/// </summary>
/// <param name="newBehaviorName">New name for the behavior.</param>
/// <param name="model">New neural network model for this behavior.</param>
/// <param name="inferenceDevice">New inference device for this behavior.</param>
public void GiveModel(
string newBehaviorName,
NNModel model,
InferenceDevice inferenceDevice = InferenceDevice.CPU)
internal void UpdateAgentPolicy()
m_Model = model;
m_InferenceDevice = inferenceDevice;
m_BehaviorName = newBehaviorName;
var agent = GetComponent<Agent>();
if (agent == null)
{
return;
}
agent.ReloadPolicy();
}
}
}

20
com.unity.ml-agents/Runtime/Policies/BrainParameters.cs


/// <summary>
/// Whether the action space is discrete or continuous.
/// </summary>
internal enum SpaceType
public enum SpaceType
{
/// <summary>
/// Discrete action space: a fixed number of options are available.

/// decision process.
/// </summary>
[Serializable]
internal class BrainParameters
public class BrainParameters
{
/// <summary>
/// If continuous : The length of the float vector that represents the state.

/// Defines if the action is discrete or continuous.
/// </summary>
public SpaceType vectorActionSpaceType = SpaceType.Discrete;
public int numActions
{
get
{
switch (vectorActionSpaceType)
{
case SpaceType.Discrete:
return vectorActionSize.Length;
case SpaceType.Continuous:
return vectorActionSize[0];
default:
return 0;
}
}
}
/// <summary>
/// Deep clones the BrainParameter object.

19
com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs


SensorCompressionType m_CompressionType;
/// <summary>
/// The Camera used for rendering the sensor observations.
/// </summary>
public Camera camera
{
get { return m_Camera; }
set { m_Camera = value; }
}
/// <summary>
/// The compression type used by the sensor.
/// </summary>
public SensorCompressionType compressionType
{
get { return m_CompressionType; }
set { m_CompressionType = value; }
}
/// <summary>
/// Creates and returns the camera sensor.
/// </summary>
/// <param name="camera">Camera object to capture images from.</param>

82
com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs


using UnityEngine;
using UnityEngine.Serialization;
namespace MLAgents.Sensors
{

[AddComponentMenu("ML Agents/Camera Sensor", (int)MenuGroup.Sensors)]
public class CameraSensorComponent : SensorComponent
{
[HideInInspector, SerializeField, FormerlySerializedAs("camera")]
Camera m_Camera;
CameraSensor m_Sensor;
public new Camera camera;
public new Camera camera
{
get { return m_Camera; }
set { m_Camera = value; UpdateSensor(); }
}
[HideInInspector, SerializeField, FormerlySerializedAs("sensorName")]
string m_SensorName = "CameraSensor";
/// Note that changing this at runtime does not affect how the Agent sorts the sensors.
public string sensorName = "CameraSensor";
public string sensorName
{
get { return m_SensorName; }
set { m_SensorName = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("width")]
int m_Width = 84;
/// Width of the generated image.
/// Width of the generated observation.
/// Note that changing this after the sensor is created has no effect.
public int width = 84;
public int width
{
get { return m_Width; }
set { m_Width = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("height")]
int m_Height = 84;
/// Height of the generated image.
/// Height of the generated observation.
/// Note that changing this after the sensor is created has no effect.
public int height = 84;
public int height
{
get { return m_Height; }
set { m_Height = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("grayscale")]
public bool m_Grayscale;
/// Note that changing this after the sensor is created has no effect.
public bool grayscale;
public bool grayscale
{
get { return m_Grayscale; }
set { m_Grayscale = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("compression")]
SensorCompressionType m_Compression = SensorCompressionType.PNG;
public SensorCompressionType compression = SensorCompressionType.PNG;
public SensorCompressionType compression
{
get { return m_Compression; }
set { m_Compression = value; UpdateSensor(); }
}
/// <summary>
/// Creates the <see cref="CameraSensor"/>

{
return new CameraSensor(camera, width, height, grayscale, sensorName, compression);
m_Sensor = new CameraSensor(m_Camera, m_Width, m_Height, grayscale, m_SensorName, compression);
return m_Sensor;
}
/// <summary>

public override int[] GetObservationShape()
{
return CameraSensor.GenerateShape(width, height, grayscale);
return CameraSensor.GenerateShape(m_Width, m_Height, grayscale);
}
/// <summary>
/// Update fields that are safe to change on the Sensor at runtime.
/// </summary>
internal void UpdateSensor()
{
if (m_Sensor != null)
{
m_Sensor.camera = m_Camera;
m_Sensor.compressionType = m_Compression;
}
}
}
}

29
com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs


var startPositionWorld = transform.TransformPoint(startPositionLocal);
var endPositionWorld = transform.TransformPoint(endPositionLocal);
return (StartPositionWorld: startPositionWorld, EndPositionWorld: endPositionWorld);
return (StartPositionWorld : startPositionWorld, EndPositionWorld : endPositionWorld);
}
/// <summary>

/// <param name="rayInput">The inputs for the sensor.</param>
public RayPerceptionSensor(string name, RayPerceptionInput rayInput)
{
var numObservations = rayInput.OutputSize();
m_Shape = new[] { numObservations };
m_Observations = new float[numObservations];
SetNumObservations(rayInput.OutputSize());
if (Application.isEditor)
{

internal void SetRayPerceptionInput(RayPerceptionInput input)
void SetNumObservations(int numObservations)
// TODO make sure that number of rays and tags don't change
m_RayPerceptionInput = input;
m_Shape = new[] { numObservations };
m_Observations = new float[numObservations];
}
internal void SetRayPerceptionInput(RayPerceptionInput rayInput)
{
// Note that change the number of rays or tags doesn't directly call this,
// but changing them and then changing another field will.
if (m_RayPerceptionInput.OutputSize() != rayInput.OutputSize())
{
Debug.Log(
"Changing the number of tags or rays at runtime is not " +
"supported and may cause errors in training or inference."
);
// Changing the shape will probably break things downstream, but we can at least
// keep this consistent.
SetNumObservations(rayInput.OutputSize());
}
m_RayPerceptionInput = rayInput;
}
/// <summary>

8
com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponent3D.cs


[AddComponentMenu("ML Agents/Ray Perception Sensor 3D", (int)MenuGroup.Sensors)]
public class RayPerceptionSensorComponent3D : RayPerceptionSensorComponentBase
{
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("startVerticalOffset")]
[HideInInspector, SerializeField, FormerlySerializedAs("startVerticalOffset")]
[Range(-10f, 10f)]
[Tooltip("Ray start is offset up or down by this amount.")]
float m_StartVerticalOffset;

set { m_StartVerticalOffset = value; UpdateSensor(); }
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("endVerticalOffset")]
[HideInInspector, SerializeField, FormerlySerializedAs("endVerticalOffset")]
[Range(-10f, 10f)]
[Tooltip("Ray end is offset up or down by this amount.")]
float m_EndVerticalOffset;

75
com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs


/// </summary>
public abstract class RayPerceptionSensorComponentBase : SensorComponent
{
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("sensorName")]
[HideInInspector, SerializeField, FormerlySerializedAs("sensorName")]
/// Note that changing this at runtime does not affect how the Agent sorts the sensors.
get => m_SensorName;
// Restrict the access on the name, since changing it a runtime doesn't re-sort the Agent sensors.
internal set => m_SensorName = value;
get { return m_SensorName; }
set { m_SensorName = value; }
[SerializeField]
[FormerlySerializedAs("detectableTags")]
[SerializeField, FormerlySerializedAs("detectableTags")]
/// Note that this should not be changed at runtime.
get => m_DetectableTags;
// Note: can't change at runtime
internal set => m_DetectableTags = value;
get { return m_DetectableTags; }
set { m_DetectableTags = value; }
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("raysPerDirection")]
[HideInInspector, SerializeField, FormerlySerializedAs("raysPerDirection")]
[Range(0, 50)]
[Tooltip("Number of rays to the left and right of center.")]
int m_RaysPerDirection = 3;

/// Note that this should not be changed at runtime.
get => m_RaysPerDirection;
get { return m_RaysPerDirection; }
internal set => m_RaysPerDirection = value;
set { m_RaysPerDirection = value;}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("maxRayDegrees")]
[HideInInspector, SerializeField, FormerlySerializedAs("maxRayDegrees")]
"Greater than 90 degrees will go backwards.")]
"Greater than 90 degrees will go backwards.")]
float m_MaxRayDegrees = 70;
/// <summary>

set { m_MaxRayDegrees = value; UpdateSensor(); }
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("sphereCastRadius")]
[HideInInspector, SerializeField, FormerlySerializedAs("sphereCastRadius")]
[Range(0f, 10f)]
[Tooltip("Radius of sphere to cast. Set to zero for raycasts.")]
float m_SphereCastRadius = 0.5f;

set { m_SphereCastRadius = value; UpdateSensor(); }
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("rayLength")]
[HideInInspector, SerializeField, FormerlySerializedAs("rayLength")]
[Range(1, 1000)]
[Tooltip("Length of the rays to cast.")]
float m_RayLength = 20f;

set { m_RayLength = value; UpdateSensor(); }
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("rayLayerMask")]
[HideInInspector, SerializeField, FormerlySerializedAs("rayLayerMask")]
[Tooltip("Controls which layers the rays can hit.")]
LayerMask m_RayLayerMask = Physics.DefaultRaycastLayers;

public LayerMask rayLayerMask
{
get => m_RayLayerMask;
set { m_RayLayerMask = value; UpdateSensor();}
set { m_RayLayerMask = value; UpdateSensor(); }
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("observationStacks")]
[HideInInspector, SerializeField, FormerlySerializedAs("observationStacks")]
[Range(1, 50)]
[Tooltip("Whether to stack previous observations. Using 1 means no previous observations.")]
int m_ObservationStacks = 1;

/// Note that changing this after the sensor is created has no effect.
internal int observationStacks
public int observationStacks
get => m_ObservationStacks;
set => m_ObservationStacks = value; // Note: can't change at runtime
get { return m_ObservationStacks; }
set { m_ObservationStacks = value; }
}
/// <summary>

RayPerceptionSensor m_RaySensor;
/// <summary>
/// Get the RayPerceptionSensor that was created.
/// </summary>
public RayPerceptionSensor raySensor
{
get => m_RaySensor;
}
/// <summary>
/// Returns the <see cref="RayPerceptionCastType"/> for the associated raycast sensor.
/// </summary>
/// <returns></returns>

return new[] { obsSize * stacks };
}
RayPerceptionInput GetRayPerceptionInput()
/// <summary>
/// Get the RayPerceptionInput that is used by the <see cref="RayPerceptionSensor"/>.
/// </summary>
/// <returns></returns>
public RayPerceptionInput GetRayPerceptionInput()
{
var rayAngles = GetRayAngles(raysPerDirection, maxRayDegrees);

/// <summary>
/// Draw the debug information from the sensor (if available).
/// </summary>
void DrawRaycastGizmos(DebugDisplayInfo.RayInfo rayInfo, float alpha=1.0f)
void DrawRaycastGizmos(DebugDisplayInfo.RayInfo rayInfo, float alpha = 1.0f)
{
var startPositionWorld = rayInfo.worldStart;
var endPositionWorld = rayInfo.worldEnd;

28
com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs


SensorCompressionType m_CompressionType;
/// <summary>
/// The compression type used by the sensor.
/// </summary>
public SensorCompressionType compressionType
{
get { return m_CompressionType; }
set { m_CompressionType = value; }
}
/// <summary>
/// Initializes the sensor.
/// </summary>
/// <param name="renderTexture">The <see cref="RenderTexture"/> instance to wrap.</param>

var texture = ObservationToTexture(m_RenderTexture);
// TODO support more types here, e.g. JPG
var compressed = texture.EncodeToPNG();
UnityEngine.Object.Destroy(texture);
DestroyTexture(texture);
return compressed;
}
}

{
var texture = ObservationToTexture(m_RenderTexture);
var numWritten = Utilities.TextureToTensorProxy(texture, adapter, m_Grayscale);
UnityEngine.Object.Destroy(texture);
DestroyTexture(texture);
return numWritten;
}
}

texture2D.Apply();
RenderTexture.active = prevActiveRt;
return texture2D;
}
static void DestroyTexture(Texture2D texture)
{
if (Application.isEditor)
{
// Edit Mode tests complain if we use Destroy()
// TODO move to extension methods for UnityEngine.Object?
UnityEngine.Object.DestroyImmediate(texture);
}
else
{
UnityEngine.Object.Destroy(texture);
}
}
}
}

57
com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs


using UnityEngine;
using UnityEngine.Serialization;
namespace MLAgents.Sensors
{

[AddComponentMenu("ML Agents/Render Texture Sensor", (int)MenuGroup.Sensors)]
public class RenderTextureSensorComponent : SensorComponent
{
RenderTextureSensor m_Sensor;
public RenderTexture renderTexture;
[HideInInspector, SerializeField, FormerlySerializedAs("renderTexture")]
RenderTexture m_RenderTexture;
public RenderTexture renderTexture
{
get { return m_RenderTexture; }
set { m_RenderTexture = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("sensorName")]
string m_SensorName = "RenderTextureSensor";
/// Name of the sensor.
/// Name of the generated <see cref="RenderTextureSensor"/>.
/// Note that changing this at runtime does not affect how the Agent sorts the sensors.
public string sensorName = "RenderTextureSensor";
public string sensorName
{
get { return m_SensorName; }
set { m_SensorName = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("grayscale")]
public bool m_Grayscale;
/// Note that changing this after the sensor is created has no effect.
public bool grayscale;
public bool grayscale
{
get { return m_Grayscale; }
set { m_Grayscale = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("compression")]
SensorCompressionType m_Compression = SensorCompressionType.PNG;
public SensorCompressionType compression = SensorCompressionType.PNG;
public SensorCompressionType compression
{
get { return m_Compression; }
set { m_Compression = value; UpdateSensor(); }
}
return new RenderTextureSensor(renderTexture, grayscale, sensorName, compression);
m_Sensor = new RenderTextureSensor(renderTexture, grayscale, sensorName, compression);
return m_Sensor;
}
/// <inheritdoc/>

var height = renderTexture != null ? renderTexture.height : 0;
return new[] { height, width, grayscale ? 1 : 3 };
}
/// <summary>
/// Update fields that are safe to change on the Sensor at runtime.
/// </summary>
internal void UpdateSensor()
{
if (m_Sensor != null)
{
m_Sensor.compressionType = m_Compression;
}
}
}
}

31
com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs


using System.IO;
using System;
using UnityEngine;

}
/// <inheritdoc/>
public override void OnMessageReceived(byte[] data)
public override void OnMessageReceived(IncomingMessage msg)
using (var memStream = new MemoryStream(data))
{
using (var binaryReader = new BinaryReader(memStream))
{
var width = binaryReader.ReadInt32();
var height = binaryReader.ReadInt32();
var qualityLevel = binaryReader.ReadInt32();
var timeScale = binaryReader.ReadSingle();
var targetFrameRate = binaryReader.ReadInt32();
var width = msg.ReadInt32();
var height = msg.ReadInt32();
var qualityLevel = msg.ReadInt32();
var timeScale = msg.ReadFloat32();
var targetFrameRate = msg.ReadInt32();
timeScale = Mathf.Clamp(timeScale, 1, 100);
timeScale = Mathf.Clamp(timeScale, 1, 100);
Screen.SetResolution(width, height, false);
QualitySettings.SetQualityLevel(qualityLevel, true);
Time.timeScale = timeScale;
Time.captureFramerate = 60;
Application.targetFrameRate = targetFrameRate;
}
}
Screen.SetResolution(width, height, false);
QualitySettings.SetQualityLevel(qualityLevel, true);
Time.timeScale = timeScale;
Time.captureFramerate = 60;
Application.targetFrameRate = targetFrameRate;
}
}
}

109
com.unity.ml-agents/Runtime/SideChannels/FloatPropertiesChannel.cs


using System.Collections.Generic;
using System.IO;
using System.Text;
/// Interface for managing a collection of float properties keyed by a string variable.
/// </summary>
public interface IFloatProperties
{
/// <summary>
/// Sets one of the float properties of the environment. This data will be sent to Python.
/// </summary>
/// <param name="key"> The string identifier of the property.</param>
/// <param name="value"> The float value of the property.</param>
void SetProperty(string key, float value);
/// <summary>
/// Get an Environment property with a default value. If there is a value for this property,
/// it will be returned, otherwise, the default value will be returned.
/// </summary>
/// <param name="key"> The string identifier of the property.</param>
/// <param name="defaultValue"> The default value of the property.</param>
/// <returns></returns>
float GetPropertyWithDefault(string key, float defaultValue);
/// <summary>
/// Registers an action to be performed everytime the property is changed.
/// </summary>
/// <param name="key"> The string identifier of the property.</param>
/// <param name="action"> The action that ill be performed. Takes a float as input.</param>
void RegisterCallback(string key, Action<float> action);
/// <summary>
/// Returns a list of all the string identifiers of the properties currently present.
/// </summary>
/// <returns> The list of string identifiers </returns>
IList<string> ListProperties();
}
/// <summary>
public class FloatPropertiesChannel : SideChannel, IFloatProperties
public class FloatPropertiesChannel : SideChannel
{
Dictionary<string, float> m_FloatProperties = new Dictionary<string, float>();
Dictionary<string, Action<float>> m_RegisteredActions = new Dictionary<string, Action<float>>();

{
ChannelId = new Guid(k_FloatPropertiesDefaultId);
}
else{
else
{
public override void OnMessageReceived(byte[] data)
public override void OnMessageReceived(IncomingMessage msg)
var kv = DeserializeMessage(data);
m_FloatProperties[kv.Key] = kv.Value;
if (m_RegisteredActions.ContainsKey(kv.Key))
{
m_RegisteredActions[kv.Key].Invoke(kv.Value);
}
var key = msg.ReadString();
var value = msg.ReadFloat32();
m_FloatProperties[key] = value;
Action<float> action;
m_RegisteredActions.TryGetValue(key, out action);
action?.Invoke(value);
}
/// <inheritdoc/>

QueueMessageToSend(SerializeMessage(key, value));
if (m_RegisteredActions.ContainsKey(key))
using (var msgOut = new OutgoingMessage())
m_RegisteredActions[key].Invoke(value);
msgOut.WriteString(key);
msgOut.WriteFloat32(value);
QueueMessageToSend(msgOut);
Action<float> action;
m_RegisteredActions.TryGetValue(key, out action);
action?.Invoke(value);
if (m_FloatProperties.ContainsKey(key))
{
return m_FloatProperties[key];
}
else
{
return defaultValue;
}
float valueOut;
bool hasKey = m_FloatProperties.TryGetValue(key, out valueOut);
return hasKey ? valueOut : defaultValue;
}
/// <inheritdoc/>

public IList<string> ListProperties()
{
return new List<string>(m_FloatProperties.Keys);
}
static KeyValuePair<string, float> DeserializeMessage(byte[] data)
{
using (var memStream = new MemoryStream(data))
{
using (var binaryReader = new BinaryReader(memStream))
{
var keyLength = binaryReader.ReadInt32();
var key = Encoding.ASCII.GetString(binaryReader.ReadBytes(keyLength));
var value = binaryReader.ReadSingle();
return new KeyValuePair<string, float>(key, value);
}
}
}
static byte[] SerializeMessage(string key, float value)
{
using (var memStream = new MemoryStream())
{
using (var binaryWriter = new BinaryWriter(memStream))
{
var stringEncoded = Encoding.ASCII.GetBytes(key);
binaryWriter.Write(stringEncoded.Length);
binaryWriter.Write(stringEncoded);
binaryWriter.Write(value);
return memStream.ToArray();
}
}
}
}
}

10
com.unity.ml-agents/Runtime/SideChannels/RawBytesChannel.cs


}
/// <inheritdoc/>
public override void OnMessageReceived(byte[] data)
public override void OnMessageReceived(IncomingMessage msg)
m_MessagesReceived.Add(data);
m_MessagesReceived.Add(msg.GetRawBytes());
}
/// <summary>

/// <param name="data"> The byte array of data to send to Python.</param>
public void SendRawBytes(byte[] data)
{
QueueMessageToSend(data);
using (var msg = new OutgoingMessage())
{
msg.SetRawBytes(data);
QueueMessageToSend(msg);
}
}
/// <summary>

13
com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs


using System.Collections.Generic;
using System;
using System.IO;
using System.Text;
namespace MLAgents.SideChannels
{

/// of each type. Ensure the Unity side channels will be linked to their Python equivalent.
/// </summary>
/// <returns> The integer identifier of the SideChannel.</returns>
public Guid ChannelId{
public Guid ChannelId
{
get;
protected set;
}

/// Can be called multiple times per simulation step if multiple messages were sent.
/// </summary>
/// <param name="data"> the payload of the message.</param>
public abstract void OnMessageReceived(byte[] data);
/// <param name="msg">The incoming message.</param>
public abstract void OnMessageReceived(IncomingMessage msg);
protected void QueueMessageToSend(byte[] data)
protected void QueueMessageToSend(OutgoingMessage msg)
MessageQueue.Add(data);
MessageQueue.Add(msg.ToByteArray());
}
}
}

178
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


using System.CodeDom;
using UnityEngine;
using NUnit.Framework;
using System.Reflection;

internal IPolicy GetPolicy()
{
return (IPolicy) typeof(Agent).GetField("m_Brain", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
return (IPolicy)typeof(Agent).GetField("m_Brain", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
public int collectObservationsCallsSinceLastReset;
public int collectObservationsCallsForEpisode;
public int agentActionCallsSinceLastReset;
public int agentResetCalls;
public int agentActionCallsForEpisode;
public int agentOnEpisodeBeginCalls;
public override void InitializeAgent()
public override void Initialize()
{
initializeAgentCalls += 1;

public override void CollectObservations(VectorSensor sensor)
{
collectObservationsCalls += 1;
collectObservationsCallsSinceLastReset += 1;
collectObservationsCallsForEpisode += 1;
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
agentActionCallsSinceLastReset += 1;
agentActionCallsForEpisode += 1;
public override void AgentReset()
public override void OnEpisodeBegin()
agentResetCalls += 1;
collectObservationsCallsSinceLastReset = 0;
agentActionCallsSinceLastReset = 0;
agentOnEpisodeBeginCalls += 1;
collectObservationsCallsForEpisode = 0;
agentActionCallsForEpisode = 0;
}
public override float[] Heuristic()

public byte[] GetCompressedObservation()
{
numCompressedCalls++;
return null;
return new byte[] { 0 };
}
public SensorCompressionType GetCompressionType()

agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();
Assert.AreEqual(0, agent1.agentResetCalls);
Assert.AreEqual(0, agent2.agentResetCalls);
Assert.AreEqual(0, agent1.agentOnEpisodeBeginCalls);
Assert.AreEqual(0, agent2.agentOnEpisodeBeginCalls);
Assert.AreEqual(0, agent1.initializeAgentCalls);
Assert.AreEqual(0, agent2.initializeAgentCalls);
Assert.AreEqual(0, agent1.agentActionCalls);

// agent1 was not enabled when the academy started
// The agents have been initialized
Assert.AreEqual(0, agent1.agentResetCalls);
Assert.AreEqual(0, agent2.agentResetCalls);
Assert.AreEqual(0, agent1.agentOnEpisodeBeginCalls);
Assert.AreEqual(0, agent2.agentOnEpisodeBeginCalls);
Assert.AreEqual(1, agent1.initializeAgentCalls);
Assert.AreEqual(1, agent2.initializeAgentCalls);
Assert.AreEqual(0, agent1.agentActionCalls);

agent1.LazyInitialize();
var numberAgent1Reset = 0;
var numberAgent1Episodes = 0;
var numberAgent2Episodes = 0;
Assert.AreEqual(numberAgent1Reset, agent1.agentResetCalls);
// Agent2 is never reset since initialized after academy
Assert.AreEqual(0, agent2.agentResetCalls);
Assert.AreEqual(numberAgent1Episodes, agent1.agentOnEpisodeBeginCalls);
Assert.AreEqual(numberAgent2Episodes, agent2.agentOnEpisodeBeginCalls);
Assert.AreEqual(1, agent1.initializeAgentCalls);
Assert.AreEqual(numberAgent2Initialization, agent2.initializeAgentCalls);
Assert.AreEqual(i, agent1.agentActionCalls);

// Agent 1 resets at the first step
// Agent 1 starts a new episode at the first step
numberAgent1Reset += 1;
numberAgent1Episodes += 1;
// Since Agent2 is initialized after the Academy has stepped, its OnEpisodeBegin should be called now.
Assert.AreEqual(0, agent2.agentOnEpisodeBeginCalls);
Assert.AreEqual(1, agent2.agentOnEpisodeBeginCalls);
numberAgent2Episodes += 1;
}
// We are testing request decision and request actions when called

agent2.LazyInitialize();
var numberAgent1Reset = 0;
var numberAgent2Reset = 0;
var numberAgent1Episodes = 0;
var numberAgent2Episodes = 0;
var agent2StepSinceReset = 0;
var agent2StepForEpisode = 0;
for (var i = 0; i < 5000; i++)
{
Assert.AreEqual(acaStepsSinceReset, aca.StepCount);

Assert.AreEqual(numberAgent2Episodes, agent2.agentOnEpisodeBeginCalls);
Assert.AreEqual(agent2StepForEpisode, agent2.StepCount);
Assert.AreEqual(agent2StepSinceReset, agent2.StepCount);
Assert.AreEqual(numberAgent1Reset, agent1.agentResetCalls);
Assert.AreEqual(numberAgent2Reset, agent2.agentResetCalls);
// Agent 2 and academy reset at the first step
// Agent 2 and academy reset at the first step
Assert.AreEqual(numberAgent2Episodes, agent2.agentOnEpisodeBeginCalls);
numberAgent2Reset += 1;
numberAgent2Episodes += 1;
Assert.AreEqual(numberAgent1Episodes, agent1.agentOnEpisodeBeginCalls);
numberAgent1Episodes += 1;
Assert.AreEqual(numberAgent1Episodes, agent1.agentOnEpisodeBeginCalls);
agent1.Done();
numberAgent1Reset += 1;
Assert.AreEqual(numberAgent1Episodes, agent1.agentOnEpisodeBeginCalls);
agent1.EndEpisode();
numberAgent1Episodes += 1;
Assert.AreEqual(numberAgent1Episodes, agent1.agentOnEpisodeBeginCalls);
// Resetting agent 2 regularly
// Ending the episode for agent 2 regularly
agent2.Done();
numberAgent2Reset += 1;
agent2StepSinceReset = 0;
Assert.AreEqual(numberAgent2Episodes, agent2.agentOnEpisodeBeginCalls);
agent2.EndEpisode();
numberAgent2Episodes += 1;
agent2StepForEpisode = 0;
Assert.AreEqual(numberAgent2Episodes, agent2.agentOnEpisodeBeginCalls);
}
// Request a decision for agent 2 regularly
if (i % 3 == 2)

}
acaStepsSinceReset += 1;
agent2StepSinceReset += 1;
agent2StepForEpisode += 1;
aca.EnvironmentStep();
}
}

agent1.LazyInitialize();
agent2.SetPolicy(new TestPolicy());
var expectedAgent1ActionSinceReset = 0;
var expectedAgent1ActionForEpisode = 0;
expectedAgent1ActionSinceReset += 1;
if (expectedAgent1ActionSinceReset == agent1.maxStep || i == 0)
expectedAgent1ActionForEpisode += 1;
if (expectedAgent1ActionForEpisode == agent1.maxStep || i == 0)
expectedAgent1ActionSinceReset = 0;
expectedAgent1ActionForEpisode = 0;
Assert.LessOrEqual(Mathf.Abs(expectedAgent1ActionSinceReset * 10.1f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(expectedAgent1ActionForEpisode * 10.1f - agent1.GetCumulativeReward()), 0.05f);
Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);
agent1.AddReward(10f);

agent1.LazyInitialize();
var expectedAgentStepCount = 0;
var expectedResets = 0;
var expectedEpisodes = 0;
var expectedAgentActionSinceReset = 0;
var expectedAgentActionForEpisode = 0;
var expectedCollectObsCallsSinceReset = 0;
var expectedCollectObsCallsForEpisode = 0;
expectedAgentActionSinceReset += 1;
expectedAgentActionForEpisode += 1;
expectedCollectObsCallsSinceReset += 1;
expectedCollectObsCallsForEpisode += 1;
expectedResets += 1;
expectedEpisodes += 1;
expectedAgentActionSinceReset = 0;
expectedCollectObsCallsSinceReset = 0;
expectedAgentActionForEpisode = 0;
expectedCollectObsCallsForEpisode = 0;
Assert.AreEqual(expectedResets, agent1.agentResetCalls);
Assert.AreEqual(expectedEpisodes, agent1.agentOnEpisodeBeginCalls);
Assert.AreEqual(expectedAgentActionSinceReset, agent1.agentActionCallsSinceLastReset);
Assert.AreEqual(expectedAgentActionForEpisode, agent1.agentActionCallsForEpisode);
Assert.AreEqual(expectedCollectObsCallsSinceReset, agent1.collectObservationsCallsSinceLastReset);
Assert.AreEqual(expectedCollectObsCallsForEpisode, agent1.collectObservationsCallsForEpisode);
}
}

Assert.AreEqual(numSteps, agent1.heuristicCalls);
Assert.AreEqual(numSteps, agent1.sensor1.numWriteCalls);
Assert.AreEqual(numSteps, agent1.sensor2.numCompressedCalls);
}
}
[TestFixture]
public class TestOnEnableOverride
{
public class OnEnableAgent : Agent
{
public bool callBase;
protected override void OnEnable()
{
if (callBase)
base.OnEnable();
}
}
static void _InnerAgentTestOnEnableOverride(bool callBase = false)
{
var go = new GameObject();
var agent = go.AddComponent<OnEnableAgent>();
agent.callBase = callBase;
var onEnable = typeof(OnEnableAgent).GetMethod("OnEnable", BindingFlags.NonPublic | BindingFlags.Instance);
var sendInfo = typeof(Agent).GetMethod("SendInfoToBrain", BindingFlags.NonPublic | BindingFlags.Instance);
Assert.NotNull(onEnable);
onEnable.Invoke(agent, null);
Assert.NotNull(sendInfo);
if (agent.callBase)
{
Assert.DoesNotThrow(() => sendInfo.Invoke(agent, null));
}
else
{
Assert.Throws<UnityAgentsException>(() =>
{
try
{
sendInfo.Invoke(agent, null);
}
catch (TargetInvocationException e)
{
throw e.GetBaseException();
}
});
}
}
[Test]
public void TestAgentCallBaseOnEnable()
{
_InnerAgentTestOnEnableOverride(true);
}
[Test]
public void TestAgentDontCallBaseOnEnable()
{
_InnerAgentTestOnEnableOverride();
}
}
}

69
com.unity.ml-agents/Tests/Editor/SideChannelTests.cs


{
public List<int> messagesReceived = new List<int>();
public TestSideChannel() {
public TestSideChannel()
{
public override void OnMessageReceived(byte[] data)
public override void OnMessageReceived(IncomingMessage msg)
messagesReceived.Add(BitConverter.ToInt32(data, 0));
messagesReceived.Add(msg.ReadInt32());
public void SendInt(int data)
public void SendInt(int value)
QueueMessageToSend(BitConverter.GetBytes(data));
using (var msg = new OutgoingMessage())
{
msg.WriteInt32(value);
QueueMessageToSend(msg);
}
}
}

fakeData = RpcCommunicator.GetSideChannelMessage(dictSender);
RpcCommunicator.ProcessSideChannelData(dictReceiver, fakeData);
Assert.AreEqual(wasCalled, 1);
var keysA = propA.ListProperties();
Assert.AreEqual(2, keysA.Count);
Assert.IsTrue(keysA.Contains(k1));
Assert.IsTrue(keysA.Contains(k2));
var keysB = propA.ListProperties();
Assert.AreEqual(2, keysB.Count);
Assert.IsTrue(keysB.Contains(k1));
Assert.IsTrue(keysB.Contains(k2));
}
[Test]
public void TestOutgoingMessageRawBytes()
{
// Make sure that SetRawBytes resets the buffer correctly.
// Write 8 bytes (an int and float) then call SetRawBytes with 4 bytes
var msg = new OutgoingMessage();
msg.WriteInt32(42);
msg.WriteFloat32(1.0f);
var data = new byte[] { 1, 2, 3, 4 };
msg.SetRawBytes(data);
var result = msg.ToByteArray();
Assert.AreEqual(data, result);
}
[Test]
public void TestMessageReadWrites()
{
var boolVal = true;
var intVal = 1337;
var floatVal = 4.2f;
var floatListVal = new float[] { 1001, 1002 };
var stringVal = "mlagents!";
IncomingMessage incomingMsg;
using (var outgoingMsg = new OutgoingMessage())
{
outgoingMsg.WriteBoolean(boolVal);
outgoingMsg.WriteInt32(intVal);
outgoingMsg.WriteFloat32(floatVal);
outgoingMsg.WriteString(stringVal);
outgoingMsg.WriteFloatList(floatListVal);
incomingMsg = new IncomingMessage(outgoingMsg.ToByteArray());
}
Assert.AreEqual(boolVal, incomingMsg.ReadBoolean());
Assert.AreEqual(intVal, incomingMsg.ReadInt32());
Assert.AreEqual(floatVal, incomingMsg.ReadFloat32());
Assert.AreEqual(stringVal, incomingMsg.ReadString());
Assert.AreEqual(floatListVal, incomingMsg.ReadFloatList());
}
}
}

4
com.unity.ml-agents/package.json


{
"name": "com.unity.ml-agents",
"displayName":"ML Agents",
"version": "0.14.1-preview",
"version": "0.15.0-preview",
"com.unity.barracuda": "0.6.0-preview"
"com.unity.barracuda": "0.6.1-preview"
}
}

2
config/sac_trainer_config.yaml


summary_freq: 20000
PushBlock:
max_steps: 1.5e7
max_steps: 2e6
init_entcoef: 0.05
hidden_units: 256
summary_freq: 60000

4
config/trainer_config.yaml


Bouncer:
normalize: true
max_steps: 7.0e6
max_steps: 4.0e6
max_steps: 1.5e7
max_steps: 2.0e6
batch_size: 128
buffer_size: 2048
beta: 1.0e-2

21
docs/Getting-Started-with-Balance-Ball.md


* **Behavior Parameters** — Every Agent must have a Behavior. The Behavior
determines how an Agent makes decisions. More on Behavior Parameters in
the next section.
* **Max Step** — Defines how many simulation steps can occur before the Agent
decides it is done. In 3D Balance Ball, an Agent restarts after 5000 steps.
* **Max Step** — Defines how many simulation steps can occur before the Agent's
episode ends. In 3D Balance Ball, an Agent restarts after 5000 steps.
Perhaps the more interesting aspect of an agents is the Agent subclass
implementation. When you create an Agent, you must extend the base Agent class.
When you create an Agent, you must extend the base Agent class.
* agent.AgentReset() — Called when the Agent resets, including at the beginning
of a session. The Ball3DAgent class uses the reset function to reset the
* `Agent.OnEpisodeBegin()` — Called when the Agent resets, including at the beginning
of the simulation. The Ball3DAgent class uses the reset function to reset the
* agent.CollectObservations(VectorSensor sensor) — Called every simulation step. Responsible for
* `Agent.CollectObservations(VectorSensor sensor)` — Called every simulation step. Responsible for
* agent.AgentAction() — Called every simulation step. Receives the action chosen
* `Agent.OnActionReceived()` — Called every time the Agent receives an action to take. Receives the action chosen
small change in the agent cube's rotation at each step. The `AgentAction()` function
small change in the agent cube's rotation at each step. The `OnActionReceived()` method
negative reward for dropping the ball. An Agent is also marked as done when it
negative reward for dropping the ball. An Agent's episode is also ended when it
* agent.Heuristic() - When the `Use Heuristic` checkbox is checked in the Behavior
* `Agent.Heuristic()` - When the `Behavior Type` is set to `Heuristic Only` in the Behavior
Parameters of the Agent, the Agent will use the `Heuristic()` method to generate
the actions of the Agent. As such, the `Heuristic()` method returns an array of
floats. In the case of the Ball 3D Agent, the `Heuristic()` method converts the

160
docs/Installation.md


# Installation
To install and use ML-Agents, you need to install Unity, clone this repository and
install Python with additional dependencies. Each of the subsections below
overviews each step, in addition to a Docker set-up.
The ML-Agents Toolkit contains several components:
* Unity package ([`com.unity.ml-agents`](../com.unity.ml-agents/)) contains the Unity C#
SDK that will be integrated into your Unity scene.
* Three Python packages:
* [`mlagents`](../ml-agents/) contains the machine learning algorithms that enables you
to train behaviors in your Unity scene. Most users of ML-Agents will only need to
directly install `mlagents`.
* [`mlagents_envs`](../ml-agents-envs/) contains a Python API to interact with a Unity
scene. It is a foundational layer that facilitates data messaging between Unity scene
and the Python machine learning algorithms. Consequently, `mlagents` depends on `mlagents_envs`.
* [`gym_unity`](../gym-unity/) provides a Python-wrapper for your Unity scene that
supports the OpenAI Gym interface.
* Unity [Project](../Project/) that contains several
[example environments](Learning-Environment-Examples.md) that highlight the various features
of the toolkit to help you get started.
## Install **Unity 2018.4** or Later
Consequently, to install and use ML-Agents you will need to:
* Install Unity (2018.4 or later)
* Install Python (3.6.1 or higher)
* Clone this repository
* Install the `com.unity.ml-agents` Unity package
* Install the `mlagents` Python package
[Download](https://store.unity.com/download) and install Unity. If you would
like to use our Docker set-up (introduced later), make sure to select the _Linux
Build Support_ component when installing Unity.
### Install **Unity 2018.4** or Later
<p align="center">
<img src="images/unity_linux_build_support.png"
alt="Linux Build Support"
width="500" border="10" />
</p>
[Download](https://unity3d.com/get-unity/download) and install Unity. We strongly recommend
that you install Unity through the Unity Hub as it will enable you to manage multiple Unity
versions.
## Environment Setup
We now support a single mechanism for installing ML-Agents on Mac/Windows/Linux using Virtual
Environments. For more information on Virtual Environments and installation instructions,
follow this [guide](Using-Virtual-Environment.md).
### Install **Python 3.6.1** or Higher
Although we don't support Anaconda installation path of ML-Agents for Windows, the previous guide
is still in the docs folder. Please refer to [Windows Installation (Deprecated)](Installation-Windows.md).
We recommend [installing](https://www.python.org/downloads/) Python 3.6 or 3.7. If your Python
environment doesn't include `pip3`, see these
[instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers)
on installing it.
Although we do not provide support for Anaconda installation on Windows, the previous
[Windows Anaconda Installation (Deprecated) guide](Installation-Anaconda-Windows.md)
is still available.
Once installed, you will want to clone the ML-Agents Toolkit GitHub repository.
Now that you have installed Unity and Python, you will need to clone the
ML-Agents Toolkit GitHub repository to install the Unity package (the Python
packages can be installed directly from PyPi - a Python package registry).
```sh
git clone --branch latest_release https://github.com/Unity-Technologies/ml-agents.git

The `com.unity.ml-agents` subdirectory contains the core code to add to your projects.
The `Project` subdirectory contains many [example environments](Learning-Environment-Examples.md)
to help you get started.
### Package Installation
ML-Agents C# SDK is transitioning to a Unity Package. While we are working on getting into the
official packages list, you can add the `com.unity.ml-agents` package to your project by
navigating to the menu `Window` -> `Package Manager`. In the package manager window click
on the `+` button.
<p align="center">
<img src="images/unity_package_manager_window.png"
alt="Linux Build Support"
width="500" border="10" />
</p>
### Install the `com.unity.ml-agents` Unity package
**NOTE:** In Unity 2018.4 it's on the bottom right of the packages list, and in Unity 2019.3 it's on the top left of the packages list.
Select `Add package from disk...` and navigate into the
The Unity ML-Agents C# SDK is a Unity Package. We are working on getting it added to the
official Unity package registry which will enable you to install the `com.unity.ml-agents` package
[directly from the registry](https://docs.unity3d.com/Manual/upm-ui-install.html) without cloning
this repository. Until then, you will need to
[install it from the local package](https://docs.unity3d.com/Manual/upm-ui-local.html) that you
just cloned. You can add the `com.unity.ml-agents` package to
your project by navigating to the menu `Window` -> `Package Manager`. In the package manager
window click on the `+` button. Select `Add package from disk...` and navigate into the
**NOTE:** In Unity 2018.4 it's on the bottom right of the packages list, and in Unity 2019.3 it's
on the top left of the packages list.
<img src="images/unity_package_manager_window.png"
alt="Unity Package Manager Window"
height="340" border="10" />
alt="Linux Build Support"
width="500" border="10" />
alt="package.json"
height="340" border="10" />
The `ml-agents` subdirectory contains a Python package which provides deep reinforcement
learning trainers to use with Unity environments.
The `ml-agents-envs` subdirectory contains a Python API to interface with Unity, which
the `ml-agents` package depends on.
The `gym-unity` subdirectory contains a package to interface with OpenAI Gym.
### Install Python and mlagents Package
In order to use ML-Agents toolkit, you need Python 3.6.1 or higher.
[Download](https://www.python.org/downloads/) and install the latest version of Python if you do not already have it.
### Install the `mlagents` Python package
If your Python environment doesn't include `pip3`, see these
[instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers)
on installing it.
Installing the `mlagents` Python package involves installing other Python packages
that `mlagents` depends on. So you may run into installation issues if your machine
has older versions of any of those dependencies already installed. Consequently, our
supported path for installing `mlagents` is to leverage Python Virtual Environments.
Virtual Environments provide a mechanim for isolating the dependencies for each project
and are supported on Mac / Windows / Linux. We offer a dedicated
[guide on Virtual Environments](Using-Virtual-Environment.md).
To install the `mlagents` Python package, run from the command line:
To install the `mlagents` Python package, activate your virtual environment and run from the
command line:
Note that this will install `ml-agents` from PyPi, _not_ from the cloned repo.
Note that this will install `mlagents` from PyPi, _not_ from the cloned repo.
By installing the `mlagents` package, the dependencies listed in the [setup.py file](../ml-agents/setup.py) are also installed.
Some of the primary dependencies include:
- [TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support)
- [Jupyter](Background-Jupyter.md)
**Notes:**
- We do not currently support Python 3.5 or lower.
- If you are using Anaconda and are having trouble with TensorFlow, please see
the following
[link](https://www.tensorflow.org/install/pip)
on how to install TensorFlow in an Anaconda environment.
By installing the `mlagents` package, the dependencies listed in the
[setup.py file](../ml-agents/setup.py) are also installed. These include
[TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support) and
[Jupyter](Background-Jupyter.md).
### Installing for Development
#### Advanced: Installing for Development
If you intend to make modifications to `ml-agents` or `ml-agents-envs`, you should install
If you intend to make modifications to `mlagents` or `mlagents_envs`, you should install
`ml-agents` and `ml-agents-envs` separately. From the repo's root directory, run:
`mlagents` and `mlagents_envs` separately. From the repo's root directory, run:
cd ml-agents-envs
pip3 install -e ./
cd ..
cd ml-agents
pip3 install -e ./
pip3 install -e ./ml-agents-envs
pip3 install -e ./ml-agents
Running pip with the `-e` flag will let you make changes to the Python files directly and have those
reflected when you run `mlagents-learn`. It is important to install these packages in this order as the
`mlagents` package depends on `mlagents_envs`, and installing it in the other
Running pip with the `-e` flag will let you make changes to the Python files directly and have
those reflected when you run `mlagents-learn`. It is important to install these packages in this
order as the `mlagents` package depends on `mlagents_envs`, and installing it in the other
setting up the ML-Agents toolkit within Unity, running a pre-trained model, in
setting up the ML-Agents Toolkit within Unity, running a pre-trained model, in
addition to building and training environments.
## Help

5
docs/Learning-Environment-Best-Practices.md


lessons which progressively increase in difficulty are presented to the agent
([learn more here](Training-Curriculum-Learning.md)).
* When possible, it is often helpful to ensure that you can complete the task by
using a heuristic to control the agent. To do so, check the `Use Heuristic`
checkbox on the Agent and implement the `Heuristic()` method on the Agent.
using a heuristic to control the agent. To do so, set the `Behavior Type`
to `Heuristic Only` on the Agent's Behavior Parameters, and implement the
`Heuristic()` method on the Agent.
* It is often helpful to make many copies of the agent, and give them the same
`Behavior Name`. In this way the learning process can get more feedback
information from all of these agents, which helps it train faster.

38
docs/Learning-Environment-Create-New.md


### Initialization and Resetting the Agent
When the Agent reaches its target, it marks itself done and its Agent reset
function moves the target to a random location. In addition, if the Agent rolls
off the platform, the reset function puts it back onto the floor.
When the Agent reaches its target, its episode ends and the `OnEpisodeBegin()`
method moves the target to a random location. In addition, if the Agent rolls
off the platform, the `OnEpisodeBegin()` method puts it back onto the floor.
To move the target GameObject, we need a reference to its Transform (which
stores a GameObject's position, orientation and scale in the 3D world). To get

}
public Transform Target;
public override void AgentReset()
public override void OnEpisodeBegin()
{
if (this.transform.position.y < 0)
{

}
```
The final part of the Agent code is the `Agent.AgentAction()` method, which
receives the decision from the Brain and assigns the reward.
The final part of the Agent code is the `Agent.OnActionReceived()` method, which
receives the actions from the Brain and assigns the reward.
`AgentAction()` function. The number of elements in this array is determined by
`OnActionReceived()` function. The number of elements in this array is determined by
the `Vector Action` `Space Type` and `Space Size` settings of the
agent's Brain. The RollerAgent uses the continuous vector action space and needs
two continuous control signals from the Brain. Thus, we will set the Brain

### Rewards
Reinforcement learning requires rewards. Assign rewards in the `AgentAction()`
Reinforcement learning requires rewards. Assign rewards in the `OnActionReceived()`
function. The learning algorithm uses the rewards assigned to the Agent during
the simulation and learning process to determine whether it is giving
the Agent the optimal actions. You want to reward an Agent for completing the

The RollerAgent calculates the distance to detect when it reaches the target.
When it does, the code calls the `Agent.SetReward()` method to assign a
reward of 1.0 and marks the agent as finished by calling the `Done()` method
reward of 1.0 and marks the agent as finished by calling the `EndEpisode()` method
on the Agent.
```csharp

if (distanceToTarget < 1.42f)
{
SetReward(1.0f);
Done();
EndEpisode();
Finally, if the Agent falls off the platform, set the Agent to done so that it can reset itself:
Finally, if the Agent falls off the platform, end the episode so that it can reset itself:
Done();
EndEpisode();
### AgentAction()
### OnActionReceived()
`AgentAction()` function looks like:
`OnActionReceived()` function looks like:
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
// Actions, size = 2
Vector3 controlSignal = Vector3.zero;

if (distanceToTarget < 1.42f)
{
SetReward(1.0f);
Done();
EndEpisode();
Done();
EndEpisode();
}
}

to the values of the "Horizontal" and "Vertical" input axis (which correspond to
the keyboard arrow keys).
In order for the Agent to use the Heuristic, You will need to check the `Use Heuristic`
checkbox in the `Behavior Parameters` of the RollerAgent.
In order for the Agent to use the Heuristic, You will need to set the `Behavior Type`
to `Heuristic Only` in the `Behavior Parameters` of the RollerAgent.
Press **Play** to run the scene and use the arrows keys to move the Agent around

47
docs/Learning-Environment-Design-Agents.md


The Policy class abstracts out the decision making logic from the Agent itself so
that you can use the same Policy in multiple Agents. How a Policy makes its
decisions depends on the kind of Policy it is. You can change the Policy of an
Agent by changing its `Behavior Parameters`. If you check `Use Heuristic`, the
Agent will use its `Heuristic()` method to make decisions which can allow you to
control the Agent manually or write your own Policy. If the Agent has a `Model`
file, it Policy will use the neural network `Model` to take decisions.
Agent by changing its `Behavior Parameters`. If you set `Behavior Type` to
`Heuristic Only`, the Agent will use its `Heuristic()` method to make decisions
which can allow you to control the Agent manually or write your own Policy. If
the Agent has a `Model` file, it Policy will use the neural network `Model` to
take decisions.
## Decisions

An action is an instruction from the Policy that the agent carries out. The
action is passed to the Agent as a parameter when the Academy invokes the
agent's `AgentAction()` function. When you specify that the vector action space
agent's `OnActionReceived()` function. When you specify that the vector action space
is **Continuous**, the action parameter passed to the Agent is an array of
control signals with length equal to the `Vector Action Space Size` property.
When you specify a **Discrete** vector action space type, the action parameter

values themselves mean. The training algorithm simply tries different values for
the action list and observes the affect on the accumulated rewards over time and
many training episodes. Thus, the only place actions are defined for an Agent is
in the `AgentAction()` function. You simply specify the type of vector action
space, and, for the continuous vector action space, the number of values, and
then apply the received values appropriately (and consistently) in
`ActionAct()`.
in the `OnActionReceived()` function.
For example, if you designed an agent to move in two dimensions, you could use
either continuous or the discrete vector actions. In the continuous case, you

### Continuous Action Space
When an Agent uses a Policy set to the **Continuous** vector action space, the
action parameter passed to the Agent's `AgentAction()` function is an array with
action parameter passed to the Agent's `OnActionReceived()` function is an array with
length equal to the `Vector Action Space Size` property value.
The individual values in the array have whatever meanings that you ascribe to
them. If you assign an element in the array as the speed of an Agent, for

These control values are applied as torques to the bodies making up the arm:
```csharp
public override void AgentAction(float[] act)
public override void OnActionReceived(float[] act)
{
float torque_x = Mathf.Clamp(act[0], -1, 1) * 100f;
float torque_z = Mathf.Clamp(act[1], -1, 1) * 100f;

### Discrete Action Space
When an Agent uses a **Discrete** vector action space, the
action parameter passed to the Agent's `AgentAction()` function is an array
action parameter passed to the Agent's `OnActionReceived()` function is an array
containing indices. With the discrete vector action space, `Branches` is an
array of integers, each value corresponds to the number of possibilities for
each branch.

agent be able to move __and__ jump concurrently. We define the first branch to
have 5 possible actions (don't move, go left, go right, go backward, go forward)
and the second one to have 2 possible actions (don't jump, jump). The
AgentAction method would look something like:
`OnActionReceived()` method would look something like:
```csharp
// Get the action index for movement

Agent's Heuristic to control the Agent while watching how it accumulates rewards.
Allocate rewards to an Agent by calling the `AddReward()` method in the
`AgentAction()` function. The reward assigned between each decision
`OnActionReceived()` function. The reward assigned between each decision
should be in the range [-1,1]. Values outside this range can lead to
unstable training. The `reward` value is reset to zero when the agent receives a
new decision. If there are multiple calls to `AddReward()` for a single agent

### Examples
You can examine the `AgentAction()` functions defined in the [example
You can examine the `OnActionReceived()` functions defined in the [example
environments](Learning-Environment-Examples.md) to see how those projects
allocate rewards.

if (hitObjects.Where(col => col.gameObject.tag == "goal").ToArray().Length == 1)
{
AddReward(1.0f);
Done();
EndEpisode();
Done();
EndEpisode();
}
```

Mathf.Abs(gameObject.transform.position.x - area.transform.position.x) > 8f ||
Mathf.Abs(gameObject.transform.position.z + 5 - area.transform.position.z) > 8)
{
Done();
EndEpisode();
AddReward(-1f);
}
```

platform:
```csharp
if (IsDone() == false)
{
SetReward(0.1f);
}
SetReward(0.1f);
// When ball falls mark Agent as done and give a negative penalty
// When ball falls mark Agent as finished and give a negative penalty
Done();
EndEpisode();
}
```

Note that all of these environments make use of the `Done()` method, which manually
Note that all of these environments make use of the `EndEpisode()` method, which manually
terminates an episode when a termination condition is reached. This can be
called independently of the `Max Step` property.

33
docs/Learning-Environment-Design.md


Training and simulation proceed in steps orchestrated by the ML-Agents Academy
class. The Academy works with Agent objects in the scene to step
through the simulation. When all Agents in the scene are _done_,
one training episode is finished.
through the simulation.
During training, the external Python training process communicates with the
Academy to run a series of episodes while it collects data and optimizes its

The ML-Agents Academy class orchestrates the agent simulation loop as follows:
1. Calls your Academy's `OnEnvironmentReset` delegate.
2. Calls the `AgentReset()` function for each Agent in the scene.
2. Calls the `OnEpisodeBegin()` function for each Agent in the scene.
5. Calls the `AgentAction()` function for each Agent in the scene, passing in
the action chosen by the Agent's Policy. (This function is not called if the
Agent is done.)
6. Calls the Agent's `AgentReset()` function if the Agent has reached its `Max
Step` count or has otherwise marked itself as `done`.
5. Calls the `OnActionReceived()` function for each Agent in the scene, passing in
the action chosen by the Agent's Policy.
6. Calls the Agent's `OnEpisodeBegin()` function if the Agent has reached its `Max
Step` count or has otherwise marked itself as `EndEpisode()`.
implement the above methods. The `Agent.CollectObservations(VectorSensor sensor)` and
`Agent.AgentAction()` functions are required; the other methods are optional —
whether you need to implement them or not depends on your specific scenario.
implement the above methods whether you need to implement them or not depends on
your specific scenario.
**Note:** The API used by the Python training process to communicate with
and control the Academy during training can be used for other purposes as well.

have appropriate `Behavior Parameters`.
To create an Agent, extend the Agent class and implement the essential
`CollectObservations(VectorSensor sensor)` and `AgentAction()` methods:
`CollectObservations(VectorSensor sensor)` and `OnActionReceived()` methods:
* `AgentAction()` — Carries out the action chosen by the Agent's Policy and
* `OnActionReceived()` — Carries out the action chosen by the Agent's Policy and
assigns a reward to the current state.
Your implementations of these functions determine how the Behavior Parameters

manually set an Agent to done in your `AgentAction()` function when the Agent
has finished (or irrevocably failed) its task by calling the `Done()` function.
manually terminate an Agent episode in your `OnActionReceived()` function when the Agent
has finished (or irrevocably failed) its task by calling the `EndEpisode()` function.
Agent will consider itself done after it has taken that many steps. You can
use the `Agent.AgentReset()` function to prepare the Agent to start again.
Agent will consider the episode over after it has taken that many steps. You can
use the `Agent.OnEpisodeBegin()` function to prepare the Agent to start again.
See [Agents](Learning-Environment-Design-Agents.md) for detailed information
about programming your own Agents.

* The Academy must reset the scene to a valid starting point for each episode of
training.
* A training episode must have a definite end — either using `Max Steps` or by
each Agent setting itself to `done`.
each Agent ending its episode manually with `EndEpisode()`.

4
docs/Learning-Environment-Examples.md


* Goal: The agents must hit the ball so that the opponent cannot hit a valid
return.
* Agents: The environment contains two agent with same Behavior Parameters.
After training you can check the `Use Heuristic` checkbox on one of the Agents
to play against your trained model.
After training you can set the `Behavior Type` to `Heuristic Only` on one of the Agent's
Behavior Parameters to play against your trained model.
* Agent Reward Function (independent):
* +1.0 To the agent that wins the point. An agent wins a point by preventing
the opponent from hitting a valid return.

35
docs/Limitations.md


# Limitations
## Unity SDK
### Headless Mode
If you enable Headless mode, you will not be able to collect visual observations
from your agents.
### Rendering Speed and Synchronization
Currently the speed of the game physics can only be increased to 100x real-time.
The Academy also moves in time with FixedUpdate() rather than Update(), so game
behavior implemented in Update() may be out of sync with the agent decision
making. See
[Execution Order of Event Functions](https://docs.unity3d.com/Manual/ExecutionOrder.html)
for more information.
You can control the frequency of Academy stepping by calling
`Academy.Instance.DisableAutomaticStepping()`, and then calling
`Academy.Instance.EnvironmentStep()`
### Unity Inference Engine Models
Currently, only models created with our trainers are supported for running
ML-Agents with a neural network behavior.
## Python API
### Python version
As of version 0.3, we no longer support Python 2.
See the package-specific Limitations pages:
* [Unity `com.unity.mlagents` package](../com.unity.ml-agents/Documentation~/com.unity.ml-agents.md)
* [`mlagents` Python package](../ml-agents/README.md)
* [`mlagents_envs` Python package](../ml-agents-envs/README.md)
* [`gym_unity` Python package](../gym-unity/README.md)

29
docs/Migrating.md


# Migrating
## Migrating from 0.14 to latest
## Migrating from 0.15 to latest
### Important changes
### Steps to Migrate
## Migrating from 0.14 to 0.15
### Important changes
* The `Agent.CollectObservations()` virtual method now takes as input a `VectorSensor` sensor as argument. The `Agent.AddVectorObs()` methods were removed.

* `AgentInfo.actionMasks` has been renamed to `AgentInfo.discreteActionMasks`.
* `BrainParameters` and `SpaceType` have been removed from the public API
* `BehaviorParameters` have been removed from the public API.
* `DecisionRequester` has been made internal (you can still use the DecisionRequesterComponent from the inspector). `RepeatAction` was renamed `TakeActionsBetweenDecisions` for clarity.
* The following methods in the `Agent` class have been renamed. The original method names will be removed in a later release:
* `InitializeAgent()` was renamed to `Initialize()`
* `AgentAction()` was renamed to `OnActionReceived()`
* `AgentReset()` was renamed to `OnEpsiodeBegin()`
* `Done()` was renamed to `EndEpisode()`
* `GiveModel()` was renamed to `SetModel()`
* The `IFloatProperties` interface has been removed.
* The interface for SideChannels was changed:
* In C#, `OnMessageReceived` now takes a `IncomingMessage` argument, and `QueueMessageToSend` takes an `OutgoingMessage` argument.
* In python, `on_message_received` now takes a `IncomingMessage` argument, and `queue_message_to_send` takes an `OutgoingMessage` argument.
### Steps to Migrate
* Add the `using MLAgents.Sensors;` in addition to `using MLAgents;` on top of your Agent's script.

iterate through `RayPerceptionOutput.rayOutputs` and call `RayPerceptionOutput.RayOutput.ToFloatArray()`.
* Re-import all of your `*.NN` files to work with the updated Barracuda package.
* We strongly recommend replacing the following methods with their new equivalent as they will be removed in a later release:
* `InitializeAgent()` to `Initialize()`
* `AgentAction()` to `OnActionReceived()`
* `AgentReset()` to `OnEpsiodeBegin()`
* `Done()` to `EndEpisode()`
* `GiveModel()` to `SetModel()`
* Replace `IFloatProperties` variables with `FloatPropertiesChannel` variables.
* If you implemented custom `SideChannels`, update the signatures of your methods, and add your data to the `OutgoingMessage` or read it from the `IncomingMessage`.
## Migrating from 0.13 to 0.14

236
docs/Python-API.md


allows you to interact directly with a Unity Environment (`mlagents_envs`) and
an entry point to train (`mlagents-learn`) which allows you to train agents in
Unity Environments using our implementations of reinforcement learning or
imitation learning.
imitation learning. This document describes how to use the `mlagents_envs` API.
For information on using `mlagents-learn`, see [here](Training-ML-Agents.md).
You can use the Python Low Level API to interact directly with your learning
environment, and use it to develop new learning algorithms.
The Python Low Level API can be used to interact directly with your Unity learning environment.
As such, it can serve as the basis for developing and evaluating new learning algorithms.
## mlagents_envs

Python-side communication happens through `UnityEnvironment` which is located in
[`environment.py`](../ml-agents-envs/mlagents_envs/environment.py). To load
a Unity environment from a built binary file, put the file in the same directory
as `envs`. For example, if the filename of your Unity environment is 3DBall.app, in python, run:
as `envs`. For example, if the filename of your Unity environment is `3DBall`, in python, run:
```python
from mlagents_envs.environment import UnityEnvironment

`discrete_action_branches = (3,2,)`)
### Modifying the environment from Python
The Environment can be modified by using side channels to send data to the
environment. When creating the environment, pass a list of side channels as
`side_channels` argument to the constructor.
### Communicating additional information with the Environment
In addition to the means of communicating between Unity and python described above,
we also provide methods for sharing agent-agnostic information. These
additional methods are referred to as side channels. ML-Agents includes two ready-made
side channels, described below. It is also possible to create custom side channels to
communicate any additional data between a Unity environment and Python. Instructions for
creating custom side channels can be found [here](Custom-SideChannels.md).
Side channels exist as separate classes which are instantiated, and then passed as list to the `side_channels` argument of the constructor of the `UnityEnvironment` class.
```python
channel = MyChannel()
env = UnityEnvironment(side_channels = [channel])
```
__Note__ : A side channel will only send/receive messages when `env.step` is
__Note__ : A side channel will only send/receive messages when `env.step` or `env.reset()` is
An `EngineConfiguration` will allow you to modify the time scale and graphics quality of the Unity engine.
The `EngineConfiguration` side channel allows you to modify the time-scale, resolution, and graphics quality of the environment. This can be useful for adjusting the environment to perform better during training, or be more interpretable during inference.
* `set_configuration_parameters` with arguments
* width: Defines the width of the display. Default 80.
* height: Defines the height of the display. Default 80.
* quality_level: Defines the quality level of the simulation. Default 1.
* time_scale: Defines the multiplier for the deltatime in the simulation. If set to a higher value, time will pass faster in the simulation but the physics might break. Default 20.
* target_frame_rate: Instructs simulation to try to render at a specified frame rate. Default -1.
* `set_configuration_parameters` which takes the following arguments:
* `width`: Defines the width of the display. Default 80.
* `height`: Defines the height of the display. Default 80.
* `quality_level`: Defines the quality level of the simulation. Default 1.
* `time_scale`: Defines the multiplier for the deltatime in the simulation. If set to a higher value, time will pass faster in the simulation but the physics may perform unpredictably. Default 20.
* `target_frame_rate`: Instructs simulation to try to render at a specified frame rate. Default -1.
For example :
For example, the following code would adjust the time-scale of the simulation to be 2x realtime.
```python
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel

```
#### FloatPropertiesChannel
A `FloatPropertiesChannel` will allow you to get and set float properties
in the environment. You can call get_property and set_property on the
side channel to read and write properties.
The `FloatPropertiesChannel` will allow you to get and set pre-defined numerical values in the environment. This can be useful for adjusting environment-specific settings, or for reading non-agent related information from the environment. You can call `get_property` and `set_property` on the side channel to read and write properties.
`FloatPropertiesChannel` has three methods:
* `set_property` Sets a property in the Unity Environment.

channel.set_property("parameter_1", 2.0)
i = env.reset()
readout_value = channel.get_property("parameter_2")
...
```

float property1 = sharedProperties.GetPropertyWithDefault("parameter_1", 0.0f);
```
#### [Advanced] Create your own SideChannel
You can create your own `SideChannel` in C# and Python and use it to communicate data between the two.
##### Unity side
The side channel will have to implement the `SideChannel` abstract class and the following method.
* `OnMessageReceived(byte[] data)` : You must implement this method to specify what the side channel will be doing
with the data received from Python. The data is a `byte[]` argument.
The side channel must also assign a `ChannelId` property in the constructor. The `ChannelId` is a Guid
(or UUID in Python) used to uniquely identify a side channel. This Guid must be the same on C# and Python.
There can only be one side channel of a certain id during communication.
To send a byte array from C# to Python, call the `base.QueueMessageToSend(data)` method inside the side channel.
The `data` argument must be a `byte[]`.
To register a side channel on the Unity side, call `Academy.Instance.RegisterSideChannel` with the side channel
as only argument.
##### Python side
The side channel will have to implement the `SideChannel` abstract class. You must implement :
* `on_message_received(self, data: bytes) -> None` : You must implement this method to specify what the
side channel will be doing with the data received from Unity. The data is a `byte[]` argument.
The side channel must also assign a `channel_id` property in the constructor. The `channel_id` is a UUID
(referred in C# as Guid) used to uniquely identify a side channel. This number must be the same on C# and
Python. There can only be one side channel of a certain id during communication.
To assign the `channel_id` call the abstract class constructor with the appropriate `channel_id` as follows:
```python
super().__init__(my_channel_id)
```
To send a byte array from Python to C#, call the `super().queue_message_to_send(bytes_data)` method inside the
side channel. The `bytes_data` argument must be a `bytes` object.
To register a side channel on the Python side, pass the side channel as argument when creating the
`UnityEnvironment` object. One of the arguments of the constructor (`side_channels`) is a list of side channels.
##### Example implementation
Here is a simple implementation of a Side Channel that will exchange strings between C# and Python
(encoded as ascii).
One the C# side :
Here is an implementation of a `StringLogSideChannel` that will listed to the `UnityEngine.Debug.LogError` calls in
the game :
```csharp
using UnityEngine;
using MLAgents;
using System.Text;
using System;
public class StringLogSideChannel : SideChannel
{
public StringLogSideChannel()
{
ChannelId = new Guid("621f0a70-4f87-11ea-a6bf-784f4387d1f7");
}
public override void OnMessageReceived(byte[] data)
{
var receivedString = Encoding.ASCII.GetString(data);
Debug.Log("From Python : " + receivedString);
}
public void SendDebugStatementToPython(string logString, string stackTrace, LogType type)
{
if (type == LogType.Error)
{
var stringToSend = type.ToString() + ": " + logString + "\n" + stackTrace;
var encodedString = Encoding.ASCII.GetBytes(stringToSend);
base.QueueMessageToSend(encodedString);
}
}
}
```
We also need to register this side channel to the Academy and to the `Application.logMessageReceived` events,
so we write a simple MonoBehavior for this. (Do not forget to attach it to a GameObject in the scene).
```csharp
using UnityEngine;
using MLAgents;
public class RegisterStringLogSideChannel : MonoBehaviour
{
StringLogSideChannel stringChannel;
public void Awake()
{
// We create the Side Channel
stringChannel = new StringLogSideChannel();
// When a Debug.Log message is created, we send it to the stringChannel
Application.logMessageReceived += stringChannel.SendDebugStatementToPython;
// Just in case the Academy has not yet initialized
Academy.Instance.RegisterSideChannel(stringChannel);
}
public void OnDestroy()
{
// De-register the Debug.Log callback
Application.logMessageReceived -= stringChannel.SendDebugStatementToPython;
if (Academy.IsInitialized){
Academy.Instance.UnregisterSideChannel(stringChannel);
}
}
public void Update()
{
// Optional : If the space bar is pressed, raise an error !
if (Input.GetKeyDown(KeyCode.Space))
{
Debug.LogError("This is a fake error. Space bar was pressed in Unity.");
}
}
}
```
And here is the script on the Python side. This script creates a new Side channel type (`StringLogChannel`) and
launches a `UnityEnvironment` with that side channel.
```python
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.side_channel import SideChannel
import numpy as np
# Create the StringLogChannel class
class StringLogChannel(SideChannel):
def __init__(self) -> None:
super().__init__(uuid.UUID("621f0a70-4f87-11ea-a6bf-784f4387d1f7"))
#### Custom side channels
def on_message_received(self, data: bytes) -> None:
"""
Note :We must implement this method of the SideChannel interface to
receive messages from Unity
"""
# We simply print the data received interpreted as ascii
print(data.decode("ascii"))
def send_string(self, data: str) -> None:
# Convert the string to ascii
bytes_data = data.encode("ascii")
# We call this method to queue the data we want to send
super().queue_message_to_send(bytes_data)
# Create the channel
string_log = StringLogChannel()
# We start the communication with the Unity Editor and pass the string_log side channel as input
env = UnityEnvironment(base_port=UnityEnvironment.DEFAULT_EDITOR_PORT, side_channels=[string_log])
env.reset()
string_log.send_string("The environment was reset")
group_name = env.get_agent_groups()[0] # Get the first group_name
for i in range(1000):
step_data = env.get_step_result(group_name)
n_agents = step_data.n_agents() # Get the number of agents
# We send data to Unity : A string with the number of Agent at each
string_log.send_string(
"Step " + str(i) + " occurred with " + str(n_agents) + " agents."
)
env.step() # Move the simulation forward
env.close()
```
Now, if you run this script and press `Play` the Unity Editor when prompted, The console in the Unity Editor will
display a message at every Python step. Additionally, if you press the Space Bar in the Unity Engine, a message will
appear in the terminal.
For information on how to make custom side channels for sending additional data types, see the documentation [here](Custom-SideChannels.md).

3
docs/Readme.md


* [Using the Monitor](Feature-Monitor.md)
* [Using the Video Recorder](https://github.com/Unity-Technologies/video-recorder)
* [Using an Executable Environment](Learning-Environment-Executable.md)
* [Creating Custom Side Channels](Custom-SideChannels.md)
## Training

* [Training on the Cloud with Amazon Web Services](Training-on-Amazon-Web-Service.md)
* [Training on the Cloud with Microsoft Azure](Training-on-Microsoft-Azure.md)
* [Using Docker](Using-Docker.md)
* [Installation-Windows](Installation-Windows.md)
* [Windows Anaconda Installation](Installation-Anaconda-Windows.md)

2
docs/Training-Curriculum-Learning.md


greater than number of thresholds.
Once our curriculum is defined, we have to use the reset parameters we defined
and modify the environment from the Agent's `AgentReset()` function. See
and modify the environment from the Agent's `OnEpisodeBegin()` function. See
[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
for an example.

37
docs/Training-Imitation-Learning.md


of a reward function, we can give the medic real world examples of observations
from the game and actions from a game controller to guide the medic's behavior.
Imitation Learning uses pairs of observations and actions from
a demonstration to learn a policy. [Video Link](https://youtu.be/kpb8ZkMBFYs).
a demonstration to learn a policy.
Imitation learning can also be used to help reinforcement learning. Especially in
environments with sparse (i.e., infrequent or rare) rewards, the agent may never see

</p>
The ML-Agents toolkit provides two features that enable your agent to learn from demonstrations.
In most scenarios, you should combine these two features
In most scenarios, you can combine these two features.
* GAIL (Generative Adversarial Imitation Learning) uses an adversarial approach to
reward your Agent for behaving similar to a set of demonstrations. To use GAIL, you can add the

* Behavioral Cloning (BC) trains the Agent's neural network to exactly mimic the actions
shown in a set of demonstrations.
[The BC feature](Training-PPO.md#optional-behavioral-cloning-using-demonstrations)
can be enabled on the PPO or SAC trainer. BC tends to work best when
there are a lot of demonstrations, or in conjunction with GAIL and/or an extrinsic reward.
The BC feature can be enabled on the [PPO](Training-PPO.md#optional-behavioral-cloning-using-demonstrations)
or [SAC](Training-SAC.md#optional-behavioral-cloning-using-demonstrations) trainer. As BC cannot generalize
past the examples shown in the demonstrations, BC tends to work best when there exists demonstrations
for nearly all of the states that the agent can experience, or in conjunction with GAIL and/or an extrinsic reward.
### How to Choose
### What to Use
If you want to help your agents learn (especially with environments that have sparse rewards)
using pre-recorded demonstrations, you can generally enable both GAIL and Behavioral Cloning

## Recording Demonstrations
It is possible to record demonstrations of agent behavior from the Unity Editor,
and save them as assets. These demonstrations contain information on the
Demonstrations of agent behavior can be recorded from the Unity Editor,
and saved as assets. These demonstrations contain information on the
They can be managed from the Editor, as well as used for training with BC and GAIL.
They can be managed in the Editor, as well as used for training with BC and GAIL.
In order to record demonstrations from an agent, add the `Demonstration Recorder`
component to a GameObject in the scene which contains an `Agent` component.

is played from the Editor. Depending on the complexity of the task, anywhere
from a few minutes or a few hours of demonstration data may be necessary to
be useful for imitation learning. When you have recorded enough data, end
the Editor play session, and a `.demo` file will be created in the
the Editor play session. A `.demo` file will be created in the
`Assets/Demonstrations` folder (by default). This file contains the demonstrations.
Clicking on the file will provide metadata about the demonstration in the
inspector.

alt="BC Teacher Helper"
width="375" border="10" />
</p>
You can then specify the path to this file as the `demo_path` in your `trainer_config.yaml` file
when using BC or GAIL. For instance, for BC:
```
behavioral_cloning:
demo_path: <path_to_your_demo_file>
...
```
And for GAIL:
```
reward_signals:
gail:
demo_path: <path_to_your_demo_file>
...
```

1
docs/Training-ML-Agents.md


| init_entcoef | How much the agent should explore in the beginning of training. | SAC |
| lambd | The regularization parameter. | PPO |
| learning_rate | The initial learning rate for gradient descent. | PPO, SAC |
| learning_rate_schedule | Determines how learning rate changes over time. | PPO, SAC |
| max_steps | The maximum number of simulation steps to run during a training session. | PPO, SAC |
| memory_size | The size of the memory an agent must keep. Used for training with a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md). | PPO, SAC |
| normalize | Whether to automatically normalize observations. | PPO, SAC |

2
docs/Unity-Inference-Engine.md


* ONNX (`.onnx`) files use an [industry-standard open format](https://onnx.ai/about.html) produced by the [tf2onnx package](https://github.com/onnx/tensorflow-onnx).
Export to ONNX is currently considered beta. To enable it, make sure `tf2onnx>=1.5.5` is installed in pip.
tf2onnx does not currently support tensorflow 2.0.0 or later.
tf2onnx does not currently support tensorflow 2.0.0 or later, or earlier than 1.12.0.
## Using the Unity Inference Engine

9
docs/Using-Docker.md


## Requirements
- Unity _Linux Build Support_ Component
- Unity _Linux Build Support_ Component. Make sure to select the _Linux
Build Support_ component when installing Unity.
<p align="center">
<img src="images/unity_linux_build_support.png"
alt="Linux Build Support"
width="500" border="10" />
</p>
## Setup

12
docs/Using-Virtual-Environment.md


spinning up a new environment and verifying the compatibility of the code with the
different version.
Requirement - Python 3.6 must be installed on the machine you would like
to run ML-Agents on (either local laptop/desktop or remote server). Python 3.6 can be
installed from [here](https://www.python.org/downloads/).
## Python Version Requirement (Required)
This guide has been tested with Python 3.6 and 3.7. Python 3.8 is not supported at this time.

1. To activate the environment execute `$ source ~/python-envs/sample-env/bin/activate`
1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the latest, upgrade to
the latest pip version using `$ pip3 install --upgrade pip`
1. Install ML-Agents package using `$ pip3 install mlagents`
1. To deactivate the environment execute `$ deactivate`
1. To deactivate the environment execute `$ deactivate` (you can reactivate the environment
using the same `activate` command listed above)
## Ubuntu Setup

1. To activate the environment execute `python-envs\sample-env\Scripts\activate`
1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the
latest, upgrade to the latest pip version using `pip install --upgrade pip`
1. Install ML-Agents package using `pip install mlagents`
1. To deactivate the environment execute `deactivate`
1. To deactivate the environment execute `deactivate` (you can reactivate the environment
using the same `activate` command listed above)
Note:
- Verify that you are using Python 3.6 or Python 3.7. Launch a command prompt using `cmd` and

951
docs/images/unity_package_manager_window.png

之前 之后
宽度: 1002  |  高度: 1150  |  大小: 266 KiB

2
docs/localized/KR/docs/Installation.md


</p>
## Windows 사용자
Windows에서 환경을 설정하기 위해, [세부 사항](Installation-Windows.md)에 설정 방법에 대해 작성하였습니다.
Windows에서 환경을 설정하기 위해, [세부 사항](Installation-Anaconda-Windows.md)에 설정 방법에 대해 작성하였습니다.
Mac과 Linux는 다음 가이드를 확인해주십시오.
## Mac 또는 Unix 사용자

2
docs/localized/zh-CN/docs/Installation.md


### Windows 用户
如果您是刚接触 Python 和 TensorFlow 的 Windows 用户,请遵循[此指南](/docs/Installation-Windows.md)来设置 Python 环境。
如果您是刚接触 Python 和 TensorFlow 的 Windows 用户,请遵循[此指南](/docs/Installation-Anaconda-Windows.md)来设置 Python 环境。
### Mac 和 Unix 用户

2
gym-unity/gym_unity/__init__.py


__version__ = "0.15.0.dev0"
__version__ = "0.16.0.dev0"

2
ml-agents-envs/mlagents_envs/__init__.py


__version__ = "0.15.0.dev0"
__version__ = "0.16.0.dev0"

10
ml-agents-envs/mlagents_envs/base_env.py


@property
def agent_id_to_index(self) -> Dict[AgentId, int]:
"""
Returns the index of the agent_id in this BatchedStepResult, and
-1 if agent_id is not in this BatchedStepResult.
:param agent_id: The id of the agent
:returns: The index of the agent_id, and -1 if not found.
:returns: A Dict that maps agent_id to the index of those agents in
this BatchedStepResult.
"""
if self._agent_id_to_index is None:
self._agent_id_to_index = {}

"""
if not self.contains_agent(agent_id):
raise IndexError(
"agent_id {} is not present in the BatchedStepResult".format(agent_id)
"get_agent_step_result failed. agent_id {} is not present in the BatchedStepResult".format(
agent_id
)
)
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []

4
ml-agents-envs/mlagents_envs/communicator.py


import logging
logger = logging.getLogger("mlagents_envs")
class Communicator(object):

7
ml-agents-envs/mlagents_envs/environment.py


from typing import Dict, List, Optional, Any
import mlagents_envs
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.side_channel import SideChannel, IncomingMessage
from mlagents_envs.base_env import (
BaseEnv,

import signal
import struct
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("mlagents_envs")

"sending side channel data properly.".format(channel_id)
)
if channel_id in side_channels:
side_channels[channel_id].on_message_received(message_data)
incoming_message = IncomingMessage(message_data)
side_channels[channel_id].on_message_received(incoming_message)
else:
logger.warning(
"Unknown side channel data received. Channel type "

5
ml-agents-envs/mlagents_envs/exception.py


import logging
logger = logging.getLogger("mlagents_envs")
class UnityException(Exception):
"""
Any error related to ml-agents environment.

3
ml-agents-envs/mlagents_envs/rpc_communicator.py


import logging
import grpc
from typing import Optional

from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
from mlagents_envs.communicator_objects.unity_output_pb2 import UnityOutputProto
from .exception import UnityTimeOutException, UnityWorkerInUseException
logger = logging.getLogger("mlagents_envs")
class UnityToExternalServicerImplementation(UnityToExternalProtoServicer):

7
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto
from mlagents_envs.communicator_objects.observation_pb2 import (
ObservationProto,
NONE as COMPRESSION_NONE,
NONE as COMPRESSION_TYPE_NONE,
import logging
logger = logging.getLogger("mlagents_envs")
def agent_group_spec_from_proto(

f"Observation did not have the expected shape - got {obs.shape} but expected {expected_shape}"
)
gray_scale = obs.shape[2] == 1
if obs.compression_type == COMPRESSION_NONE:
if obs.compression_type == COMPRESSION_TYPE_NONE:
img = np.array(obs.float_data.data, dtype=np.float32)
img = np.reshape(img, obs.shape)
return img

4
ml-agents-envs/mlagents_envs/side_channel/__init__.py


from mlagents_envs.side_channel.incoming_message import IncomingMessage # noqa
from mlagents_envs.side_channel.outgoing_message import OutgoingMessage # noqa
from mlagents_envs.side_channel.side_channel import SideChannel # noqa

23
ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py


from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel import SideChannel, OutgoingMessage, IncomingMessage
import struct
import uuid
from typing import NamedTuple

def __init__(self) -> None:
super().__init__(uuid.UUID("e951342c-4f7e-11ea-b238-784f4387d1f7"))
def on_message_received(self, data: bytes) -> None:
def on_message_received(self, msg: IncomingMessage) -> None:
"""
Is called by the environment to the side channel. Can be called
multiple times per step if multiple messages are meant for that

:param target_frame_rate: Instructs simulation to try to render at a
specified frame rate. Default -1.
"""
data = bytearray()
data += struct.pack("<i", width)
data += struct.pack("<i", height)
data += struct.pack("<i", quality_level)
data += struct.pack("<f", time_scale)
data += struct.pack("<i", target_frame_rate)
super().queue_message_to_send(data)
msg = OutgoingMessage()
msg.write_int32(width)
msg.write_int32(height)
msg.write_int32(quality_level)
msg.write_float32(time_scale)
msg.write_int32(target_frame_rate)
super().queue_message_to_send(msg)
data = bytearray()
data += struct.pack("<iiifi", *config)
super().queue_message_to_send(data)
self.set_configuration_parameters(**config._asdict())

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存