浏览代码

Merge branch 'master' into asymm-envs

/asymm-envs
Andrew Cohen 4 年前
当前提交
59a60c1e
共有 185 个文件被更改,包括 1744 次插入918 次删除
  1. 12
      .circleci/config.yml
  2. 7
      .pre-commit-config.yaml
  3. 6
      .yamato/com.unity.ml-agents-pack.yml
  4. 55
      .yamato/com.unity.ml-agents-test.yml
  5. 8
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  6. 8
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
  7. 6
      Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
  8. 7
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  9. 10
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorSettings.cs
  10. 9
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  11. 20
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs
  12. 2
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridSettings.cs
  13. 4
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  14. 20
      Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
  15. 2
      Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
  16. 13
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  17. 8
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  18. 9
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs
  19. 8
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs
  20. 6
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  21. 10
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  22. 12
      Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  23. 6
      Project/ProjectSettings/DynamicsManager.asset
  24. 47
      com.unity.ml-agents/CHANGELOG.md
  25. 6
      com.unity.ml-agents/CONTRIBUTING.md
  26. 78
      com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
  27. 7
      com.unity.ml-agents/Editor/AgentEditor.cs
  28. 6
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  29. 10
      com.unity.ml-agents/Editor/BrainParametersDrawer.cs
  30. 4
      com.unity.ml-agents/Editor/DemonstrationDrawer.cs
  31. 2
      com.unity.ml-agents/Editor/RayPerceptionSensorComponentBaseEditor.cs
  32. 136
      com.unity.ml-agents/Runtime/Academy.cs
  33. 622
      com.unity.ml-agents/Runtime/Agent.cs
  34. 39
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  35. 14
      com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
  36. 11
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  37. 13
      com.unity.ml-agents/Runtime/DecisionRequester.cs
  38. 45
      com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
  39. 7
      com.unity.ml-agents/Runtime/Demonstrations/DemonstrationWriter.cs
  40. 51
      com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
  41. 52
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/UnityRlInitializationInput.cs
  42. 58
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/UnityRlInitializationOutput.cs
  43. 18
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  44. 12
      com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
  45. 4
      com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
  46. 38
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  47. 61
      com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
  48. 6
      com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
  49. 12
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  50. 20
      com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs
  51. 6
      com.unity.ml-agents/Runtime/Sensors/ISensor.cs
  52. 112
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
  53. 2
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponent2D.cs
  54. 8
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponent3D.cs
  55. 56
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs
  56. 6
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
  57. 22
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs
  58. 12
      com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
  59. 14
      com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
  60. 8
      com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
  61. 58
      com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
  62. 30
      com.unity.ml-agents/Runtime/SideChannels/FloatPropertiesChannel.cs
  63. 2
      com.unity.ml-agents/Runtime/SideChannels/RawBytesChannel.cs
  64. 17
      com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
  65. 45
      com.unity.ml-agents/Runtime/SideChannels/StatsSideChannel.cs
  66. 2
      com.unity.ml-agents/Runtime/SideChannels/EnvironmentParametersChannel.cs.meta
  67. 16
      com.unity.ml-agents/Runtime/Utilities.cs
  68. 2
      com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
  69. 34
      com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
  70. 26
      com.unity.ml-agents/Tests/Editor/EditModeTestActionMasker.cs
  71. 8
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
  72. 35
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  73. 18
      com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
  74. 34
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  75. 42
      com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
  76. 10
      com.unity.ml-agents/Tests/Editor/Sensor/CameraSensorComponentTest.cs
  77. 4
      com.unity.ml-agents/Tests/Editor/Sensor/CameraSensorTest.cs
  78. 6
      com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
  79. 88
      com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
  80. 6
      com.unity.ml-agents/Tests/Editor/Sensor/RenderTextureSensorComponentTests.cs
  81. 4
      com.unity.ml-agents/Tests/Editor/Sensor/RenderTextureSensorTests.cs
  82. 2
      com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs
  83. 4
      com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs
  84. 8
      com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs
  85. 32
      com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
  86. 33
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  87. 4
      com.unity.ml-agents/package.json
  88. 30
      config/trainer_config.yaml
  89. 2
      docs/API-Reference.md
  90. 10
      docs/Custom-SideChannels.md
  91. 4
      docs/FAQ.md
  92. 6
      docs/Learning-Environment-Design.md
  93. 25
      docs/Learning-Environment-Examples.md
  94. 26
      docs/ML-Agents-Overview.md
  95. 42
      docs/Migrating.md
  96. 36
      docs/Python-API.md
  97. 2
      docs/Readme.md
  98. 32
      docs/Training-Curriculum-Learning.md
  99. 2
      docs/Training-Imitation-Learning.md
  100. 2
      docs/Training-PPO.md

12
.circleci/config.yml


python373:
docker:
- image: circleci/python:3.7.3
python382:
docker:
- image: circleci/python:3.8.2
jobs:
build_python:

- run:
name: Install Dependencies
command: |
# Need ruby for search-and-replace
sudo apt-get update
sudo apt-get install ruby-full
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip

executor: python373
pyversion: 3.7.3
# Test python 3.7 with the newest supported versions
pip_constraints: test_constraints_max_tf2_version.txt
- build_python:
name: python_3.8.2+tf2.2
executor: python382
pyversion: 3.8.2
# Test python 3.8 with the newest edge versions
pip_constraints: test_constraints_max_tf2_version.txt
- markdown_link_check
- pre-commit

7
.pre-commit-config.yaml


)$
args: [--score=n]
- repo: https://github.com/mattlqx/pre-commit-search-and-replace
rev: v1.0.3
hooks:
- id: search-and-replace
types: [markdown]
exclude: ".*localized.*"
# "Local" hooks, see https://pre-commit.com/#repository-local-hooks
- repo: local
hooks:

6
.yamato/com.unity.ml-agents-pack.yml


pack:
name: Pack
agent:
type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.large
type: Unity::VM::osx
image: package-ci/mac:stable
flavor: b1.small
commands:
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci package pack --package-path com.unity.ml-agents

55
.yamato/com.unity.ml-agents-test.yml


- version: 2020.1
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
minCoveragePct: 72
- version: 2020.2
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
minCoveragePct: 72
trunk_editor:
- version: trunk
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
minCoveragePct: 72
test_platforms:
- name: win
type: Unity::VM

flavor: b1.medium
---
all_package_tests:
name: Run All Combinations of Editors/Platforms Tests
dependencies:
{% for editor in test_editors %}
{% for platform in test_platforms %}
- .yamato/com.unity.ml-agents-test.yml#test_{{ platform.name }}_{{ editor.version }}
{% endfor %}
{% endfor %}
{% for editor in trunk_editor %}
{% for platform in test_platforms %}
- .yamato/com.unity.ml-agents-test.yml#test_{{ platform.name }}_{{ editor.version }}
{% endfor %}
{% endfor %}
triggers:
cancel_old_ci: true
recurring:
- branch: master
frequency: daily
{% for editor in test_editors %}
{% for platform in test_platforms %}
test_{{ platform.name }}_{{ editor.version }}:

- .yamato/com.unity.ml-agents-pack.yml#pack
triggers:
cancel_old_ci: true
{% if platform.name == "mac" %}
{% endif %}
{% endfor %}
{% endfor %}
{% endfor %}
{% for editor in trunk_editor %}
{% for platform in test_platforms %}
test_{{ platform.name }}_trunk:
name : com.unity.ml-agents test {{ editor.version }} on {{ platform.name }}
agent:
type: {{ platform.type }}
image: {{ platform.image }}
flavor: {{ platform.flavor}}
commands:
- python -m pip install unity-downloader-cli --extra-index-url https://artifactory.eu-cph-1.unityops.net/api/pypi/common-python/simple
- unity-downloader-cli -u trunk -c editor --wait --fast
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci package test -u {{ editor.version }} --package-path com.unity.ml-agents {{ editor.coverageOptions }}
- python ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ editor.minCoveragePct }}
artifacts:
logs:
paths:
- "upm-ci~/test-results/**/*"
dependencies:
- .yamato/com.unity.ml-agents-pack.yml#pack
triggers:
cancel_old_ci: true
{% endfor %}
{% endfor %}

8
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


[Header("Specific to Ball3D")]
public GameObject ball;
Rigidbody m_BallRb;
FloatPropertiesChannel m_ResetParams;
EnvironmentParameters m_ResetParams;
m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}

public void SetBall()
{
//Set the attributes of the ball by fetching the information from the academy
m_BallRb.mass = m_ResetParams.GetPropertyWithDefault("mass", 1.0f);
var scale = m_ResetParams.GetPropertyWithDefault("scale", 1.0f);
m_BallRb.mass = m_ResetParams.GetWithDefault("mass", 1.0f);
var scale = m_ResetParams.GetWithDefault("scale", 1.0f);
ball.transform.localScale = new Vector3(scale, scale, scale);
}

8
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs


[Header("Specific to Ball3DHard")]
public GameObject ball;
Rigidbody m_BallRb;
FloatPropertiesChannel m_ResetParams;
EnvironmentParameters m_ResetParams;
m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}

public void SetBall()
{
//Set the attributes of the ball by fetching the information from the academy
m_BallRb.mass = m_ResetParams.GetPropertyWithDefault("mass", 1.0f);
var scale = m_ResetParams.GetPropertyWithDefault("scale", 1.0f);
m_BallRb.mass = m_ResetParams.GetWithDefault("mass", 1.0f);
var scale = m_ResetParams.GetWithDefault("scale", 1.0f);
ball.transform.localScale = new Vector3(scale, scale, scale);
}

6
Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs


int m_NumberJumps = 20;
int m_JumpLeft = 20;
FloatPropertiesChannel m_ResetParams;
EnvironmentParameters m_ResetParams;
public override void Initialize()
{

m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}

public void SetTargetScale()
{
var targetScale = m_ResetParams.GetPropertyWithDefault("target_scale", 1.0f);
var targetScale = m_ResetParams.GetWithDefault("target_scale", 1.0f);
target.transform.localScale = new Vector3(targetScale, targetScale, targetScale);
}

7
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


public bool contribute;
public bool useVectorObs;
EnvironmentParameters m_ResetParams;
public override void Initialize()
{

m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}

public void SetLaserLengths()
{
m_LaserLength = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().GetPropertyWithDefault("laser_length", 1.0f);
m_LaserLength = m_ResetParams.GetWithDefault("laser_length", 1.0f);
float agentScale = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().GetPropertyWithDefault("agent_scale", 1.0f);
float agentScale = m_ResetParams.GetWithDefault("agent_scale", 1.0f);
gameObject.transform.localScale = new Vector3(agentScale, agentScale, agentScale);
}

10
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorSettings.cs


using System;
using MLAgents.SideChannels;
public class FoodCollectorSettings : MonoBehaviour
{

public int totalScore;
public Text scoreText;
StatsSideChannel m_statsSideChannel;
StatsRecorder m_Recorder;
m_statsSideChannel = SideChannelUtils.GetSideChannel<StatsSideChannel>();
m_Recorder = Academy.Instance.StatsRecorder;
public void EnvironmentReset()
private void EnvironmentReset()
{
ClearObjects(GameObject.FindGameObjectsWithTag("food"));
ClearObjects(GameObject.FindGameObjectsWithTag("badFood"));

// need to send every Update() call.
if ((Time.frameCount % 100)== 0)
{
m_statsSideChannel?.AddStat("TotalScore", totalScore);
m_Recorder.Add("TotalScore", totalScore);
}
}
}

9
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


const int k_Left = 3;
const int k_Right = 4;
EnvironmentParameters m_ResetParams;
public override void Initialize()
{
m_ResetParams = Academy.Instance.EnvironmentParameters;
}
public override void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
{
// Mask the necessary actions if selected by the user.

var positionX = (int)transform.position.x;
var positionZ = (int)transform.position.z;
var maxPosition = (int)SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().GetPropertyWithDefault("gridSize", 5f) - 1;
var maxPosition = (int)m_ResetParams.GetWithDefault("gridSize", 5f) - 1;
if (positionX == 0)
{

20
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs


public GameObject trueAgent;
FloatPropertiesChannel m_ResetParameters;
Camera m_AgentCam;
public GameObject goalPref;

Vector3 m_InitialPosition;
EnvironmentParameters m_ResetParams;
m_ResetParameters = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
m_Objects = new[] { goalPref, pitPref };

m_InitialPosition = transform.position;
}
public void SetEnvironment()
private void SetEnvironment()
transform.position = m_InitialPosition * (m_ResetParameters.GetPropertyWithDefault("gridSize", 5f) + 1);
transform.position = m_InitialPosition * (m_ResetParams.GetWithDefault("gridSize", 5f) + 1);
for (var i = 0; i < (int)m_ResetParameters.GetPropertyWithDefault("numObstacles", 1); i++)
for (var i = 0; i < (int)m_ResetParams.GetWithDefault("numObstacles", 1); i++)
for (var i = 0; i < (int)m_ResetParameters.GetPropertyWithDefault("numGoals", 1f); i++)
for (var i = 0; i < (int)m_ResetParams.GetWithDefault("numGoals", 1f); i++)
var gridSize = (int)m_ResetParameters.GetPropertyWithDefault("gridSize", 5f);
var gridSize = (int)m_ResetParams.GetWithDefault("gridSize", 5f);
m_Plane.transform.localScale = new Vector3(gridSize / 10.0f, 1f, gridSize / 10.0f);
m_Plane.transform.localPosition = new Vector3((gridSize - 1) / 2f, -0.5f, (gridSize - 1) / 2f);
m_Sn.transform.localScale = new Vector3(1, 1, gridSize + 2);

public void AreaReset()
{
var gridSize = (int)m_ResetParameters.GetPropertyWithDefault("gridSize", 5f);
var gridSize = (int)m_ResetParams.GetWithDefault("gridSize", 5f);
foreach (var actor in actorObjs)
{
DestroyImmediate(actor);

{
numbers.Add(Random.Range(0, gridSize * gridSize));
}
var numbersA = Enumerable.ToArray(numbers);
var numbersA = numbers.ToArray();
for (var i = 0; i < players.Length; i++)
{

2
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridSettings.cs


public void Awake()
{
SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().RegisterCallback("gridSize", f =>
Academy.Instance.EnvironmentParameters.RegisterCallback("gridSize", f =>
{
MainCamera.transform.position = new Vector3(-(f - 1) / 2f, f * 1.25f, -(f - 1) / 2f);
MainCamera.orthographicSize = (f + 5f) / 2f;

4
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


{
if (useVectorObs)
{
sensor.AddObservation(StepCount / (float)maxStep);
sensor.AddObservation(StepCount / (float)MaxStep);
}
}

public override void OnActionReceived(float[] vectorAction)
{
AddReward(-1f / maxStep);
AddReward(-1f / MaxStep);
MoveAgent(vectorAction);
}

20
Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs


/// </summary>
Renderer m_GroundRenderer;
private EnvironmentParameters m_ResetParams;
void Awake()
{
m_PushBlockSettings = FindObjectOfType<PushBlockSettings>();

m_GroundRenderer = ground.GetComponent<Renderer>();
// Starting material
m_GroundMaterial = m_GroundRenderer.material;
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}

MoveAgent(vectorAction);
// Penalty given each step to encourage agent to finish task quickly.
AddReward(-1f / maxStep);
AddReward(-1f / MaxStep);
}
public override void Heuristic(float[] actionsOut)

public void SetGroundMaterialFriction()
{
var resetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
groundCollider.material.dynamicFriction = resetParams.GetPropertyWithDefault("dynamic_friction", 0);
groundCollider.material.staticFriction = resetParams.GetPropertyWithDefault("static_friction", 0);
groundCollider.material.dynamicFriction = m_ResetParams.GetWithDefault("dynamic_friction", 0);
groundCollider.material.staticFriction = m_ResetParams.GetWithDefault("static_friction", 0);
var resetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
var scale = resetParams.GetPropertyWithDefault("block_scale", 2);
var scale = m_ResetParams.GetWithDefault("block_scale", 2);
m_BlockRb.drag = resetParams.GetPropertyWithDefault("block_drag", 0.5f);
m_BlockRb.drag = m_ResetParams.GetWithDefault("block_drag", 0.5f);
public void SetResetParameters()
private void SetResetParameters()
{
SetGroundMaterialFriction();
SetBlockProperties();

2
Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs


public override void OnActionReceived(float[] vectorAction)
{
AddReward(-1f / maxStep);
AddReward(-1f / MaxStep);
MoveAgent(vectorAction);
}

13
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


// Frequency of the cosine deviation of the goal along the vertical dimension
float m_DeviationFreq;
private EnvironmentParameters m_ResetParams;
/// <summary>
/// Collect the rigidbodies of the reacher in order to resue them for
/// observations and actions.

m_RbA = pendulumA.GetComponent<Rigidbody>();
m_RbB = pendulumB.GetComponent<Rigidbody>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}

public void SetResetParameters()
{
var fp = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_GoalSize = fp.GetPropertyWithDefault("goal_size", 5);
m_GoalSpeed = Random.Range(-1f, 1f) * fp.GetPropertyWithDefault("goal_speed", 1);
m_Deviation = fp.GetPropertyWithDefault("deviation", 0);
m_DeviationFreq = fp.GetPropertyWithDefault("deviation_freq", 0);
m_GoalSize = m_ResetParams.GetWithDefault("goal_size", 5);
m_GoalSpeed = Random.Range(-1f, 1f) * m_ResetParams.GetWithDefault("goal_speed", 1);
m_Deviation = m_ResetParams.GetWithDefault("deviation", 0);
m_DeviationFreq = m_ResetParams.GetWithDefault("deviation_freq", 0);
}
}

8
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


if (m_MaxEpisodes > 0)
{
// For Agents without maxSteps, exit as soon as we've hit the target number of episodes.
// For Agents that specify maxStep, also make sure we've gone at least that many steps.
// For Agents that specify MaxStep, also make sure we've gone at least that many steps.
if (m_Agent.CompletedEpisodes >= m_MaxEpisodes && m_NumSteps > m_MaxEpisodes * m_Agent.maxStep)
if (m_Agent.CompletedEpisodes >= m_MaxEpisodes && m_NumSteps > m_MaxEpisodes * m_Agent.MaxStep)
{
Application.Quit(0);
}

if (!m_BehaviorNameOverrides.ContainsKey(behaviorName))
{
Debug.Log($"No override for behaviorName {behaviorName}");
Debug.Log($"No override for BehaviorName {behaviorName}");
return null;
}

{
m_Agent.LazyInitialize();
var bp = m_Agent.GetComponent<BehaviorParameters>();
var name = bp.behaviorName;
var name = bp.BehaviorName;
var nnModel = GetModelForBehaviorName(name);
Debug.Log($"Overriding behavior {name} for agent with model {nnModel?.name}");

9
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs


namespace MLAgentsExamples
{
/// <summary>
/// A helper class for the ML-Agents example scenes to override various
/// global settings, and restore them afterwards.
/// This can modify some Physics and time-stepping properties, so you
/// shouldn't copy it into your project unless you know what you're doing.
/// </summary>
public class ProjectSettingsOverrides : MonoBehaviour
{
// Original values

Physics.defaultSolverVelocityIterations = solverVelocityIterations;
// Make sure the Academy singleton is initialized first, since it will create the SideChannels.
var academy = Academy.Instance;
SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
Academy.Instance.EnvironmentParameters.RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
}
public void OnDestroy()

8
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs


/// <summary>
/// Default implementation of Write interface. This creates a temporary array,
/// calls WriteObservation, and then writes the results to the WriteAdapter.
/// calls WriteObservation, and then writes the results to the ObservationWriter.
/// <param name="adapter"></param>
/// <param name="writer"></param>
public virtual int Write(WriteAdapter adapter)
public virtual int Write(ObservationWriter writer)
{
// TODO reuse buffer for similar agents, don't call GetObservationShape()
var numFloats = this.ObservationSize();

adapter.AddRange(buffer);
writer.AddRange(buffer);
return numFloats;
}

6
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


HitWall m_BallScript;
TennisArea m_Area;
float m_InvertMult;
FloatPropertiesChannel m_ResetParams;
EnvironmentParameters m_ResetParams;
Vector3 m_Down = new Vector3(0f, -100f, 0f);
Vector3 zAxis = new Vector3(0f, 0f, 1f);
const float k_Angle = 90f;

m_Area = myArea.GetComponent<TennisArea>();
var canvas = GameObject.Find(k_CanvasName);
GameObject scoreBoard;
m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
if (invertX)
{
scoreBoard = canvas.transform.Find(k_ScoreBoardBName).gameObject;

public void SetBall()
{
scale = m_ResetParams.GetPropertyWithDefault("scale", .5f);
scale = m_ResetParams.GetWithDefault("scale", .5f);
ball.transform.localScale = new Vector3(scale, scale, scale);
}

10
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


Rigidbody m_ChestRb;
Rigidbody m_SpineRb;
FloatPropertiesChannel m_ResetParams;
EnvironmentParameters m_ResetParams;
public override void Initialize()
{

m_ChestRb = chest.GetComponent<Rigidbody>();
m_SpineRb = spine.GetComponent<Rigidbody>();
m_ResetParams = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
SetResetParameters();
}

public void SetTorsoMass()
{
m_ChestRb.mass = m_ResetParams.GetPropertyWithDefault("chest_mass", 8);
m_SpineRb.mass = m_ResetParams.GetPropertyWithDefault("spine_mass", 10);
m_HipsRb.mass = m_ResetParams.GetPropertyWithDefault("hip_mass", 15);
m_ChestRb.mass = m_ResetParams.GetWithDefault("chest_mass", 8);
m_SpineRb.mass = m_ResetParams.GetWithDefault("spine_mass", 10);
m_HipsRb.mass = m_ResetParams.GetWithDefault("hip_mass", 15);
}
public void SetResetParameters()

12
Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


Vector3 m_JumpTargetPos;
Vector3 m_JumpStartingPos;
FloatPropertiesChannel m_FloatProperties;
EnvironmentParameters m_ResetParams;
public override void Initialize()
{

spawnArea.SetActive(false);
m_FloatProperties = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>();
m_ResetParams = Academy.Instance.EnvironmentParameters;
}
// Begin the jump sequence

{
localScale = new Vector3(
localScale.x,
m_FloatProperties.GetPropertyWithDefault("no_wall_height", 0),
m_ResetParams.GetWithDefault("no_wall_height", 0),
localScale.z);
wall.transform.localScale = localScale;
SetModel("SmallWallJump", noWallBrain);

localScale = new Vector3(
localScale.x,
m_FloatProperties.GetPropertyWithDefault("small_wall_height", 4),
m_ResetParams.GetWithDefault("small_wall_height", 4),
localScale.z);
wall.transform.localScale = localScale;
SetModel("SmallWallJump", smallWallBrain);

var min = m_FloatProperties.GetPropertyWithDefault("big_wall_min_height", 8);
var max = m_FloatProperties.GetPropertyWithDefault("big_wall_max_height", 8);
var min = m_ResetParams.GetWithDefault("big_wall_min_height", 8);
var max = m_ResetParams.GetWithDefault("big_wall_max_height", 8);
var height = min + Random.value * (max - min);
localScale = new Vector3(
localScale.x,

6
Project/ProjectSettings/DynamicsManager.asset


--- !u!55 &1
PhysicsManager:
m_ObjectHideFlags: 0
serializedVersion: 7
serializedVersion: 10
m_Gravity: {x: 0, y: -9.81, z: 0}
m_DefaultMaterial: {fileID: 0}
m_BounceThreshold: 2

m_LayerCollisionMatrix: ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffebffffffddffffffeffffffff5fffffffbffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
m_AutoSimulation: 1
m_AutoSyncTransforms: 1
m_ReuseCollisionCallbacks: 0
m_ClothInterCollisionSettingsToggle: 0
m_ContactPairsMode: 0
m_BroadphaseType: 0

m_WorldSubdivisions: 8
m_FrictionType: 0
m_EnableEnhancedDeterminism: 0
m_EnableUnifiedHeightmaps: 1

47
com.unity.ml-agents/CHANGELOG.md


[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Major Changes
### Minor Changes
### Bug Fixes
## [1.0.0-preview] - 2020-05-06
- Added new 3-joint Worm ragdoll environment. (#3798)
- Introduced the `SideChannelUtils` to register, unregister and access side
channels.
- `Academy.FloatProperties` was removed, please use
`SideChannelUtils.GetSideChannel<FloatPropertiesChannel>()` instead.
- Removed the multi-agent gym option from the gym wrapper. For multi-agent
scenarios, use the [Low Level Python API](../docs/Python-API.md).
- The low level Python API has changed. You can look at the document

`AgentAction` and `AgentReset` have been removed.
- The GhostTrainer has been extended to support asymmetric games and the
asymmetric example environment Strikers Vs. Goalie has been added.
- The SideChannel API has changed (#3833, #3660) :
- Introduced the `SideChannelManager` to register, unregister and access side
channels.
- `EnvironmentParameters` replaces the default `FloatProperties`.
You can access the `EnvironmentParameters` with
`Academy.Instance.EnvironmentParameters` on C# and create an
`EnvironmentParametersChannel` on Python
- `SideChannel.OnMessageReceived` is now a protected method (was public)
- SideChannel IncomingMessages methods now take an optional default argument,
which is used when trying to read more data than the message contains.
- Added a feature to allow sending stats from C# environments to TensorBoard
(and other python StatsWriters). To do this from your code, use
`Academy.Instance.StatsRecorder.Add(key, value)`(#3660)
- CameraSensorComponent.m_Grayscale and RenderTextureSensorComponent.m_Grayscale
were changed from `public` to `private` (#3808).
- The `UnityEnv` class from the `gym-unity` package was renamed

- Public fields and properties on several classes were renamed to follow Unity's
C# style conventions. All public fields and properties now use "PascalCase"
instead of "camelCase"; for example, `Agent.maxStep` was renamed to
`Agent.MaxStep`. For a full list of changes, see the pull request. (#3828)
- Added a feature to allow sending stats from C# environments to TensorBoard
(and other python StatsWriters). To do this from your code, use
`SideChannelUtils.GetSideChannel<StatsSideChannel>().AddStat(key, value)`
(#3660)
- SideChannel IncomingMessages methods now take an optional default argument,
which is used when trying to read more data than the message contains.
- The way that UnityEnvironment decides the port was changed. If no port is
specified, the behavior will depend on the `file_name` parameter. If it is
`None`, 5004 (the editor port) will be used; otherwise 5005 (the base

- Running `mlagents-learn` with the same `--run-id` twice will no longer
overwrite the existing files. (#3705)
- `StackingSensor` was changed from `internal` visibility to `public`
- Academy.InferenceSeed property was added. This is used to initialize the
random number generator in ModelRunner, and is incremented for each ModelRunner. (#3823)
- Model updates can now happen asynchronously with environment steps for better performance. (#3690)
- `num_updates` and `train_interval` for SAC were replaced with `steps_per_update`. (#3690)
- Added `Agent.GetObservations(), which returns a read-only view of the observations
added in `CollectObservations()`. (#3825)
- Model updates can now happen asynchronously with environment steps for better performance. (#3690)
- `num_updates` and `train_interval` for SAC were replaced with `steps_per_update`. (#3690)
- `WriteAdapter` was renamed to `ObservationWriter`. If you have a custom `ISensor` implementation,
you will need to change the signature of its `Write()` method. (#3834)
- The maximum compatible version of tensorflow was changed to allow tensorflow 2.1 and 2.2. This
will allow use with python 3.8 using tensorflow 2.2.0rc3.
- `UnityRLCapabilities` was added to help inform users when RL features are mismatched between C# and Python packages. (#3831)
### Bug Fixes

6
com.unity.ml-agents/CONTRIBUTING.md


# Contribution Guidelines
Thank you for your interest in contributing to the ML-Agents toolkit! We are
Thank you for your interest in contributing to the ML-Agents Toolkit! We are
ML-Agents toolkit. To facilitate your contributions, we've outlined a brief set
ML-Agents Toolkit. To facilitate your contributions, we've outlined a brief set
of guidelines to ensure that your extensions can be easily integrated.
## Communication

Second, before starting on a project that you intend to contribute to the
ML-Agents toolkit (whether environments or modifications to the codebase), we
ML-Agents Toolkit (whether environments or modifications to the codebase), we
**strongly** recommend posting on our
[Issues page](https://github.com/Unity-Technologies/ml-agents/issues)
and briefly outlining the changes you plan to make. This will enable us to

78
com.unity.ml-agents/Documentation~/com.unity.ml-agents.md


# About ML-Agents package (`com.unity.ml-agents`)
The Unity ML-Agents package contains the C# SDK for the
[Unity ML-Agents Toolkit](https://github.com/Unity-Technologies/ml-agents).
The Unity ML-Agents package contains the C# SDK for the [Unity ML-Agents Toolkit].
The package provides the ability for any Unity scene to be converted into a learning
environment where character behaviors can be trained using a variety of machine learning
algorithms. Additionally, it enables any trained behavior to be embedded back into the Unity
scene. More specifically, the package provides the following core functionalities:
* Define Agents: entities whose behavior will be learned. Agents are entities
that generate observations (through sensors), take actions and receive rewards from
the environment.
The package allows you to convert any Unity scene to into a learning
environment and train character behaviors using a variety of machine learning
algorithms. Additionally, it allows you to embed these trained behaviors back into
Unity scenes to control your characters. More specifically, the package provides
the following core functionalities:
* Define Agents: entities, or characters, whose behavior will be learned. Agents are entities
that generate observations (through sensors), take actions, and receive rewards from
the environment.
share the same Behavior and a scene may have multiple Behaviors.
* Record demonstrations of an agent within the Editor. These demonstrations can be
valuable to train a behavior for that agent.
* Embedding a trained behavior into the scene via the
[Unity Inference Engine](https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html).
Thus an Agent can switch from a learning behavior to an inference behavior.
share the same Behavior and a scene may have multiple Behaviors.
* Record demonstrations of an agent within the Editor. You can use demonstrations
to help train a behavior for that agent.
* Embedding a trained behavior into the scene via the [Unity Inference Engine].
Embedded behaviors allow you to switch an Agent between learning and inference.
Note that this package does not contain the machine learning algorithms for training
behaviors. It relies on a Python package to orchestrate the training. This package
only enables instrumenting a Unity scene and setting it up for training, and then
embedding the trained model back into your Unity scene.
## Preview package
This package is available as a preview, so it is not ready for production use.
The features and documentation in this package might change before it is verified for release.
Note that the *ML-Agents* package does not contain the machine learning algorithms for training
behaviors. The *ML-Agents* package only supports instrumenting a Unity scene, setting it up for
training, and then embedding the trained model back into your Unity scene. The machine learning
algorithms that orchestrate training are part of the companion [Python package].
## Package contents

|*Runtime*|Contains core C# APIs for integrating ML-Agents into your Unity scene. |
|*Tests*|Contains the unit tests for the package.|
<a name="Installation"></a>
<a name="Installation"></a>
To install this package, follow the instructions in the
[Package Manager documentation](https://docs.unity3d.com/Manual/upm-ui-install.html).
To install this *ML-Agents* package, follow the instructions in the [Package Manager documentation].
To install the Python package to enable training behaviors, follow the instructions on our
[GitHub repository](https://github.com/Unity-Technologies/ml-agents/blob/latest_release/docs/Installation.md).
To install the companion Python package to enable training behaviors, follow the
[installation instructions] on our [GitHub repository].
This version of the Unity ML-Agents package is compatible with the following versions of the Unity Editor:
This version of the Unity ML-Agents package is compatible with the following versions of the
Unity Editor:
* 2018.4 and later (recommended)
* 2018.4 and later
## Known limitations

Currently the speed of the game physics can only be increased to 100x real-time.
The Academy also moves in time with FixedUpdate() rather than Update(), so game
behavior implemented in Update() may be out of sync with the agent decision
making. See
[Execution Order of Event Functions](https://docs.unity3d.com/Manual/ExecutionOrder.html)
for more information.
making. See [Execution Order of Event Functions] for more information.
You can control the frequency of Academy stepping by calling
`Academy.Instance.DisableAutomaticStepping()`, and then calling

If you are new to the Unity ML-Agents package, or have a question after reading
the documentation, you can checkout our
[GitHUb Repository](https://github.com/Unity-Technologies/ml-agents), which
also includes a number of ways to
[connect with us](https://github.com/Unity-Technologies/ml-agents#community-and-feedback)
including our [ML-Agents Forum](https://forum.unity.com/forums/ml-agents.453/).
[GitHUb Repository], which also includes a number of ways to [connect with us]
including our [ML-Agents Forum].
[Unity ML-Agents Toolkit]: https://github.com/Unity-Technologies/ml-agents
[Unity Inference Engine]: https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html
[Package Manager documentation]: https://docs.unity3d.com/Manual/upm-ui-install.html
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/latest_release/docs/Installation.md
[GitHUb Repository]: https://github.com/Unity-Technologies/ml-agents
[Python package]: https://github.com/Unity-Technologies/ml-agents
[Execution Order of Event Functions]: https://docs.unity3d.com/Manual/ExecutionOrder.html
[connect with us]: https://github.com/Unity-Technologies/ml-agents#community-and-feedback
[ML-Agents Forum]: https://forum.unity.com/forums/ml-agents.453/

7
com.unity.ml-agents/Editor/AgentEditor.cs


var serializedAgent = serializedObject;
serializedAgent.Update();
var maxSteps = serializedAgent.FindProperty(
"maxStep");
var maxSteps = serializedAgent.FindProperty("MaxStep");
new GUIContent(
"Max Step", "The per-agent maximum number of steps."));
new GUIContent("Max Step", "The per-agent maximum number of steps.")
);
serializedAgent.ApplyModifiedProperties();

6
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


var model = (NNModel)serializedObject.FindProperty("m_Model").objectReferenceValue;
var behaviorParameters = (BehaviorParameters)target;
SensorComponent[] sensorComponents;
if (behaviorParameters.useChildSensors)
if (behaviorParameters.UseChildSensors)
{
sensorComponents = behaviorParameters.GetComponentsInChildren<SensorComponent>();
}

}
var brainParameters = behaviorParameters.brainParameters;
var brainParameters = behaviorParameters.BrainParameters;
if (model != null)
{
barracudaModel = ModelLoader.Load(model);

var failedChecks = Inference.BarracudaModelParamLoader.CheckModel(
barracudaModel, brainParameters, sensorComponents, behaviorParameters.behaviorType
barracudaModel, brainParameters, sensorComponents, behaviorParameters.BehaviorType
);
foreach (var check in failedChecks)
{

10
com.unity.ml-agents/Editor/BrainParametersDrawer.cs


// The height of a line in the Unity Inspectors
const float k_LineHeight = 17f;
const int k_VecObsNumLine = 3;
const string k_ActionSizePropName = "vectorActionSize";
const string k_ActionTypePropName = "vectorActionSpaceType";
const string k_ActionDescriptionPropName = "vectorActionDescriptions";
const string k_VecObsPropName = "vectorObservationSize";
const string k_NumVecObsPropName = "numStackedVectorObservations";
const string k_ActionSizePropName = "VectorActionSize";
const string k_ActionTypePropName = "VectorActionSpaceType";
const string k_ActionDescriptionPropName = "VectorActionDescriptions";
const string k_VecObsPropName = "VectorObservationSize";
const string k_NumVecObsPropName = "NumStackedVectorObservations";
/// <inheritdoc />
public override float GetPropertyHeight(SerializedProperty property, GUIContent label)

4
com.unity.ml-agents/Editor/DemonstrationDrawer.cs


/// </summary>
void MakeActionsProperty(SerializedProperty property)
{
var actSizeProperty = property.FindPropertyRelative("vectorActionSize");
var actSpaceTypeProp = property.FindPropertyRelative("vectorActionSpaceType");
var actSizeProperty = property.FindPropertyRelative("VectorActionSize");
var actSpaceTypeProp = property.FindPropertyRelative("VectorActionSpaceType");
var vecActSizeLabel =
actSizeProperty.displayName + ": " + BuildIntArrayLabel(actSizeProperty);

2
com.unity.ml-agents/Editor/RayPerceptionSensorComponentBaseEditor.cs


// it is not editable during play mode.
EditorGUI.BeginDisabledGroup(!EditorUtilities.CanUpdateModelProperties());
{
EditorGUILayout.PropertyField(so.FindProperty("m_ObservationStacks"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_ObservationStacks"), new GUIContent("Stacked Raycasts"), true);
}
EditorGUI.EndDisabledGroup();

136
com.unity.ml-agents/Runtime/Academy.cs


* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/blob/master/docs/
* https://github.com/Unity-Technologies/ml-agents/blob/0.15.1/docs/
*/
namespace MLAgents

}
/// <summary>
/// An Academy is where Agent objects go to train their behaviors.
/// The Academy singleton manages agent training and decision making.
/// When an academy is run, it can either be in inference or training mode.
/// The mode is determined by the presence or absence of a Communicator. In
/// the presence of a communicator, the academy is run in training mode where
/// the states and observations of each agent are sent through the
/// communicator. In the absence of a communicator, the academy is run in
/// inference mode where the agent behavior is determined by the Policy
/// attached to it.
/// Access the Academy singleton through the <see cref="Instance"/>
/// property. The Academy instance is initialized the first time it is accessed (which will
/// typically be by the first <see cref="Agent"/> initialized in a scene).
///
/// At initialization, the Academy attempts to connect to the Python training process through
/// the external communicator. If successful, the training process can train <see cref="Agent"/>
/// instances. When you set an agent's <see cref="BehaviorParameters.behaviorType"/> setting
/// to <see cref="BehaviorType.Default"/>, the agent exchanges data with the training process
/// to make decisions. If no training process is available, agents with the default behavior
/// fall back to inference or heuristic decisions. (You can also set agents to always use
/// inference or heuristics.)
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +
"docs/Learning-Environment-Design.md")]

/// on each side, although we may allow some flexibility in the future.
/// This should be incremented whenever a change is made to the communication protocol.
/// </summary>
const string k_ApiVersion = "0.16.0";
const string k_ApiVersion = "0.17.0";
internal const string k_PackageVersion = "0.15.1-preview";
internal const string k_PackageVersion = "1.0.0-preview";
const int k_EditorTrainingPort = 5004;

static Lazy<Academy> s_Lazy = new Lazy<Academy>(() => new Academy());
/// <summary>
/// True if the Academy is initialized, false otherwise.
///Reports whether the Academy has been initialized yet.
/// <value><c>True</c> if the Academy is initialized, <c>false</c> otherwise.</value>
public static bool IsInitialized
{
get { return s_Lazy.IsValueCreated; }

/// The singleton Academy object.
/// </summary>
/// <value>Getting the instance initializes the Academy, if necessary.</value>
/// Returns whether or not the communicator is on.
/// Reports whether or not the communicator is on.
/// <returns>
/// <c>true</c>, if communicator is on, <c>false</c> otherwise.
/// </returns>
/// <seealso cref="ICommunicator"/>
/// <value>
/// <c>True</c>, if communicator is on, <c>false</c> otherwise.
/// </value>
public bool IsCommunicatorOn
{
get { return Communicator != null; }

// Flag used to keep track of the first time the Academy is reset.
bool m_HadFirstReset;
// Random seed used for inference.
int m_InferenceSeed;
/// <summary>
/// Set the random seed used for inference. This should be set before any Agents are added
/// to the scene. The seed is passed to the ModelRunner constructor, and incremented each
/// time a new ModelRunner is created.
/// </summary>
public int InferenceSeed
{
set { m_InferenceSeed = value; }
}
/// <summary>
/// Returns the RLCapabilities of the python client that the unity process is connected to.
/// </summary>
internal UnityRLCapabilities TrainerCapabilities { get; set; }
// to facilitate synchronization. More specifically, it ensure
// that all the agents performs their steps in a consistent order (i.e. no
// to facilitate synchronization. More specifically, it ensures
// that all the agents perform their steps in a consistent order (i.e. no
// agent can act based on a decision before another agent has had a chance
// to request a decision).

// Signals to all the listeners that the academy is being destroyed
internal event Action DestroyAction;
// Signals the Agent that a new step is about to start.
// Signals to the Agent that a new step is about to start.
// This will mark the Agent as Done if it has reached its maxSteps.
internal event Action AgentIncrementStep;

/// <summary>
/// Determines whether or not the Academy is automatically stepped during the FixedUpdate phase.
/// </summary>
/// <value>Set <c>true</c> to enable automatic stepping; <c>false</c> to disable.</value>
public bool AutomaticSteppingEnabled
{
get { return m_FixedUpdateStepper != null; }

#endif
}
}
private EnvironmentParameters m_EnvironmentParameters;
private StatsRecorder m_StatsRecorder;
/// Initializes the environment, configures it and initialized the Academy.
/// Returns the <see cref="EnvironmentParameters"/> instance. If training
/// features such as Curriculum Learning or Environment Parameter Randomization are used,
/// then the values of the parameters generated from the training process can be
/// retrieved here.
/// </summary>
/// <returns></returns>
public EnvironmentParameters EnvironmentParameters
{
get { return m_EnvironmentParameters; }
}
/// <summary>
/// Returns the <see cref="StatsRecorder"/> instance. This instance can be used
/// to record any statistics from the Unity environment.
/// </summary>
/// <returns></returns>
public StatsRecorder StatsRecorder
{
get { return m_StatsRecorder; }
}
/// <summary>
/// Initializes the environment, configures it and initializes the Academy.
/// </summary>
void InitializeEnvironment()
{

EnableAutomaticStepping();
SideChannelUtils.RegisterSideChannel(new EngineConfigurationChannel());
SideChannelUtils.RegisterSideChannel(new FloatPropertiesChannel());
SideChannelUtils.RegisterSideChannel(new StatsSideChannel());
SideChannelsManager.RegisterSideChannel(new EngineConfigurationChannel());
m_EnvironmentParameters = new EnvironmentParameters();
m_StatsRecorder = new StatsRecorder();
// Try to launch the communicator by using the arguments passed at launch
var port = ReadPortFromArgs();

unityCommunicationVersion = k_ApiVersion,
unityPackageVersion = k_PackageVersion,
name = "AcademySingleton",
CSharpCapabilities = new UnityRLCapabilities()
// We might have inference-only Agents, so set the seed for them too.
m_InferenceSeed = unityRlInitParameters.seed;
TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
}
catch
{

}
/// <summary>
/// Returns the current episode counter.
/// The current episode count.
/// <returns>
/// <value>
/// </returns>
/// </value>
public int EpisodeCount
{
get { return m_EpisodeCount; }

/// Returns the current step counter (within the current episode).
/// The current step count (within the current episode).
/// <returns>
/// <value>
/// </returns>
/// </value>
public int StepCount
{
get { return m_StepCount; }

/// Returns the total step counter.
/// Returns the total step count.
/// <returns>
/// <value>
/// </returns>
/// </value>
public int TotalStepCount
{
get { return m_TotalStepCount; }

}
/// <summary>
/// Performs a single environment update to the Academy, and Agent
/// Performs a single environment update of the Academy and Agent
/// objects within the environment.
/// </summary>
public void EnvironmentStep()

// If the communicator is not on, we need to clear the SideChannel sending queue
if (!IsCommunicatorOn)
{
SideChannelUtils.GetSideChannelMessage();
SideChannelsManager.GetSideChannelMessage();
}
using (TimerStack.Instance.Scoped("AgentAct"))

/// NNModel and the InferenceDevice as provided.
/// </summary>
/// <param name="model">The NNModel the ModelRunner must use.</param>
/// <param name="brainParameters">The brainParameters used to create the ModelRunner.</param>
/// <param name="brainParameters">The BrainParameters used to create the ModelRunner.</param>
/// <param name="inferenceDevice">
/// The inference device (CPU or GPU) the ModelRunner will use.
/// </param>

var modelRunner = m_ModelRunners.Find(x => x.HasModel(model, inferenceDevice));
if (modelRunner == null)
{
modelRunner = new ModelRunner(
model, brainParameters, inferenceDevice);
modelRunner = new ModelRunner(model, brainParameters, inferenceDevice, m_InferenceSeed);
m_InferenceSeed++;
}
return modelRunner;
}

Communicator?.Dispose();
Communicator = null;
SideChannelUtils.UnregisterAllSideChannels();
m_EnvironmentParameters.Dispose();
m_StatsRecorder.Dispose();
SideChannelsManager.UnregisterAllSideChannels(); // unregister custom side channels
if (m_ModelRunners != null)
{

622
com.unity.ml-agents/Runtime/Agent.cs


using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using UnityEngine.Serialization;
namespace MLAgents
{

public float[] storedVectorActions;
/// <summary>
/// For discrete control, specifies the actions that the agent cannot take. Is true if
/// the action is masked.
/// For discrete control, specifies the actions that the agent cannot take.
/// An element of the mask array is <c>true</c> if the action is prohibited.
/// Current agent reward.
/// The current agent reward.
/// </summary>
public float reward;

}
/// <summary>
/// Agent MonoBehaviour class that is attached to a Unity GameObject, making it
/// an Agent. An agent produces observations and takes actions in the
/// environment. Observations are determined by the cameras attached
/// to the agent in addition to the vector observations implemented by the
/// user in <see cref="Agent.CollectObservations(VectorSensor)"/>.
/// On the other hand, actions are determined by decisions produced by a Policy.
/// Currently, this class is expected to be extended to implement the desired agent behavior.
/// An agent is an actor that can observe its environment, decide on the
/// best course of action using those observations, and execute those actions
/// within the environment.
/// Simply speaking, an agent roams through an environment and at each step
/// of the environment extracts its current observation, sends them to its
/// policy and in return receives an action. In practice,
/// however, an agent need not send its observation at every step since very
/// little may have changed between successive steps.
/// Use the Agent class as the subclass for implementing your own agents. Add
/// your Agent implementation to a [GameObject] in the [Unity scene] that serves
/// as the agent's environment.
/// At any step, an agent may be considered done due to a variety of reasons:
/// - The agent reached an end state within its environment.
/// - The agent reached the maximum # of steps (i.e. timed out).
/// - The academy reached the maximum # of steps (forced agent to be done).
/// Agents in an environment operate in *steps*. At each step, an agent collects observations,
/// passes them to its decision-making policy, and receives an action vector in response.
/// Here, an agent reaches an end state if it completes its task successfully
/// or somehow fails along the way. In the case where an agent is done before
/// the academy, it either resets and restarts, or just lingers until the
/// academy is done.
/// Agents make observations using <see cref="ISensor"/> implementations. The ML-Agents
/// API provides implementations for visual observations (<see cref="CameraSensor"/>)
/// raycast observations (<see cref="RayPerceptionSensor"/>), and arbitrary
/// data observations (<see cref="VectorSensor"/>). You can add the
/// <see cref="CameraSensorComponent"/> and <see cref="RayPerceptionSensorComponent2D"/> or
/// <see cref="RayPerceptionSensorComponent3D"/> components to an agent's [GameObject] to use
/// those sensor types. You can implement the <see cref="CollectObservations(VectorSensor)"/>
/// function in your Agent subclass to use a vector observation. The Agent class calls this
/// function before it uses the observation vector to make a decision. (If you only use
/// visual or raycast observations, you do not need to implement
/// <see cref="CollectObservations"/>.)
/// An important note regarding steps and episodes is due. Here, an agent step
/// corresponds to an academy step, which also corresponds to Unity
/// environment step (i.e. each FixedUpdate call). This is not the case for
/// episodes. The academy controls the global episode count and each agent
/// controls its own local episode count and can reset and start a new local
/// episode independently (based on its own experience). Thus an academy
/// (global) episode can be viewed as the upper-bound on an agents episode
/// length and that within a single global episode, an agent may have completed
/// multiple local episodes. Consequently, if an agent max step is
/// set to a value larger than the academy max steps value, then the academy
/// value takes precedence (since the agent max step will never be reached).
/// Assign a decision making policy to an agent using a <see cref="BehaviorParameters"/>
/// component attached to the agent's [GameObject]. The <see cref="BehaviorType"/> setting
/// determines how decisions are made:
/// Lastly, note that at any step the policy to the agent is allowed to
/// change model with <see cref="SetModel"/>.
/// * <see cref="BehaviorType.Default"/>: decisions are made by the external process,
/// when connected. Otherwise, decisions are made using inference. If no inference model
/// is specified in the BehaviorParameters component, then heuristic decision
/// making is used.
/// * <see cref="BehaviorType.InferenceOnly"/>: decisions are always made using the trained
/// model specified in the <see cref="BehaviorParameters"/> component.
/// * <see cref="BehaviorType.HeuristicOnly"/>: when a decision is needed, the agent's
/// <see cref="Heuristic"/> function is called. Your implementation is responsible for
/// providing the appropriate action.
/// Implementation-wise, it is required that this class is extended and the
/// virtual methods overridden. For sample implementations of agent behavior,
/// see the Examples/ directory within this Unity project.
/// To trigger an agent decision automatically, you can attach a <see cref="DecisionRequester"/>
/// component to the Agent game object. You can also call the agent's <see cref="RequestDecision"/>
/// function manually. You only need to call <see cref="RequestDecision"/> when the agent is
/// in a position to act upon the decision. In many cases, this will be every [FixedUpdate]
/// callback, but could be less frequent. For example, an agent that hops around its environment
/// can only take an action when it touches the ground, so several frames might elapse between
/// one decision and the need for the next.
///
/// Use the <see cref="OnActionReceived"/> function to implement the actions your agent can take,
/// such as moving to reach a goal or interacting with its environment.
///
/// When you call <see cref="EndEpisode"/> on an agent or the agent reaches its <see cref="maxStep"/> count,