浏览代码

Merge remote-tracking branch 'origin/master' into MLA-1734-demo-provider

/MLA-1734-demo-provider
Chris Elion 4 年前
当前提交
e4f51ca7
共有 163 个文件被更改,包括 1456 次插入897 次删除
  1. 6
      .github/workflows/pytest.yml
  2. 1
      .yamato/com.unity.ml-agents-performance.yml
  3. 4
      .yamato/com.unity.ml-agents-test.yml
  4. 4
      .yamato/compressed-sensor-test.yml
  5. 4
      .yamato/gym-interface-test.yml
  6. 4
      .yamato/python-ll-api-test.yml
  7. 15
      .yamato/test_versions.metafile
  8. 2
      DevProject/Packages/manifest.json
  9. 2
      Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
  10. 6
      Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
  11. 24
      Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallHardNew.prefab
  12. 6
      Project/Assets/ML-Agents/Examples/3DBall/Prefabs/Visual3DBall.prefab
  13. 20
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
  14. 6
      Project/Assets/ML-Agents/Examples/Basic/Prefabs/Basic.prefab
  15. 2
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
  16. 21
      Project/Assets/ML-Agents/Examples/Bouncer/Prefabs/Environment.prefab
  17. 6
      Project/Assets/ML-Agents/Examples/Crawler/Prefabs/CrawlerBase.prefab
  18. 10
      Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab
  19. 10
      Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/GridFoodCollectorArea.prefab
  20. 8
      Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab
  21. 23
      Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
  22. 8
      Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  23. 6
      Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
  24. 43
      Project/Assets/ML-Agents/Examples/Hallway/Prefabs/VisualSymbolFinderArea.prefab
  25. 6
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab
  26. 6
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab
  27. 6
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
  28. 2
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
  29. 22
      Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockArea.prefab
  30. 8
      Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockVisualArea.prefab
  31. 22
      Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/AreaPB.prefab
  32. 43
      Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab
  33. 22
      Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab
  34. 29
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  35. 28
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
  36. 21
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab
  37. 18
      Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  38. 12
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
  39. 5
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollDySingleSpeedVariant.prefab
  40. 7
      Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
  41. 6
      Project/Assets/ML-Agents/Examples/Worm/Prefabs/WormBasePrefab.prefab
  42. 1
      Project/ProjectSettings/TagManager.asset
  43. 2
      com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
  44. 33
      com.unity.ml-agents/CHANGELOG.md
  45. 16
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  46. 55
      com.unity.ml-agents/Runtime/Academy.cs
  47. 14
      com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
  48. 14
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  49. 6
      com.unity.ml-agents/Runtime/Agent.cs
  50. 15
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  51. 5
      com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
  52. 147
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  53. 5
      com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
  54. 40
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
  55. 129
      com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
  56. 70
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  57. 28
      com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
  58. 17
      com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
  59. 12
      com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
  60. 2
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  61. 21
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  62. 14
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
  63. 17
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  64. 2
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
  65. 14
      com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
  66. 7
      com.unity.ml-agents/Runtime/StatsRecorder.cs
  67. 4
      com.unity.ml-agents/Runtime/Timer.cs
  68. 19
      com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs
  69. 198
      com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs
  70. 13
      com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
  71. 30
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  72. 2
      com.unity.ml-agents/package.json
  73. 2
      docs/Installation.md
  74. 64
      docs/Learning-Environment-Design-Agents.md
  75. 27
      docs/Learning-Environment-Examples.md
  76. 9
      docs/Migrating.md
  77. 2
      docs/Training-Configuration-File.md
  78. 2
      gym-unity/README.md
  79. 11
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  80. 6
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
  81. 4
      ml-agents-envs/mlagents_envs/environment.py
  82. 16
      ml-agents-envs/mlagents_envs/rpc_utils.py
  83. 3
      ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
  84. 14
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  85. 392
      ml-agents/mlagents/trainers/buffer.py
  86. 3
      ml-agents/mlagents/trainers/cli_utils.py
  87. 16
      ml-agents/mlagents/trainers/demo_loader.py
  88. 40
      ml-agents/mlagents/trainers/ghost/trainer.py
  89. 41
      ml-agents/mlagents/trainers/learn.py
  90. 22
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  91. 33
      ml-agents/mlagents/trainers/ppo/trainer.py
  92. 24
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  93. 7
      ml-agents/mlagents/trainers/sac/trainer.py
  94. 18
      ml-agents/mlagents/trainers/settings.py
  95. 39
      ml-agents/mlagents/trainers/stats.py
  96. 6
      ml-agents/mlagents/trainers/tests/__init__.py
  97. 9
      ml-agents/mlagents/trainers/tests/mock_brain.py
  98. 18
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  99. 60
      ml-agents/mlagents/trainers/tests/test_buffer.py
  100. 9
      ml-agents/mlagents/trainers/tests/test_demo_loader.py

6
.github/workflows/pytest.yml


jobs:
pytest:
runs-on: ubuntu-latest
env:
TEST_ENFORCE_BUFFER_KEY_TYPES: 1
strategy:
matrix:
python-version: [3.6.x, 3.7.x, 3.8.x]

run: python -c "import sys; print(sys.version)"
- name: Install dependencies
run: |
# pin pip to workaround https://github.com/pypa/pip/issues/9180
python -m pip install pip==20.2
python -m pip install --upgrade pip
python -m pip install --progress-bar=off -e ./ml-agents-plugin-examples
- name: Save python dependencies
run: |
pip freeze > pip_versions-${{ matrix.python-version }}.txt

1
.yamato/com.unity.ml-agents-performance.yml


test_editors:
- version: 2019.4
- version: 2020.1
- version: 2020.2
---
{% for editor in test_editors %}

4
.yamato/com.unity.ml-agents-test.yml


enableCodeCoverage: !!bool true
testProject: DevProject
enableNoDefaultPackages: !!bool true
- version: 2020.1
enableCodeCoverage: !!bool true
testProject: DevProject
enableNoDefaultPackages: !!bool true
- version: 2020.2
enableCodeCoverage: !!bool true
testProject: DevProject

4
.yamato/compressed-sensor-test.yml


- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
{% if editor.extra_test == "sensor" %}
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND

pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents/tests/yamato/**" OR
{% endif %}
{% endfor %}

4
.yamato/gym-interface-test.yml


- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
{% if editor.extra_test == "gym" %}
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND

pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents/tests/yamato/**" OR
{% endif %}
{% endfor %}

4
.yamato/python-ll-api-test.yml


- .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
triggers:
cancel_old_ci: true
{% if editor.extra_test == "llapi" %}
expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND

pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents/tests/yamato/**" OR
{% endif %}
{% endfor %}

15
.yamato/test_versions.metafile


# List of editor versions for standalone-build-test and its dependencies.
# csharp_backcompat_version is used in training-int-tests to determine the
# older package version to run the backwards compat tests against.
# We always run training-int-tests for all versions of the editor
# For each "other" test, we only run it against a single version of the
# editor to reduce the number of yamato jobs
csharp_backcompat_version: 1.0.0
extra_test: llapi
csharp_backcompat_version: 1.0.0
- version: 2020.1
csharp_backcompat_version: 1.0.0
extra_test: gym
# 2020.2 moved the AssetImporters namespace
# but we didn't handle this until 1.2.0
csharp_backcompat_version: 1.2.0
extra_test: sensor

2
DevProject/Packages/manifest.json


"com.unity.purchasing": "2.1.0",
"com.unity.test-framework": "1.1.16",
"com.unity.test-framework.performance": "2.2.0-preview",
"com.unity.testtools.codecoverage": "0.2.2-preview",
"com.unity.testtools.codecoverage": "1.0.0-pre.3",
"com.unity.textmeshpro": "2.0.1",
"com.unity.timeline": "1.2.12",
"com.unity.ugui": "1.0.0",

2
Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs


scenes,
outputPath,
buildTarget,
BuildOptions.None
BuildOptions.Development
);
var isOk = buildResult.summary.result == BuildResult.Succeeded;
var error = "";

6
Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab


m_BrainParameters:
VectorObservationSize: 8
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 2
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: 3DBall
TeamId: 0

24
Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallHardNew.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 5
numStackedVectorObservations: 9
vectorActionSize: 02000000
vectorActionDescriptions: []
vectorActionSpaceType: 1
m_Model: {fileID: 11400000, guid: 27d49984757ed46b181090a532ef48e5, type: 3}
m_InferenceDevice: 0
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 2
BranchSizes:
VectorActionSize: 02000000
VectorActionDescriptions: []
VectorActionSpaceType: 1
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: d179c44c147aa4ffbbb725f009eca3b8, type: 3}
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 1
--- !u!114 &114466000339026140
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
ball: {fileID: 1142513601053358}
--- !u!114 &8193279139064749781
MonoBehaviour:

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!114 &7923264721978289873
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1978072206102878
GameObject:
m_ObjectHideFlags: 0

6
Project/Assets/ML-Agents/Examples/3DBall/Prefabs/Visual3DBall.prefab


m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 2
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: Visual3DBall
TeamId: 0

20
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs


using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Sensors.Reflection;
public class Ball3DHardAgent : Agent
{

SetResetParameters();
}
public override void CollectObservations(VectorSensor sensor)
[Observable(numStackedObservations: 9)]
Vector2 Rotation
sensor.AddObservation(gameObject.transform.rotation.z);
sensor.AddObservation(gameObject.transform.rotation.x);
sensor.AddObservation((ball.transform.position - gameObject.transform.position));
get
{
return new Vector2(gameObject.transform.rotation.z, gameObject.transform.rotation.x);
}
}
[Observable(numStackedObservations: 9)]
Vector3 PositionDelta
{
get
{
return ball.transform.position - gameObject.transform.position;
}
}
public override void OnActionReceived(ActionBuffers actionBuffers)

6
Project/Assets/ML-Agents/Examples/Basic/Prefabs/Basic.prefab


m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: Basic
TeamId: 0

2
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs


/// Creates a BasicActuator.
/// </summary>
/// <returns></returns>
#pragma warning disable 672
#pragma warning restore 672
{
return new BasicActuator(basicController);
}

21
Project/Assets/ML-Agents/Examples/Bouncer/Prefabs/Environment.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 6
numStackedVectorObservations: 3
vectorActionSize: 03000000
vectorActionDescriptions: []
vectorActionSpaceType: 1
VectorObservationSize: 6
NumStackedVectorObservations: 3
m_ActionSpec:
m_NumContinuousActions: 3
BranchSizes:
VectorActionSize: 03000000
VectorActionDescriptions: []
VectorActionSpaceType: 1
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114878620968301562
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 0
MaxStep: 0
target: {fileID: 1160631129428284}
bodyObject: {fileID: 1680588139522898}
strength: 500

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1680588139522898
GameObject:
m_ObjectHideFlags: 0

6
Project/Assets/ML-Agents/Examples/Crawler/Prefabs/CrawlerBase.prefab


m_BrainParameters:
VectorObservationSize: 32
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 20
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName:
TeamId: 0

10
Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab


VectorActionSpaceType: 1
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: FoodCollector
TeamId: 0

VectorActionSpaceType: 1
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: FoodCollector
TeamId: 0

VectorActionSpaceType: 1
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: FoodCollector
TeamId: 0

VectorActionSpaceType: 1
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: FoodCollector
TeamId: 0

VectorActionSpaceType: 1
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: FoodCollector
TeamId: 0

10
Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/GridFoodCollectorArea.prefab


VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: GridFoodCollector
TeamId: 0

VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: GridFoodCollector
TeamId: 0

VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: GridFoodCollector
TeamId: 0

VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: GridFoodCollector
TeamId: 0

VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: GridFoodCollector
TeamId: 0

8
Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab


VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: ec4b31b5d66ca4e51ae3ac41945facb2, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: VisualFoodCollector
TeamId: 0

VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: ec4b31b5d66ca4e51ae3ac41945facb2, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: VisualFoodCollector
TeamId: 0

VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: ec4b31b5d66ca4e51ae3ac41945facb2, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: VisualFoodCollector
TeamId: 0

VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: ec4b31b5d66ca4e51ae3ac41945facb2, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: VisualFoodCollector
TeamId: 0

23
Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 05000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 05000000
VectorActionSize: 05000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114650561397225712
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 100
MaxStep: 100
area: {fileID: 114704252266302846}
timeBetweenDecisionsAtInference: 0.15
renderCamera: {fileID: 0}

m_Width: 84
m_Height: 64
m_Grayscale: 0
m_ObservationStacks: 1
m_Compression: 1
--- !u!114 &7980686505185502968
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1625008366184734
GameObject:
m_ObjectHideFlags: 0

trueAgent: {fileID: 1488387672112076}
goalPref: {fileID: 1508142483324970, guid: 1ec4e4e96e7514d45b7ebc3ba5a9a481, type: 3}
pitPref: {fileID: 1811317785436014, guid: d13ee2db77b3a4dcc8664d2fe2a0f219, type: 3}
numberOfObstacles: 1
--- !u!1 &1656910849934022
GameObject:
m_ObjectHideFlags: 0

8
Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.44971228, g: 0.49977815, b: 0.57563734, a: 1}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.49977785, b: 0.57563704, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 05000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: GridWorld
TeamId: 0

6
Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab


m_BrainParameters:
VectorObservationSize: 1
NumStackedVectorObservations: 3
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 05000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: Hallway
TeamId: 0

43
Project/Assets/ML-Agents/Examples/Hallway/Prefabs/VisualSymbolFinderArea.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 05000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 05000000
VectorActionSize: 05000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_TeamID: 0
m_useChildSensors: 1
TeamId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114451776683649118
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: b446afae240924105b36d07e8d17a608, type: 3}
m_Name:
m_EditorClassIdentifier:
maxStep: 3000
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 3000
ground: {fileID: 1625056884785366}
area: {fileID: 1689874756253538}
symbolOGoal: {fileID: 1800868804754718}

m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
m_Name:
m_EditorClassIdentifier:
camera: {fileID: 20961984019151212}
sensorName: CameraSensor
width: 84
height: 84
grayscale: 0
compression: 1
m_Camera: {fileID: 20961984019151212}
m_SensorName: CameraSensor
m_Width: 84
m_Height: 84
m_Grayscale: 0
m_ObservationStacks: 1
m_Compression: 1
--- !u!114 &640264344416331590
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 6
RepeatAction: 1
offsetStep: 0
TakeActionsBetweenDecisions: 1
--- !u!1 &1377584197416466
GameObject:
m_ObjectHideFlags: 0

6
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab


VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: c34da50737a3c4a50918002b20b2b927, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: Match3SmartHeuristic
TeamId: 0

Columns: 8
NumCellTypes: 6
NumSpecialTypes: 2
RandomSeed: -1
RandomSeed: -1
--- !u!114 &3508723250470608014
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
ActuatorName: Match3 Actuator
RandomSeed: -1
HeuristicQuality: 0
--- !u!1 &3508723250774301855
GameObject:
m_ObjectHideFlags: 0

6
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab


VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 9e89b8e81974148d3b7213530d00589d, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: Match3VectorObs
TeamId: 0

Columns: 8
NumCellTypes: 6
NumSpecialTypes: 2
RandomSeed: -1
RandomSeed: -1
--- !u!114 &2118285884327540680
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
ActuatorName: Match3 Actuator
RandomSeed: -1
HeuristicQuality: 0

6
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab


VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 48d14da88fea74d0693c691c6e3f2e34, type: 3}
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: Match3VisualObs
TeamId: 0

Columns: 8
NumCellTypes: 6
NumSpecialTypes: 2
RandomSeed: -1
RandomSeed: -1
--- !u!114 &3019509692332007783
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
ActuatorName: Match3 Actuator
RandomSeed: -1
HeuristicQuality: 0

2
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs


public class Match3ExampleActuatorComponent : Match3ActuatorComponent
{
/// <inheritdoc/>
#pragma warning disable 672
#pragma warning restore 672
{
var board = GetComponent<Match3Board>();
var agent = GetComponentInParent<Agent>();

22
Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockArea.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 2
vectorActionSize: 07000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 0
NumStackedVectorObservations: 2
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 07000000
VectorActionSize: 07000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114505490781873732
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
ground: {fileID: 1500989011945850}
area: {fileID: 1125452240183160}
areaBounds:

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!114 &4081319787948195948
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1500989011945850
GameObject:
m_ObjectHideFlags: 0

8
Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockVisualArea.prefab


m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 07000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114812843792483960
MonoBehaviour:

m_Width: 84
m_Height: 84
m_Grayscale: 0
m_ObservationStacks: 1
m_Compression: 1
--- !u!114 &9049837659352187721
MonoBehaviour:

22
Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/AreaPB.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 05000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 05000000
VectorActionSize: 05000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114937736047215868
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1464170487903594}
areaSwitch: {fileID: 1432086782037750}
useVectorObs: 1

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!114 &5712624269609438939
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1148882946833254
GameObject:
m_ObjectHideFlags: 0

43
Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 05000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 05000000
VectorActionSize: 05000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_TeamID: 0
m_useChildSensors: 1
TeamId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114741503533626942
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: b8db44472779248d3be46895c4d562d5, type: 3}
m_Name:
m_EditorClassIdentifier:
maxStep: 5000
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
area: {fileID: 1055559745433172}
areaSwitch: {fileID: 1212218760704844}
useVectorObs: 0

m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
m_Name:
m_EditorClassIdentifier:
camera: {fileID: 20712684238256298}
sensorName: CameraSensor
width: 84
height: 84
grayscale: 0
compression: 1
m_Camera: {fileID: 20712684238256298}
m_SensorName: CameraSensor
m_Width: 84
m_Height: 84
m_Grayscale: 0
m_ObservationStacks: 1
m_Compression: 1
--- !u!114 &9216598927300453297
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
TakeActionsBetweenDecisions: 1
--- !u!1 &1747856067778386
GameObject:
m_ObjectHideFlags: 0

22
Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 33
numStackedVectorObservations: 1
vectorActionSize: 04000000
vectorActionDescriptions: []
vectorActionSpaceType: 1
VectorObservationSize: 33
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 4
BranchSizes:
VectorActionSize: 04000000
VectorActionDescriptions: []
VectorActionSpaceType: 1
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114955921823023820
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 4000
MaxStep: 4000
pendulumA: {fileID: 1644872085946016}
pendulumB: {fileID: 1053261483945176}
hand: {fileID: 1654288206095398}

m_EditorClassIdentifier:
DecisionPeriod: 4
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!114 &7840105453417110232
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1644872085946016
GameObject:
m_ObjectHideFlags: 0

29
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


const string k_CommandLineModelOverrideDirectoryFlag = "--mlagents-override-model-directory";
const string k_CommandLineModelOverrideExtensionFlag = "--mlagents-override-model-extension";
const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
const string k_CommandLineQuitAfterSeconds = "--mlagents-quit-after-seconds";
const string k_CommandLineQuitOnLoadFailure = "--mlagents-quit-on-load-failure";
// The attached Agent

// Max episodes to run. Only used if > 0
// Will default to 1 if override models are specified, otherwise 0.
int m_MaxEpisodes;
// Deadline - exit if the time exceeds this
DateTime m_Deadline = DateTime.MaxValue;
int m_NumSteps;
int m_PreviousNumSteps;

void GetAssetPathFromCommandLine()
{
var maxEpisodes = 0;
var timeoutSeconds = 0;
string[] commandLineArgsOverride = null;
if (!string.IsNullOrEmpty(debugCommandLineOverride) && Application.isEditor)
{

{
Int32.TryParse(args[i + 1], out maxEpisodes);
}
else if (args[i] == k_CommandLineQuitAfterSeconds && i < args.Length - 1)
{
Int32.TryParse(args[i + 1], out timeoutSeconds);
}
else if (args[i] == k_CommandLineQuitOnLoadFailure)
{
m_QuitOnLoadFailure = true;

m_MaxEpisodes = maxEpisodes > 0 ? maxEpisodes : 1;
Debug.Log($"setting m_MaxEpisodes to {maxEpisodes}");
}
if (timeoutSeconds > 0)
{
m_Deadline = DateTime.Now + TimeSpan.FromSeconds(timeoutSeconds);
Debug.Log($"setting deadline to {timeoutSeconds} from now.");
}
}
void OnEnable()

EditorApplication.isPlaying = false;
#endif
}
else if (DateTime.Now >= m_Deadline)
{
Debug.Log(
$"Deadline exceeded. " +
$"{TotalCompletedEpisodes}/{m_MaxEpisodes} episodes and " +
$"{TotalNumSteps}/{m_MaxEpisodes * m_Agent.MaxStep} steps completed. Exiting.");
Application.Quit(0);
#if UNITY_EDITOR
EditorApplication.isPlaying = false;
#endif
}
m_NumSteps++;
}

28
Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab


m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 030000000300000003000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114492261207303438
MonoBehaviour:

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 030000000300000003000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114850431417842684
MonoBehaviour:

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 030000000300000003000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &5320024511406682322
MonoBehaviour:

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 030000000300000003000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &5379409612883756837
MonoBehaviour:

21
Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab


m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 030000000300000003000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114492261207303438
MonoBehaviour:

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 030000000300000003000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114850431417842684
MonoBehaviour:

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 030000000300000003000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &5379409612883756837
MonoBehaviour:

18
Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab


m_BrainParameters:
VectorObservationSize: 9
NumStackedVectorObservations: 3
m_ActionSpec:
m_NumContinuousActions: 3
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114915946461826994
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1194790474478638
GameObject:
m_ObjectHideFlags: 0

m_BrainParameters:
VectorObservationSize: 9
NumStackedVectorObservations: 3
m_ActionSpec:
m_NumContinuousActions: 3
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114800310164848628
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1969551055586186
GameObject:
m_ObjectHideFlags: 0

12
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab


m_BrainParameters:
VectorObservationSize: 243
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 39
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &7408209125961349353
MonoBehaviour:

maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
targetWalkingSpeed: 10
m_TargetWalkingSpeed: 10
walkDirectionMethod: 0
worldDirToWalk: {x: 1, y: 0, z: 0}
worldPosToWalkTo: {x: 0, y: 0, z: 0}
target: {fileID: 0}
hips: {fileID: 895268871264836332}
chest: {fileID: 7933235354845945071}

5
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollDySingleSpeedVariant.prefab


value:
objectReference: {fileID: 11400000, guid: 47e7c480450ec4dcd9e4a04124e14ed4,
type: 3}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_InferenceDevice
value: 2
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.x

7
Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab


m_BrainParameters:
VectorObservationSize: 4
NumStackedVectorObservations: 6
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes: 03000000030000000300000002000000
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114925928594762506
MonoBehaviour:

6
Project/Assets/ML-Agents/Examples/Worm/Prefabs/WormBasePrefab.prefab


m_BrainParameters:
VectorObservationSize: 64
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 9
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_InferenceDevice: 0
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: WormDynamic
TeamId: 0

1
Project/ProjectSettings/TagManager.asset


- symbol_O_Goal
- purpleAgent
- purpleGoal
- tile
layers:
- Default
- TransparentFX

2
com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs


public bool ForceHeuristic;
/// <inheritdoc/>
#pragma warning disable 672
#pragma warning restore 672
{
var board = GetComponent<AbstractBoard>();
var agent = GetComponentInParent<Agent>();

33
com.unity.ml-agents/CHANGELOG.md


and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [Unreleased]
### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.8.0-preview] - 2021-02-17
- A plugin system for `mlagents-learn` has been added. You can now define custom
`StatsWriter` implementations and register them to be called during training.
More types of plugins will be added in the future. (#4788)
### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)

will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
- The upper limit for the time scale (by setting the `--time-scale` paramater in mlagents-learn) was
removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
- Added the IHeuristicProvider interface to allow IActuators as well as Agent implement the Heuristic function to generate actions.
Updated the Basic example and the Match3 Example to use Actuators.
Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)

- Added `ObservationWriter.AddList()` and deprecated `ObservationWriter.AddRange()`.
`AddList()` is recommended, as it does not generate any additional memory allocations. (#4887)
- The Barracuda dependency was upgraded to 1.3.0. (#4898)
- Added `ActuatorComponent.CreateActuators`, and deprecate `ActuatorComponent.CreateActuator`. The
default implementation will wrap `ActuatorComponent.CreateActuator` in an array and return that. (#4899)
- `InferenceDevice.Burst` was added, indicating that Agent's model will be run using Barracuda's Burst backend.
This is the default for new Agents, but existing ones that use `InferenceDevice.CPU` should update to
`InferenceDevice.Burst`. (#4925)
- Tensorboard now logs the Environment Reward as both a scalar and a histogram. (#4878)
- The `mlagents_env` API has changed, `BehaviorSpec` now has a `observation_specs` property containing a list of `ObservationSpec`. For more information on `ObservationSpec` see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md#behaviorspec). (#4763, #4825)
### Bug Fixes
#### com.unity.ml-agents (C#)

- Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886)
- Removed several memory allocations that happened during inference. On a test scene, this
reduced the amount of memory allocated by approximately 25%. (#4887)
- Removed several memory allocations that happened during inference with discrete actions. (#4922)
- Properly catch permission errors when writing timer files. (#4921)
- Unexpected exceptions during training initialization and shutdown are now logged. If you see
"noisy" logs, please let us know! (#4930, #4935)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed a bug that would cause an exception when `RunOptions` was deserialized via `pickle`. (#4842)

while waiting for a connection, and raises a better error message if it crashes. (#4880)
- Passing a `-logfile` option in the `--env-args` option to `mlagents-learn` is
no longer overwritten. (#4880)
- The `load_weights` function was being called unnecessarily often in the Ghost Trainer leading to training slowdowns. (#4934)
## [1.7.2-preview] - 2020-12-22

16
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


// Grab the sensor components, since we need them to determine the observation sizes.
// TODO make these methods of BehaviorParameters
SensorComponent[] sensorComponents;
if (behaviorParameters.UseChildSensors)
{
sensorComponents = behaviorParameters.GetComponentsInChildren<SensorComponent>();
}
else
{
sensorComponents = behaviorParameters.GetComponents<SensorComponent>();
}
var agent = behaviorParameters.gameObject.GetComponent<Agent>();
agent.sensors = new List<ISensor>();
agent.InitializeSensors();
var sensors = agent.sensors.ToArray();
ActuatorComponent[] actuatorComponents;
if (behaviorParameters.UseChildActuators)

// Get the total size of the sensors generated by ObservableAttributes.
// If there are any errors (e.g. unsupported type, write-only properties), display them too.
int observableAttributeSensorTotalSize = 0;
var agent = behaviorParameters.GetComponent<Agent>();
if (agent != null && behaviorParameters.ObservableAttributeHandling != ObservableAttributeOptions.Ignore)
{
List<string> observableErrors = new List<string>();

if (brainParameters != null)
{
var failedChecks = Inference.BarracudaModelParamLoader.CheckModel(
barracudaModel, brainParameters, sensorComponents, actuatorComponents,
barracudaModel, brainParameters, sensors, actuatorComponents,
observableAttributeSensorTotalSize, behaviorParameters.BehaviorType
);
foreach (var check in failedChecks)

55
com.unity.ml-agents/Runtime/Academy.cs


/// <term>1.4.0</term>
/// <description>Support training analytics sent from python trainer to the editor.</description>
/// </item>
/// <item>
/// <term>1.5.0</term>
/// <description>Support variable length observation training.</description>
/// </item>
const string k_ApiVersion = "1.4.0";
const string k_ApiVersion = "1.5.0";
/// <summary>
/// Unity package version of com.unity.ml-agents.

{
// We try to exchange the first message with Python. If this fails, it means
// no Python Process is ready to train the environment. In this case, the
//environment must use Inference.
// environment must use Inference.
bool initSuccessful = false;
var communicatorInitParams = new CommunicatorInitParameters
{
unityCommunicationVersion = k_ApiVersion,
unityPackageVersion = k_PackageVersion,
name = "AcademySingleton",
CSharpCapabilities = new UnityRLCapabilities()
};
var unityRlInitParameters = Communicator.Initialize(
new CommunicatorInitParameters
{
unityCommunicationVersion = k_ApiVersion,
unityPackageVersion = k_PackageVersion,
name = "AcademySingleton",
CSharpCapabilities = new UnityRLCapabilities()
});
UnityEngine.Random.InitState(unityRlInitParameters.seed);
// We might have inference-only Agents, so set the seed for them too.
m_InferenceSeed = unityRlInitParameters.seed;
TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
initSuccessful = Communicator.Initialize(
communicatorInitParams,
out var unityRlInitParameters
);
if (initSuccessful)
{
UnityEngine.Random.InitState(unityRlInitParameters.seed);
// We might have inference-only Agents, so set the seed for them too.
m_InferenceSeed = unityRlInitParameters.seed;
TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
}
else
{
Debug.Log($"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. Will perform inference instead.");
Communicator = null;
}
catch
catch (Exception ex)
Debug.Log($"" +
$"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. " +
"Will perform inference instead."
);
Debug.Log($"Unexpected exception when trying to initialize communication: {ex}\nWill perform inference instead.");
if (Communicator != null)
{
Communicator.QuitCommandReceived += OnQuitCommandReceived;

14
com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs


using System;
using UnityEngine;
namespace Unity.MLAgents.Actuators

/// Create the IActuator. This is called by the Agent when it is initialized.
/// </summary>
/// <returns>Created IActuator object.</returns>
[Obsolete("Use CreateActuators instead.")]
/// <summary>
/// Create a collection of <see cref="IActuator"/>s. This is called by the <see cref="Agent"/> during
/// initialization.
/// </summary>
/// <returns>A collection of <see cref="IActuator"/>s</returns>
public virtual IActuator[] CreateActuators()
{
#pragma warning disable 618
return new[] { CreateActuator() };
#pragma warning restore 618
}
/// <summary>
/// The specification of the possible actions for this ActuatorComponent.

14
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


NumContinuousActions = NumDiscreteActions = SumOfDiscreteBranchSizes = 0;
}
/// <summary>
/// Add an array of <see cref="IActuator"/>s at once.
/// </summary>
/// <param name="actuators">The array of <see cref="IActuator"/>s to add.</param>
public void AddActuators(IActuator[] actuators)
{
for (var i = 0; i < actuators.Length; i++)
{
Add(actuators[i]);
}
}
/*********************************************************************************
* IList implementation that delegates to m_Actuators List. *
*********************************************************************************/

public int Count => m_Actuators.Count;
/// <inheritdoc/>
public bool IsReadOnly => m_Actuators.IsReadOnly;
public bool IsReadOnly => false;
/// <inheritdoc/>
public int IndexOf(IActuator item)

6
com.unity.ml-agents/Runtime/Agent.cs


/// </summary>
internal void InitializeSensors()
{
if (m_PolicyFactory == null)
{
m_PolicyFactory = GetComponent<BehaviorParameters>();
}
if (m_PolicyFactory.ObservableAttributeHandling != ObservableAttributeOptions.Ignore)
{
var excludeInherited =

foreach (var actuatorComponent in attachedActuators)
{
m_ActuatorManager.Add(actuatorComponent.CreateActuator());
m_ActuatorManager.AddActuators(actuatorComponent.CreateActuators());
}
}

15
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


{
observationProto.DimensionProperties.Add((int)dimensionProperties[i]);
}
// Checking trainer compatibility with variable length observations
if (dimensionProperties.Length == 2)
{
if (dimensionProperties[0] == DimensionProperty.VariableSize &&
dimensionProperties[1] == DimensionProperty.None)
{
var trainerCanHandleVarLenObs = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.VariableLengthObservation;
if (!trainerCanHandleVarLenObs)
{
throw new UnityAgentsException("Variable Length Observations are not supported by the trainer");
}
}
}
}
observationProto.Shape.AddRange(shape);

CompressedChannelMapping = proto.CompressedChannelMapping,
HybridActions = proto.HybridActions,
TrainingAnalytics = proto.TrainingAnalytics,
VariableLengthObservation = proto.VariableLengthObservation,
};
}

CompressedChannelMapping = rlCaps.CompressedChannelMapping,
HybridActions = rlCaps.HybridActions,
TrainingAnalytics = rlCaps.TrainingAnalytics,
VariableLengthObservation = rlCaps.VariableLengthObservation,
};
}

5
com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs


/// Sends the academy parameters through the Communicator.
/// Is used by the academy to send the AcademyParameters to the communicator.
/// </summary>
/// <returns>The External Initialization Parameters received.</returns>
/// <returns>Whether the connection was successful.</returns>
UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters);
/// <param name="initParametersOut">The External Initialization Parameters received</param>
bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut);
/// <summary>
/// Registers a new Brain to the Communicator.

147
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


internal static bool CheckCommunicationVersionsAreCompatible(
string unityCommunicationVersion,
string pythonApiVersion,
string pythonLibraryVersion)
string pythonApiVersion
)
{
var unityVersion = new Version(unityCommunicationVersion);
var pythonVersion = new Version(pythonApiVersion);

/// Sends the initialization parameters through the Communicator.
/// Is used by the academy to send initialization parameters to the communicator.
/// </summary>
/// <returns>The External Initialization Parameters received.</returns>
/// <returns>Whether the connection was successful.</returns>
public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters)
/// <param name="initParametersOut">The External Initialization Parameters received.</param>
public bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut)
{
var academyParameters = new UnityRLInitializationOutputProto
{

{
RlInitializationOutput = academyParameters
},
out input);
var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion;
var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
var unityCommunicationVersion = initParameters.unityCommunicationVersion;
TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
out input
);
}
catch (Exception ex)
{
if (ex is RpcException rpcException)
{
var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(unityCommunicationVersion,
pythonCommunicationVersion,
pythonPackageVersion);
// Initialization succeeded part-way. The most likely cause is a mismatch between the communicator
// API strings, so log an explicit warning if that's the case.
if (initializationInput != null && input == null)
{
if (!communicationIsCompatible)
switch (rpcException.Status.StatusCode)
Debug.LogWarningFormat(
"Communication protocol between python ({0}) and Unity ({1}) have different " +
"versions which make them incompatible. Python library version: {2}.",
pythonCommunicationVersion, initParameters.unityCommunicationVersion,
pythonPackageVersion
);
case StatusCode.Unavailable:
// This is the common case where there's no trainer to connect to.
break;
case StatusCode.DeadlineExceeded:
// We don't currently set a deadline for connection, but likely will in the future.
break;
default:
Debug.Log($"Unexpected gRPC exception when trying to initialize communication: {rpcException}");
break;
else
{
Debug.LogWarningFormat(
"Unknown communication error between Python. Python communication protocol: {0}, " +
"Python library version: {1}.",
pythonCommunicationVersion,
pythonPackageVersion
);
}
throw new UnityAgentsException("ICommunicator.Initialize() failed.");
else
{
Debug.Log($"Unexpected exception when trying to initialize communication: {ex}");
}
initParametersOut = new UnityRLInitParameters();
return false;
catch
var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion;
var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(
initParameters.unityCommunicationVersion,
pythonCommunicationVersion
);
// Initialization succeeded part-way. The most likely cause is a mismatch between the communicator
// API strings, so log an explicit warning if that's the case.
if (initializationInput != null && input == null)
var exceptionMessage = "The Communicator was unable to connect. Please make sure the External " +
"process is ready to accept communication with Unity.";
// Check for common error condition and add details to the exception message.
var httpProxy = Environment.GetEnvironmentVariable("HTTP_PROXY");
var httpsProxy = Environment.GetEnvironmentVariable("HTTPS_PROXY");
if (httpProxy != null || httpsProxy != null)
if (!communicationIsCompatible)
{
Debug.LogWarningFormat(
"Communication protocol between python ({0}) and Unity ({1}) have different " +
"versions which make them incompatible. Python library version: {2}.",
pythonCommunicationVersion, initParameters.unityCommunicationVersion,
pythonPackageVersion
);
}
else
exceptionMessage += " Try removing HTTP_PROXY and HTTPS_PROXY from the" +
"environment variables and try again.";
Debug.LogWarningFormat(
"Unknown communication error between Python. Python communication protocol: {0}, " +
"Python library version: {1}.",
pythonCommunicationVersion,
pythonPackageVersion
);
throw new UnityAgentsException(exceptionMessage);
initParametersOut = new UnityRLInitParameters();
return false;
return initializationInput.RlInitializationInput.ToUnityRLInitParameters();
initParametersOut = initializationInput.RlInitializationInput.ToUnityRLInitParameters();
return true;
}
/// <summary>

SendCommandEvent(rlInput.Command);
}
UnityInputProto Initialize(UnityOutputProto unityOutput,
out UnityInputProto unityInput)
UnityInputProto Initialize(UnityOutputProto unityOutput, out UnityInputProto unityInput)
{
#if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
m_IsOpen = true;

}
return result.UnityInput;
#else
throw new UnityAgentsException(
"You cannot perform training on this platform.");
throw new UnityAgentsException("You cannot perform training on this platform.");
#endif
}

{
return null;
}
try
{
var message = m_Client.Exchange(WrapMessage(unityOutput, 200));

QuitCommandReceived?.Invoke();
return message.UnityInput;
}
catch
catch (Exception ex)
if (ex is RpcException rpcException)
{
// Log more verbose errors if they're something the user can possibly do something about.
switch (rpcException.Status.StatusCode)
{
case StatusCode.Unavailable:
// This can happen when python disconnects. Ignore it to avoid noisy logs.
break;
case StatusCode.ResourceExhausted:
// This happens is the message body is too large. There's no way to
// gracefully handle this, but at least we can show the message and the
// user can try to reduce the number of agents or observation sizes.
Debug.LogError($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
break;
default:
// Other unknown errors. Log at INFO level.
Debug.Log($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
break;
}
}
else
{
// Fall-through for other error types
Debug.LogError($"Communication Exception: {ex.Message}. Disconnecting from trainer.");
}
m_IsOpen = false;
QuitCommandReceived?.Invoke();
return null;

5
com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs


public bool CompressedChannelMapping;
public bool HybridActions;
public bool TrainingAnalytics;
public bool VariableLengthObservation;
/// <summary>
/// A class holding the capabilities flags for Reinforcement Learning across C# and the Trainer codebase. This

bool concatenatedPngObservations = true,
bool compressedChannelMapping = true,
bool hybridActions = true,
bool trainingAnalytics = true)
bool trainingAnalytics = true,
bool variableLengthObservation = true)
{
BaseRLCapabilities = baseRlCapabilities;
ConcatenatedPngObservations = concatenatedPngObservations;

VariableLengthObservation = variableLengthObservation;
}
/// <summary>

40
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMirwEKGFVuaXR5UkxD",
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi0gEKGFVuaXR5UkxD",
"ASgIEhkKEXRyYWluaW5nQW5hbHl0aWNzGAUgASgIQiWqAiJVbml0eS5NTEFn",
"ZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
"ASgIEhkKEXRyYWluaW5nQW5hbHl0aWNzGAUgASgIEiEKGXZhcmlhYmxlTGVu",
"Z3RoT2JzZXJ2YXRpb24YBiABKAhCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11",
"bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions", "TrainingAnalytics" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions", "TrainingAnalytics", "VariableLengthObservation" }, null, null, null)
}));
}
#endregion

compressedChannelMapping_ = other.compressedChannelMapping_;
hybridActions_ = other.hybridActions_;
trainingAnalytics_ = other.trainingAnalytics_;
variableLengthObservation_ = other.variableLengthObservation_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}

}
}
/// <summary>Field number for the "variableLengthObservation" field.</summary>
public const int VariableLengthObservationFieldNumber = 6;
private bool variableLengthObservation_;
/// <summary>
/// Support for variable length observations of rank 2
/// </summary>
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool VariableLengthObservation {
get { return variableLengthObservation_; }
set {
variableLengthObservation_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as UnityRLCapabilitiesProto);

if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
if (HybridActions != other.HybridActions) return false;
if (TrainingAnalytics != other.TrainingAnalytics) return false;
if (VariableLengthObservation != other.VariableLengthObservation) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
if (HybridActions != false) hash ^= HybridActions.GetHashCode();
if (TrainingAnalytics != false) hash ^= TrainingAnalytics.GetHashCode();
if (VariableLengthObservation != false) hash ^= VariableLengthObservation.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}

if (TrainingAnalytics != false) {
output.WriteRawTag(40);
output.WriteBool(TrainingAnalytics);
}
if (VariableLengthObservation != false) {
output.WriteRawTag(48);
output.WriteBool(VariableLengthObservation);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);

if (TrainingAnalytics != false) {
size += 1 + 1;
}
if (VariableLengthObservation != false) {
size += 1 + 1;
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

if (other.TrainingAnalytics != false) {
TrainingAnalytics = other.TrainingAnalytics;
}
if (other.VariableLengthObservation != false) {
VariableLengthObservation = other.VariableLengthObservation;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}

}
case 40: {
TrainingAnalytics = input.ReadBool();
break;
}
case 48: {
VariableLengthObservation = input.ReadBool();
break;
}
}

129
com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs


using System;
using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents.Inference.Utils;

{
readonly int[] m_ActionSize;
readonly Multinomial m_Multinomial;
readonly ITensorAllocator m_Allocator;
readonly int[] m_StartActionIndices;
readonly float[] m_CdfBuffer;
m_Allocator = allocator;
m_StartActionIndices = Utilities.CumSum(m_ActionSize);
// Scratch space for computing the cumulative distribution function.
// In order to reuse it, make it the size of the largest branch.
var largestBranch = Mathf.Max(m_ActionSize);
m_CdfBuffer = new float[largestBranch];
//var tensorDataProbabilities = tensorProxy.Data as float[,];
var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
var batchSize = idActionPairList.Count;
var actionValues = new float[batchSize, m_ActionSize.Length];
var startActionIndices = Utilities.CumSum(m_ActionSize);
for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++)
{
var nBranchAction = m_ActionSize[actionIndex];
var actionProbs = new TensorProxy()
{
valueType = TensorProxy.TensorType.FloatingPoint,
shape = new long[] { batchSize, nBranchAction },
data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction))
};
for (var batchIndex = 0; batchIndex < batchSize; batchIndex++)
{
for (var branchActionIndex = 0;
branchActionIndex < nBranchAction;
branchActionIndex++)
{
actionProbs.data[batchIndex, branchActionIndex] =
tensorProxy.data[batchIndex, startActionIndices[actionIndex] + branchActionIndex];
}
}
var outputTensor = new TensorProxy()
{
valueType = TensorProxy.TensorType.FloatingPoint,
shape = new long[] { batchSize, 1 },
data = m_Allocator.Alloc(new TensorShape(batchSize, 1))
};
Eval(actionProbs, outputTensor, m_Multinomial);
for (var ii = 0; ii < batchSize; ii++)
{
actionValues[ii, actionIndex] = outputTensor.data[ii, 0];
}
actionProbs.data.Dispose();
outputTensor.data.Dispose();
}
var agentIndex = 0;
for (var i = 0; i < actionIds.Count; i++)
{

var discreteBuffer = actionBuffer.DiscreteActions;
for (var j = 0; j < m_ActionSize.Length; j++)
{
discreteBuffer[j] = (int)actionValues[agentIndex, j];
ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]);
discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]);
}
}
agentIndex++;

/// <summary>
/// Draw samples from a multinomial distribution based on log-probabilities specified
/// in tensor src. The samples will be saved in the dst tensor.
/// Compute the cumulative distribution function for a given agent's action
/// given the log-probabilities.
/// The results are stored in m_CdfBuffer, which is the size of the largest action's number of branches.
/// <param name="src">2-D tensor with shape batch_size x num_classes</param>
/// <param name="dst">Allocated tensor with size batch_size x num_samples</param>
/// <param name="multinomial">Multinomial object used to sample values</param>
/// <exception cref="NotImplementedException">
/// Multinomial doesn't support integer tensors
/// </exception>
/// <exception cref="ArgumentException">Issue with tensor shape or type</exception>
/// <exception cref="ArgumentNullException">
/// At least one of the tensors is not allocated
/// </exception>
public static void Eval(TensorProxy src, TensorProxy dst, Multinomial multinomial)
/// <param name="logProbs"></param>
/// <param name="batch">Index of the agent being considered</param>
/// <param name="channelOffset">Offset into the tensor's channel.</param>
/// <param name="branchSize"></param>
internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize)
if (src.DataType != typeof(float))
{
throw new NotImplementedException("Only float tensors are currently supported");
}
if (src.valueType != dst.valueType)
{
throw new ArgumentException(
"Source and destination tensors have different types!");
}
if (src.data == null || dst.data == null)
{
throw new ArgumentNullException();
}
if (src.data.batch != dst.data.batch)
// Find the class maximum
var maxProb = float.NegativeInfinity;
for (var cls = 0; cls < branchSize; ++cls)
throw new ArgumentException("Batch size for input and output data is different!");
maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb);
var cdf = new float[src.data.channels];
for (var batch = 0; batch < src.data.batch; ++batch)
// Sum the log probabilities and compute CDF
var sumProb = 0.0f;
for (var cls = 0; cls < branchSize; ++cls)
// Find the class maximum
var maxProb = float.NegativeInfinity;
for (var cls = 0; cls < src.data.channels; ++cls)
{
maxProb = Mathf.Max(src.data[batch, cls], maxProb);
}
// Sum the log probabilities and compute CDF
var sumProb = 0.0f;
for (var cls = 0; cls < src.data.channels; ++cls)
{
sumProb += Mathf.Exp(src.data[batch, cls] - maxProb);
cdf[cls] = sumProb;
}
// Generate the samples
for (var sample = 0; sample < dst.data.channels; ++sample)
{
dst.data[batch, sample] = multinomial.Sample(cdf);
}
sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb);
m_CdfBuffer[cls] = sumProb;
}
}
}

70
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


/// <param name="brainParameters">
/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="sensorComponents">Attached sensor components</param>
/// <param name="sensors">Attached sensor components</param>
SensorComponent[] sensorComponents, ActuatorComponent[] actuatorComponents,
ISensor[] sensors, ActuatorComponent[] actuatorComponents,
int observableAttributeTotalSize = 0,
BehaviorType behaviorType = BehaviorType.Default)
{

}
failedModelChecks.AddRange(
CheckInputTensorPresence(model, brainParameters, memorySize, sensorComponents)
CheckInputTensorPresence(model, brainParameters, memorySize, sensors)
CheckInputTensorShape(model, brainParameters, sensorComponents, observableAttributeTotalSize)
CheckInputTensorShape(model, brainParameters, sensors, observableAttributeTotalSize)
);
failedModelChecks.AddRange(
CheckOutputTensorShape(model, brainParameters, actuatorComponents)

/// <param name="memory">
/// The memory size that the model is expecting.
/// </param>
/// <param name="sensorComponents">Array of attached sensor components</param>
/// <param name="sensors">Array of attached sensor components</param>
/// <returns>
/// A IEnumerable of string corresponding to the failed input presence checks.
/// </returns>

int memory,
SensorComponent[] sensorComponents
ISensor[] sensors
)
{
var failedModelChecks = new List<string>();

// If there are not enough Visual Observation Input compared to what the
// sensors expect.
var visObsIndex = 0;
for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
for (var sensorIndex = 0; sensorIndex < sensors.Length; sensorIndex++)
var sensor = sensorComponents[sensorIndex];
var sensor = sensors[sensorIndex];
if (sensor.GetObservationShape().Length == 3)
{
if (!tensorsNames.Contains(

/// Checks that the shape of the visual observation input placeholder is the same as the corresponding sensor.
/// </summary>
/// <param name="tensorProxy">The tensor that is expected by the model</param>
/// <param name="sensorComponent">The sensor that produces the visual observation.</param>
/// <param name="sensor">The sensor that produces the visual observation.</param>
TensorProxy tensorProxy, SensorComponent sensorComponent)
TensorProxy tensorProxy, ISensor sensor)
var shape = sensorComponent.GetObservationShape();
var shape = sensor.GetObservationShape();
var heightBp = shape[0];
var widthBp = shape[1];
var pixelBp = shape[2];

/// Checks that the shape of the rank 2 observation input placeholder is the same as the corresponding sensor.
/// </summary>
/// <param name="tensorProxy">The tensor that is expected by the model</param>
/// <param name="sensorComponent">The sensor that produces the visual observation.</param>
/// <param name="sensor">The sensor that produces the visual observation.</param>
TensorProxy tensorProxy, SensorComponent sensorComponent)
TensorProxy tensorProxy, ISensor sensor)
var shape = sensorComponent.GetObservationShape();
var shape = sensor.GetObservationShape();
var dim1Bp = shape[0];
var dim2Bp = shape[1];
var dim1T = tensorProxy.Channels;

/// <param name="brainParameters">
/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="sensorComponents">Attached sensors</param>
/// <param name="sensors">Attached sensors</param>
Model model, BrainParameters brainParameters, SensorComponent[] sensorComponents,
Model model, BrainParameters brainParameters, ISensor[] sensors,
new Dictionary<string, Func<BrainParameters, TensorProxy, SensorComponent[], int, string>>()
new Dictionary<string, Func<BrainParameters, TensorProxy, ISensor[], int, string>>()
{
{TensorNames.VectorObservationPlaceholder, CheckVectorObsShape},
{TensorNames.PreviousActionPlaceholder, CheckPreviousActionShape},

}
var visObsIndex = 0;
for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
for (var sensorIndex = 0; sensorIndex < sensors.Length; sensorIndex++)
var sensorComponent = sensorComponents[sensorIndex];
if (sensorComponent.GetObservationShape().Length == 3)
var sens = sensors[sensorIndex];
if (sens.GetObservationShape().Length == 3)
(bp, tensor, scs, i) => CheckVisualObsShape(tensor, sensorComponent);
(bp, tensor, scs, i) => CheckVisualObsShape(tensor, sens);
if (sensorComponent.GetObservationShape().Length == 2)
if (sens.GetObservationShape().Length == 2)
(bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sensorComponent);
(bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sens);
}
}

else
{
var tester = tensorTester[tensor.name];
var error = tester.Invoke(brainParameters, tensor, sensorComponents, observableAttributeTotalSize);
var error = tester.Invoke(brainParameters, tensor, sensors, observableAttributeTotalSize);
if (error != null)
{
failedModelChecks.Add(error);

/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="tensorProxy">The tensor that is expected by the model</param>
/// <param name="sensorComponents">Array of attached sensor components</param>
/// <param name="sensors">Array of attached sensor components</param>
/// <param name="observableAttributeTotalSize">Sum of the sizes of all ObservableAttributes.</param>
/// <returns>
/// If the Check failed, returns a string containing information about why the

BrainParameters brainParameters, TensorProxy tensorProxy, SensorComponent[] sensorComponents,
BrainParameters brainParameters, TensorProxy tensorProxy, ISensor[] sensors,
int observableAttributeTotalSize)
{
var vecObsSizeBp = brainParameters.VectorObservationSize;

var totalVectorSensorSize = 0;
foreach (var sensorComp in sensorComponents)
foreach (var sens in sensors)
if (sensorComp.GetObservationShape().Length == 1)
if ((sens.GetObservationShape().Length == 1))
totalVectorSensorSize += sensorComp.GetObservationShape()[0];
totalVectorSensorSize += sens.GetObservationShape()[0];
totalVectorSensorSize += observableAttributeTotalSize;
if (vecObsSizeBp * numStackedVector + totalVectorSensorSize != totalVecObsSizeT)
if (totalVectorSensorSize != totalVecObsSizeT)
foreach (var sensorComp in sensorComponents)
foreach (var sensorComp in sensors)
{
if (sensorComp.GetObservationShape().Length == 1)
{

$"but received: \n" +
$"Vector observations: {vecObsSizeBp} x {numStackedVector}\n" +
$"Total [Observable] attributes: {observableAttributeTotalSize}\n" +
$"SensorComponent sizes: {sensorSizes}.";
$"Sensor sizes: {sensorSizes}.";
}
return null;
}

/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="tensorProxy"> The tensor that is expected by the model</param>
/// <param name="sensorComponents">Array of attached sensor components (unused).</param>
/// <param name="sensors">Array of attached sensor components (unused).</param>
SensorComponent[] sensorComponents, int observableAttributeTotalSize)
ISensor[] sensors, int observableAttributeTotalSize)
{
var numberActionsBp = brainParameters.ActionSpec.NumDiscreteActions;
var numberActionsT = tensorProxy.shape[tensorProxy.shape.Length - 1];

28
com.unity.ml-agents/Runtime/Inference/ModelRunner.cs


using System;
using System.Collections.Generic;
using Unity.Barracuda;
using UnityEngine.Profiling;

SensorShapeValidator m_SensorShapeValidator = new SensorShapeValidator();
bool m_VisualObservationsInitialized;
bool m_ObservationsInitialized;
/// <summary>
/// Initializes the Brain with the Model that it will use when selecting actions for

public ModelRunner(
NNModel model,
ActionSpec actionSpec,
InferenceDevice inferenceDevice = InferenceDevice.CPU,
InferenceDevice inferenceDevice,
int seed = 0)
{
Model barracudaModel;

D.logEnabled = m_Verbose;
barracudaModel = ModelLoader.Load(model);
var executionDevice = inferenceDevice == InferenceDevice.GPU
? WorkerFactory.Type.ComputePrecompiled
: WorkerFactory.Type.CSharp;
WorkerFactory.Type executionDevice;
switch (inferenceDevice)
{
case InferenceDevice.CPU:
executionDevice = WorkerFactory.Type.CSharp;
break;
case InferenceDevice.GPU:
executionDevice = WorkerFactory.Type.ComputePrecompiled;
break;
case InferenceDevice.Burst:
executionDevice = WorkerFactory.Type.CSharpBurst;
break;
default:
executionDevice = WorkerFactory.Type.CSharpBurst;
break;
}
m_Engine = WorkerFactory.CreateWorker(executionDevice, barracudaModel, m_Verbose);
}
else

{
return;
}
if (!m_VisualObservationsInitialized)
if (!m_ObservationsInitialized)
m_VisualObservationsInitialized = true;
m_ObservationsInitialized = true;
}
Profiler.BeginSample("ModelRunner.DecideAction");

17
com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs


/// to be monotonic (always increasing). If the CMF is scaled, then the last entry in
/// the array will be 1.0.
/// </param>
/// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
public int Sample(float[] cmf)
/// <param name="branchSize">The number of possible branches, i.e. the effective size of the cmf array.</param>
/// <returns>A sampled index from the CMF ranging from 0 to branchSize-1.</returns>
public int Sample(float[] cmf, int branchSize)
var p = (float)m_Random.NextDouble() * cmf[cmf.Length - 1];
var p = (float)m_Random.NextDouble() * cmf[branchSize - 1];
var cls = 0;
while (cmf[cls] < p)
{

return cls;
}
/// <summary>
/// Samples from the Multinomial distribution defined by the provided cumulative
/// mass function.
/// </summary>
/// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
public int Sample(float[] cmf)
{
return Sample(cmf, cmf.Length);
}
}
}

12
com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs


public enum InferenceDevice
{
/// <summary>
/// CPU inference
/// CPU inference. Corresponds to in WorkerFactory.Type.CSharp Barracuda.
/// Burst is recommended instead; this is kept for legacy compatibility.
/// GPU inference
/// GPU inference. Corresponds to WorkerFactory.Type.ComputePrecompiled in Barracuda.
GPU = 1
GPU = 1,
/// <summary>
/// CPU inference using Burst. Corresponds to WorkerFactory.Type.CSharpBurst in Barracuda.
/// </summary>
Burst = 2,
}
/// <summary>

2
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


}
[HideInInspector, SerializeField]
InferenceDevice m_InferenceDevice;
InferenceDevice m_InferenceDevice = InferenceDevice.Burst;
/// <summary>
/// How inference is performed for this Agent's model.

21
com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs


namespace Unity.MLAgents.Sensors
{
internal class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
/// <summary>
/// A Sensor that allows to observe a variable number of entities.
/// </summary>
public class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
static DimensionProperty[] s_DimensionProperties = new DimensionProperty[]{
DimensionProperty.VariableSize,
DimensionProperty.None
};
public BufferSensor(int maxNumberObs, int obsSize)
{
m_MaxNumObs = maxNumberObs;

/// <inheritdoc/>
public DimensionProperty[] GetDimensionProperties()
{
return new DimensionProperty[]{
DimensionProperty.VariableSize,
DimensionProperty.None
};
return s_DimensionProperties;
}
/// <summary>

/// <param name="obs"> The float array observation</param>
public void AppendObservation(float[] obs)
{
if (obs.Length != m_ObsSize)
{
throw new UnityAgentsException(
"The BufferSensor was expecting an observation of size " +
$"{m_ObsSize} but received {obs.Length} observations instead."
);
}
if (m_CurrentNumObservables >= m_MaxNumObs)
{
return;

14
com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs


{
/// <summary>
/// A component for BufferSensor.
/// A SensorComponent that creates a <see cref="BufferSensor"/>.
internal class BufferSensorComponent : SensorComponent
public class BufferSensorComponent : SensorComponent
/// <summary>
/// This is how many floats each entities will be represented with. This number
/// is fixed and all entities must have the same representation.
/// </summary>
/// <summary>
/// This is the maximum number of entities the `BufferSensor` will be able to
/// collect.
/// </summary>
private BufferSensor m_Sensor;
/// <inheritdoc/>

17
com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs


/// <summary>
/// A sensor that wraps a Camera object to generate visual observations for an agent.
/// </summary>
public class CameraSensor : ISensor, IBuiltInSensor
public class CameraSensor : ISensor, IBuiltInSensor, IDimensionPropertiesSensor
{
Camera m_Camera;
int m_Width;

int[] m_Shape;
SensorCompressionType m_CompressionType;
static DimensionProperty[] s_DimensionProperties = new DimensionProperty[] {
DimensionProperty.TranslationalEquivariance,
DimensionProperty.TranslationalEquivariance,
DimensionProperty.None };
/// <summary>
/// The Camera used for rendering the sensor observations.

public int[] GetObservationShape()
{
return m_Shape;
}
/// <summary>
/// Accessor for the dimension properties of a camera sensor. A camera sensor
/// Has translational equivariance along width and hight and no property along
/// the channels dimension.
/// </summary>
/// <returns></returns>
public DimensionProperty[] GetDimensionProperties()
{
return s_DimensionProperties;
}
/// <summary>

2
com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs


/// The Dimension property flags of the observations
/// </summary>
[System.Flags]
internal enum DimensionProperty
public enum DimensionProperty
{
/// <summary>
/// No properties specified.

14
com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs


using System.Collections.Generic;
using System;
using UnityEngine;
namespace Unity.MLAgents.SideChannels
{

internal void ProcessMessage(byte[] msg)
{
using (var incomingMsg = new IncomingMessage(msg))
try
{
using (var incomingMsg = new IncomingMessage(msg))
{
OnMessageReceived(incomingMsg);
}
}
catch (Exception ex)
OnMessageReceived(incomingMsg);
// Catch all errors in the sidechannel processing, so that a single
// bad SideChannel implementation doesn't take everything down with it.
Debug.LogError($"Error processing SideChannel message: {ex}.\nThe message will be skipped.");
}
}

7
com.unity.ml-agents/Runtime/StatsRecorder.cs


/// <summary>
/// Values within the summary period are summed up before reporting.
/// </summary>
Sum = 2
Sum = 2,
/// <summary>
/// Values within the summary period are reported as a histogram.
/// </summary>
Histogram = 3
}
/// <summary>

4
com.unity.ml-agents/Runtime/Timer.cs


SaveJsonTimers(fs);
fs.Close();
}
catch (IOException)
catch (SystemException)
// It's possible we don't have write access to the directory.
// We may not have write access to the directory.
Debug.LogWarning($"Unable to save timers to file {filename}");
}
#endif

19
com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs


{
var unityVerStr = "1.0.0";
var pythonVerStr = "1.0.0";
var pythonPackageVerStr = "0.16.0";
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
pythonVerStr,
pythonPackageVerStr));
pythonVerStr));
}
}

198
com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs


using System;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference.Utils;
namespace Unity.MLAgents.Tests
{

public void TestEvalP()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
data = new Tensor(1, 3, new[] { 0.1f, 0.2f, 0.7f }),
valueType = TensorProxy.TensorType.FloatingPoint
};
var dst = new TensorProxy
{
data = new Tensor(1, 3),
valueType = TensorProxy.TensorType.FloatingPoint
};
DiscreteActionOutputApplier.Eval(src, dst, m);
float[] reference = { 2, 2, 1 };
for (var i = 0; i < dst.data.length; i++)
{
Assert.AreEqual(reference[i], dst.data[i]);
++i;
}
}
[Test]
public void TestEvalLogits()
public void TestDiscreteApply()
var m = new Multinomial(2018);
var actionSpec = ActionSpec.MakeDiscrete(3, 2);
const float smallLogProb = -1000.0f;
const float largeLogProb = -1.0f;
var src = new TensorProxy
var logProbs = new TensorProxy
1,
3,
new[] { Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50 }),
valueType = TensorProxy.TensorType.FloatingPoint
};
var dst = new TensorProxy
{
data = new Tensor(1, 3),
valueType = TensorProxy.TensorType.FloatingPoint
};
DiscreteActionOutputApplier.Eval(src, dst, m);
float[] reference = { 2, 2, 2 };
for (var i = 0; i < dst.data.length; i++)
{
Assert.AreEqual(reference[i], dst.data[i]);
++i;
}
}
[Test]
public void TestEvalBatching()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
data = new Tensor(2, 3, new[]
{
Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50,
Mathf.Log(0.3f) - 25, Mathf.Log(0.4f) - 25, Mathf.Log(0.3f) - 25
}),
valueType = TensorProxy.TensorType.FloatingPoint
};
var dst = new TensorProxy
{
data = new Tensor(2, 3),
valueType = TensorProxy.TensorType.FloatingPoint
};
DiscreteActionOutputApplier.Eval(src, dst, m);
float[] reference = { 2, 2, 2, 0, 1, 0 };
for (var i = 0; i < dst.data.length; i++)
{
Assert.AreEqual(reference[i], dst.data[i]);
++i;
}
}
[Test]
public void TestSrcInt()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
valueType = TensorProxy.TensorType.Integer
};
Assert.Throws<NotImplementedException>(
() => DiscreteActionOutputApplier.Eval(src, null, m));
}
[Test]
public void TestDstInt()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
2,
5,
new[]
{
smallLogProb, smallLogProb, largeLogProb, // Agent 0, branch 0
smallLogProb, largeLogProb, // Agent 0, branch 1
largeLogProb, smallLogProb, smallLogProb, // Agent 1, branch 0
largeLogProb, smallLogProb, // Agent 1, branch 1
}),
var dst = new TensorProxy
{
valueType = TensorProxy.TensorType.Integer
};
var applier = new DiscreteActionOutputApplier(actionSpec, 2020, null);
var agentIds = new List<int> { 42, 1337 };
var actionBuffers = new Dictionary<int, ActionBuffers>();
actionBuffers[42] = new ActionBuffers(actionSpec);
actionBuffers[1337] = new ActionBuffers(actionSpec);
Assert.Throws<ArgumentException>(
() => DiscreteActionOutputApplier.Eval(src, dst, m));
}
[Test]
public void TestSrcDataNull()
{
var m = new Multinomial(2018);
applier.Apply(logProbs, agentIds, actionBuffers);
Assert.AreEqual(2, actionBuffers[42].DiscreteActions[0]);
Assert.AreEqual(1, actionBuffers[42].DiscreteActions[1]);
var src = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint
};
var dst = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint
};
Assert.Throws<ArgumentNullException>(
() => DiscreteActionOutputApplier.Eval(src, dst, m));
}
[Test]
public void TestDstDataNull()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint,
data = new Tensor(0, 1)
};
var dst = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint
};
Assert.Throws<ArgumentNullException>(
() => DiscreteActionOutputApplier.Eval(src, dst, m));
}
[Test]
public void TestUnequalBatchSize()
{
var m = new Multinomial(2018);
var src = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint,
data = new Tensor(1, 1)
};
var dst = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint,
data = new Tensor(2, 1)
};
Assert.Throws<ArgumentException>(
() => DiscreteActionOutputApplier.Eval(src, dst, m));
Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[0]);
Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[1]);
}
}
}

13
com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs


[Test]
public void TestCreation()
{
var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec());
var inferenceDevice = InferenceDevice.Burst;
var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec(), inferenceDevice);
modelRunner = new ModelRunner(discreteONNXModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
modelRunner = new ModelRunner(discreteONNXModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec(), inferenceDevice);
modelRunner = new ModelRunner(hybridONNXModel, GetHybrid0vis53vec_3c_2dActionSpec());
modelRunner = new ModelRunner(hybridONNXModel, GetHybrid0vis53vec_3c_2dActionSpec(), inferenceDevice);
modelRunner = new ModelRunner(continuousNNModel, GetContinuous2vis8vec2actionActionSpec());
modelRunner = new ModelRunner(continuousNNModel, GetContinuous2vis8vec2actionActionSpec(), inferenceDevice);
modelRunner = new ModelRunner(discreteNNModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
modelRunner = new ModelRunner(discreteNNModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec(), inferenceDevice);
modelRunner.Dispose();
}

public void TestRunModel()
{
var actionSpec = GetDiscrete1vis0vec_2_3action_recurrModelActionSpec();
var modelRunner = new ModelRunner(discreteONNXModel, actionSpec);
var modelRunner = new ModelRunner(discreteONNXModel, actionSpec, InferenceDevice.Burst);
var info1 = new AgentInfo();
info1.episodeId = 1;
modelRunner.PutObservations(info1, new[] { sensor_21_20_3.CreateSensor() }.ToList());

30
com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs


var errors = BarracudaModelParamLoader.CheckModel(
model, validBrainParameters,
new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
new ISensor[] { new VectorSensor(8), sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]
);
Assert.AreEqual(0, errors.Count()); // There should not be any errors
}

var errors = BarracudaModelParamLoader.CheckModel(
model, validBrainParameters,
new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]
new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]
);
Assert.AreEqual(0, errors.Count()); // There should not be any errors
}

var errors = BarracudaModelParamLoader.CheckModel(
model, validBrainParameters,
new SensorComponent[] { }, new ActuatorComponent[0]
new ISensor[] { new VectorSensor(validBrainParameters.VectorObservationSize) }, new ActuatorComponent[0]
);
Assert.AreEqual(0, errors.Count()); // There should not be any errors
}

brainParameters.VectorObservationSize = 9; // Invalid observation
var errors = BarracudaModelParamLoader.CheckModel(
model, brainParameters,
new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]
);
Assert.Greater(errors.Count(), 0);

model, brainParameters,
new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]
);
Assert.Greater(errors.Count(), 0);
}

var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
brainParameters.VectorObservationSize = 1; // Invalid observation
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}

brainParameters.VectorObservationSize = 9; // Invalid observation
var errors = BarracudaModelParamLoader.CheckModel(
model, brainParameters,
new SensorComponent[] { }, new ActuatorComponent[0]
new ISensor[] { }, new ActuatorComponent[0]
);
Assert.Greater(errors.Count(), 0);

model, brainParameters,
new SensorComponent[] { }, new ActuatorComponent[0]
new ISensor[] { }, new ActuatorComponent[0]
);
Assert.Greater(errors.Count(), 0);
}

var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
brainParameters.ActionSpec = ActionSpec.MakeContinuous(3); // Invalid action
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}

var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3, 3); // Invalid action
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]);
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}

var brainParameters = GetHybridBrainParameters();
brainParameters.ActionSpec = new ActionSpec(3, new[] { 3 }); // Invalid discrete action size
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}

var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
var errors = BarracudaModelParamLoader.CheckModel(null, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
var errors = BarracudaModelParamLoader.CheckModel(null, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}
}

2
com.unity.ml-agents/package.json


"unity": "2018.4",
"description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
"dependencies": {
"com.unity.barracuda": "1.2.1-preview",
"com.unity.barracuda": "1.3.0-preview",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",

2
docs/Installation.md


installing ML-Agents. Activate your virtual environment and run from the command line:
```sh
pip3 install torch==1.7.0 -f https://download.pytorch.org/whl/torch_stable.html
pip3 install torch~=1.7.1 -f https://download.pytorch.org/whl/torch_stable.html
```
Note that on Windows, you may also need Microsoft's

64
docs/Learning-Environment-Design-Agents.md


- [Visual Observation Summary & Best Practices](#visual-observation-summary--best-practices)
- [Raycast Observations](#raycast-observations)
- [RayCast Observation Summary & Best Practices](#raycast-observation-summary--best-practices)
- [Variable Length Observations](#variable-length-observations)
- [Variable Length Observation Summary & Best Practices](#variable-length-observation-summary--best-practices)
- [Actions and Actuators](#actions-and-actuators)
- [Continuous Actions](#continuous-actions)
- [Discrete Actions](#discrete-actions)

#### Observable Fields and Properties
Another approach is to define the relevant observations as fields or properties
on your Agent class, and annotate them with an `ObservableAttribute`. For
example, in the 3DBall example above, the rigid body velocity could be observed
example, in the Ball3DHardAgent, the difference between positions could be observed
public class Ball3DAgent : Agent {
public class Ball3DHardAgent : Agent {
[Observable]
public Vector3 RigidBodyVelocity
[Observable(numStackedObservations: 9)]
Vector3 PositionDelta
get { return m_BallRb.velocity; }
get
{
return ball.transform.position - gameObject.transform.position;
}
}
}
```

for the agent that doesn't require a fully rendered image to convey.
- Use as few rays and tags as necessary to solve the problem in order to improve
learning stability and agent performance.
### Variable Length Observations
It is possible for agents to collect observations from a varying number of
GameObjects by using a `BufferSensor`.
You can add a `BufferSensor` to your Agent by adding a `BufferSensorComponent` to
its GameObject.
The `BufferSensor` can be useful in situations in which the Agent must pay
attention to a varying number of entities (for example, a varying number of
enemies or projectiles).
On the trainer side, the `BufferSensor`
is processed using an attention module. More information about attention
mechanisms can be found [here](https://arxiv.org/abs/1706.03762). Training or
doing inference with variable length observations can be slower than using
a flat vector observation. However, attention mechanisms enable solving
problems that require comparative reasoning between entities in a scene
such as our [Sorter environment](Learning-Environment-Examples.md#sorter).
Note that even though the `BufferSensor` can process a variable number of
entities, you still need to define a maximum number of entities. This is
because our network architecture requires to know what the shape of the
observations will be. If fewer entities are observed than the maximum, the
observation will be padded with zeros and the trainer will ignore
the padded observations. Note that attention layers are invariant to
the order of the entities, so there is no need to properly "order" the
entities before feeding them into the `BufferSensor`.
The the `BufferSensorComponent` Editor inspector have two arguments:
- `Observation Size` : This is how many floats each entities will be
represented with. This number is fixed and all entities must
have the same representation. For example, if the entities you want to
put into the `BufferSensor` have for relevant information position and
speed, then the `Observation Size` should be 6 floats.
- `Maximum Number of Entities` : This is the maximum number of entities
the `BufferSensor` will be able to collect.
To add an entity's observations to a `BufferSensorComponent`, you need
to call `BufferSensorComponent.AppendObservation()`
with a float array of size `Observation Size` as argument.
__Note__: Currently, the observations put into the `BufferSensor` are
not normalized, you will need to normalize your observations manually
between -1 and 1.
#### Variable Length Observation Summary & Best Practices
- Attach `BufferSensorComponent` to use.
- Call `BufferSensorComponent.AppendObservation()` to add the observations
of an entity to the `BufferSensor`.
- Normalize the entities observations before feeding them into the `BufferSensor`.
## Actions and Actuators

27
docs/Learning-Environment-Examples.md


- 37.6 for vector observations
- 34.2 for simple heuristic (pick a random valid move)
- 37.0 for greedy heuristic (pick the highest-scoring valid move)
## Sorter
![Sorter](images/sorter.png)
- Set-up: The Agent is in a circular room with numbered tiles. The values of the
tiles are random between 1 and 20. The tiles present in the room are randomized
at each episode. When the Agent visits a tile, it turns green.
- Goal: Visit all the tiles in ascending order.
- Agents: The environment contains a single Agent
- Agent Reward Function:
- -.0002 Existential penalty.
- +1 For visiting the right tile
- -1 For visiting the wrong tile
- BehaviorParameters:
- Vector Observations : 4 : 2 floats for Position and 2 floats for orientation
- Variable Length Observations : Between 1 and 20 entities (one for each tile)
each with 22 observations, the first 20 are one hot encoding of the value of the tile,
the 21st and 22nd represent the position of the tile relative to the Agent and the 23rd
is `1` if the tile was visited and `0` otherwise.
- Actions: 3 discrete branched actions corresponding to forward, backward,
sideways movement, as well as rotation.
- Float Properties: One
- num_tiles: The maximum number of tiles to sample.
- Default: 2
- Recommended Minimum: 1
- Recommended Maximum: 20
- Benchmark Mean Reward: Depends on the number of tiles.

9
docs/Migrating.md


- `VectorSensor.AddObservation(IEnumerable<float>)` is deprecated. Use `VectorSensor.AddObservation(IList<float>)`
instead.
- `ObservationWriter.AddRange()` is deprecated. Use `ObservationWriter.AddList()` instead.
- `ActuatorComponent.CreateAcuator()` is deprecated. Please use override `ActuatorComponent.CreateActuators`
instead. Since `ActuatorComponent.CreateActuator()` is abstract, you will still need to override it in your
class until it is removed. It is only ever called if you don't override `ActuatorComponent.CreateActuators`.
You can suppress the warnings by surrounding the method with the following pragma:
```c#
#pragma warning disable 672
public IActuator CreateActuator() { ... }
#pragma warning restore 672
```
# Migrating

2
docs/Training-Configuration-File.md


- LSTM does not work well with continuous actions. Please use
discrete actions for better results.
- Since the memories must be sent back and forth between Python and Unity, using
too large `memory_size` will slow down training.
- Adding a recurrent layer increases the complexity of the neural network, it is
recommended to decrease `num_layers` when using recurrent.
- It is required that `memory_size` be divisible by 2.

2
gym-unity/README.md


def main():
unity_env = UnityEnvironment("./envs/GridWorld")
env = UnityToGymWrapper(unity_env, 0, uint8_visual=True)
logger.configure('./logs') # Çhange to log in a different directory
logger.configure('./logs') # Change to log in a different directory
act = deepq.learn(
env,
"cnn", # conv_only is also a good choice for GridWorld

11
ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py


name='mlagents_envs/communicator_objects/capabilities.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\xaf\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x12\x19\n\x11trainingAnalytics\x18\x05 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\xd2\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x12\x19\n\x11trainingAnalytics\x18\x05 \x01(\x08\x12!\n\x19variableLengthObservation\x18\x06 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='variableLengthObservation', full_name='communicator_objects.UnityRLCapabilitiesProto.variableLengthObservation', index=5,
number=6, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

oneofs=[
],
serialized_start=80,
serialized_end=255,
serialized_end=290,
)
DESCRIPTOR.message_types_by_name['UnityRLCapabilitiesProto'] = _UNITYRLCAPABILITIESPROTO

6
ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi


compressedChannelMapping = ... # type: builtin___bool
hybridActions = ... # type: builtin___bool
trainingAnalytics = ... # type: builtin___bool
variableLengthObservation = ... # type: builtin___bool
def __init__(self,
*,

hybridActions : typing___Optional[builtin___bool] = None,
trainingAnalytics : typing___Optional[builtin___bool] = None,
variableLengthObservation : typing___Optional[builtin___bool] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> UnityRLCapabilitiesProto: ...

def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions",u"trainingAnalytics"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions",u"trainingAnalytics",u"variableLengthObservation"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions",u"trainingAnalytics",b"trainingAnalytics"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions",u"trainingAnalytics",b"trainingAnalytics",u"variableLengthObservation",b"variableLengthObservation"]) -> None: ...

4
ml-agents-envs/mlagents_envs/environment.py


# * 1.2.0 - support compression mapping for stacked compressed observations.
# * 1.3.0 - support action spaces with both continuous and discrete actions.
# * 1.4.0 - support training analytics sent from python trainer to the editor.
API_VERSION = "1.4.0"
# * 1.5.0 - support variable length observation training.
API_VERSION = "1.5.0"
# Default port that the editor listens on. If an environment executable
# isn't specified, this port will be used.

capabilities.compressedChannelMapping = True
capabilities.hybridActions = True
capabilities.trainingAnalytics = True
capabilities.variableLengthObservation = True
return capabilities
@staticmethod

16
ml-agents-envs/mlagents_envs/rpc_utils.py


@timed
def observation_to_np_array(
def _observation_to_np_array(
obs: ObservationProto, expected_shape: Optional[Iterable[int]] = None
) -> np.ndarray:
"""

@timed
def _process_visual_observation(
def _process_maybe_compressed_observation(
obs_index: int,
shape: Tuple[int, int, int],
agent_info_list: Collection[AgentInfoProto],

batched_visual = [
observation_to_np_array(agent_obs.observations[obs_index], shape)
_observation_to_np_array(agent_obs.observations[obs_index], shape)
for agent_obs in agent_info_list
]
return np.array(batched_visual, dtype=np.float32)

@timed
def _process_vector_observation(
def _process_rank_one_or_two_observation(
obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
) -> np.ndarray:
if len(agent_info_list) == 0:

if is_visual:
obs_shape = cast(Tuple[int, int, int], observation_specs.shape)
decision_obs_list.append(
_process_visual_observation(
_process_maybe_compressed_observation(
_process_visual_observation(
_process_maybe_compressed_observation(
_process_vector_observation(
_process_rank_one_or_two_observation(
_process_vector_observation(
_process_rank_one_or_two_observation(
obs_index, observation_specs.shape, terminal_agent_info_list
)
)

3
ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py


# Values within the summary period are summed up before reporting.
SUM = 2
# All values within a summary period are reported as a histogram.
HISTOGRAM = 3
StatList = List[Tuple[float, StatsAggregationMethod]]
EnvironmentStats = Mapping[str, StatList]

14
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


from mlagents_envs.rpc_utils import (
behavior_spec_from_proto,
process_pixels,
_process_visual_observation,
_process_vector_observation,
_process_maybe_compressed_observation,
_process_rank_one_or_two_observation,
steps_from_proto,
)
from PIL import Image

shapes = [(3,), (4,)]
list_proto = generate_list_agent_proto(n_agents, shapes)
for obs_index, shape in enumerate(shapes):
arr = _process_vector_observation(obs_index, shape, list_proto)
arr = _process_rank_one_or_two_observation(obs_index, shape, list_proto)
assert list(arr.shape) == ([n_agents] + list(shape))
assert np.allclose(arr, 0.1, atol=0.01)

ap2 = AgentInfoProto()
ap2.observations.extend([proto_obs_2])
ap_list = [ap1, ap2]
arr = _process_visual_observation(0, (128, 64, 3), ap_list)
arr = _process_maybe_compressed_observation(0, (128, 64, 3), ap_list)
assert list(arr.shape) == [2, 128, 64, 3]
assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)

ap2 = AgentInfoProto()
ap2.observations.extend([proto_obs_2])
ap_list = [ap1, ap2]
arr = _process_visual_observation(0, (128, 64, 1), ap_list)
arr = _process_maybe_compressed_observation(0, (128, 64, 1), ap_list)
assert list(arr.shape) == [2, 128, 64, 1]
assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)

ap1 = AgentInfoProto()
ap1.observations.extend([proto_obs_1])
ap_list = [ap1]
arr = _process_visual_observation(0, (128, 64, 8), ap_list)
arr = _process_maybe_compressed_observation(0, (128, 64, 8), ap_list)
assert list(arr.shape) == [1, 128, 64, 8]
assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)

ap1.observations.extend([proto_obs_1])
ap_list = [ap1]
with pytest.raises(UnityObservationException):
_process_visual_observation(0, (128, 42, 3), ap_list)
_process_maybe_compressed_observation(0, (128, 42, 3), ap_list)
def test_batched_step_result_from_proto():

392
ml-agents/mlagents/trainers/buffer.py


from collections import defaultdict
from collections.abc import MutableMapping
import enum
import itertools
from typing import BinaryIO, DefaultDict, List, Tuple, Union, Optional
from typing import List, BinaryIO
import itertools
from mlagents_envs.exception import UnityException

pass
class AgentBuffer(dict):
class BufferKey(enum.Enum):
ACTION_MASK = "action_mask"
CONTINUOUS_ACTION = "continuous_action"
CONTINUOUS_LOG_PROBS = "continuous_log_probs"
DISCRETE_ACTION = "discrete_action"
DISCRETE_LOG_PROBS = "discrete_log_probs"
DONE = "done"
ENVIRONMENT_REWARDS = "environment_rewards"
MASKS = "masks"
MEMORY = "memory"
PREV_ACTION = "prev_action"
ADVANTAGES = "advantages"
DISCOUNTED_RETURNS = "discounted_returns"
class ObservationKeyPrefix(enum.Enum):
OBSERVATION = "obs"
NEXT_OBSERVATION = "next_obs"
class RewardSignalKeyPrefix(enum.Enum):
# Reward signals
REWARDS = "rewards"
VALUE_ESTIMATES = "value_estimates"
RETURNS = "returns"
ADVANTAGE = "advantage"
AgentBufferKey = Union[
BufferKey, Tuple[ObservationKeyPrefix, int], Tuple[RewardSignalKeyPrefix, str]
]
class RewardSignalUtil:
@staticmethod
def rewards_key(name: str) -> AgentBufferKey:
return RewardSignalKeyPrefix.REWARDS, name
@staticmethod
def value_estimates_key(name: str) -> AgentBufferKey:
return RewardSignalKeyPrefix.RETURNS, name
@staticmethod
def returns_key(name: str) -> AgentBufferKey:
return RewardSignalKeyPrefix.RETURNS, name
@staticmethod
def advantage_key(name: str) -> AgentBufferKey:
return RewardSignalKeyPrefix.ADVANTAGE, name
class AgentBufferField(list):
AgentBuffer contains a dictionary of AgentBufferFields. Each agent has his own AgentBuffer.
The keys correspond to the name of the field. Example: state, action
AgentBufferField is a list of numpy arrays. When an agent collects a field, you can add it to its
AgentBufferField with the append method.
class AgentBufferField(list):
def __init__(self):
self.padding_value = 0
super().__init__()
def __str__(self):
return str(np.array(self).shape)
def append(self, element: np.ndarray, padding_value: float = 0.0) -> None:
AgentBufferField is a list of numpy arrays. When an agent collects a field, you can add it to its
AgentBufferField with the append method.
Adds an element to this list. Also lets you change the padding
type, so that it can be set on append (e.g. action_masks should
be padded with 1.)
:param element: The element to append to the list.
:param padding_value: The value used to pad when get_batch is called.
super().append(element)
self.padding_value = padding_value
def __init__(self):
self.padding_value = 0
super().__init__()
def extend(self, data: np.ndarray) -> None:
"""
Adds a list of np.arrays to the end of the list of np.arrays.
:param data: The np.array list to append.
"""
self += list(np.array(data, dtype=np.float32))
def __str__(self):
return str(np.array(self).shape)
def set(self, data):
"""
Sets the list of np.array to the input data
:param data: The np.array list to be set.
"""
# Make sure we convert incoming data to float32 if it's a float
dtype = None
if data is not None and len(data) and isinstance(data[0], float):
dtype = np.float32
self[:] = []
self[:] = list(np.array(data, dtype=dtype))
def append(self, element: np.ndarray, padding_value: float = 0.0) -> None:
"""
Adds an element to this list. Also lets you change the padding
type, so that it can be set on append (e.g. action_masks should
be padded with 1.)
:param element: The element to append to the list.
:param padding_value: The value used to pad when get_batch is called.
"""
super().append(element)
self.padding_value = padding_value
def get_batch(
self,
batch_size: int = None,
training_length: Optional[int] = 1,
sequential: bool = True,
) -> np.ndarray:
"""
Retrieve the last batch_size elements of length training_length
from the list of np.array
:param batch_size: The number of elements to retrieve. If None:
All elements will be retrieved.
:param training_length: The length of the sequence to be retrieved. If
None: only takes one element.
:param sequential: If true and training_length is not None: the elements
will not repeat in the sequence. [a,b,c,d,e] with training_length = 2 and
sequential=True gives [[0,a],[b,c],[d,e]]. If sequential=False gives
[[a,b],[b,c],[c,d],[d,e]]
"""
if training_length is None:
training_length = 1
if sequential:
# The sequences will not have overlapping elements (this involves padding)
leftover = len(self) % training_length
# leftover is the number of elements in the first sequence (this sequence might need 0 padding)
if batch_size is None:
# retrieve the maximum number of elements
batch_size = len(self) // training_length + 1 * (leftover != 0)
# The maximum number of sequences taken from a list of length len(self) without overlapping
# with padding is equal to batch_size
if batch_size > (len(self) // training_length + 1 * (leftover != 0)):
raise BufferException(
"The batch size and training length requested for get_batch where"
" too large given the current number of data points."
)
if batch_size * training_length > len(self):
padding = np.array(self[-1], dtype=np.float32) * self.padding_value
return np.array(
[padding] * (training_length - leftover) + self[:], dtype=np.float32
)
else:
return np.array(
self[len(self) - batch_size * training_length :], dtype=np.float32
)
else:
# The sequences will have overlapping elements
if batch_size is None:
# retrieve the maximum number of elements
batch_size = len(self) - training_length + 1
# The number of sequences of length training_length taken from a list of len(self) elements
# with overlapping is equal to batch_size
if (len(self) - training_length + 1) < batch_size:
raise BufferException(
"The batch size and training length requested for get_batch where"
" too large given the current number of data points."
)
tmp_list: List[np.ndarray] = []
for end in range(len(self) - batch_size + 1, len(self) + 1):
tmp_list += self[end - training_length : end]
return np.array(tmp_list, dtype=np.float32)
def extend(self, data: np.ndarray) -> None:
"""
Adds a list of np.arrays to the end of the list of np.arrays.
:param data: The np.array list to append.
"""
self += list(np.array(data, dtype=np.float32))
def reset_field(self) -> None:
"""
Resets the AgentBufferField
"""
self[:] = []
def set(self, data):
"""
Sets the list of np.array to the input data
:param data: The np.array list to be set.
"""
# Make sure we convert incoming data to float32 if it's a float
dtype = None
if data is not None and len(data) and isinstance(data[0], float):
dtype = np.float32
self[:] = []
self[:] = list(np.array(data, dtype=dtype))
def get_batch(
self,
batch_size: int = None,
training_length: int = 1,
sequential: bool = True,
) -> np.ndarray:
"""
Retrieve the last batch_size elements of length training_length
from the list of np.array
:param batch_size: The number of elements to retrieve. If None:
All elements will be retrieved.
:param training_length: The length of the sequence to be retrieved. If
None: only takes one element.
:param sequential: If true and training_length is not None: the elements
will not repeat in the sequence. [a,b,c,d,e] with training_length = 2 and
sequential=True gives [[0,a],[b,c],[d,e]]. If sequential=False gives
[[a,b],[b,c],[c,d],[d,e]]
"""
if sequential:
# The sequences will not have overlapping elements (this involves padding)
leftover = len(self) % training_length
# leftover is the number of elements in the first sequence (this sequence might need 0 padding)
if batch_size is None:
# retrieve the maximum number of elements
batch_size = len(self) // training_length + 1 * (leftover != 0)
# The maximum number of sequences taken from a list of length len(self) without overlapping
# with padding is equal to batch_size
if batch_size > (len(self) // training_length + 1 * (leftover != 0)):
raise BufferException(
"The batch size and training length requested for get_batch where"
" too large given the current number of data points."
)
if batch_size * training_length > len(self):
padding = np.array(self[-1], dtype=np.float32) * self.padding_value
return np.array(
[padding] * (training_length - leftover) + self[:],
dtype=np.float32,
)
else:
return np.array(
self[len(self) - batch_size * training_length :],
dtype=np.float32,
)
else:
# The sequences will have overlapping elements
if batch_size is None:
# retrieve the maximum number of elements
batch_size = len(self) - training_length + 1
# The number of sequences of length training_length taken from a list of len(self) elements
# with overlapping is equal to batch_size
if (len(self) - training_length + 1) < batch_size:
raise BufferException(
"The batch size and training length requested for get_batch where"
" too large given the current number of data points."
)
tmp_list: List[np.ndarray] = []
for end in range(len(self) - batch_size + 1, len(self) + 1):
tmp_list += self[end - training_length : end]
return np.array(tmp_list, dtype=np.float32)
class AgentBuffer(MutableMapping):
"""
AgentBuffer contains a dictionary of AgentBufferFields. Each agent has his own AgentBuffer.
The keys correspond to the name of the field. Example: state, action
"""
def reset_field(self) -> None:
"""
Resets the AgentBufferField
"""
self[:] = []
# Whether or not to validate the types of keys at runtime
# This should be off for training, but enabled for testing
CHECK_KEY_TYPES_AT_RUNTIME = False
super().__init__()
self._fields: DefaultDict[AgentBufferKey, AgentBufferField] = defaultdict(
AgentBufferField
)
return ", ".join(["'{}' : {}".format(k, str(self[k])) for k in self.keys()])
return ", ".join(
["'{}' : {}".format(k, str(self[k])) for k in self._fields.keys()]
)
for k in self.keys():
self[k].reset_field()
for f in self._fields.values():
f.reset_field()
def __getitem__(self, key):
if key not in self.keys():
self[key] = self.AgentBufferField()
return super().__getitem__(key)
@staticmethod
def _check_key(key):
if isinstance(key, BufferKey):
return
if isinstance(key, tuple):
key0, key1 = key
if isinstance(key0, ObservationKeyPrefix):
if isinstance(key1, int):
return
raise KeyError(f"{key} has type ({type(key0)}, {type(key1)})")
if isinstance(key0, RewardSignalKeyPrefix):
if isinstance(key1, str):
return
raise KeyError(f"{key} has type ({type(key0)}, {type(key1)})")
raise KeyError(f"{key} is a {type(key)}")
def check_length(self, key_list: List[str]) -> bool:
@staticmethod
def _encode_key(key: AgentBufferKey) -> str:
"""
Convert the key to a string representation so that it can be used for serialization.
"""
if isinstance(key, BufferKey):
return key.value
prefix, suffix = key
return f"{prefix.value}:{suffix}"
@staticmethod
def _decode_key(encoded_key: str) -> AgentBufferKey:
"""
Convert the string representation back to a key after serialization.
"""
# Simple case: convert the string directly to a BufferKey
try:
return BufferKey(encoded_key)
except ValueError:
pass
# Not a simple key, so split into two parts
prefix_str, _, suffix_str = encoded_key.partition(":")
# See if it's an ObservationKeyPrefix first
try:
return ObservationKeyPrefix(prefix_str), int(suffix_str)
except ValueError:
pass
# If not, it had better be a RewardSignalKeyPrefix
try:
return RewardSignalKeyPrefix(prefix_str), suffix_str
except ValueError:
raise ValueError(f"Unable to convert {encoded_key} to an AgentBufferKey")
def __getitem__(self, key: AgentBufferKey) -> AgentBufferField:
if self.CHECK_KEY_TYPES_AT_RUNTIME:
self._check_key(key)
return self._fields[key]
def __setitem__(self, key: AgentBufferKey, value: AgentBufferField) -> None:
if self.CHECK_KEY_TYPES_AT_RUNTIME:
self._check_key(key)
self._fields[key] = value
def __delitem__(self, key: AgentBufferKey) -> None:
if self.CHECK_KEY_TYPES_AT_RUNTIME:
self._check_key(key)
self._fields.__delitem__(key)
def __iter__(self):
return self._fields.__iter__()
def __len__(self) -> int:
return self._fields.__len__()
def __contains__(self, key):
if self.CHECK_KEY_TYPES_AT_RUNTIME:
self._check_key(key)
return self._fields.__contains__(key)
def check_length(self, key_list: List[AgentBufferKey]) -> bool:
"""
Some methods will require that some fields have the same length.
check_length will return true if the fields in key_list

if self.CHECK_KEY_TYPES_AT_RUNTIME:
for k in key_list:
self._check_key(k)
if key not in self.keys():
if key not in self._fields:
return False
if (length is not None) and (length != len(self[key])):
return False

def shuffle(self, sequence_length: int, key_list: List[str] = None) -> None:
def shuffle(
self, sequence_length: int, key_list: List[AgentBufferKey] = None
) -> None:
"""
Shuffles the fields in key_list in a consistent way: The reordering will
be the same across fields.

key_list = list(self.keys())
key_list = list(self._fields.keys())
if not self.check_length(key_list):
raise BufferException(
"Unable to shuffle if the fields are not of same length"

:return: Dict of mini batch.
"""
mini_batch = AgentBuffer()
for key in self:
mini_batch[key] = self[key][start:end]
for key, field in self._fields.items():
# slicing AgentBufferField returns a List[Any}
mini_batch[key] = field[start:end] # type: ignore
return mini_batch
def sample_mini_batch(

"""
with h5py.File(file_object, "w") as write_file:
for key, data in self.items():
write_file.create_dataset(key, data=data, dtype="f", compression="gzip")
write_file.create_dataset(
self._encode_key(key), data=data, dtype="f", compression="gzip"
)
def load_from_file(self, file_object: BinaryIO) -> None:
"""

for key in list(read_file.keys()):
self[key] = AgentBuffer.AgentBufferField()
decoded_key = self._decode_key(key)
self[decoded_key] = AgentBufferField()
self[key].extend(read_file[key][()])
self[decoded_key].extend(read_file[key][()])
def truncate(self, max_length: int, sequence_length: int = 1) -> None:
"""

def resequence_and_append(
self,
target_buffer: "AgentBuffer",
key_list: List[str] = None,
key_list: List[AgentBufferKey] = None,
batch_size: int = None,
training_length: int = None,
) -> None:

3
ml-agents/mlagents/trainers/cli_utils.py


action=RaiseRemovedWarning,
help="(Removed) Use the TensorFlow framework.",
)
argparser.add_argument(
"--results-dir", default="results", help="Results base directory"
)
eng_conf = argparser.add_argument_group(title="Engine Configuration")
eng_conf.add_argument(

16
ml-agents/mlagents/trainers/demo_loader.py


import os
from typing import List, Tuple
import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.buffer import AgentBuffer, BufferKey
from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
AgentInfoActionPairProto,
)

else:
current_obs = list(current_decision_step.values())[0].obs
demo_raw_buffer["done"].append(next_done)
demo_raw_buffer["rewards"].append(next_reward)
demo_raw_buffer[BufferKey.DONE].append(next_done)
demo_raw_buffer[BufferKey.ENVIRONMENT_REWARDS].append(next_reward)
for i, obs in enumerate(current_obs):
demo_raw_buffer[ObsUtil.get_name_at(i)].append(obs)
if (

if behavior_spec.action_spec.continuous_size > 0:
demo_raw_buffer["continuous_action"].append(
demo_raw_buffer[BufferKey.CONTINUOUS_ACTION].append(
demo_raw_buffer["discrete_action"].append(
demo_raw_buffer[BufferKey.DISCRETE_ACTION].append(
demo_raw_buffer["continuous_action"].append(
demo_raw_buffer[BufferKey.CONTINUOUS_ACTION].append(
demo_raw_buffer["discrete_action"].append(
demo_raw_buffer[BufferKey.DISCRETE_ACTION].append(
demo_raw_buffer["prev_action"].append(previous_action)
demo_raw_buffer[BufferKey.PREV_ACTION].append(previous_action)
if next_done:
demo_raw_buffer.resequence_and_append(
demo_processed_buffer, batch_size=None, training_length=sequence_length

40
ml-agents/mlagents/trainers/ghost/trainer.py


next_learning_team = self.controller.get_learning_team
# CASE 1: Current learning team is managed by this GhostTrainer.
# If the learning team changes, the following loop over queues will push the
# new policy into the policy queue for the new learning agent if
# that policy is managed by this GhostTrainer. Otherwise, it will save the current snapshot.
# CASE 2: Current learning team is managed by a different GhostTrainer.
# If the learning team changes to a team managed by this GhostTrainer, this loop
# will push the current_snapshot into the correct queue. Otherwise,
# it will continue skipping and swap_snapshot will continue to handle
# pushing fixed snapshots
# Case 3: No team change. The if statement just continues to push the policy
# Case 1: No team change. The if statement just continues to push the policy
# into the correct queue (or not if not learning team).
for brain_name in self._internal_policy_queues:
internal_policy_queue = self._internal_policy_queues[brain_name]

except AgentManagerQueue.Empty:
pass
if next_learning_team in self._team_to_name_to_policy_queue:
continue
if (
self._learning_team == next_learning_team
and next_learning_team in self._team_to_name_to_policy_queue
):
name_to_policy_queue = self._team_to_name_to_policy_queue[
next_learning_team
]

policy = self.get_policy(behavior_id)
policy.load_weights(self.current_policy_snapshot[brain_name])
name_to_policy_queue[brain_name].put(policy)
# CASE 2: Current learning team is managed by this GhostTrainer.
# If the learning team changes, the following loop over queues will push the
# new policy into the policy queue for the new learning agent if
# that policy is managed by this GhostTrainer. Otherwise, it will save the current snapshot.
# CASE 3: Current learning team is managed by a different GhostTrainer.
# If the learning team changes to a team managed by this GhostTrainer, this loop
# will push the current_snapshot into the correct queue. Otherwise,
# it will continue skipping and swap_snapshot will continue to handle
# pushing fixed snapshots
if (
self._learning_team != next_learning_team
and next_learning_team in self._team_to_name_to_policy_queue
):
name_to_policy_queue = self._team_to_name_to_policy_queue[
next_learning_team
]
for brain_name in name_to_policy_queue:
behavior_id = create_name_behavior_id(brain_name, next_learning_team)
policy = self.get_policy(behavior_id)
policy.load_weights(self.current_policy_snapshot[brain_name])
name_to_policy_queue[brain_name].put(policy)
# Note save and swap should be on different step counters.
# We don't want to save unless the policy is learning.

41
ml-agents/mlagents/trainers/learn.py


from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.trainer import TrainerFactory
from mlagents.trainers.directory_utils import validate_existing_directories
from mlagents.trainers.stats import (
TensorboardWriter,
StatsReporter,
GaugeWriter,
ConsoleWriter,
)
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.cli_utils import parser
from mlagents_envs.environment import UnityEnvironment
from mlagents.trainers.settings import RunOptions

add_metadata as add_timer_metadata,
)
from mlagents_envs import logging_util
from mlagents.plugins.stats_writer import register_stats_writer_plugins
logger = logging_util.get_logger(__name__)

checkpoint_settings = options.checkpoint_settings
env_settings = options.env_settings
engine_settings = options.engine_settings
base_path = "results"
write_path = os.path.join(base_path, checkpoint_settings.run_id)
maybe_init_path = (
os.path.join(base_path, checkpoint_settings.initialize_from)
if checkpoint_settings.initialize_from is not None
else None
)
run_logs_dir = os.path.join(write_path, "run_logs")
run_logs_dir = checkpoint_settings.run_logs_dir
write_path,
checkpoint_settings.write_path,
maybe_init_path,
checkpoint_settings.maybe_init_path,
)
# Make run logs directory
os.makedirs(run_logs_dir, exist_ok=True)

)
# Configure Tensorboard Writers and StatsReporter
tb_writer = TensorboardWriter(
write_path, clear_past_data=not checkpoint_settings.resume
)
gauge_write = GaugeWriter()
console_writer = ConsoleWriter()
StatsReporter.add_writer(tb_writer)
StatsReporter.add_writer(gauge_write)
StatsReporter.add_writer(console_writer)
stats_writers = register_stats_writer_plugins(options)
for sw in stats_writers:
StatsReporter.add_writer(sw)
if env_settings.env_path is None:
port = None

trainer_factory = TrainerFactory(
trainer_config=options.behaviors,
output_path=write_path,
output_path=checkpoint_settings.write_path,
init_path=maybe_init_path,
init_path=checkpoint_settings.maybe_init_path,
write_path,
checkpoint_settings.write_path,
checkpoint_settings.run_id,
env_parameter_manager,
not checkpoint_settings.inference,

tc.start_learning(env_manager)
finally:
env_manager.close()
write_run_options(write_path, options)
write_run_options(checkpoint_settings.write_path, options)
write_timing_tree(run_logs_dir)
write_training_status(run_logs_dir)

22
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


from typing import Dict, cast
from mlagents.torch_utils import torch
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.buffer import AgentBuffer, BufferKey, RewardSignalUtil
from mlagents_envs.timers import timed
from mlagents.trainers.policy.torch_policy import TorchPolicy

old_values = {}
for name in self.reward_signals:
old_values[name] = ModelUtils.list_to_tensor(
batch[f"{name}_value_estimates"]
batch[RewardSignalUtil.value_estimates_key(name)]
returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
returns[name] = ModelUtils.list_to_tensor(
batch[RewardSignalUtil.returns_key(name)]
)
n_obs = len(self.policy.behavior_spec.observation_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)

act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = AgentAction.from_dict(batch)
act_masks = ModelUtils.list_to_tensor(batch[BufferKey.ACTION_MASK])
actions = AgentAction.from_buffer(batch)
ModelUtils.list_to_tensor(batch["memory"][i])
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
ModelUtils.list_to_tensor(batch[BufferKey.MEMORY][i])
for i in range(0, len(batch[BufferKey.MEMORY]), self.policy.sequence_length)
]
if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)

memories=memories,
seq_len=self.policy.sequence_length,
)
old_log_probs = ActionLogProbs.from_dict(batch).flatten()
old_log_probs = ActionLogProbs.from_buffer(batch).flatten()
loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
loss_masks = ModelUtils.list_to_tensor(batch[BufferKey.MASKS], dtype=torch.bool)
ModelUtils.list_to_tensor(batch["advantages"]),
ModelUtils.list_to_tensor(batch[BufferKey.ADVANTAGES]),
log_probs,
old_log_probs,
loss_masks,

33
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents_envs.logging_util import get_logger
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.buffer import BufferKey, RewardSignalUtil
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.policy import Policy
from mlagents.trainers.policy.torch_policy import TorchPolicy

)
for name, v in value_estimates.items():
agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
agent_buffer_trajectory[RewardSignalUtil.value_estimates_key(name)].extend(
v
)
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
np.mean(v),

self.collected_rewards["environment"][agent_id] += np.sum(
agent_buffer_trajectory["environment_rewards"]
agent_buffer_trajectory[BufferKey.ENVIRONMENT_REWARDS]
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
agent_buffer_trajectory[RewardSignalUtil.rewards_key(name)].extend(
evaluate_result
)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

for name in self.optimizer.reward_signals:
bootstrap_value = value_next[name]
local_rewards = agent_buffer_trajectory[f"{name}_rewards"].get_batch()
local_rewards = agent_buffer_trajectory[
RewardSignalUtil.rewards_key(name)
].get_batch()
f"{name}_value_estimates"
RewardSignalUtil.value_estimates_key(name)
].get_batch()
local_advantage = get_gae(

)
local_return = local_advantage + local_value_estimates
# This is later use as target for the different value estimates
agent_buffer_trajectory[f"{name}_returns"].set(local_return)
agent_buffer_trajectory[f"{name}_advantage"].set(local_advantage)
agent_buffer_trajectory[RewardSignalUtil.returns_key(name)].set(
local_return
)
agent_buffer_trajectory[RewardSignalUtil.advantage_key(name)].set(
local_advantage
)
tmp_advantages.append(local_advantage)
tmp_returns.append(local_return)

)
global_returns = list(np.mean(np.array(tmp_returns, dtype=np.float32), axis=0))
agent_buffer_trajectory["advantages"].set(global_advantages)
agent_buffer_trajectory["discounted_returns"].set(global_returns)
agent_buffer_trajectory[BufferKey.ADVANTAGES].set(global_advantages)
agent_buffer_trajectory[BufferKey.DISCOUNTED_RETURNS].set(global_returns)
# Append to update buffer
agent_buffer_trajectory.resequence_and_append(
self.update_buffer, training_length=self.policy.sequence_length

int(self.hyperparameters.batch_size / self.policy.sequence_length), 1
)
advantages = self.update_buffer["advantages"].get_batch()
self.update_buffer["advantages"].set(
advantages = self.update_buffer[BufferKey.ADVANTAGES].get_batch()
self.update_buffer[BufferKey.ADVANTAGES].set(
(advantages - advantages.mean()) / (advantages.std() + 1e-10)
)
num_epoch = self.hyperparameters.num_epoch

24
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.buffer import AgentBuffer, BufferKey, RewardSignalUtil
from mlagents_envs.timers import timed
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents.trainers.exception import UnityTrainerException

"""
rewards = {}
for name in self.reward_signals:
rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
rewards[name] = ModelUtils.list_to_tensor(
batch[RewardSignalUtil.rewards_key(name)]
)
n_obs = len(self.policy.behavior_spec.observation_specs)
current_obs = ObsUtil.from_buffer(batch, n_obs)

# Convert to tensors
next_obs = [ModelUtils.list_to_tensor(obs) for obs in next_obs]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
actions = AgentAction.from_dict(batch)
act_masks = ModelUtils.list_to_tensor(batch[BufferKey.ACTION_MASK])
actions = AgentAction.from_buffer(batch)
ModelUtils.list_to_tensor(batch["memory"][i])
for i in range(0, len(batch["memory"]), self.policy.sequence_length)
ModelUtils.list_to_tensor(batch[BufferKey.MEMORY][i])
for i in range(0, len(batch[BufferKey.MEMORY]), self.policy.sequence_length)
batch["memory"][i][self.policy.m_size // 2 :]
batch[BufferKey.MEMORY][i][self.policy.m_size // 2 :]
for i in range(offset, len(batch["memory"]), self.policy.sequence_length)
for i in range(
offset, len(batch[BufferKey.MEMORY]), self.policy.sequence_length
)
]
if len(memories_list) > 0:

memories=next_memories,
sequence_length=self.policy.sequence_length,
)
masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
dones = ModelUtils.list_to_tensor(batch["done"])
masks = ModelUtils.list_to_tensor(batch[BufferKey.MASKS], dtype=torch.bool)
dones = ModelUtils.list_to_tensor(batch[BufferKey.DONE])
q1_loss, q2_loss = self.sac_q_loss(
q1_stream, q2_stream, target_values, dones, rewards, masks

7
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import timed
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.buffer import BufferKey, RewardSignalUtil
from mlagents.trainers.policy import Policy
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.policy.torch_policy import TorchPolicy

# Evaluate all reward functions for reporting purposes
self.collected_rewards["environment"][agent_id] += np.sum(
agent_buffer_trajectory["environment_rewards"]
agent_buffer_trajectory[BufferKey.ENVIRONMENT_REWARDS]
)
for name, reward_signal in self.optimizer.reward_signals.items():
evaluate_result = (

last_step_obs = last_step.obs
for i, obs in enumerate(last_step_obs):
agent_buffer_trajectory[ObsUtil.get_name_at_next(i)][-1] = obs
agent_buffer_trajectory["done"][-1] = False
agent_buffer_trajectory[BufferKey.DONE][-1] = False
# Append to update buffer
agent_buffer_trajectory.resequence_and_append(

)
# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
sampled_minibatch[f"{name}_rewards"] = (
sampled_minibatch[RewardSignalUtil.rewards_key(name)] = (
signal.evaluate(sampled_minibatch) * signal.strength
)

18
ml-agents/mlagents/trainers/settings.py


import os.path
import warnings
import attr

force: bool = parser.get_default("force")
train_model: bool = parser.get_default("train_model")
inference: bool = parser.get_default("inference")
results_dir: str = parser.get_default("results_dir")
@property
def write_path(self) -> str:
return os.path.join(self.results_dir, self.run_id)
@property
def maybe_init_path(self) -> Optional[str]:
return (
os.path.join(self.results_dir, self.initialize_from)
if self.initialize_from is not None
else None
)
@property
def run_logs_dir(self) -> str:
return os.path.join(self.write_path, "run_logs")
@attr.s(auto_attribs=True)

39
ml-agents/mlagents/trainers/stats.py


class StatsSummary(NamedTuple):
mean: float
std: float
num: int
sum: float
full_dist: List[float]
return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
return StatsSummary([], StatsAggregationMethod.AVERAGE)
@property
def aggregated_value(self):

return self.mean
@property
def mean(self):
return np.mean(self.full_dist)
@property
def std(self):
return np.std(self.full_dist)
@property
def num(self):
return len(self.full_dist)
@property
def sum(self):
return np.sum(self.full_dist)
class StatsPropertyType(Enum):
HYPERPARAMETERS = "hyperparameters"

def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int
) -> None:
"""
Callback to record training information
:param category: Category of the statistics. Usually this is the behavior name.
:param values: Dictionary of statistics.
:param step: The current training step.
:return:
"""
pass
def add_property(

self.summary_writers[category].add_scalar(
f"{key}", value.aggregated_value, step
)
if value.aggregation_method == StatsAggregationMethod.HISTOGRAM:
self.summary_writers[category].add_histogram(
f"{key}_hist", np.array(value.full_dist), step
)
self.summary_writers[category].flush()
def _maybe_create_summary_writer(self, category: str) -> None:

return StatsSummary.empty()
return StatsSummary(
mean=np.mean(stat_values),
std=np.std(stat_values),
num=len(stat_values),
sum=np.sum(stat_values),
full_dist=stat_values,
aggregation_method=StatsReporter.stats_aggregation[self.category][key],
)

6
ml-agents/mlagents/trainers/tests/__init__.py


np.array = np_array_no_float64
np.zeros = np_zeros_no_float64
np.ones = np_ones_no_float64
if os.getenv("TEST_ENFORCE_BUFFER_KEY_TYPES"):
from mlagents.trainers.buffer import AgentBuffer
AgentBuffer.CHECK_KEY_TYPES_AT_RUNTIME = True

9
ml-agents/mlagents/trainers/tests/mock_brain.py


from typing import List, Tuple
import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.buffer import AgentBuffer, AgentBufferKey
from mlagents.trainers.torch.action_log_probs import LogProbsTuple
from mlagents.trainers.trajectory import Trajectory, AgentExperience
from mlagents_envs.base_env import (

return Trajectory(
steps=steps_list, agent_id=agent_id, behavior_id=behavior_id, next_obs=obs
)
def copy_buffer_fields(
buffer: AgentBuffer, src_key: AgentBufferKey, dst_keys: List[AgentBufferKey]
) -> None:
for dst_key in dst_keys:
buffer[dst_key] = buffer[src_key]
def simulate_rollout(

18
ml-agents/mlagents/trainers/tests/test_agent_processor.py


expected_stats = {
"averaged": StatsSummary(
mean=2.0,
std=mock.ANY,
num=2,
sum=4.0,
aggregation_method=StatsAggregationMethod.AVERAGE,
full_dist=[1.0, 3.0], aggregation_method=StatsAggregationMethod.AVERAGE
mean=4.0,
std=0.0,
num=1,
sum=4.0,
aggregation_method=StatsAggregationMethod.MOST_RECENT,
full_dist=[4.0], aggregation_method=StatsAggregationMethod.MOST_RECENT
mean=2.1,
std=mock.ANY,
num=2,
sum=4.2,
aggregation_method=StatsAggregationMethod.SUM,
full_dist=[3.1, 1.1], aggregation_method=StatsAggregationMethod.SUM
),
}
stats_reporter.write_stats(123)

60
ml-agents/mlagents/trainers/tests/test_buffer.py


import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.buffer import (
AgentBuffer,
AgentBufferField,
BufferKey,
ObservationKeyPrefix,
RewardSignalKeyPrefix,
)
from mlagents.trainers.trajectory import ObsUtil
def assert_array(a, b):

def construct_fake_buffer(fake_agent_id):
b = AgentBuffer()
for step in range(9):
b["vector_observation"].append(
b[ObsUtil.get_name_at(0)].append(
[
100 * fake_agent_id + 10 * step + 1,
100 * fake_agent_id + 10 * step + 2,

b["action"].append(
b[BufferKey.CONTINUOUS_ACTION].append(
[100 * fake_agent_id + 10 * step + 4, 100 * fake_agent_id + 10 * step + 5]
)
return b

agent_1_buffer = construct_fake_buffer(1)
agent_2_buffer = construct_fake_buffer(2)
agent_3_buffer = construct_fake_buffer(3)
a = agent_1_buffer["vector_observation"].get_batch(
a = agent_1_buffer[ObsUtil.get_name_at(0)].get_batch(
a = agent_2_buffer["vector_observation"].get_batch(
a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(
batch_size=2, training_length=3, sequential=True
)
assert_array(

]
),
)
a = agent_2_buffer["vector_observation"].get_batch(
a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(
batch_size=2, training_length=3, sequential=False
)
assert_array(

agent_3_buffer.resequence_and_append(
update_buffer, batch_size=None, training_length=2
)
assert len(update_buffer["action"]) == 20
assert len(update_buffer[BufferKey.CONTINUOUS_ACTION]) == 20
assert np.array(update_buffer["action"]).shape == (20, 2)
assert np.array(update_buffer[BufferKey.CONTINUOUS_ACTION]).shape == (20, 2)
assert np.array(c["action"]).shape == (1, 2)
assert np.array(c[BufferKey.CONTINUOUS_ACTION]).shape == (1, 2)
def fakerandint(values):

# Test non-LSTM
mb = update_buffer.sample_mini_batch(batch_size=4, sequence_length=1)
assert mb.keys() == update_buffer.keys()
assert np.array(mb["action"]).shape == (4, 2)
assert np.array(mb[BufferKey.CONTINUOUS_ACTION]).shape == (4, 2)
# Test LSTM
# We need to check if we ever get a breaking start - this will maximize the probability

assert np.array(mb["action"]).shape == (19, 2)
assert np.array(mb[BufferKey.CONTINUOUS_ACTION]).shape == (19, 2)
def test_num_experiences():

assert len(update_buffer["action"]) == 0
assert len(update_buffer[BufferKey.CONTINUOUS_ACTION]) == 0
assert update_buffer.num_experiences == 0
agent_1_buffer.resequence_and_append(
update_buffer, batch_size=None, training_length=2

)
assert len(update_buffer["action"]) == 20
assert len(update_buffer[BufferKey.CONTINUOUS_ACTION]) == 20
assert update_buffer.num_experiences == 20

update_buffer.truncate(4, sequence_length=3)
assert update_buffer.num_experiences == 3
for buffer_field in update_buffer.values():
assert isinstance(buffer_field, AgentBuffer.AgentBufferField)
assert isinstance(buffer_field, AgentBufferField)
def test_key_encode_decode():
keys = (
list(BufferKey)
+ [(k, 42) for k in ObservationKeyPrefix]
+ [(k, "gail") for k in RewardSignalKeyPrefix]
)
for k in keys:
assert k == AgentBuffer._decode_key(AgentBuffer._encode_key(k))
def test_buffer_save_load():
original = construct_fake_buffer(3)
import io
write_buffer = io.BytesIO()
original.save_to_file(write_buffer)
loaded = AgentBuffer()
loaded.load_from_file(write_buffer)
assert len(original) == len(loaded)
for k in original.keys():
assert np.allclose(original[k], loaded[k])

9
ml-agents/mlagents/trainers/tests/test_demo_loader.py


get_demo_files,
write_delimited,
)
from mlagents.trainers.buffer import BufferKey
BEHAVIOR_SPEC = create_mock_3dball_behavior_specs()

_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)
assert (
len(demo_buffer["continuous_action"]) == total_expected - 1
or len(demo_buffer["discrete_action"]) == total_expected - 1
len(demo_buffer[BufferKey.CONTINUOUS_ACTION]) == total_expected - 1
or len(demo_buffer[BufferKey.DISCRETE_ACTION]) == total_expected - 1
)

_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)
assert (
len(demo_buffer["continuous_action"]) == total_expected - 1
or len(demo_buffer["discrete_action"]) == total_expected - 1
len(demo_buffer[BufferKey.CONTINUOUS_ACTION]) == total_expected - 1
or len(demo_buffer[BufferKey.DISCRETE_ACTION]) == total_expected - 1
)

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存