浏览代码

Merge branch 'main' into develop-superpush-int

/develop/superpush/int
Ervin Teng 4 年前
当前提交
c8137dcd
共有 122 个文件被更改,包括 2291 次插入1012 次删除
  1. 6
      .github/PULL_REQUEST_TEMPLATE.md
  2. 2
      .github/workflows/pre-commit.yml
  3. 2
      .github/workflows/publish_pypi.yaml
  4. 2
      .github/workflows/pytest.yml
  5. 2
      .yamato/com.unity.ml-agents-performance.yml
  6. 4
      .yamato/com.unity.ml-agents-test.yml
  7. 2
      .yamato/compressed-sensor-test.yml
  8. 2
      .yamato/gym-interface-test.yml
  9. 2
      .yamato/protobuf-generation-test.yml
  10. 2
      .yamato/pytest-gpu.yml
  11. 2
      .yamato/python-ll-api-test.yml
  12. 2
      .yamato/standalone-build-test.yml
  13. 2
      .yamato/standalone-build-webgl-test.yml
  14. 2
      .yamato/training-backcompat-tests.yml
  15. 2
      .yamato/training-int-tests.yml
  16. 4
      ML-Agents-Input-Example/Packages/packages-lock.json
  17. 68
      Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/GridFoodCollector.unity
  18. 2
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
  19. 1001
      Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx
  20. 5
      Project/Assets/ML-Agents/Examples/Sorter/Prefabs/Area.prefab
  21. 27
      README.md
  22. 16
      com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md
  23. 2
      com.unity.ml-agents.extensions/Documentation~/Match3.md
  24. 9
      com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md
  25. 12
      com.unity.ml-agents.extensions/Runtime/Input/InputActionActuator.cs
  26. 6
      com.unity.ml-agents.extensions/Runtime/Input/InputActuatorComponent.cs
  27. 9
      com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs
  28. 1
      com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
  29. 2
      com.unity.ml-agents.extensions/Runtime/Match3/Match3SensorComponent.cs
  30. 8
      com.unity.ml-agents.extensions/Runtime/Sensors/CountingGridSensor.cs
  31. 266
      com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
  32. 4
      com.unity.ml-agents.extensions/Runtime/Unity.ML-Agents.Extensions.asmdef
  33. 2
      com.unity.ml-agents.extensions/Tests/Runtime/Input/Unity.ML-Agents.Extensions.Input.Tests.Runtime.asmdef
  34. 2
      com.unity.ml-agents.extensions/package.json
  35. 15
      com.unity.ml-agents/CHANGELOG.md
  36. 4
      com.unity.ml-agents/CONTRIBUTING.md
  37. 4
      com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
  38. 4
      com.unity.ml-agents/Runtime/Academy.cs
  39. 1
      com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
  40. 2
      com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
  41. 2
      com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs
  42. 8
      com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
  43. 47
      com.unity.ml-agents/Runtime/Agent.cs
  44. 31
      com.unity.ml-agents/Runtime/Analytics/Events.cs
  45. 18
      com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs
  46. 17
      com.unity.ml-agents/Runtime/Analytics/TrainingAnalytics.cs
  47. 9
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  48. 3
      com.unity.ml-agents/Runtime/Constants.cs
  49. 2
      com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
  50. 2
      com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
  51. 8
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  52. 4
      com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
  53. 20
      com.unity.ml-agents/Runtime/Inference/TensorNames.cs
  54. 10
      com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
  55. 6
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  56. 10
      com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
  57. 8
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  58. 31
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
  59. 38
      com.unity.ml-agents/Runtime/Sensors/IBuiltInSensor.cs
  60. 6
      com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
  61. 5
      com.unity.ml-agents/Runtime/SideChannels/TrainingAnalyticsSideChannel.cs
  62. 18
      com.unity.ml-agents/Tests/Editor/Analytics/InferenceAnalyticsTests.cs
  63. 38
      com.unity.ml-agents/Tests/Editor/Analytics/TrainingAnalyticsTest.cs
  64. 5
      com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs
  65. 86
      config/ppo/Match3.yaml
  66. 1
      config/ppo/PyramidsRND.yaml
  67. 6
      docs/Installation-Anaconda-Windows.md
  68. 8
      docs/Installation.md
  69. 4
      docs/Learning-Environment-Examples.md
  70. 25
      docs/ML-Agents-Overview.md
  71. 2
      docs/Migrating.md
  72. 2
      docs/Training-on-Amazon-Web-Service.md
  73. 4
      docs/Unity-Inference-Engine.md
  74. 2
      docs/localized/KR/docs/Installation-Anaconda-Windows.md
  75. 6
      docs/localized/RU/README.md
  76. 30
      docs/localized/RU/docs/Начало работы.md
  77. 32
      docs/localized/RU/docs/Установка.md
  78. 11
      ml-agents-envs/mlagents_envs/base_env.py
  79. 1
      ml-agents-envs/mlagents_envs/rpc_utils.py
  80. 2
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  81. 11
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  82. 12
      ml-agents/mlagents/trainers/action_info.py
  83. 259
      ml-agents/mlagents/trainers/agent_processor.py
  84. 49
      ml-agents/mlagents/trainers/behavior_id_utils.py
  85. 134
      ml-agents/mlagents/trainers/buffer.py
  86. 14
      ml-agents/mlagents/trainers/ghost/trainer.py
  87. 156
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  88. 28
      ml-agents/mlagents/trainers/policy/policy.py
  89. 67
      ml-agents/mlagents/trainers/policy/torch_policy.py
  90. 42
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  91. 7
      ml-agents/mlagents/trainers/ppo/trainer.py
  92. 96
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  93. 9
      ml-agents/mlagents/trainers/sac/trainer.py
  94. 20
      ml-agents/mlagents/trainers/tests/mock_brain.py
  95. 157
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  96. 77
      ml-agents/mlagents/trainers/tests/test_buffer.py
  97. 54
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  98. 4
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
  99. 23
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  100. 4
      ml-agents/mlagents/trainers/tests/torch/test_policy.py

6
.github/PULL_REQUEST_TEMPLATE.md


### Checklist
- [ ] Added tests that prove my fix is effective or that my feature works
- [ ] Updated the [changelog](https://github.com/Unity-Technologies/ml-agents/blob/master/com.unity.ml-agents/CHANGELOG.md) (if applicable)
- [ ] Updated the [documentation](https://github.com/Unity-Technologies/ml-agents/tree/master/docs) (if applicable)
- [ ] Updated the [migration guide](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Migrating.md) (if applicable)
- [ ] Updated the [changelog](https://github.com/Unity-Technologies/ml-agents/blob/main/com.unity.ml-agents/CHANGELOG.md) (if applicable)
- [ ] Updated the [documentation](https://github.com/Unity-Technologies/ml-agents/tree/main/docs) (if applicable)
- [ ] Updated the [migration guide](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Migrating.md) (if applicable)
### Other comments

2
.github/workflows/pre-commit.yml


on:
pull_request:
push:
branches: [master]
branches: [main]
jobs:
pre-commit:

2
.github/workflows/publish_pypi.yaml


package-path: [ml-agents, ml-agents-envs, gym-unity]
steps:
- uses: actions/checkout@master
- uses: actions/checkout@main
- name: Set up Python 3.7
uses: actions/setup-python@v1
with:

2
.github/workflows/pytest.yml


- 'test_requirements.txt'
- '.github/workflows/pytest.yml'
push:
branches: [master]
branches: [main]
jobs:
pytest:

2
.yamato/com.unity.ml-agents-performance.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: daily
artifacts:
logs:

4
.yamato/com.unity.ml-agents-test.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: daily
{% for package in packages %}

cancel_old_ci: true
{% if platform.name == "linux" %}
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/compressed-sensor-test.yml


cancel_old_ci: true
{% if editor.extra_test == "sensor" %}
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/gym-interface-test.yml


cancel_old_ci: true
{% if editor.extra_test == "gym" %}
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/protobuf-generation-test.yml


triggers:
cancel_old_ci: true
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "protobuf-definitions/**" OR

2
.yamato/pytest-gpu.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: daily
artifacts:
logs:

2
.yamato/python-ll-api-test.yml


cancel_old_ci: true
{% if editor.extra_test == "llapi" %}
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/standalone-build-test.yml


triggers:
cancel_old_ci: true
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/standalone-build-webgl-test.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: weekly
artifacts:
logs:

2
.yamato/training-backcompat-tests.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: daily
artifacts:
logs:

2
.yamato/training-int-tests.yml


triggers:
cancel_old_ci: true
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

4
ML-Agents-Input-Example/Packages/packages-lock.json


"url": "https://packages.unity.com"
},
"com.unity.barracuda": {
"version": "1.3.0-preview",
"version": "1.3.1-preview",
"depth": 1,
"source": "registry",
"dependencies": {

"depth": 0,
"source": "local",
"dependencies": {
"com.unity.barracuda": "1.3.0-preview",
"com.unity.barracuda": "1.3.1-preview",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",

68
Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/GridFoodCollector.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.49977785, b: 0.57563704, a: 1}
m_IndirectSpecularColor: {r: 0.44971168, g: 0.4997775, b: 0.57563686, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:

propertyPath: m_Name
value: GridFoodCollectorArea
objectReference: {fileID: 0}
- target: {fileID: 4137908820211030, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: -17.2
objectReference: {fileID: 0}
- target: {fileID: 4259834826122778, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: -23.9
objectReference: {fileID: 0}
- target: {fileID: 4419274671784554, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: -8.9
objectReference: {fileID: 0}
- target: {fileID: 4688212428263696, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: 0

propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4756368533889646, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: -30.4
objectReference: {fileID: 0}
- target: {fileID: 4756368533889646, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.z
value: -9.9
objectReference: {fileID: 0}
- target: {fileID: 3067525015186813280, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: NumCollidersPerCell
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3067525015186813280, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: EstimatedMaxCollidersPerCell
value: 4
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: ChannelOffsets.Array.size
value: 1
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: ShowGizmos
value: 0
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: ObservationPerCell
value: 6
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: NumberOfObservations
value: 9600
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: m_Enabled
value: 1
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: rootReference
value:
objectReference: {fileID: 190823801}
--- !u!1 &190823801 stripped
GameObject:
m_CorrespondingSourceObject: {fileID: 1706274796045088, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
m_PrefabInstance: {fileID: 190823800}
m_PrefabAsset: {fileID: 0}
--- !u!1001 &392794583
PrefabInstance:
m_ObjectHideFlags: 0

2
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab


VectorActionDescriptions: []
VectorActionSpaceType: 0
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 48d14da88fea74d0693c691c6e3f2e34, type: 3}
m_Model: {fileID: 11400000, guid: 28ccdfd7cb3d941ce8af0ab89e06130a, type: 3}
m_InferenceDevice: 2
m_BehaviorType: 0
m_BehaviorName: Match3VisualObs

1001
Project/Assets/ML-Agents/Examples/Match3/TFModels/Match3VectorObs.onnx
文件差异内容过多而无法显示
查看文件

5
Project/Assets/ML-Agents/Examples/Sorter/Prefabs/Area.prefab


m_Script: {fileID: 11500000, guid: dd8012d5925524537b27131fef517017, type: 3}
m_Name:
m_EditorClassIdentifier:
ObservableSize: 23
MaxNumObservables: 20
m_SensorName: BufferSensor
m_ObservableSize: 23
m_MaxNumObservables: 20
--- !u!1 &6000518840957865293
GameObject:
m_ObjectHideFlags: 0

27
README.md


# Unity ML-Agents Toolkit
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/)
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/)
[![license badge](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE)

## Features
- 15+ [example Unity environments](docs/Learning-Environment-Examples.md)
- 18+ [example Unity environments](docs/Learning-Environment-Examples.md)
- Built-in support for Imitation Learning through Behavioral Cloning or
Generative Adversarial Imitation Learning
- Built-in support for Imitation Learning through Behavioral Cloning (BC) or
Generative Adversarial Imitation Learning (GAIL)
- Self-play mechanism for training agents in adversarial scenarios
- Easily definable Curriculum Learning scenarios for complex tasks
- Train robust agents using environment randomization

## Releases & Documentation
**Our latest, stable release is `Release 12`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Readme.md)
**Our latest, stable release is `Release 13`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/Readme.md)
The table below lists all our releases, including our `master` branch which is
The table below lists all our releases, including our `main` branch which is
under active development and may be unstable. A few helpful guidelines:
- The [Versioning page](docs/Versioning.md) overviews how we manage our GitHub
releases and the versioning process for each of the ML-Agents components.

| **Version** | **Release Date** | **Source** | **Documentation** | **Download** | **Python Package** | **Unity Package** |
|:-------:|:------:|:-------------:|:-------:|:------------:|:------------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) | -- | -- |
| **Release 12** | **December 22, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_12)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_12.zip)** | **[0.23.0](https://pypi.org/project/mlagents/0.23.0/)** | **[1.7.2](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.7/manual/index.html)** |
| **main (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/main) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/main/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/main.zip) | -- | -- |
| **Release 13** | **February 17, 2021** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_13)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_13.zip)** | **[0.24.0](https://pypi.org/project/mlagents/0.24.0/)** | **[1.8.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.8/manual/index.html)** |
| **Release 12** | December 22, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_12) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_12.zip) | [0.23.0](https://pypi.org/project/mlagents/0.23.0/) | [1.7.2](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.7/manual/index.html) |
| **Release 11** | December 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_11) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_11_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_11.zip) | [0.23.0](https://pypi.org/project/mlagents/0.23.0/) | [1.7.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.7/manual/index.html) |
| **Release 10** | November 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_10) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_10_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_10.zip) | [0.22.0](https://pypi.org/project/mlagents/0.22.0/) | [1.6.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.6/manual/index.html) |
| **Verified Package 1.0.6** | **November 16, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/com.unity.ml-agents_1.0.6)** | **[docs](https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/com.unity.ml-agents_1.0.6.zip)** | **[0.16.1](https://pypi.org/project/mlagents/0.16.1/)** | **[1.0.6](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.0/manual/index.html)** |

| **Release 7** | September 16, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_7) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip) | [0.20.0](https://pypi.org/project/mlagents/0.20.0/) | [1.4.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.4/manual/index.html) |
If you are a researcher interested in a discussion of Unity as an AI platform,
see a pre-print of our

([multi-armed bandit](https://blogs.unity3d.com/2017/06/26/unity-ai-themed-blog-entries/)
and
[Q-learning](https://blogs.unity3d.com/2017/08/22/unity-ai-reinforcement-learning-with-q-learning/))
### More from Unity
- [Unity Robotics](https://github.com/Unity-Technologies/Unity-Robotics-Hub)
- [Unity Computer Vision](https://unity.com/computer-vision)
- [Unity Game Simulation](https://unity.com/products/game-simulation)
## Community and Feedback

16
com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md


# Contribution
An image can be thought of as a matrix of a predefined width (W) and a height (H) and each pixel can be thought of as simply an array of length 3 (in the case of RGB), `[Red, Green, Blue]` holding the different channel information of the color (channel) intensities at that pixel location. Thus an image is just a 3 dimensional matrix of size WxHx3. A Grid Observation can be thought of as a generalization of this setup where in place of a pixel there is a "cell" which is an array of length N representing different channel intensities at that cell position. From a Convolutional Neural Network point of view, the introduction of multiple channels in an "image" isn't a new concept. One such example is using an RGB-Depth image which is used in several robotics applications. The distinction of Grid Observations is what the data within the channels represents. Instead of limiting the channels to color intensities, the channels within a cell of a Grid Observation generalize to any data that can be represented by a single number (float or int).
Before jumping into the details of the Grid Sensor, an important thing to note is the agent performance and qualitatively different behavior over raycasts. Unity MLAgent's comes with a suite of example environments. One in particular, the [Food Collector](https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Examples.md#food-collector), has been the focus of the Grid Sensor development.
The Food Collector environment can be described as:
* Set-up: A multi-agent environment where agents compete to collect food.
* Goal: The agents must learn to collect as many green food spheres as possible while avoiding red spheres.
* Agents: The environment contains 5 agents with same Behavior Parameters.
When applying the Grid Sensor to this environment, in place of the Raycast Vector Sensor or the Camera Sensor, a Mean Reward of 40-50 is observed. This performance is on par with what is seen by agents trained with RayCasts but the side-by-side comparison of trained agents, shows a qualitative difference in behavior. A deeper study and interpretation of the qualitative differences between agents trained with Raycasts and Vector Sensors verses Grid Sensors is left to future studies.
<img src="images/gridobs-vs-vectorobs.gif" align="middle" width="3000"/>
## Overview
There are three main phases to the observation process of the Grid Sensor:

### Channel Based
The Channel Based Grid Observations represent obsevations in a normalized form with 0 to 1. To distinguish between categorical and continuous data, one would use the ChannelDepth array to signify the ranges that the values in the `channelValues` array could take. If one sets ChannelDepth[i] to be 1, it is assumed that the value of `channelValues[i]` is already normalized. Else ChannelDepth[i] represents the total number of possible values that `channelValues[i]` can take and will be used for normalization.
The Channel Based Grid Observations is perhaps the simplest in terms of usability and similarity with other machine learning applications. Each grid is of size WxHxC where C is the number of channels. To distinguish between categorical and continuous data, one would use the ChannelDepth array to signify the ranges that the values in the `channelValues` array could take. If one sets ChannelDepth[i] to be 1, it is assumed that the value of `channelValues[i]` is already normalized. Else ChannelDepth[i] represents the total number of possible values that `channelValues[i]` can take.
As the "enemy" is in the second position of the observed tags, its value can be normalized by:
For ObjectType, "weapon", "enemy" will be represented respectively as:
```
weapon = DetectableObjects.IndexOfTag("weapon")/ChannelDepth[0] = 1/2 = 0.5;

2
com.unity.ml-agents.extensions/Documentation~/Match3.md


This implementation includes:
* C# implementation catered toward a Match-3 setup including concepts around encoding for moves based on [Human Like Playtesting with Deep Learning](https://www.researchgate.net/publication/328307928_Human-Like_Playtesting_with_Deep_Learning)
* An example Match-3 scene with ML-Agents implemented (located under /Project/Assets/ML-Agents/Examples/Match3). More information, on Match-3 example [here](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/docs/Learning-Environment-Examples.md#match-3).
* An example Match-3 scene with ML-Agents implemented (located under /Project/Assets/ML-Agents/Examples/Match3). More information, on Match-3 example [here](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/docs/Learning-Environment-Examples.md#match-3).
### Feedback
If you are a Match-3 developer and are trying to leverage ML-Agents for this scenario, [we want to hear from you](https://forms.gle/TBsB9jc8WshgzViU9). Additionally, we are also looking for interested Match-3 teams to speak with us for 45 minutes. If you are interested, please indicate that in the [form](https://forms.gle/TBsB9jc8WshgzViU9). If selected, we will provide gift cards as a token of appreciation.

9
com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md


recommended ways to install the package:
### Local Installation
[Clone the repository](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Installation.md#clone-the-ml-agents-toolkit-repository-optional) and follow the
[Local Installation for Development](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Installation.md#advanced-local-installation-for-development-1)
[Clone the repository](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/Installation.md#clone-the-ml-agents-toolkit-repository-optional) and follow the
[Local Installation for Development](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/Installation.md#advanced-local-installation-for-development-1)
![Package Manager git URL](https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/images/unity_package_manager_git_url.png)
![Package Manager git URL](https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/images/unity_package_manager_git_url.png)
In the dialog that appears, enter
```
git+https://github.com/Unity-Technologies/ml-agents.git?path=com.unity.ml-agents.extensions

- No way to customize the action space of the `InputActuatorComponent`
## Need Help?
The main [README](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/README.md) contains links for contacting the team or getting support.
The main [README](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/README.md) contains links for contacting the team or getting support.

12
com.unity.ml-agents.extensions/Runtime/Input/InputActionActuator.cs


/// <see cref="Agent"/>'s <see cref="BehaviorParameters"/> indicate that the Agent is running in Heuristic Mode,
/// this Actuator will write actions from the <see cref="InputSystem"/> to the <see cref="ActionBuffers"/> object.
/// </summary>
public class InputActionActuator : IActuator, IHeuristicProvider
public class InputActionActuator : IActuator, IHeuristicProvider, IBuiltInActuator
{
readonly BehaviorParameters m_BehaviorParameters;
readonly InputAction m_Action;

/// <param name="adaptor">The <see cref="IRLActionInputAdaptor"/> that will convert data between ML-Agents
/// and the <see cref="InputSystem"/>.</param>
public InputActionActuator(InputDevice inputDevice, BehaviorParameters behaviorParameters,
InputAction action,
IRLActionInputAdaptor adaptor)
InputAction action,
IRLActionInputAdaptor adaptor)
{
m_BehaviorParameters = behaviorParameters;
Name = $"InputActionActuator-{action.name}";

Profiler.BeginSample("InputActionActuator.Heuristic");
m_InputAdaptor.WriteToHeuristic(m_Action, actionBuffersOut);
Profiler.EndSample();
}
/// <inheritdoc/>
public BuiltInActuatorType GetBuiltInActuatorType()
{
return BuiltInActuatorType.InputActionActuator;
}
}
}

6
com.unity.ml-agents.extensions/Runtime/Input/InputActuatorComponent.cs


/// <see cref="InputActionActuator"/>s.
/// </summary>
[RequireComponent(typeof(PlayerInput), typeof(IInputActionAssetProvider))]
[AddComponentMenu("ML Agents/Input Actuator", (int)MenuGroup.Actuators)]
public class InputActuatorComponent : ActuatorComponent
{
InputActionAsset m_InputAsset;

}
var inputControlScheme = new InputControlScheme(
mlAgentsControlSchemeName,
mlAgentsControlSchemeName,
deviceRequirements);
return inputControlScheme;

var builder = new InputControlLayout.Builder()
.WithName(layoutName)
.WithFormat(mlAgentsLayoutFormat);
for(var i = 0; i < defaultMap.actions.Count; i++)
for (var i = 0; i < defaultMap.actions.Count; i++)
{
var action = defaultMap.actions[i];
builder.AddControl(action.name)

}, layoutName);
}
}

9
com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs


/// Actuator for a Match3 game. It translates valid moves (defined by AbstractBoard.IsMoveValid())
/// in action masks, and applies the action to the board via AbstractBoard.MakeMove().
/// </summary>
public class Match3Actuator : IActuator, IHeuristicProvider
public class Match3Actuator : IActuator, IHeuristicProvider, IBuiltInActuator
{
protected AbstractBoard m_Board;
protected System.Random m_Random;

/// <inheritdoc/>
public void ResetData()
{
}
/// <inheritdoc/>
public BuiltInActuatorType GetBuiltInActuatorType()
{
return BuiltInActuatorType.Match3Actuator;
}
IEnumerable<int> InvalidMoveIndices()

{
return 1;
}
}
}

1
com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs


/// <summary>
/// Actuator component for a Match3 game. Generates a Match3Actuator at runtime.
/// </summary>
[AddComponentMenu("ML Agents/Match 3 Actuator", (int)MenuGroup.Actuators)]
public class Match3ActuatorComponent : ActuatorComponent
{
/// <summary>

2
com.unity.ml-agents.extensions/Runtime/Match3/Match3SensorComponent.cs


using Unity.MLAgents.Sensors;
using UnityEngine;
namespace Unity.MLAgents.Extensions.Match3
{

[AddComponentMenu("ML Agents/Match 3 Sensor", (int)MenuGroup.Sensors)]
public class Match3SensorComponent : SensorComponent
{
/// <summary>

8
com.unity.ml-agents.extensions/Runtime/Sensors/CountingGridSensor.cs


this.ChannelDepth = channelDepth;
if (DetectableObjects.Length != ChannelDepth.Length)
throw new UnityAgentsException("The channels of a CountingGridSensor is equal to the number of detectableObjects");
this.gridDepthType = GridDepthType.Channel;
this.gridDepthType = gridDepthType;
this.CellScaleX = cellScaleX;
this.CellScaleZ = cellScaleZ;
this.GridNumSideX = gridWidth;

/// <param name="foundColliders">The array of colliders</param>
/// <param name="cellIndex">The cell index the collider is in</param>
/// <param name="cellCenter">the center of the cell the collider is in</param>
protected override void ParseColliders(Collider[] foundColliders, int cellIndex, Vector3 cellCenter)
protected override void ParseColliders(Collider[] foundColliders, int numFound, int cellIndex, Vector3 cellCenter)
for (int i = 0; i < foundColliders.Length; i++)
for (int i = 0; i < numFound; i++)
{
currentColliderGo = foundColliders[i].gameObject;

closestColliderPoint = foundColliders[i].ClosestPointOnBounds(cellCenter);
LoadObjectData(currentColliderGo, cellIndex,
Vector3.Distance(closestColliderPoint, transform.position) / SphereRadius);
Vector3.Distance(closestColliderPoint, transform.position) * InverseSphereRadius);
}
}

266
com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs


using UnityEngine;
using UnityEngine.Assertions;
using Unity.MLAgents.Sensors;
using UnityEngine.Profiling;
namespace Unity.MLAgents.Extensions.Sensors
{

[AddComponentMenu("ML Agents/Grid Sensor", (int)MenuGroup.Sensors)]
public class GridSensor : SensorComponent, ISensor, IBuiltInSensor
{
/// <summary>

[Tooltip("The reference of the root of the agent. This is used to disambiguate objects with the same tag as the agent. Defaults to current GameObject")]
public GameObject rootReference;
[Header("Collider Buffer Properties")]
[Tooltip("The absolute max size of the Collider buffer used in the non-allocating Physics calls. In other words" +
" the Collider buffer will never grow beyond this number even if there are more Colliders in the Grid Cell.")]
public int MaxColliderBufferSize = 500;
[Tooltip(
"The Estimated Max Number of Colliders to expect per cell. This number is used to " +
"pre-allocate an array of Colliders in order to take advantage of the OverlapBoxNonAlloc " +
"Physics API. If the number of colliders found is >= InitialColliderBufferSize the array " +
"will be resized to double its current size. The hard coded absolute size is 500.")]
public int InitialColliderBufferSize = 4;
Collider[] m_ColliderBuffer;
float[] m_ChannelBuffer;
//
// Hidden Parameters
//

/// <summary>
/// Radius of grid, used for normalizing the distance.
/// </summary>
protected float SphereRadius;
protected float InverseSphereRadius;
/// <summary>
/// Total Number of cells (width*height)

NumCells = GridNumSideX * GridNumSideZ;
float sphereRadiusX = (CellScaleX * GridNumSideX) / Mathf.Sqrt(2);
float sphereRadiusZ = (CellScaleZ * GridNumSideZ) / Mathf.Sqrt(2);
SphereRadius = Mathf.Max(sphereRadiusX, sphereRadiusZ);
InverseSphereRadius = 1.0f / Mathf.Max(sphereRadiusX, sphereRadiusZ);
ChannelOffsets = new int[ChannelDepth.Length];
DiffNumSideZX = (GridNumSideZ - GridNumSideX);
OffsetGridNumSide = (GridNumSideZ - 1f) / 2f;

InitDepthType();
InitCellPoints();
InitPerceptionBuffer();
m_ColliderBuffer = new Collider[Math.Min(MaxColliderBufferSize, InitialColliderBufferSize)];
// Default root reference to current game object
if (rootReference == null)
rootReference = gameObject;

m_perceptionTexture2D = new Texture2D(GridNumSideX, GridNumSideZ, TextureFormat.RGB24, false);
}
/// <inheritdoc cref="ISensor.Reset"/>
void ISensor.Reset() { }
public void Reset()
public void ClearPerceptionBuffer()
{
if (m_PerceptionBuffer != null)
{

else
{
m_PerceptionBuffer = new float[NumberOfObservations];
m_ColliderBuffer = new Collider[Math.Min(MaxColliderBufferSize, InitialColliderBufferSize)];
}
if (ShowGizmos)

{
return BuiltInSensorType.GridSensor;
}
/// <summary>
/// GetCompressedObservation - Calls Perceive then puts the data stored on the perception buffer

/// <returns>A float[] containing all of the information collected from the gridsensor</returns>
public float[] Perceive()
{
Reset();
if (m_ColliderBuffer == null)
{
return Array.Empty<float>();
}
ClearPerceptionBuffer();
// TODO: make these part of the class
Collider[] foundColliders = null;
Vector3 cellCenter = Vector3.zero;
var halfCellScale = new Vector3(CellScaleX / 2f, CellScaleY, CellScaleZ / 2f);
Vector3 halfCellScale = new Vector3(CellScaleX / 2f, CellScaleY, CellScaleZ / 2f);
for (int cellIndex = 0; cellIndex < NumCells; cellIndex++)
for (var cellIndex = 0; cellIndex < NumCells; cellIndex++)
int numFound;
Vector3 cellCenter;
cellCenter = transform.TransformPoint(CellPoints[cellIndex]);
foundColliders = Physics.OverlapBox(cellCenter, halfCellScale, transform.rotation, ObserveMask);
Transform transform1;
cellCenter = (transform1 = transform).TransformPoint(CellPoints[cellIndex]);
numFound = BufferResizingOverlapBoxNonAlloc(cellCenter, halfCellScale, transform1.rotation);
foundColliders = Physics.OverlapBox(cellCenter, halfCellScale, Quaternion.identity, ObserveMask);
numFound = BufferResizingOverlapBoxNonAlloc(cellCenter, halfCellScale, Quaternion.identity);
if (foundColliders != null && foundColliders.Length > 0)
if (numFound > 0)
ParseColliders(foundColliders, cellIndex, cellCenter);
ParseColliders(m_ColliderBuffer, numFound, cellIndex, cellCenter);
return m_PerceptionBuffer;
}
return m_PerceptionBuffer;
/// <summary>
/// This method attempts to perform the Physics.OverlapBoxNonAlloc and will double the size of the Collider buffer
/// if the number of Colliders in the buffer after the call is equal to the length of the buffer.
/// </summary>
/// <param name="cellCenter"></param>
/// <param name="halfCellScale"></param>
/// <param name="rotation"></param>
/// <returns></returns>
int BufferResizingOverlapBoxNonAlloc(Vector3 cellCenter, Vector3 halfCellScale, Quaternion rotation)
{
int numFound;
// Since we can only get a fixed number of results, requery
// until we're sure we can hold them all (or until we hit the max size).
while (true)
{
numFound = Physics.OverlapBoxNonAlloc(cellCenter, halfCellScale, m_ColliderBuffer, rotation, ObserveMask);
if (numFound == m_ColliderBuffer.Length && m_ColliderBuffer.Length < MaxColliderBufferSize)
{
m_ColliderBuffer = new Collider[Math.Min(MaxColliderBufferSize, m_ColliderBuffer.Length * 2)];
InitialColliderBufferSize = m_ColliderBuffer.Length;
}
else
{
break;
}
}
return numFound;
}
/// <summary>

/// <param name="numFound">Number of colliders found.</param>
protected virtual void ParseColliders(Collider[] foundColliders, int cellIndex, Vector3 cellCenter)
protected virtual void ParseColliders(Collider[] foundColliders, int numFound, int cellIndex, Vector3 cellCenter)
GameObject currentColliderGo = null;
Profiler.BeginSample("GridSensor.ParseColliders");
Vector3 closestColliderPoint = Vector3.zero;
float distance = float.MaxValue;
float currentDistance = 0f;
var minDistanceSquared = float.MaxValue;
for (int i = 0; i < foundColliders.Length; i++)
for (var i = 0; i < numFound; i++)
currentColliderGo = foundColliders[i].gameObject;
var currentColliderGo = foundColliders[i].gameObject;
if (currentColliderGo == rootReference)
if (ReferenceEquals(currentColliderGo, rootReference))
closestColliderPoint = foundColliders[i].ClosestPointOnBounds(cellCenter);
currentDistance = Vector3.Distance(closestColliderPoint, rootReference.transform.position);
var closestColliderPoint = foundColliders[i].ClosestPointOnBounds(cellCenter);
var currentDistanceSquared = (closestColliderPoint - rootReference.transform.position).sqrMagnitude;
if ((Array.IndexOf(DetectableObjects, currentColliderGo.tag) > -1) && (currentDistance < distance))
var index = -1;
for (var ii = 0; ii < DetectableObjects.Length; ii++)
{
if (currentColliderGo.CompareTag(DetectableObjects[ii]))
{
index = ii;
break;
}
}
if (index > -1 && currentDistanceSquared < minDistanceSquared)
distance = currentDistance;
minDistanceSquared = currentDistanceSquared;
if (closestColliderGo != null)
LoadObjectData(closestColliderGo, cellIndex, distance / SphereRadius);
if (!ReferenceEquals(closestColliderGo, null))
LoadObjectData(closestColliderGo, cellIndex, (float)Math.Sqrt(minDistanceSquared) * InverseSphereRadius);
Profiler.EndSample();
}
/// <summary>

/// </example>
protected virtual float[] GetObjectData(GameObject currentColliderGo, float typeIndex, float normalizedDistance)
{
float[] channelValues = new float[ChannelDepth.Length];
channelValues[0] = typeIndex;
return channelValues;
if (m_ChannelBuffer == null)
{
m_ChannelBuffer = new float[ChannelDepth.Length];
}
Array.Clear(m_ChannelBuffer, 0, m_ChannelBuffer.Length);
m_ChannelBuffer[0] = typeIndex;
return m_ChannelBuffer;
}
/// <summary>

/// </summary>
/// <param name="currentColliderGo">The game object that was found colliding with a certain cell</param>
/// <param name="cellIndex">The index of the current cell</param>
/// <param name="normalized_distance">A float between 0 and 1 describing the ratio of
/// <param name="normalizedDistance">A float between 0 and 1 describing the ratio of
protected virtual void LoadObjectData(GameObject currentColliderGo, int cellIndex, float normalized_distance)
protected virtual void LoadObjectData(GameObject currentColliderGo, int cellIndex, float normalizedDistance)
for (int i = 0; i < DetectableObjects.Length; i++)
Profiler.BeginSample("GridSensor.LoadObjectData");
var channelHotVals = new ArraySegment<float>(m_PerceptionBuffer, cellIndex * ObservationPerCell, ObservationPerCell);
for (var i = 0; i < DetectableObjects.Length; i++)
if (currentColliderGo != null && currentColliderGo.CompareTag(DetectableObjects[i]))
for (var ii = 0; ii < channelHotVals.Count; ii++)
{
m_PerceptionBuffer[channelHotVals.Offset + ii] = 0f;
}
if (!ReferenceEquals(currentColliderGo, null) && currentColliderGo.CompareTag(DetectableObjects[i]))
float[] channelValues = GetObjectData(currentColliderGo, (float)i + 1, normalized_distance);
float[] channelValues = GetObjectData(currentColliderGo, (float)i + 1, normalizedDistance);
if (ShowGizmos)
{
Color debugRayColor = Color.white;

}
CellActivity[cellIndex] = new Color(debugRayColor.r, debugRayColor.g, debugRayColor.b, .5f);
}

/// <remarks>
/// The observations are "channel based" so each grid is WxHxC where C is the number of channels
/// This typically means that each channel value is normalized between 0 and 1
/// If channelDepth is 1, the value is assumed normalized, else the value is normalized by the channelDepth
/// The channels are then stored consecutively in PerceptionBuffer.
/// NOTE: This is the only grid type that uses floating point values
/// For example, if a cell contains the 3rd type of 5 possible on the 2nd team of 3 possible teams:
/// channelValues = {2, 1}
/// ObservationPerCell = channelValues.Length
/// channelValues = {2f/5f, 1f/3f} = {.4, .33..}
/// Array.Copy(channelValues, 0, PerceptionBuffer, cell_id*ObservationPerCell, ObservationPerCell);
/// </remarks>
for (int j = 0; j < channelValues.Length; j++)
channelValues[j] /= ChannelDepth[j];
// The observations are "channel based" so each grid is WxHxC where C is the number of channels
// This typically means that each channel value is normalized between 0 and 1
// If channelDepth is 1, the value is assumed normalized, else the value is normalized by the channelDepth
// The channels are then stored consecutively in PerceptionBuffer.
// NOTE: This is the only grid type that uses floating point values
// For example, if a cell contains the 3rd type of 5 possible on the 2nd team of 3 possible teams:
// channelValues = {2, 1}
// ObservationPerCell = channelValues.Length
// channelValues = {2f/5f, 1f/3f} = {.4, .33..}
// Array.Copy(channelValues, 0, PerceptionBuffer, cell_id*ObservationPerCell, ObservationPerCell);
for (int j = 0; j < channelValues.Length; j++)
{
channelValues[j] /= ChannelDepth[j];
}
Array.Copy(channelValues, 0, m_PerceptionBuffer, cellIndex * ObservationPerCell, ObservationPerCell);
break;
Array.Copy(channelValues, 0, m_PerceptionBuffer, cellIndex * ObservationPerCell, ObservationPerCell);
break;
/// <remarks>
/// The observations are "channel hot" so each grid is WxHxD where D is the sum of all of the channel depths
/// The opposite of the "channel based" case, the channel values are represented as one hot vector per channel and then concatenated together
/// Thus channelDepth is assumed to be greater than 1.
/// For example, if a cell contains the 3rd type of 5 possible on the 2nd team of 3 possible teams,
/// channelValues = {2, 1}
/// channelOffsets = {5, 3}
/// ObservationPerCell = 5 + 3 = 8
/// channelHotVals = {0, 0, 1, 0, 0, 0, 1, 0}
/// Array.Copy(channelHotVals, 0, PerceptionBuffer, cell_id*ObservationPerCell, ObservationPerCell);
/// </remarks>
float[] channelHotVals = new float[ObservationPerCell];
for (int j = 0; j < channelValues.Length; j++)
if (ChannelDepth[j] > 1)
{
channelHotVals[(int)channelValues[j] + ChannelOffsets[j]] = 1f;
}
else
// The observations are "channel hot" so each grid is WxHxD where D is the sum of all of the channel depths
// The opposite of the "channel based" case, the channel values are represented as one hot vector per channel and then concatenated together
// Thus channelDepth is assumed to be greater than 1.
// For example, if a cell contains the 3rd type of 5 possible on the 2nd team of 3 possible teams,
// channelValues = {2, 1}
// channelOffsets = {5, 3}
// ObservationPerCell = 5 + 3 = 8
// channelHotVals = {0, 0, 1, 0, 0, 0, 1, 0}
// Array.Copy(channelHotVals, 0, PerceptionBuffer, cell_id*ObservationPerCell, ObservationPerCell);
for (int j = 0; j < channelValues.Length; j++)
channelHotVals[ChannelOffsets[j]] = channelValues[j];
if (ChannelDepth[j] > 1)
{
m_PerceptionBuffer[channelHotVals.Offset + (int)channelValues[j] + ChannelOffsets[j]] = 1f;
}
else
{
m_PerceptionBuffer[channelHotVals.Offset + ChannelOffsets[j]] = channelValues[j];
}
break;
}
Array.Copy(channelHotVals, 0, m_PerceptionBuffer, cellIndex * ObservationPerCell, ObservationPerCell);
break;
}
Profiler.EndSample();
}
/// <summary>Converts the index of the cell to the 3D point (y is zero)</summary>

CellActivity[toCellID] = CellActivity[fromCellID];
}
/// <summary>Creates a copy of a float array</summary>
/// <returns>float[] of the original data</returns>
/// <param name="array">The array to copy from</parma>
private static float[] CreateCopy(float[] array)
{
float[] b = new float[array.Length];
System.Buffer.BlockCopy(array, 0, b, 0, array.Length * sizeof(float));
return b;
}
/// <summary>Utility method to find the index of a tag</summary>
/// <returns>Index of the tag in DetectableObjects, if it is in there</returns>
/// <param name="tag">The tag to search for</param>
public int IndexOfTag(string tag)
{
return Array.IndexOf(DetectableObjects, tag);
}
void OnDrawGizmos()
{
if (ShowGizmos)

Perceive();
Vector3 scale = new Vector3(CellScaleX, 1, CellScaleZ);
Vector3 offset = new Vector3(0, GizmoYOffset, 0);
Matrix4x4 oldGizmoMatrix = Gizmos.matrix;
Matrix4x4 cubeTransform = Gizmos.matrix;
for (int i = 0; i < NumCells; i++)
var scale = new Vector3(CellScaleX, 1, CellScaleZ);
var offset = new Vector3(0, GizmoYOffset, 0);
var oldGizmoMatrix = Gizmos.matrix;
for (var i = 0; i < NumCells; i++)
Matrix4x4 cubeTransform;
if (RotateToAgent)
{
cubeTransform = Matrix4x4.TRS(CellToPoint(i) + offset, transform.rotation, scale);

}
/// <inheritdoc/>
void ISensor.Update() { }
void ISensor.Update()
{
using (TimerStack.Instance.Scoped("GridSensor.Update"))
{
Perceive();
}
}
/// <summary>Gets the observation shape</summary>
/// <returns>int[] of the observation shape</returns>

{
using (TimerStack.Instance.Scoped("GridSensor.WriteToTensor"))
{
Perceive();
int index = 0;
for (var h = GridNumSideZ - 1; h >= 0; h--) // height
{

4
com.unity.ml-agents.extensions/Runtime/Unity.ML-Agents.Extensions.asmdef


"Unity.Barracuda",
"Unity.ML-Agents",
"Unity.ML-Agents.Extensions.Input"
],
"includePlatforms": [],
"excludePlatforms": []
]
}

2
com.unity.ml-agents.extensions/Tests/Runtime/Input/Unity.ML-Agents.Extensions.Input.Tests.Runtime.asmdef


"versionDefines": [
{
"name": "com.unity.inputsystem",
"expression": "1.1.0-preview",
"expression": "1.1.0",
"define": "MLA_INPUT_TESTS"
}
],

2
com.unity.ml-agents.extensions/package.json


{
"name": "com.unity.ml-agents.extensions",
"displayName": "ML Agents Extensions",
"version": "0.0.1-preview",
"version": "0.1.0-preview",
"unity": "2018.4",
"description": "A source-only package for new features based on ML-Agents",
"dependencies": {

15
com.unity.ml-agents/CHANGELOG.md


## [Unreleased]
### Major Changes
#### com.unity.ml-agents (C#)
- The `BufferSensor` and `BufferSensorComponent` have been added. They allow the Agent to observe variable number of entities. (#4909)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Minor Changes

- The `cattrs` version dependency was updated to allow `>=1.1.0` on Python 3.8 or higher. (#4821)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)

- Added a `--torch-device` commandline option to `mlagents-learn`, which sets the default
[`torch.device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device) used for training. (#4888)
- The `--cpu` commandline option had no effect and was removed. Use `--torch-device=cpu` to force CPU training. (#4888)
- The `mlagents_env` API has changed, `BehaviorSpec` now has a `observation_specs` property containing a list of `ObservationSpec`. For more information on `ObservationSpec` see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md#behaviorspec). (#4763, #4825)
- The `mlagents_env` API has changed, `BehaviorSpec` now has a `observation_specs` property containing a list of `ObservationSpec`. For more information on `ObservationSpec` see [here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Python-API.md#behaviorspec). (#4763, #4825)
### Bug Fixes
#### com.unity.ml-agents (C#)

#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- PyTorch trainers are now the default. See the
[installation docs](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Installation.md) for
[installation docs](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Installation.md) for
more information on installing PyTorch. For the time being, TensorFlow is still available;
you can use the TensorFlow backend by adding `--tensorflow` to the CLI, or
adding `framework: tensorflow` in the configuration YAML. (#4517)

- The Barracuda dependency was upgraded to 1.1.2 (#4571)
- Utilities were added to `com.unity.ml-agents.extensions` to make it easier to
integrate with match-3 games. See the [readme](https://github.com/Unity-Technologies/ml-agents/blob/master/com.unity.ml-agents.extensions/Documentation~/Match3.md)
integrate with match-3 games. See the [readme](https://github.com/Unity-Technologies/ml-agents/blob/main/com.unity.ml-agents.extensions/Documentation~/Match3.md)
for more details. (#4515)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The `action_probs` node is no longer listed as an output in TensorFlow models (#4613).

#### ml-agents / ml-agents-envs / gym-unity (Python)
- Added the Random Network Distillation (RND) intrinsic reward signal to the Pytorch
trainers. To use RND, add a `rnd` section to the `reward_signals` section of your
yaml configuration file. [More information here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-Configuration-File.md#rnd-intrinsic-reward) (#4473)
yaml configuration file. [More information here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-Configuration-File.md#rnd-intrinsic-reward) (#4473)
### Minor Changes
#### com.unity.ml-agents (C#)
- Stacking for compressed observations is now supported. An additional setting

### Major Changes
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The Parameter Randomization feature has been refactored to enable sampling of new parameters per episode to improve robustness. The
`resampling-interval` parameter has been removed and the config structure updated. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md). (#4065)
`resampling-interval` parameter has been removed and the config structure updated. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-ML-Agents.md). (#4065)
[here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md).(#4160)
[here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-ML-Agents.md).(#4160)
### Minor Changes
#### com.unity.ml-agents (C#)

4
com.unity.ml-agents/CONTRIBUTING.md


## Communication
First, please read through our
[code of conduct](https://github.com/Unity-Technologies/ml-agents/blob/master/CODE_OF_CONDUCT.md),
[code of conduct](https://github.com/Unity-Technologies/ml-agents/blob/main/CODE_OF_CONDUCT.md),
as we expect all our contributors to follow it.
Second, before starting on a project that you intend to contribute to the

## Git Branches
The master branch corresponds to the most recent version of the project. Note
The main branch corresponds to the most recent version of the project. Note
that this may be newer that the
[latest release](https://github.com/Unity-Technologies/ml-agents/releases/tag/latest_release).

4
com.unity.ml-agents/Documentation~/com.unity.ml-agents.md


[unity ML-Agents Toolkit]: https://github.com/Unity-Technologies/ml-agents
[unity inference engine]: https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html
[package manager documentation]: https://docs.unity3d.com/Manual/upm-ui-install.html
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Installation.md
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Installation.md
[ML-Agents GitHub repo]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/com.unity.ml-agents.extensions
[ML-Agents GitHub repo]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/com.unity.ml-agents.extensions

4
com.unity.ml-agents/Runtime/Academy.cs


* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/
* https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/
*/
namespace Unity.MLAgents

/// fall back to inference or heuristic decisions. (You can also set agents to always use
/// inference or heuristics.)
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/" +
"docs/Learning-Environment-Design.md")]
public class Academy : IDisposable
{

1
com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs


/// <param name="numContinuousActions">The number of continuous actions available.</param>
/// <param name="discreteBranchSizes">The array of branch sizes for the discrete actions. Each index
/// contains the number of actions available for that branch.</param>
/// <returns>An ActionSpec initialized with the specified action sizes.</returns>
public ActionSpec(int numContinuousActions = 0, int[] discreteBranchSizes = null)
{
m_NumContinuousActions = numContinuousActions;

2
com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs


///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="IActionReceiver.OnActionReceived"/>
void WriteDiscreteActionMask(IDiscreteActionMask actionMask);

2
com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs


///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="branch">The branch for which the actions will be masked.</param>
/// <param name="actionIndices">The indices of the masked actions.</param>

8
com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs


/// <summary>
/// IActuator implementation that forwards calls to an <see cref="IActionReceiver"/> and an <see cref="IHeuristicProvider"/>.
/// </summary>
internal class VectorActuator : IActuator, IHeuristicProvider
internal class VectorActuator : IActuator, IHeuristicProvider, IBuiltInActuator
{
IActionReceiver m_ActionReceiver;
IHeuristicProvider m_HeuristicProvider;

/// <inheritdoc />
public string Name { get; }
/// <inheritdoc />
public virtual BuiltInActuatorType GetBuiltInActuatorType()
{
return BuiltInActuatorType.VectorActuator;
}
}
}

47
com.unity.ml-agents/Runtime/Agent.cs


}
/// <summary>
/// Simple wrapper around VectorActuator that overrides GetBuiltInActuatorType
/// so that it can be distinguished from a standard VectorActuator.
/// </summary>
internal class AgentVectorActuator : VectorActuator
{
public AgentVectorActuator(IActionReceiver actionReceiver,
IHeuristicProvider heuristicProvider,
ActionSpec actionSpec,
string name = "VectorActuator"
) : base(actionReceiver, heuristicProvider, actionSpec, name)
{ }
public override BuiltInActuatorType GetBuiltInActuatorType()
{
return BuiltInActuatorType.AgentVectorActuator;
}
}
/// <summary>
/// An agent is an actor that can observe its environment, decide on the
/// best course of action using those observations, and execute those actions
/// within the environment.

/// [OnDisable()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnDisable.html]
/// [OnBeforeSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnBeforeSerialize.html
/// [OnAfterSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnAfterSerialize.html
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design.md
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Readme.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Readme.md
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/" +
"docs/Learning-Environment-Design-Agents.md")]
[Serializable]
[RequireComponent(typeof(BehaviorParameters))]

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// </remarks>
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
///</remarks>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)

/// implementing a simple heuristic function can aid in debugging agent actions and interactions
/// with its environment.
///
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </remarks>
/// <example>

// Support legacy OnActionReceived
// TODO don't set this up if the sizes are 0?
var param = m_PolicyFactory.BrainParameters;
m_VectorActuator = new VectorActuator(this, this, param.ActionSpec);
m_VectorActuator = new AgentVectorActuator(this, this, param.ActionSpec);
m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1);
m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions()];
m_LegacyHeuristicCache = new float[m_VectorActuator.TotalNumberOfActions()];

/// For more information about observations, see [Observations and Sensors].
///
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// </remarks>
public virtual void CollectObservations(VectorSensor sensor)
{

///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="IActionReceiver.OnActionReceived"/>
public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)

///
/// For more information about implementing agent actions see [Agents - Actions].
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="actions">