浏览代码

Merge branch 'develop-add-fire' into develop-add-fire-checkpoint

/develop/add-fire/ckpt-2
Ruo-Ping Dong 4 年前
当前提交
d3eb6c46
共有 131 个文件被更改,包括 2959 次插入1977 次删除
  1. 2
      .circleci/config.yml
  2. 7
      .pre-commit-config.yaml
  3. 2
      .yamato/com.unity.ml-agents-performance.yml
  4. 2
      .yamato/com.unity.ml-agents-test.yml
  5. 14
      .yamato/gym-interface-test.yml
  6. 5
      .yamato/protobuf-generation-test.yml
  7. 14
      .yamato/python-ll-api-test.yml
  8. 2
      .yamato/standalone-build-test.yml
  9. 2
      .yamato/training-int-tests.yml
  10. 35
      Project/Assets/ML-Agents/Examples/Crawler/Prefabs/Crawler.prefab
  11. 5
      Project/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab
  12. 12
      Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  13. 1001
      Project/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerDynamic.nn
  14. 1001
      Project/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerStatic.nn
  15. 37
      Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockVisualArea.prefab
  16. 2
      Project/ProjectSettings/ProjectVersion.txt
  17. 10
      README.md
  18. 2
      com.unity.ml-agents.extensions/README.md
  19. 6
      com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs
  20. 14
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
  21. 31
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsSensorSettings.cs
  22. 211
      com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs
  23. 77
      com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs
  24. 9
      com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs
  25. 11
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/ArticulationBodySensorTests.cs
  26. 88
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs
  27. 58
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs
  28. 25
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodySensorTests.cs
  29. 2
      com.unity.ml-agents.extensions/package.json
  30. 25
      com.unity.ml-agents/CHANGELOG.md
  31. 2
      com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
  32. 6
      com.unity.ml-agents/Runtime/Academy.cs
  33. 26
      com.unity.ml-agents/Runtime/Agent.cs
  34. 1
      com.unity.ml-agents/Runtime/AssemblyInfo.cs
  35. 2
      com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
  36. 2
      com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
  37. 2
      com.unity.ml-agents/package.json
  38. 4
      docs/Installation-Anaconda-Windows.md
  39. 6
      docs/Installation.md
  40. 2
      docs/Training-Configuration-File.md
  41. 81
      docs/Training-ML-Agents.md
  42. 2
      docs/Training-on-Amazon-Web-Service.md
  43. 22
      docs/Unity-Inference-Engine.md
  44. 8
      docs/Using-Tensorboard.md
  45. 4
      experiment_torch.py
  46. 2
      gym-unity/gym_unity/__init__.py
  47. 2
      ml-agents-envs/mlagents_envs/__init__.py
  48. 2
      ml-agents-envs/setup.py
  49. 2
      ml-agents/mlagents/trainers/__init__.py
  50. 2
      ml-agents/mlagents/trainers/buffer.py
  51. 7
      ml-agents/mlagents/trainers/cli_utils.py
  52. 2
      ml-agents/mlagents/trainers/environment_parameter_manager.py
  53. 8
      ml-agents/mlagents/trainers/exception.py
  54. 14
      ml-agents/mlagents/trainers/learn.py
  55. 26
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  56. 2
      ml-agents/mlagents/trainers/ppo/optimizer_tf.py
  57. 52
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  58. 42
      ml-agents/mlagents/trainers/ppo/trainer.py
  59. 34
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  60. 61
      ml-agents/mlagents/trainers/sac/trainer.py
  61. 26
      ml-agents/mlagents/trainers/settings.py
  62. 59
      ml-agents/mlagents/trainers/stats.py
  63. 64
      ml-agents/mlagents/trainers/tests/test_env_param_manager.py
  64. 4
      ml-agents/mlagents/trainers/tests/test_ppo.py
  65. 4
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  66. 5
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  67. 42
      ml-agents/mlagents/trainers/tests/test_stats.py
  68. 4
      ml-agents/mlagents/trainers/tests/torch/test_decoders.py
  69. 10
      ml-agents/mlagents/trainers/tests/torch/test_distributions.py
  70. 12
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  71. 54
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  72. 18
      ml-agents/mlagents/trainers/tf/model_serialization.py
  73. 3
      ml-agents/mlagents/trainers/torch/decoders.py
  74. 39
      ml-agents/mlagents/trainers/torch/distributions.py
  75. 170
      ml-agents/mlagents/trainers/torch/encoders.py
  76. 123
      ml-agents/mlagents/trainers/torch/utils.py
  77. 44
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  78. 3
      ml-agents/tests/yamato/yamato_utils.py
  79. 3
      test_requirements.txt
  80. 1
      utils/make_readme_table.py
  81. 3
      com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs
  82. 11
      com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs.meta
  83. 8
      com.unity.ml-agents/Runtime/Actuators.meta
  84. 8
      com.unity.ml-agents/Tests/Editor/Actuators.meta
  85. 160
      docs/images/TensorBoard-download.png
  86. 20
      ml-agents/mlagents/trainers/tests/torch/test_layers.py
  87. 48
      ml-agents/mlagents/trainers/torch/layers.py
  88. 181
      com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
  89. 3
      com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs.meta
  90. 75
      com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
  91. 3
      com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs.meta
  92. 17
      com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
  93. 3
      com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs.meta
  94. 150
      com.unity.ml-agents/Runtime/Actuators/ActuatorDiscreteActionMask.cs
  95. 3
      com.unity.ml-agents/Runtime/Actuators/ActuatorDiscreteActionMask.cs.meta
  96. 415
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs

2
.circleci/config.yml


. venv/bin/activate
mkdir test-reports
pip freeze > test-reports/pip_versions.txt
pytest -n 2 --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
pytest --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
- run:
name: Verify there are no hidden/missing metafiles.

7
.pre-commit-config.yaml


hooks:
- id: pyupgrade
args: [--py3-plus, --py36-plus]
exclude: .*barracuda.py
exclude: >
(?x)^(
.*barracuda.py|
.*_pb2.py|
.*_pb2_grpc.py
)$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0

2
.yamato/com.unity.ml-agents-performance.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- python -m pip install unity-downloader-cli --extra-index-url https://artifactory.eu-cph-1.unityops.net/api/pypi/common-python/simple
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
- chmod +x ./utr

2
.yamato/com.unity.ml-agents-test.yml


image: {{ platform.image }}
flavor: {{ platform.flavor}}
commands:
- python -m pip install unity-downloader-cli --extra-index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/unity-pypi-local/simple --upgrade
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u trunk -c editor --wait --fast
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci project test -u {{ editor.version }} --project-path Project --package-filter {{ package.name }} {{ coverageOptions }}

14
.yamato/gym-interface-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
dependencies:

expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match "gym-unity/**" OR
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match "gym-unity/**" OR
pull_request.changes.any match ".yamato/gym-interface-test.yml") AND
NOT pull_request.changes.all match "**/*.md"
{% endfor %}

5
.yamato/protobuf-generation-test.yml


nuget install Grpc.Tools -Version $GRPC_VERSION -OutputDirectory protobuf-definitions/
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip
pip install grpcio-tools==1.13.0 --progress-bar=off
pip install mypy-protobuf==1.16.0 --progress-bar=off
pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0 --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
cd protobuf-definitions
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/protoc
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/grpc_csharp_plugin

14
.yamato/python-ll-api-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer

expression: |
(pull_request.target eq "master" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match ".yamato/python-ll-api-test.yml") AND
NOT pull_request.changes.all match "**/*.md"
{% endfor %}

2
.yamato/standalone-build-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.standalone_build_tests
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity

2
.yamato/training-int-tests.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.training_int_tests
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need

35
Project/Assets/ML-Agents/Examples/Crawler/Prefabs/Crawler.prefab


- component: {fileID: 4845971001715176662}
- component: {fileID: 4845971001715176663}
- component: {fileID: 4845971001715176660}
- component: {fileID: 4622120667686875944}
m_Layer: 0
m_Name: Crawler
m_TagString: Untagged

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 138
VectorObservationSize: 21
NumStackedVectorObservations: 1
VectorActionSize: 14000000
VectorActionDescriptions: []

m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!114 &4622120667686875944
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 4845971001715176661}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: df0f8be9a37d6486498061e2cbc4cd94, type: 3}
m_Name:
m_EditorClassIdentifier:
RootBody: {fileID: 4845971001588102145}
VirtualRoot: {fileID: 2270141184585723037}
Settings:
UseModelSpaceTranslations: 1
UseModelSpaceRotations: 1
UseLocalSpaceTranslations: 0
UseLocalSpaceRotations: 1
UseModelSpaceLinearVelocity: 1
UseLocalSpaceLinearVelocity: 0
UseJointPositionsAndAngles: 0
UseJointForces: 0
sensorName:
--- !u!1 &4845971001730692034
GameObject:
m_ObjectHideFlags: 0

objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 72f745913c5a34df5aaadd5c1f0024cb, type: 3}
--- !u!1 &2270141184585723037 stripped
GameObject:
m_CorrespondingSourceObject: {fileID: 2591864627249999519, guid: 72f745913c5a34df5aaadd5c1f0024cb,
type: 3}
m_PrefabInstance: {fileID: 4357529801223143938}
m_PrefabAsset: {fileID: 0}
--- !u!4 &2270141184585723026 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 2591864627249999504, guid: 72f745913c5a34df5aaadd5c1f0024cb,

type: 3}
m_PrefabInstance: {fileID: 4357529801223143938}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 0}
m_GameObject: {fileID: 2270141184585723037}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 771e78c5e980e440e8cd19716b55075f, type: 3}

5
Project/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab


propertyPath: targetToLookAt
value:
objectReference: {fileID: 2673081981996998229}
- target: {fileID: 4622120667686875944, guid: 0456c89e8c9c243d595b039fe7aa0bf9,
type: 3}
propertyPath: Settings.UseLocalSpaceLinearVelocity
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4845971000000621469, guid: 0456c89e8c9c243d595b039fe7aa0bf9,
type: 3}
propertyPath: m_ConnectedAnchor.x

12
Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


//GROUND CHECK
sensor.AddObservation(bp.groundContact.touchingGround); // Is this bp touching the ground
//Get velocities in the context of our orientation cube's space
//Note: You can get these velocities in world space as well but it may not train as well.
sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.velocity));
sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
//Get position relative to hips in the context of our orientation cube's space
sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.position - body.position));
sensor.AddObservation(bp.rb.transform.localRotation);
sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit);
}
}

/// </summary>
public override void CollectObservations(VectorSensor sensor)
{
//Add body rotation delta relative to orientation cube
sensor.AddObservation(Quaternion.FromToRotation(body.forward, orientationCube.transform.forward));
//Add pos of target relative to orientation cube
sensor.AddObservation(orientationCube.transform.InverseTransformPoint(target.transform.position));

1001
Project/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerDynamic.nn
文件差异内容过多而无法显示
查看文件

1001
Project/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerStatic.nn
文件差异内容过多而无法显示
查看文件

37
Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockVisualArea.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 07000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 0
NumStackedVectorObservations: 1
VectorActionSize: 07000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_BehaviorName: VisualHallway
m_TeamID: 0
m_useChildSensors: 1
m_BehaviorName: VisualPushBlock
TeamId: 0
m_UseChildSensors: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114812843792483960
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: dea8c4f2604b947e6b7b97750dde87ca, type: 3}
m_Name:
m_EditorClassIdentifier:
maxStep: 5000
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
ground: {fileID: 1913379827958244}
area: {fileID: 1632733799967290}
areaBounds:

m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
m_Name:
m_EditorClassIdentifier:
camera: {fileID: 20961401228419460}
sensorName: CameraSensor
width: 84
height: 84
grayscale: 0
compression: 1
m_Camera: {fileID: 20961401228419460}
m_SensorName: CameraSensor
m_Width: 84
m_Height: 84
m_Grayscale: 0
m_Compression: 1
--- !u!114 &9049837659352187721
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
TakeActionsBetweenDecisions: 1
--- !u!1 &1626651094211584
GameObject:
m_ObjectHideFlags: 0

2
Project/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2018.4.20f1
m_EditorVersion: 2018.4.24f1

10
README.md


# Unity ML-Agents Toolkit
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_4_docs/docs/)
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/)
[![license badge](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE)

## Releases & Documentation
**Our latest, stable release is `Release 4`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_4_docs/docs/Readme.md)
**Our latest, stable release is `Release 5`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/Readme.md)
to get started with the latest release of ML-Agents.**
The table below lists all our releases, including our `master` branch which is

| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **Release 4** | **July 15, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_4)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_4_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_4.zip)** |
| **Release 5** | **July 31, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_5)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_5.zip)** |
| **Release 4** | July 15, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_4) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_4_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_4.zip) |
| **0.14.1** | February 26, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.1.zip) |
## Citation

2
com.unity.ml-agents.extensions/README.md


# ML-Agents Extensions
This is a source-only package for new features based on ML-Agents.
More details coming soon.

6
com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs


parentIndices[i] = bodyToIndex[parentArticBody];
}
SetParentIndices(parentIndices);
Setup(parentIndices);
protected override Vector3 GetLinearVelocityAt(int index)
protected internal override Vector3 GetLinearVelocityAt(int index)
protected override Pose GetPoseAt(int index)
protected internal override Pose GetPoseAt(int index)
{
var body = m_Bodies[index];
var go = body.gameObject;

14
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs


/// <summary>
/// Construct a new PhysicsBodySensor
/// </summary>
/// <param name="rootBody"></param>
/// <param name="rootBody">The root Rigidbody. This has no Joints on it (but other Joints may connect to it).</param>
/// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
/// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
public PhysicsBodySensor(Rigidbody rootBody, GameObject rootGameObject, PhysicsSensorSettings settings, string sensorName=null)
public PhysicsBodySensor(
Rigidbody rootBody,
GameObject rootGameObject,
GameObject virtualRoot,
PhysicsSensorSettings settings,
string sensorName=null
)
var poseExtractor = new RigidBodyPoseExtractor(rootBody, rootGameObject);
var poseExtractor = new RigidBodyPoseExtractor(rootBody, rootGameObject, virtualRoot);
m_PoseExtractor = poseExtractor;
m_SensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{rootBody?.name}" : sensorName;
m_Settings = settings;

31
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsSensorSettings.cs


using System;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Extensions.Sensors

var offset = baseOffset;
if (settings.UseModelSpace)
{
var poses = poseExtractor.ModelSpacePoses;
var vels = poseExtractor.ModelSpaceVelocities;
for(var i=0; i<poseExtractor.NumPoses; i++)
foreach (var pose in poseExtractor.GetEnabledModelSpacePoses())
var pose = poses[i];
if(settings.UseModelSpaceTranslations)
if (settings.UseModelSpaceTranslations)
}
foreach(var vel in poseExtractor.GetEnabledModelSpaceVelocities())
{
writer.Add(vels[i], offset);
writer.Add(vel, offset);
offset += 3;
}
}

{
var poses = poseExtractor.LocalSpacePoses;
var vels = poseExtractor.LocalSpaceVelocities;
for(var i=0; i<poseExtractor.NumPoses; i++)
foreach (var pose in poseExtractor.GetEnabledLocalSpacePoses())
var pose = poses[i];
if(settings.UseLocalSpaceTranslations)
if (settings.UseLocalSpaceTranslations)
}
foreach(var vel in poseExtractor.GetEnabledLocalSpaceVelocities())
{
writer.Add(vels[i], offset);
writer.Add(vel, offset);
offset += 3;
}
}

211
com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs


Vector3[] m_ModelSpaceLinearVelocities;
Vector3[] m_LocalSpaceLinearVelocities;
bool[] m_PoseEnabled;
/// Read access to the model space transforms.
/// Read iterator for the enabled model space transforms.
public IList<Pose> ModelSpacePoses
public IEnumerable<Pose> GetEnabledModelSpacePoses()
get { return m_ModelSpacePoses; }
if (m_ModelSpacePoses == null)
{
yield break;
}
for (var i = 0; i < m_ModelSpacePoses.Length; i++)
{
if (m_PoseEnabled[i])
{
yield return m_ModelSpacePoses[i];
}
}
/// Read access to the local space transforms.
/// Read iterator for the enabled local space transforms.
public IList<Pose> LocalSpacePoses
public IEnumerable<Pose> GetEnabledLocalSpacePoses()
get { return m_LocalSpacePoses; }
if (m_LocalSpacePoses == null)
{
yield break;
}
for (var i = 0; i < m_LocalSpacePoses.Length; i++)
{
if (m_PoseEnabled[i])
{
yield return m_LocalSpacePoses[i];
}
}
/// Read access to the model space linear velocities.
/// Read iterator for the enabled model space linear velocities.
public IList<Vector3> ModelSpaceVelocities
public IEnumerable<Vector3> GetEnabledModelSpaceVelocities()
get { return m_ModelSpaceLinearVelocities; }
if (m_ModelSpaceLinearVelocities == null)
{
yield break;
}
for (var i = 0; i < m_ModelSpaceLinearVelocities.Length; i++)
{
if (m_PoseEnabled[i])
{
yield return m_ModelSpaceLinearVelocities[i];
}
}
/// Read access to the local space linear velocities.
/// Read iterator for the enabled local space linear velocities.
/// </summary>
public IEnumerable<Vector3> GetEnabledLocalSpaceVelocities()
{
if (m_LocalSpaceLinearVelocities == null)
{
yield break;
}
for (var i = 0; i < m_LocalSpaceLinearVelocities.Length; i++)
{
if (m_PoseEnabled[i])
{
yield return m_LocalSpaceLinearVelocities[i];
}
}
}
/// <summary>
/// Number of enabled poses in the hierarchy (read-only).
public IList<Vector3> LocalSpaceVelocities
public int NumEnabledPoses
get { return m_LocalSpaceLinearVelocities; }
get
{
if (m_PoseEnabled == null)
{
return 0;
}
var numEnabled = 0;
for (var i = 0; i < m_PoseEnabled.Length; i++)
{
numEnabled += m_PoseEnabled[i] ? 1 : 0;
}
return numEnabled;
}
/// Number of poses in the hierarchy (read-only).
/// Number of total poses in the hierarchy (read-only).
get { return m_ModelSpacePoses?.Length ?? 0; }
get { return m_ModelSpacePoses?.Length ?? 0; }
}
/// <summary>

}
return m_ParentIndices[index];
}
/// <summary>
/// Set whether the pose at the given index is enabled or disabled for observations.
/// </summary>
/// <param name="index"></param>
/// <param name="val"></param>
public void SetPoseEnabled(int index, bool val)
{
m_PoseEnabled[index] = val;
}
/// <summary>

/// <param name="parentIndices"></param>
protected void SetParentIndices(int[] parentIndices)
protected void Setup(int[] parentIndices)
#if DEBUG
if (parentIndices[0] != -1)
{
throw new UnityAgentsException($"Expected parentIndices[0] to be -1, got {parentIndices[0]}");
}
#endif
var numTransforms = parentIndices.Length;
m_ModelSpacePoses = new Pose[numTransforms];
m_LocalSpacePoses = new Pose[numTransforms];
var numPoses = parentIndices.Length;
m_ModelSpacePoses = new Pose[numPoses];
m_LocalSpacePoses = new Pose[numPoses];
m_ModelSpaceLinearVelocities = new Vector3[numPoses];
m_LocalSpaceLinearVelocities = new Vector3[numPoses];
m_ModelSpaceLinearVelocities = new Vector3[numTransforms];
m_LocalSpaceLinearVelocities = new Vector3[numTransforms];
m_PoseEnabled = new bool[numPoses];
// All poses are enabled by default. Generally we'll want to disable the root though.
for (var i = 0; i < numPoses; i++)
{
m_PoseEnabled[i] = true;
}
}
/// <summary>

/// <returns></returns>
protected abstract Pose GetPoseAt(int index);
protected internal abstract Pose GetPoseAt(int index);
/// <summary>
/// Return the world space linear velocity of the i'th object.

protected abstract Vector3 GetLinearVelocityAt(int index);
protected internal abstract Vector3 GetLinearVelocityAt(int index);
/// <summary>

{
if (m_ModelSpacePoses == null)
using (TimerStack.Instance.Scoped("UpdateModelSpacePoses"))
return;
}
if (m_ModelSpacePoses == null)
{
return;
}
var rootWorldTransform = GetPoseAt(0);
var worldToModel = rootWorldTransform.Inverse();
var rootLinearVel = GetLinearVelocityAt(0);
var rootWorldTransform = GetPoseAt(0);
var worldToModel = rootWorldTransform.Inverse();
var rootLinearVel = GetLinearVelocityAt(0);
for (var i = 0; i < m_ModelSpacePoses.Length; i++)
{
var currentWorldSpacePose = GetPoseAt(i);
var currentModelSpacePose = worldToModel.Multiply(currentWorldSpacePose);
m_ModelSpacePoses[i] = currentModelSpacePose;
for (var i = 0; i < m_ModelSpacePoses.Length; i++)
{
var currentWorldSpacePose = GetPoseAt(i);
var currentModelSpacePose = worldToModel.Multiply(currentWorldSpacePose);
m_ModelSpacePoses[i] = currentModelSpacePose;
var currentBodyLinearVel = GetLinearVelocityAt(i);
var relativeVelocity = currentBodyLinearVel - rootLinearVel;
m_ModelSpaceLinearVelocities[i] = worldToModel.rotation * relativeVelocity;
var currentBodyLinearVel = GetLinearVelocityAt(i);
var relativeVelocity = currentBodyLinearVel - rootLinearVel;
m_ModelSpaceLinearVelocities[i] = worldToModel.rotation * relativeVelocity;
}
}
}

public void UpdateLocalSpacePoses()
{
if (m_LocalSpacePoses == null)
{
return;
}
for (var i = 0; i < m_LocalSpacePoses.Length; i++)
using (TimerStack.Instance.Scoped("UpdateLocalSpacePoses"))
if (m_ParentIndices[i] != -1)
if (m_LocalSpacePoses == null)
var parentTransform = GetPoseAt(m_ParentIndices[i]);
// This is slightly inefficient, since for a body with multiple children, we'll end up inverting
// the transform multiple times. Might be able to trade space for perf here.
var invParent = parentTransform.Inverse();
var currentTransform = GetPoseAt(i);
m_LocalSpacePoses[i] = invParent.Multiply(currentTransform);
var parentLinearVel = GetLinearVelocityAt(m_ParentIndices[i]);
var currentLinearVel = GetLinearVelocityAt(i);
m_LocalSpaceLinearVelocities[i] = invParent.rotation * (currentLinearVel - parentLinearVel);
return;
else
for (var i = 0; i < m_LocalSpacePoses.Length; i++)
m_LocalSpacePoses[i] = Pose.identity;
m_LocalSpaceLinearVelocities[i] = Vector3.zero;
if (m_ParentIndices[i] != -1)
{
var parentTransform = GetPoseAt(m_ParentIndices[i]);
// This is slightly inefficient, since for a body with multiple children, we'll end up inverting
// the transform multiple times. Might be able to trade space for perf here.
var invParent = parentTransform.Inverse();
var currentTransform = GetPoseAt(i);
m_LocalSpacePoses[i] = invParent.Multiply(currentTransform);
var parentLinearVel = GetLinearVelocityAt(m_ParentIndices[i]);
var currentLinearVel = GetLinearVelocityAt(i);
m_LocalSpaceLinearVelocities[i] = invParent.rotation * (currentLinearVel - parentLinearVel);
}
else
{
m_LocalSpacePoses[i] = Pose.identity;
m_LocalSpaceLinearVelocities[i] = Vector3.zero;
}
}
}
}

obsPerPose += settings.UseModelSpaceLinearVelocity ? 3 : 0;
obsPerPose += settings.UseLocalSpaceLinearVelocity ? 3 : 0;
return NumPoses * obsPerPose;
return NumEnabledPoses * obsPerPose;
}
internal void DrawModelSpace(Vector3 offset)

77
com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs


Rigidbody[] m_Bodies;
/// <summary>
/// Optional game object used to determine the root of the poses, separate from the actual Rigidbodies
/// in the hierarchy. For locomotion
/// </summary>
GameObject m_VirtualRoot;
/// <summary>
/// <param name="rootBody"></param>
public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null)
/// <param name="rootBody">The root Rigidbody. This has no Joints on it (but other Joints may connect to it).</param>
/// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
/// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
/// separate from the actual Rigidbodies in the hierarchy. For locomotion tasks, with ragdolls, this provides
/// a stabilized refernece frame, which can improve learning.</param>
public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null, GameObject virtualRoot = null)
{
if (rootBody == null)
{

{
rbs = rootGameObject.GetComponentsInChildren<Rigidbody>();
}
var bodyToIndex = new Dictionary<Rigidbody, int>(rbs.Length);
var parentIndices = new int[rbs.Length];
if (rbs == null || rbs.Length == 0)
{
Debug.Log("No rigid bodies found!");
return;
}
if (rbs[0] != rootBody)
if (rbs[0] != rootBody)
// Adjust the array if we have a virtual root.
// This will be at index 0, and the "real" root will be parented to it.
if (virtualRoot != null)
{
var extendedRbs = new Rigidbody[rbs.Length + 1];
for (var i = 0; i < rbs.Length; i++)
{
extendedRbs[i + 1] = rbs[i];
}
rbs = extendedRbs;
}
var bodyToIndex = new Dictionary<Rigidbody, int>(rbs.Length);
var parentIndices = new int[rbs.Length];
parentIndices[0] = -1;
bodyToIndex[rbs[i]] = i;
if(rbs[i] != null)
{
bodyToIndex[rbs[i]] = i;
}
}
var joints = rootBody.GetComponentsInChildren <Joint>();

parentIndices[childIndex] = parentIndex;
}
if (virtualRoot != null)
{
// Make sure the original root treats the virtual root as its parent.
parentIndices[1] = 0;
m_VirtualRoot = virtualRoot;
}
SetParentIndices(parentIndices);
Setup(parentIndices);
// By default, ignore the root
SetPoseEnabled(0, false);
protected override Vector3 GetLinearVelocityAt(int index)
protected internal override Vector3 GetLinearVelocityAt(int index)
if (index == 0 && m_VirtualRoot != null)
{
// No velocity on the virtual root
return Vector3.zero;
}
protected override Pose GetPoseAt(int index)
protected internal override Pose GetPoseAt(int index)
if (index == 0 && m_VirtualRoot != null)
{
// Use the GameObject's world transform
return new Pose
{
rotation = m_VirtualRoot.transform.rotation,
position = m_VirtualRoot.transform.position
};
}
var body = m_Bodies[index];
return new Pose { rotation = body.rotation, position = body.position };
}

9
com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs


public Rigidbody RootBody;
/// <summary>
/// Optional GameObject used to determine the root of the poses.
/// </summary>
public GameObject VirtualRoot;
/// <summary>
/// Settings defining what types of observations will be generated.
/// </summary>
[SerializeField]

/// <returns></returns>
public override ISensor CreateSensor()
{
return new PhysicsBodySensor(RootBody, gameObject, Settings, sensorName);
return new PhysicsBodySensor(RootBody, gameObject, VirtualRoot, Settings, sensorName);
}
/// <inheritdoc/>

// TODO static method in PhysicsBodySensor?
// TODO only update PoseExtractor when body changes?
var poseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject);
var poseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot);
var numPoseObservations = poseExtractor.GetNumPoseObservations(Settings);
var numJointObservations = 0;

11
com.unity.ml-agents.extensions/Tests/Editor/Sensors/ArticulationBodySensorTests.cs


// Local space
0f, 0f, 0f, // Root pos
13.37f, 0f, 0f, // Attached pos
4.2f, 0f, 0f, // Leaf pos
#endif
13.37f, 0f, 0f, // Attached pos
#if UNITY_2020_2_OR_NEWER
#endif
4.2f, 0f, 0f, // Leaf pos
#if UNITY_2020_2_OR_NEWER
0f, -1f, 1f // Leaf vel
#endif
};

88
com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs


{
class UselessPoseExtractor : PoseExtractor
{
protected override Pose GetPoseAt(int index)
protected internal override Pose GetPoseAt(int index)
protected override Vector3 GetLinearVelocityAt(int index)
protected internal override Vector3 GetLinearVelocityAt(int index)
{
return Vector3.zero;
}

SetParentIndices(parentIndices);
Setup(parentIndices);
}
}

{
parents[i] = i - 1;
}
SetParentIndices(parents);
Setup(parents);
protected override Pose GetPoseAt(int index)
protected internal override Pose GetPoseAt(int index)
{
var rotation = Quaternion.identity;
var translation = offset + new Vector3(index, index, index);

};
}
protected override Vector3 GetLinearVelocityAt(int index)
protected internal override Vector3 GetLinearVelocityAt(int index)
{
return Vector3.zero;
}

chain.UpdateModelSpacePoses();
chain.UpdateLocalSpacePoses();
// Root transforms are currently always the identity.
Assert.IsTrue(chain.ModelSpacePoses[0] == Pose.identity);
Assert.IsTrue(chain.LocalSpacePoses[0] == Pose.identity);
// Check the non-root transforms
for (var i = 1; i < size; i++)
var modelPoseIndex = 0;
foreach (var modelSpace in chain.GetEnabledModelSpacePoses())
var modelSpace = chain.ModelSpacePoses[i];
var expectedModelTranslation = new Vector3(i, i, i);
Assert.IsTrue(expectedModelTranslation == modelSpace.position);
if (modelPoseIndex == 0)
{
// Root transforms are currently always the identity.
Assert.IsTrue(modelSpace == Pose.identity);
}
else
{
var expectedModelTranslation = new Vector3(modelPoseIndex, modelPoseIndex, modelPoseIndex);
Assert.IsTrue(expectedModelTranslation == modelSpace.position);
var localSpace = chain.LocalSpacePoses[i];
var expectedLocalTranslation = new Vector3(1, 1, 1);
Assert.IsTrue(expectedLocalTranslation == localSpace.position);
}
modelPoseIndex++;
Assert.AreEqual(size, modelPoseIndex);
var localPoseIndex = 0;
foreach (var localSpace in chain.GetEnabledLocalSpacePoses())
{
if (localPoseIndex == 0)
{
// Root transforms are currently always the identity.
Assert.IsTrue(localSpace == Pose.identity);
}
else
{
var expectedLocalTranslation = new Vector3(1, 1, 1);
Assert.IsTrue(expectedLocalTranslation == localSpace.position, $"{expectedLocalTranslation} != {localSpace.position}");
}
localPoseIndex++;
}
Assert.AreEqual(size, localPoseIndex);
class BadPoseExtractor : PoseExtractor
{
public BadPoseExtractor()
{
var size = 2;
var parents = new int[size];
// Parents are intentionally invalid - expect -1 at root
for (var i = 0; i < size; i++)
{
parents[i] = i;
}
Setup(parents);
}
protected internal override Pose GetPoseAt(int index)
{
return Pose.identity;
}
protected internal override Vector3 GetLinearVelocityAt(int index)
{
return Vector3.zero;
}
}
[Test]
public void TestExpectedRoot()
{
Assert.Throws<UnityAgentsException>(() =>
{
var bad = new BadPoseExtractor();
});
}
}
public class PoseExtensionTests

58
com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs


using UnityEngine;
using NUnit.Framework;
using Unity.MLAgents.Extensions.Sensors;
using UnityEditor;
namespace Unity.MLAgents.Extensions.Tests.Sensors
{

var poseExtractor = new RigidBodyPoseExtractor(rb1);
Assert.AreEqual(2, poseExtractor.NumPoses);
rb1.position = new Vector3(1, 0, 0);
rb1.rotation = Quaternion.Euler(0, 13.37f, 0);
rb1.velocity = new Vector3(2, 0, 0);
Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(0).position);
Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(0).rotation);
Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(0));
}
[Test]
public void TestTwoBodiesVirtualRoot()
{
// * virtualRoot
// * rootObj
// - rb1
// * go2
// - rb2
// - joint
var virtualRoot = new GameObject("I am vroot");
var rootObj = new GameObject();
var rb1 = rootObj.AddComponent<Rigidbody>();
var go2 = new GameObject();
var rb2 = go2.AddComponent<Rigidbody>();
go2.transform.SetParent(rootObj.transform);
var joint = go2.AddComponent<ConfigurableJoint>();
joint.connectedBody = rb1;
var poseExtractor = new RigidBodyPoseExtractor(rb1, null, virtualRoot);
Assert.AreEqual(3, poseExtractor.NumPoses);
// "body" 0 has no parent
Assert.AreEqual(-1, poseExtractor.GetParentIndex(0));
// body 1 has parent 0
Assert.AreEqual(0, poseExtractor.GetParentIndex(1));
var virtualRootPos = new Vector3(0,2,0);
var virtualRootRot = Quaternion.Euler(0, 42, 0);
virtualRoot.transform.position = virtualRootPos;
virtualRoot.transform.rotation = virtualRootRot;
Assert.AreEqual(virtualRootPos, poseExtractor.GetPoseAt(0).position);
Assert.IsTrue(virtualRootRot == poseExtractor.GetPoseAt(0).rotation);
Assert.AreEqual(Vector3.zero, poseExtractor.GetLinearVelocityAt(0));
// Same as above test, but using index 1
rb1.position = new Vector3(1, 0, 0);
rb1.rotation = Quaternion.Euler(0, 13.37f, 0);
rb1.velocity = new Vector3(2, 0, 0);
Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(1).position);
Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(1).rotation);
Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(1));
}
}
}

25
com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodySensorTests.cs


var sensor = sensorComponent.CreateSensor();
sensor.Update();
var expected = new[]
{
0f, 0f, 0f, // ModelSpaceLinearVelocity
0f, 0f, 0f, // LocalSpaceTranslations
0f, 0f, 0f, 1f // LocalSpaceRotations
};
SensorTestHelper.CompareObservation(sensor, expected);
// The root body is ignored since it always generates identity values
// and there are no other bodies to generate observations.
var expected = new float[0];
SensorTestHelper.CompareObservation(sensor, expected);
}
[Test]

var joint2 = leafGameObj.AddComponent<ConfigurableJoint>();
joint2.connectedBody = middleRb;
var virtualRoot = new GameObject();
var sensorComponent = rootObj.AddComponent<RigidBodySensorComponent>();
sensorComponent.RootBody = rootRb;

UseLocalSpaceTranslations = true,
UseLocalSpaceLinearVelocity = true
};
sensorComponent.VirtualRoot = virtualRoot;
// Note that the VirtualRoot is ignored from the observations
var expected = new[]
{
// Model space

// Local space
0f, 0f, 0f, // Root pos
0f, 0f, 0f, // Root vel
-1f, 1f, 0f, // Attached vel
4.2f, 0f, 0f, // Leaf pos
4.2f, 0f, 0f, // Leaf pos
1f, 0f, 0f, // Root vel (relative to virtual root)
-1f, 1f, 0f, // Attached vel
Assert.AreEqual(expected.Length, sensorComponent.GetObservationShape()[0]);
Assert.AreEqual(expected.Length, sensorComponent.GetObservationShape()[0]);
// Update the settings to only process joint observations
sensorComponent.Settings = new PhysicsSensorSettings

2
com.unity.ml-agents.extensions/package.json


"unity": "2018.4",
"description": "A source-only package for new features based on ML-Agents",
"dependencies": {
"com.unity.ml-agents": "1.2.0-preview"
"com.unity.ml-agents": "1.3.0-preview"
}
}

25
com.unity.ml-agents/CHANGELOG.md


### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The minimum supported python version for ml-agents-envs was changed to 3.6.1. (#4244)
### Minor Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.3.0-preview] 2020-08-12
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The minimum supported Python version for ml-agents-envs was changed to 3.6.1. (#4244)
- The interaction between EnvManager and TrainerController was changed; EnvManager.advance() was split into to stages,
and TrainerController now uses the results from the first stage to handle new behavior names. This change speeds up
Python training by approximately 5-10%. (#4259)

#### ml-agents / ml-agents-envs / gym-unity (Python)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The versions of `numpy` supported by ml-agents-envs were changed to disallow 1.19.0 or later. This was done to reflect
a similar change in TensorFlow's requirements. (#4274)
- CSV statistics writer was removed (#4300).
### Bug Fixes
#### com.unity.ml-agents (C#)

- `max_step` in the `TerminalStep` and `TerminalSteps` objects was renamed `interrupted`.
- `beta` and `epsilon` in `PPO` are no longer decayed by default but follow the same schedule as learning rate. (#3940)
- `get_behavior_names()` and `get_behavior_spec()` on UnityEnvironment were replaced by the `behavior_specs` property. (#3946)
- The first version of the Unity Environment Registry (Experimental) has been released. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Unity-Environment-Registry.md)(#3967)
- The first version of the Unity Environment Registry (Experimental) has been released. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Unity-Environment-Registry.md)(#3967)
- `use_visual` and `allow_multiple_visual_obs` in the `UnityToGymWrapper` constructor
were replaced by `allow_multiple_obs` which allows one or more visual observations and
vector observations to be used simultaneously. (#3981) Thank you @shakenes !

- The format for trainer configuration has changed, and the "default" behavior has been deprecated.
See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Migrating.md) for more details. (#3936)
See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Migrating.md) for more details. (#3936)
- Training artifacts (trained models, summaries) are now found in the `results/`
directory. (#3829)
- When using Curriculum, the current lesson will resume if training is quit and resumed. As such,

2
com.unity.ml-agents/Documentation~/com.unity.ml-agents.md


[unity ML-Agents Toolkit]: https://github.com/Unity-Technologies/ml-agents
[unity inference engine]: https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html
[package manager documentation]: https://docs.unity3d.com/Manual/upm-ui-install.html
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Installation.md
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Installation.md
[github repository]: https://github.com/Unity-Technologies/ml-agents
[python package]: https://github.com/Unity-Technologies/ml-agents
[execution order of event functions]: https://docs.unity3d.com/Manual/ExecutionOrder.html

6
com.unity.ml-agents/Runtime/Academy.cs


* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/tree/release_4_docs/docs/
* https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/
*/
namespace Unity.MLAgents

/// fall back to inference or heuristic decisions. (You can also set agents to always use
/// inference or heuristics.)
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_4_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/" +
"docs/Learning-Environment-Design.md")]
public class Academy : IDisposable
{

/// Unity package version of com.unity.ml-agents.
/// This must match the version string in package.json and is checked in a unit test.
/// </summary>
internal const string k_PackageVersion = "1.2.0-preview";
internal const string k_PackageVersion = "1.3.0-preview";
const int k_EditorTrainingPort = 5004;

26
com.unity.ml-agents/Runtime/Agent.cs


/// [OnDisable()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnDisable.html]
/// [OnBeforeSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnBeforeSerialize.html
/// [OnAfterSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnAfterSerialize.html
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design.md
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Readme.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Readme.md
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/" +
"docs/Learning-Environment-Design-Agents.md")]
[Serializable]
[RequireComponent(typeof(BehaviorParameters))]

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// </remarks>
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
///</remarks>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)

/// implementing a simple heuristic function can aid in debugging agent actions and interactions
/// with its environment.
///
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </remarks>
/// <example>

/// For more information about observations, see [Observations and Sensors].
///
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// </remarks>
public virtual void CollectObservations(VectorSensor sensor)
{

///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="OnActionReceived(float[])"/>
public virtual void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)

///
/// For more information about implementing agent actions see [Agents - Actions].
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="vectorAction">
/// An array containing the action vector. The length of the array is specified

1
com.unity.ml-agents/Runtime/AssemblyInfo.cs


[assembly: InternalsVisibleTo("Unity.ML-Agents.Editor.Tests")]
[assembly: InternalsVisibleTo("Unity.ML-Agents.Editor")]
[assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions")]

2
com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs


/// See [Imitation Learning - Recording Demonstrations] for more information.
///
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
/// </remarks>
[RequireComponent(typeof(Agent))]
[AddComponentMenu("ML Agents/Demonstration Recorder", (int)MenuGroup.Default)]

2
com.unity.ml-agents/Runtime/DiscreteActionMasker.cs


///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="branch">The branch for which the actions will be masked.</param>
/// <param name="actionIndices">The indices of the masked actions.</param>

2
com.unity.ml-agents/package.json


{
"name": "com.unity.ml-agents",
"displayName": "ML Agents",
"version": "1.2.0-preview",
"version": "1.3.0-preview",
"unity": "2018.4",
"description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
"dependencies": {

4
docs/Installation-Anaconda-Windows.md


the ml-agents Conda environment by typing `activate ml-agents`)_:
```sh
git clone --branch release_4 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_5 https://github.com/Unity-Technologies/ml-agents.git
The `--branch release_4` option will switch to the tag of the latest stable
The `--branch release_5` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
unstable.

6
docs/Installation.md


of our tutorials / guides assume you have access to our example environments).
```sh
git clone --branch release_4 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_5 https://github.com/Unity-Technologies/ml-agents.git
The `--branch release_4` option will switch to the tag of the latest stable
The `--branch release_5` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
unstable.

ML-Agents Toolkit for your purposes. If you plan to contribute those changes
back, make sure to clone the `master` branch (by omitting `--branch release_4`
back, make sure to clone the `master` branch (by omitting `--branch release_5`
from the command above). See our
[Contributions Guidelines](../com.unity.ml-agents/CONTRIBUTING.md) for more
information on contributing to the ML-Agents Toolkit.

2
docs/Training-Configuration-File.md


| `hyperparameters -> beta` | (default = `5.0e-3`) Strength of the entropy regularization, which makes the policy "more random." This ensures that agents properly explore the action space during training. Increasing this will ensure more random actions are taken. This should be adjusted such that the entropy (measurable from TensorBoard) slowly decreases alongside increases in reward. If entropy drops too quickly, increase beta. If entropy drops too slowly, decrease `beta`. <br><br>Typical range: `1e-4` - `1e-2` |
| `hyperparameters -> epsilon` | (default = `0.2`) Influences how rapidly the policy can evolve during training. Corresponds to the acceptable threshold of divergence between the old and new policies during gradient descent updating. Setting this value small will result in more stable updates, but will also slow the training process. <br><br>Typical range: `0.1` - `0.3` |
| `hyperparameters -> lambd` | (default = `0.95`) Regularization parameter (lambda) used when calculating the Generalized Advantage Estimate ([GAE](https://arxiv.org/abs/1506.02438)). This can be thought of as how much the agent relies on its current value estimate when calculating an updated value estimate. Low values correspond to relying more on the current value estimate (which can be high bias), and high values correspond to relying more on the actual rewards received in the environment (which can be high variance). The parameter provides a trade-off between the two, and the right value can lead to a more stable training process. <br><br>Typical range: `0.9` - `0.95` |
| `hyperparameters -> num_epoch` | Number of passes to make through the experience buffer when performing gradient descent optimization.The larger the batch_size, the larger it is acceptable to make this. Decreasing this will ensure more stable updates, at the cost of slower learning. <br><br>Typical range: `3` - `10` |
| `hyperparameters -> num_epoch` | (default = `3`) Number of passes to make through the experience buffer when performing gradient descent optimization.The larger the batch_size, the larger it is acceptable to make this. Decreasing this will ensure more stable updates, at the cost of slower learning. <br><br>Typical range: `3` - `10` |
### SAC-specific Configurations

81
docs/Training-ML-Agents.md


mlagents-learn config/ppo/3DBall_randomize.yaml --run-id=3D-Ball-randomize
```
We can observe progress and metrics via Tensorboard.
We can observe progress and metrics via TensorBoard.
#### Curriculum
To enable curriculum learning, you need to add a `curriculum` sub-section to your environment
parameter. Here is one example with the environment parameter `my_environment_parameter` :
```yml
behaviors:
BehaviorY:
# < Same as above >
# Add this section
environment_parameters:
my_environment_parameter:
curriculum:
- name: MyFirstLesson # The '-' is important as this is a list
completion_criteria:
measure: progress
behavior: my_behavior
signal_smoothing: true
min_lesson_length: 100
threshold: 0.2
value: 0.0
- name: MySecondLesson # This is the start of the second lesson
completion_criteria:
measure: progress
behavior: my_behavior
signal_smoothing: true
min_lesson_length: 100
threshold: 0.6
require_reset: true
value:
sampler_type: uniform
sampler_parameters:
min_value: 4.0
max_value: 7.0
- name: MyLastLesson
value: 8.0
```
Note that this curriculum __only__ applies to `my_environment_parameter`. The `curriculum` section
contains a list of `Lessons`. In the example, the lessons are named `MyFirstLesson`, `MySecondLesson`
and `MyLastLesson`.
Each `Lesson` has 3 fields :
- `name` which is a user defined name for the lesson (The name of the lesson will be displayed in
the console when the lesson changes)
- `completion_criteria` which determines what needs to happen in the simulation before the lesson
can be considered complete. When that condition is met, the curriculum moves on to the next
`Lesson`. Note that you do not need to specify a `completion_criteria` for the last `Lesson`
- `value` which is the value the environment parameter will take during the lesson. Note that this
can be a float or a sampler.
There are the different settings of the `completion_criteria` :
| **Setting** | **Description** |
| :------------------ | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `measure` | What to measure learning progress, and advancement in lessons by.<br><br> `reward` uses a measure received reward, while `progress` uses the ratio of steps/max_steps. |
| `behavior` | Specifies which behavior is being tracked. There can be multiple behaviors with different names, each at different points of training. This setting allows the curriculum to track only one of them. |
| `threshold` | Determines at what point in value of `measure` the lesson should be increased. |
| `min_lesson_length` | The minimum number of episodes that should be completed before the lesson can change. If `measure` is set to `reward`, the average cumulative reward of the last `min_lesson_length` episodes will be used to determine if the lesson should change. Must be nonnegative. <br><br> **Important**: the average reward that is compared to the thresholds is different than the mean reward that is logged to the console. For example, if `min_lesson_length` is `100`, the lesson will increment after the average cumulative reward of the last `100` episodes exceeds the current threshold. The mean reward logged to the console is dictated by the `summary_freq` parameter defined above. |
| `signal_smoothing` | Whether to weight the current progress measure by previous values. |
| `require_reset` | Whether changing lesson requires the environment to reset (default: false) |
##### Training with a Curriculum
Once we have specified our metacurriculum and curricula, we can launch
`mlagents-learn` to point to the config file containing
our curricula and PPO will train using Curriculum Learning. For example, to
train agents in the Wall Jump environment with curriculum learning, we can run:
```sh
mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum
```
We can then keep track of the current lessons and progresses via TensorBoard. If you've terminated
the run, you can resume it using `--resume` and lesson progress will start off where it
ended.
#### Curriculum

2
docs/Training-on-Amazon-Web-Service.md


2. Clone the ML-Agents repo and install the required Python packages
```sh
git clone --branch release_4 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_5 https://github.com/Unity-Technologies/ml-agents.git
cd ml-agents/ml-agents/
pip3 install -e .
```

22
docs/Unity-Inference-Engine.md


[compute shaders](https://docs.unity3d.com/Manual/class-ComputeShader.html) to
run the neural network within Unity.
**Note**: The ML-Agents Toolkit only supports the models created with our
trainers.
## Supported devices
See the Unity Inference Engine documentation for a list of the

**Note:** For most of the models generated with the ML-Agents Toolkit, CPU will
be faster than GPU. You should use the GPU only if you use the ResNet visual
encoder or have a large number of agents with visual observations.
# Unsupported use cases
## Externally trained models
The ML-Agents Toolkit only supports the models created with our trainers. Model
loading expects certain conventions for constants and tensor names. While it is
possible to construct a model that follows these conventions, we don't provide
any additional help for this. More details can be found in
[TensorNames.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/com.unity.ml-agents/Runtime/Inference/TensorNames.cs)
and
[BarracudaModelParamLoader.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_4_docs/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs).
If you wish to run inference on an externally trained model, you should use
Barracuda directly, instead of trying to run it through ML-Agents.
## Model inference outside of Unity
We do not provide support for inference anywhere outside of Unity. The
`frozen_graph_def.pb` and `.onnx` files produced by training are open formats
for TensorFlow and ONNX respectively; if you wish to convert these to another
format or run inference with them, refer to their documentation.

8
docs/Using-Tensorboard.md


skill level between two players. In a proper training run, the ELO of the
agent should steadily increase.
## Exporting Data from TensorBoard
To export timeseries data in CSV or JSON format, check the "Show data download
links" in the upper left. This will enable download links below each chart.
![Example TensorBoard Run](images/TensorBoard-download.png)
To get custom metrics from a C# environment into Tensorboard, you can use the
To get custom metrics from a C# environment into TensorBoard, you can use the
`StatsRecorder`:
```csharp

4
experiment_torch.py


evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
else:
if algo == "ppo":
update_total = update["TFPPOOptimizer.update"]["total"]
update_count = update["TFPPOOptimizer.update"]["count"]
update_total = update["PPOOptimizer.update"]["total"]
update_count = update["PPOOptimizer.update"]["count"]
else:
update_total = update["SACTrainer._update_policy"]["total"]
update_count = update["SACTrainer._update_policy"]["count"]

2
gym-unity/gym_unity/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.19.0.dev0"
__version__ = "0.20.0.dev0"
# Git tag that will be checked to determine whether to trigger upload to pypi
__release_tag__ = None

2
ml-agents-envs/mlagents_envs/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.19.0.dev0"
__version__ = "0.20.0.dev0"
# Git tag that will be checked to determine whether to trigger upload to pypi
__release_tag__ = None

2
ml-agents-envs/setup.py


install_requires=[
"cloudpickle",
"grpcio>=1.11.0",
"numpy>=1.14.1,<2.0",
"numpy>=1.14.1,<1.19.0",
"Pillow>=4.2.1",
"protobuf>=3.6",
"pyyaml>=3.1.0",

2
ml-agents/mlagents/trainers/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.19.0.dev0"
__version__ = "0.20.0.dev0"
# Git tag that will be checked to determine whether to trigger upload to pypi
__release_tag__ = None

2
ml-agents/mlagents/trainers/buffer.py


Adds a list of np.arrays to the end of the list of np.arrays.
:param data: The np.array list to append.
"""
self += list(np.array(data))
self += list(np.array(data, dtype=np.float32))
def set(self, data):
"""

7
ml-agents/mlagents/trainers/cli_utils.py


action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)
argparser.add_argument(
"--torch",
default=False,
action=DetectDefaultStoreTrue,
help="(Experimental) Use the PyTorch framework instead of TensorFlow. Install PyTorch "
"before using this option",
)
eng_conf = argparser.add_argument_group(title="Engine Configuration")
eng_conf.add_argument(

2
ml-agents/mlagents/trainers/environment_parameter_manager.py


lesson = settings.curriculum[lesson_num]
if (
lesson.completion_criteria is not None
and len(settings.curriculum) > lesson_num
and len(settings.curriculum) > lesson_num + 1
):
behavior_to_consider = lesson.completion_criteria.behavior
if behavior_to_consider in trainer_steps:

8
ml-agents/mlagents/trainers/exception.py


pass
class TrainerConfigWarning(Warning):
"""
Any warning related to the configuration of trainers in the ML-Agents Toolkit.
"""
pass
class CurriculumError(TrainerError):
"""
Any error related to training with a curriculum.

14
ml-agents/mlagents/trainers/learn.py


from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories
from mlagents.trainers.stats import (
TensorboardWriter,
CSVWriter,
StatsReporter,
GaugeWriter,
ConsoleWriter,

os.path.join(run_logs_dir, "training_status.json")
)
# Configure CSV, Tensorboard Writers and StatsReporter
# We assume reward and episode length are needed in the CSV.
csv_writer = CSVWriter(
write_path,
required_fields=[
"Environment/Cumulative Reward",
"Environment/Episode Length",
],
)
# Configure Tensorboard Writers and StatsReporter
tb_writer = TensorboardWriter(
write_path, clear_past_data=not checkpoint_settings.resume
)

StatsReporter.add_writer(csv_writer)
StatsReporter.add_writer(gauge_write)
StatsReporter.add_writer(console_writer)

add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)
add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)
add_timer_metadata("tensorflow_version", tf_utils.tf.__version__)
add_timer_metadata("numpy_version", np.__version__)
logger.info(f"run_seed set to {run_seed}")
run_training(run_seed, options)

26
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.components.bc.module import BCModule
from mlagents.trainers.components.reward_signals.extrinsic.signal import (
ExtrinsicRewardSignal,
)
from mlagents.trainers.torch.components.reward_providers import create_reward_provider
from mlagents.trainers.settings import TrainerSettings, RewardSignalType
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.torch.utils import ModelUtils

Create reward signals
:param reward_signal_configs: Reward signal config.
"""
extrinsic_signal = ExtrinsicRewardSignal(
self.policy, reward_signal_configs[RewardSignalType.EXTRINSIC]
)
self.reward_signals = {RewardSignalType.EXTRINSIC.value: extrinsic_signal}
# Create reward signals
# for reward_signal, config in reward_signal_configs.items():
# self.reward_signals[reward_signal] = create_reward_signal(
# self.policy, reward_signal, config
# )
# self.update_dict.update(self.reward_signals[reward_signal].update_dict)
for reward_signal, settings in reward_signal_configs.items():
# Name reward signals by string in case we have duplicates later
self.reward_signals[reward_signal.value] = create_reward_provider(
reward_signal, self.policy.behavior_spec, settings
)
def get_value_estimates(
self, decision_requests: DecisionSteps, idx: int, done: bool

# If we're done, reassign all of the value estimates that need terminal states.
if done:
for k in value_estimates:
if self.reward_signals[k].use_terminal_states:
if not self.reward_signals[k].ignore_done:
value_estimates[k] = 0.0
return value_estimates

if done:
for k in next_value_estimate:
if self.reward_signals[k].use_terminal_states:
if not self.reward_signals[k].ignore_done:
next_value_estimate[k] = 0.0
return value_estimates, next_value_estimate

2
ml-agents/mlagents/trainers/ppo/optimizer_tf.py


from mlagents.trainers.settings import TrainerSettings, PPOSettings
class TFPPOOptimizer(TFOptimizer):
class PPOOptimizer(TFOptimizer):
def __init__(self, policy: TFPolicy, trainer_params: TrainerSettings):
"""
Takes a Policy and a Dict of trainer parameters and creates an Optimizer around the policy.

52
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


self.hyperparameters: PPOSettings = cast(
PPOSettings, trainer_settings.hyperparameters
)
self.decay_learning_rate = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.learning_rate,
1e-10,
self.trainer_settings.max_steps,
)
self.decay_epsilon = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.epsilon,
0.1,
self.trainer_settings.max_steps,
)
self.decay_beta = ModelUtils.DecayedValue(
self.hyperparameters.learning_rate_schedule,
self.hyperparameters.beta,
1e-5,
self.trainer_settings.max_steps,
)
self.optimizer = torch.optim.Adam(
params, lr=self.trainer_settings.hyperparameters.learning_rate

self.stream_names = list(self.reward_signals.keys())
def ppo_value_loss(self, values, old_values, returns):
def ppo_value_loss(
self,
values: Dict[str, torch.Tensor],
old_values: Dict[str, torch.Tensor],
returns: Dict[str, torch.Tensor],
epsilon: float,
) -> torch.Tensor:
"""
Creates training-specific Tensorflow ops for PPO models.
:param returns:

decay_epsilon = self.hyperparameters.epsilon
head - old_val_tensor, -decay_epsilon, decay_epsilon
head - old_val_tensor, -1 * epsilon, epsilon
)
v_opt_a = (returns_tensor - head) ** 2
v_opt_b = (returns_tensor - clipped_value_estimate) ** 2

:param num_sequences: Number of sequences to process.
:return: Results of update.
"""
# Get decayed parameters
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
decay_eps = self.decay_epsilon.get_value(self.policy.get_current_step())
decay_bet = self.decay_beta.get_value(self.policy.get_current_step())
returns = {}
old_values = {}
for name in self.reward_signals:

memories=memories,
seq_len=self.policy.sequence_length,
)
value_loss = self.ppo_value_loss(values, old_values, returns)
value_loss = self.ppo_value_loss(values, old_values, returns, decay_eps)
policy_loss = self.ppo_policy_loss(
ModelUtils.list_to_tensor(batch["advantages"]),
log_probs,

loss = (
policy_loss
+ 0.5 * value_loss
- self.hyperparameters.beta * torch.mean(entropy)
)
loss = policy_loss + 0.5 * value_loss - decay_bet * torch.mean(entropy)
# Set optimizer learning rate
ModelUtils.update_learning_rate(self.optimizer, decay_lr)
self.optimizer.zero_grad()
loss.backward()

"Losses/Value Loss": value_loss.detach().cpu().numpy(),
"Policy/Learning Rate": decay_lr,
"Policy/Epsilon": decay_eps,
"Policy/Beta": decay_bet,
for reward_provider in self.reward_signals.values():
update_stats.update(reward_provider.update(batch))
return update_stats

42
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents_envs.logging_util import get_logger
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import (

FrameworkType,
from mlagents.trainers.components.reward_signals import RewardSignal
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchPPOOptimizer = None # type: ignore
logger = get_logger(__name__)

PPOSettings, self.trainer_settings.hyperparameters
)
self.seed = seed
self.framework = "torch" if TestingConfiguration.use_torch else "tf"
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps
self.policy: Policy = None # type: ignore

for name, v in value_estimates.items():
agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
if isinstance(self.optimizer.reward_signals[name], RewardSignal):
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
else:
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
np.mean(v),
)
# Evaluate all reward functions
self.collected_rewards["environment"][agent_id] += np.sum(

evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
if isinstance(reward_signal, RewardSignal):
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
else:
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
if self.framework == "torch":
if self.framework == FrameworkType.PYTORCH:
self.optimizer = TFPPOOptimizer( # type: ignore
self.optimizer = PPOOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore
for _reward_signal in self.optimizer.reward_signals.keys():

34
ml-agents/mlagents/trainers/sac/optimizer_torch.py


def __init__(self, policy: TorchPolicy, trainer_params: TrainerSettings):
super().__init__(policy, trainer_params)
hyperparameters: SACSettings = cast(SACSettings, trainer_params.hyperparameters)
lr = hyperparameters.learning_rate
# lr_schedule = hyperparameters.learning_rate_schedule
# max_step = trainer_params.max_steps
self.tau = hyperparameters.tau
self.init_entcoef = hyperparameters.init_entcoef

# h_size = policy_network_settings.hidden_units
# num_layers = policy_network_settings.num_layers
# vis_encode_type = policy_network_settings.vis_encode_type
self.tau = hyperparameters.tau
self.burn_in_ratio = 0.0

# Use to reduce "survivor bonus" when using Curiosity or GAIL.
self.gammas = [_val.gamma for _val in trainer_params.reward_signals.values()]
self.use_dones_in_backup = {
name: int(self.reward_signals[name].use_terminal_states)
name: int(not self.reward_signals[name].ignore_done)
for name in self.stream_names
}

for param in policy_params:
logger.debug(param.shape)
self.policy_optimizer = torch.optim.Adam(policy_params, lr=lr)
self.value_optimizer = torch.optim.Adam(value_params, lr=lr)
self.entropy_optimizer = torch.optim.Adam([self._log_ent_coef], lr=lr)
self.decay_learning_rate = ModelUtils.DecayedValue(
hyperparameters.learning_rate_schedule,
hyperparameters.learning_rate,
1e-10,
self.trainer_settings.max_steps,
)
self.policy_optimizer = torch.optim.Adam(
policy_params, lr=hyperparameters.learning_rate
)
self.value_optimizer = torch.optim.Adam(
value_params, lr=hyperparameters.learning_rate
)
self.entropy_optimizer = torch.optim.Adam(
[self._log_ent_coef], lr=hyperparameters.learning_rate
)
def sac_q_loss(
self,

total_value_loss = q1_loss + q2_loss + value_loss
decay_lr = self.decay_learning_rate.get_value(self.policy.get_current_step())
ModelUtils.update_learning_rate(self.policy_optimizer, decay_lr)
ModelUtils.update_learning_rate(self.value_optimizer, decay_lr)
ModelUtils.update_learning_rate(self.entropy_optimizer, decay_lr)
self.entropy_optimizer.zero_grad()
entropy_loss.backward()
self.entropy_optimizer.step()

.detach()
.cpu()
.numpy(),
"Policy/Learning Rate": decay_lr,
for signal in self.reward_signals.values():
signal.update(batch)
return update_stats

61
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
from mlagents.trainers.settings import TrainerSettings, SACSettings
from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
TorchSACOptimizer = None # type: ignore
logger = get_logger(__name__)

agent_buffer_trajectory["environment_rewards"]
)
for name, reward_signal in self.optimizer.reward_signals.items():
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
if isinstance(reward_signal, RewardSignal):
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
else:
evaluate_result = (
reward_signal.evaluate(agent_buffer_trajectory)
* reward_signal.strength
)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

)
for name, v in value_estimates.items():
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
if isinstance(self.optimizer.reward_signals[name], RewardSignal):
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)
else:
self._stats_reporter.add_stat(
f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value",
np.mean(v),
)
# Bootstrap using the last step rather than the bootstrap step if max step is reached.
# Set last element to duplicate obs and remove dones.

)
# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
if isinstance(signal, RewardSignal):
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
else:
sampled_minibatch[f"{name}_rewards"] = (
signal.evaluate(sampled_minibatch) * signal.strength
)
update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():

reward_signal_minibatches = {}
for name, signal in self.optimizer.reward_signals.items():
logger.debug(f"Updating {name} at step {self.step}")
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
if isinstance(signal, RewardSignal):
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(
self.hyperparameters.batch_size,
sequence_length=self.policy.sequence_length,
)
update_stats = self.optimizer.update_reward_signals(
reward_signal_minibatches, n_sequences
)

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
if self.framework == "torch":
if self.framework == FrameworkType.PYTORCH:
self.optimizer = TorchSACOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore

26
ml-agents/mlagents/trainers/settings.py


import warnings
import attr
import cattr
from typing import Dict, Optional, List, Any, DefaultDict, Mapping, Tuple, Union

from mlagents.trainers.cli_utils import StoreConfigFile, DetectDefault, parser
from mlagents.trainers.cli_utils import load_config
from mlagents.trainers.exception import TrainerConfigError
from mlagents.trainers.exception import TrainerConfigError, TrainerConfigWarning
from mlagents_envs import logging_util
from mlagents_envs.side_channel.environment_parameters_channel import (

def _check_lesson_chain(lessons, parameter_name):
"""
Ensures that when using curriculum, all non-terminal lessons have a valid
CompletionCriteria
CompletionCriteria, and that the terminal lesson does not contain a CompletionCriteria.
"""
num_lessons = len(lessons)
for index, lesson in enumerate(lessons):

)
if index == num_lessons - 1 and lesson.completion_criteria is not None:
warnings.warn(
f"Your final lesson definition contains completion_criteria for {parameter_name}."
f"It will be ignored.",
TrainerConfigWarning,
)
@staticmethod

return _mapping[self]
class FrameworkType(Enum):
TENSORFLOW: str = "tensorflow"
PYTORCH: str = "pytorch"
@attr.s(auto_attribs=True)
class TrainerSettings(ExportableSettings):
trainer_type: TrainerType = TrainerType.PPO

threaded: bool = True
self_play: Optional[SelfPlaySettings] = None
behavioral_cloning: Optional[BehavioralCloningSettings] = None
framework: FrameworkType = FrameworkType.TENSORFLOW
cattr.register_structure_hook(
Dict[RewardSignalType, RewardSignalSettings], RewardSignalSettings.structure

configured_dict["engine_settings"][key] = val
else: # Base options
configured_dict[key] = val
return RunOptions.from_dict(configured_dict)
# Apply --torch retroactively
final_runoptions = RunOptions.from_dict(configured_dict)
if "torch" in DetectDefault.non_default_args:
for trainer_set in final_runoptions.behaviors.values():
trainer_set.framework = FrameworkType.PYTORCH
return final_runoptions
@staticmethod
def from_dict(options_dict: Dict[str, Any]) -> "RunOptions":

59
ml-agents/mlagents/trainers/stats.py


from typing import List, Dict, NamedTuple, Any, Optional
import numpy as np
import abc
import csv
import os
import time
from threading import RLock

"""
Add a generic property to the StatsWriter. This could be e.g. a Dict of hyperparameters,
a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
with all types of properties. For instance, a TB writer doesn't need a max step, nor should
we write hyperparameters to the CSV.
with all types of properties. For instance, a TB writer doesn't need a max step.
:param category: The category that the property belongs to.
:param type: The type of property.
:param value: The property itself.

return None
class CSVWriter(StatsWriter):
def __init__(self, base_dir: str, required_fields: List[str] = None):
"""
A StatsWriter that writes to a Tensorboard summary.
:param base_dir: The directory within which to place the CSV file, which will be {base_dir}/{category}.csv.
:param required_fields: If provided, the CSV writer won't write until these fields have statistics to write for
them.
"""
# We need to keep track of the fields in the CSV, as all rows need the same fields.
self.csv_fields: Dict[str, List[str]] = {}
self.required_fields = required_fields if required_fields else []
self.base_dir: str = base_dir
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int
) -> None:
if self._maybe_create_csv_file(category, list(values.keys())):
row = [str(step)]
# Only record the stats that showed up in the first valid row
for key in self.csv_fields[category]:
_val = values.get(key, None)
row.append(str(_val.mean) if _val else "None")
with open(self._get_filepath(category), "a") as file:
writer = csv.writer(file)
writer.writerow(row)
def _maybe_create_csv_file(self, category: str, keys: List[str]) -> bool:
"""
If no CSV file exists and the keys have the required values,
make the CSV file and write hte title row.
Returns True if there is now (or already is) a valid CSV file.
"""
if category not in self.csv_fields:
summary_dir = self.base_dir
os.makedirs(summary_dir, exist_ok=True)
# Only store if the row contains the required fields
if all(item in keys for item in self.required_fields):
self.csv_fields[category] = keys
with open(self._get_filepath(category), "w") as file:
title_row = ["Steps"]
title_row.extend(keys)
writer = csv.writer(file)
writer.writerow(title_row)
return True
return False
return True
def _get_filepath(self, category: str) -> str:
file_dir = os.path.join(self.base_dir, category + ".csv")
return file_dir
class StatsReporter:
writers: List[StatsWriter] = []
stats_dict: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))

"""
Add a generic property to the StatsReporter. This could be e.g. a Dict of hyperparameters,
a max step count, a trainer type, etc. Note that not all StatsWriters need to be compatible
with all types of properties. For instance, a TB writer doesn't need a max step, nor should
we write hyperparameters to the CSV.
with all types of properties. For instance, a TB writer doesn't need a max step.
:param key: The type of property.
:param value: The property itself.
"""

64
ml-agents/mlagents/trainers/tests/test_env_param_manager.py


import yaml
from mlagents.trainers.exception import TrainerConfigError
from mlagents.trainers.exception import TrainerConfigError, TrainerConfigWarning
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.settings import (
RunOptions,

"""
test_bad_curriculum_all_competion_criteria_config_yaml = """
environment_parameters:
param_1:
curriculum:
- name: Lesson1
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 30
min_lesson_length: 100
require_reset: true
value: 1
- name: Lesson2
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 30
min_lesson_length: 100
require_reset: true
value: 2
- name: Lesson3
completion_criteria:
measure: reward
behavior: fake_behavior
threshold: 30
min_lesson_length: 100
require_reset: true
value:
sampler_type: uniform
sampler_parameters:
min_value: 1
max_value: 3
"""
def test_curriculum_raises_all_completion_criteria_conversion():
with pytest.warns(TrainerConfigWarning):
run_options = RunOptions.from_dict(
yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
)
param_manager = EnvironmentParameterManager(
run_options.environment_parameters, 1337, False
)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (False, False)
assert param_manager.get_current_lesson_number() == {"param_1": 2}
test_everything_config_yaml = """

4
ml-agents/mlagents/trainers/tests/test_ppo.py


from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests import mock_brain as mb

policy = TFPolicy(
0, mock_specs, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = TFPPOOptimizer(policy, trainer_settings)
optimizer = PPOOptimizer(policy, trainer_settings)
return optimizer

4
ml-agents/mlagents/trainers/tests/test_reward_signals.py


import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.ppo.optimizer_tf import TFPPOOptimizer
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG, SAC_CONFIG
from mlagents.trainers.settings import (
GAILSettings,

if trainer_settings.trainer_type == TrainerType.SAC:
optimizer = SACOptimizer(policy, trainer_settings)
else:
optimizer = TFPPOOptimizer(policy, trainer_settings)
optimizer = PPOOptimizer(policy, trainer_settings)
return optimizer

5
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


mock_saver.save_checkpoint.side_effect = checkpoint_path
self.saver = mock_saver
def create_policy(self):
def create_tf_policy(self):
return mock.Mock()
def create_torch_policy(self):
return mock.Mock()
def _process_trajectory(self, trajectory):

42
ml-agents/mlagents/trainers/tests/test_stats.py


import pytest
import tempfile
import unittest
import csv
CSVWriter,
StatsSummary,
GaugeWriter,
ConsoleWriter,

tb_writer = TensorboardWriter(tmp_path, clear_past_data=True)
tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
assert len(os.listdir(os.path.join(tmp_path, "category1"))) == 1
def test_csv_writer():
# Test write_stats
category = "category1"
with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
csv_writer = CSVWriter(base_dir, required_fields=["key1", "key2"])
statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
csv_writer.write_stats("category1", {"key1": statssummary1}, 10)
# Test that the filewriter has been created and the directory has been created.
filewriter_dir = "{basedir}/{category}.csv".format(
basedir=base_dir, category=category
)
# The required keys weren't in the stats
assert not os.path.exists(filewriter_dir)
csv_writer.write_stats(
"category1", {"key1": statssummary1, "key2": statssummary1}, 10
)
csv_writer.write_stats(
"category1", {"key1": statssummary1, "key2": statssummary1}, 20
)
# The required keys were in the stats
assert os.path.exists(filewriter_dir)
with open(filewriter_dir) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=",")
line_count = 0
for row in csv_reader:
if line_count == 0:
assert "key1" in row
assert "key2" in row
assert "Steps" in row
line_count += 1
else:
assert len(row) == 3
line_count += 1
assert line_count == 3
def test_gauge_stat_writer_sanitize():

4
ml-agents/mlagents/trainers/tests/torch/test_decoders.py


# Test default 1 value per head
value_heads = ValueHeads(stream_names, input_size)
input_data = torch.ones((batch_size, input_size))
value_out, _ = value_heads(input_data) # Note: mean value will be removed shortly
value_out = value_heads(input_data) # Note: mean value will be removed shortly
for stream_name in stream_names:
assert value_out[stream_name].shape == (batch_size,)

output_size = 4
value_heads = ValueHeads(stream_names, input_size, output_size)
input_data = torch.ones((batch_size, input_size))
value_out, _ = value_heads(input_data)
value_out = value_heads(input_data)
for stream_name in stream_names:
assert value_out[stream_name].shape == (batch_size, output_size)

10
ml-agents/mlagents/trainers/tests/torch/test_distributions.py


assert log_prob == pytest.approx(-0.919, abs=0.01)
for ent in dist_instance.entropy().flatten():
# entropy of standard normal at 0
assert ent == pytest.approx(2.83, abs=0.01)
# entropy of standard normal at 0, based on 1/2 + ln(sqrt(2pi)sigma)
assert ent == pytest.approx(1.42, abs=0.01)
dist_instance = GaussianDistInstance(
dist_instance = TanhGaussianDistInstance(
torch.zeros(1, act_size), torch.ones(1, act_size)
)
for _ in range(10):

torch.manual_seed(0)
act_size = 4
test_prob = torch.tensor(
[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)
[[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]
assert action.shape == (1,)
assert action.shape == (1, 1)
assert action < act_size
# Make sure the first action as higher probability than the others.

12
ml-agents/mlagents/trainers/tests/torch/test_networks.py


def test_networkbody_vector():
torch.manual_seed(0)
obs_size = 4
network_settings = NetworkSettings()
obs_shapes = [(obs_size,)]

sample_obs = torch.ones((1, obs_size))
sample_act = torch.ones((1, 2))
sample_obs = 0.1 * torch.ones((1, obs_size))
sample_act = 0.1 * torch.ones((1, 2))
for _ in range(100):
for _ in range(300):
encoded, _ = networkbody([sample_obs], [], sample_act)
assert encoded.shape == (1, network_settings.hidden_units)
# Try to force output to 1

def test_networkbody_lstm():
torch.manual_seed(0)
obs_size = 4
seq_len = 16
network_settings = NetworkSettings(

def test_networkbody_visual():
torch.manual_seed(0)
vec_obs_size = 4
obs_size = (84, 84, 3)
network_settings = NetworkSettings()

sample_obs = torch.ones((1, 84, 84, 3))
sample_vec_obs = torch.ones((1, vec_obs_size))
for _ in range(100):
for _ in range(150):
encoded, _ = networkbody([sample_vec_obs], [sample_obs])
assert encoded.shape == (1, network_settings.hidden_units)
# Try to force output to 1

def test_valuenetwork():
torch.manual_seed(0)
obs_size = 4
num_outputs = 2
network_settings = NetworkSettings()

54
ml-agents/mlagents/trainers/tests/torch/test_utils.py


import torch
import numpy as np
from mlagents.trainers.settings import EncoderType
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import (

for encoder_type in EncoderType:
good_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type]
vis_input = torch.ones((1, 3, good_size, good_size))
ModelUtils._check_resolution_for_encoder(vis_input, encoder_type)
ModelUtils._check_resolution_for_encoder(good_size, good_size, encoder_type)
enc_func = ModelUtils.get_encoder_for_type(encoder_type)
enc = enc_func(good_size, good_size, 3, 1)
enc.forward(vis_input)

with pytest.raises(UnityTrainerException):
# Make sure we'd hit a friendly error during model setup time.
ModelUtils._check_resolution_for_encoder(vis_input, encoder_type)
ModelUtils._check_resolution_for_encoder(
bad_size, bad_size, encoder_type
)
enc = enc_func(bad_size, bad_size, 3, 1)
enc.forward(vis_input)

assert isinstance(enc, ModelUtils.get_encoder_for_type(encoder_type))
def test_decayed_value():
test_steps = [0, 4, 9]
# Test constant decay
param = ModelUtils.DecayedValue(ScheduleType.CONSTANT, 1.0, 0.2, test_steps[-1])
for _step in test_steps:
_param = param.get_value(_step)
assert _param == 1.0
test_results = [1.0, 0.6444, 0.2]
# Test linear decay
param = ModelUtils.DecayedValue(ScheduleType.LINEAR, 1.0, 0.2, test_steps[-1])
for _step, _result in zip(test_steps, test_results):
_param = param.get_value(_step)
assert _param == pytest.approx(_result, abs=0.01)
# Test invalid
with pytest.raises(UnityTrainerException):
ModelUtils.DecayedValue(
"SomeOtherSchedule", 1.0, 0.2, test_steps[-1]
).get_value(0)
def test_polynomial_decay():
test_steps = [0, 4, 9]
test_results = [1.0, 0.7, 0.2]
for _step, _result in zip(test_steps, test_results):
decayed = ModelUtils.polynomial_decay(
1.0, 0.2, test_steps[-1], _step, power=0.8
)
assert decayed == pytest.approx(_result, abs=0.01)
def test_list_to_tensor():
# Test converting pure list
unconverted_list = [[1, 2], [1, 3], [1, 4]]

action_size = [2, 1, 3]
oh_actions = ModelUtils.actions_to_onehot(all_actions, action_size)
expected_result = [
torch.tensor([[0, 1], [0, 1]]),
torch.tensor([[1], [1]]),
torch.tensor([[0, 0, 1], [0, 0, 1]]),
torch.tensor([[0, 1], [0, 1]], dtype=torch.float),
torch.tensor([[1], [1]], dtype=torch.float),
torch.tensor([[0, 0, 1], [0, 0, 1]], dtype=torch.float),
]
for res, exp in zip(oh_actions, expected_result):
assert torch.equal(res, exp)

for ent in entropies.flatten():
# entropy of standard normal at 0
assert ent == pytest.approx(2.83, abs=0.01)
assert ent == pytest.approx(1.42, abs=0.01)
[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)
[[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]
) # High prob for first action
dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)]
action_list = [torch.tensor([0]), torch.tensor([1])]

assert all_probs.shape == (len(dist_list * act_size),)
assert entropies.shape == (len(dist_list),)
assert all_probs.shape == (1, len(dist_list * act_size))
assert entropies.shape == (1, len(dist_list))
# Make sure the first action has high probability than the others.
assert log_probs.flatten()[0] > log_probs.flatten()[1]

18
ml-agents/mlagents/trainers/tf/model_serialization.py


from distutils.util import strtobool
import os
from typing import Any, List, Set
import shutil
from distutils.version import LooseVersion
try:

return strtobool(val)
except Exception:
return False
def copy_model_files(source_nn_path: str, destination_nn_path: str) -> None:
"""
Copy the .nn file at the given source to the destination.
Also copies the corresponding .onnx file if it exists.
"""
shutil.copyfile(source_nn_path, destination_nn_path)
logger.info(f"Copied {source_nn_path} to {destination_nn_path}.")
# Copy the onnx file if it exists
source_onnx_path = os.path.splitext(source_nn_path)[0] + ".onnx"
destination_onnx_path = os.path.splitext(destination_nn_path)[0] + ".onnx"
try:
shutil.copyfile(source_onnx_path, destination_onnx_path)
logger.info(f"Copied {source_onnx_path} to {destination_onnx_path}.")
except OSError:
pass

3
ml-agents/mlagents/trainers/torch/decoders.py


import torch
from torch import nn
from mlagents.trainers.torch.layers import linear_layer
class ValueHeads(nn.Module):

_value_heads = {}
for name in stream_names:
value = nn.Linear(input_size, output_size)
value = linear_layer(input_size, output_size)
_value_heads[name] = value
self.value_heads = nn.ModuleDict(_value_heads)

39
ml-agents/mlagents/trainers/torch/distributions.py


from torch import nn
import numpy as np
import math
from mlagents.trainers.torch.layers import linear_layer, Initialization
EPSILON = 1e-7 # Small value to avoid divide by zero

return torch.exp(log_prob)
def entropy(self):
return torch.log(2 * math.pi * math.e * self.std + EPSILON)
return 0.5 * torch.log(2 * math.pi * math.e * self.std + EPSILON)
class TanhGaussianDistInstance(GaussianDistInstance):

return torch.multinomial(self.probs, 1)
def pdf(self, value):
return torch.diag(self.probs.T[value.flatten().long()])
# This function is equivalent to torch.diag(self.probs.T[value.flatten().long()]),
# but torch.diag is not supported by ONNX export.
idx = torch.arange(start=0, end=len(value)).unsqueeze(-1)
return torch.gather(
self.probs.permute(1, 0)[value.flatten().long()], -1, idx
).squeeze(-1)
def log_prob(self, value):
return torch.log(self.pdf(value))

def entropy(self):
return torch.sum(self.probs * torch.log(self.probs), dim=-1)
return -torch.sum(self.probs * torch.log(self.probs), dim=-1)
class GaussianDistribution(nn.Module):

):
super().__init__()
self.conditional_sigma = conditional_sigma
self.mu = nn.Linear(hidden_size, num_outputs)
self.mu = linear_layer(
hidden_size,
num_outputs,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
nn.init.xavier_uniform_(self.mu.weight, gain=0.01)
self.log_sigma = nn.Linear(hidden_size, num_outputs)
nn.init.xavier_uniform(self.log_sigma.weight, gain=0.01)
self.log_sigma = linear_layer(
hidden_size,
num_outputs,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
else:
self.log_sigma = nn.Parameter(
torch.zeros(1, num_outputs, requires_grad=True)

def _create_policy_branches(self, hidden_size: int) -> nn.ModuleList:
branches = []
for size in self.act_sizes:
branch_output_layer = nn.Linear(hidden_size, size)
nn.init.xavier_uniform_(branch_output_layer.weight, gain=0.01)
branch_output_layer = linear_layer(
hidden_size,
size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
branches.append(branch_output_layer)
return nn.ModuleList(branches)

170
ml-agents/mlagents/trainers/torch/encoders.py


from typing import Tuple, Optional
from typing import Tuple, Optional, Union
from mlagents.trainers.torch.layers import linear_layer, Initialization, Swish
import torch
from torch import nn

self.running_variance.copy_(other_normalizer.running_variance.data)
def conv_output_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):
def conv_output_shape(
h_w: Tuple[int, int],
kernel_size: Union[int, Tuple[int, int]] = 1,
stride: int = 1,
padding: int = 0,
dilation: int = 1,
) -> Tuple[int, int]:
"""
Calculates the output shape (height and width) of the output of a convolution layer.
kernel_size, stride, padding and dilation correspond to the inputs of the
torch.nn.Conv2d layer (https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html)
:param h_w: The height and width of the input.
:param kernel_size: The size of the kernel of the convolution (can be an int or a
tuple [width, height])
:param stride: The stride of the convolution
:param padding: The padding of the convolution
:param dilation: The dilation of the convolution
"""
if type(kernel_size) is not tuple:
kernel_size = (kernel_size, kernel_size)
if not isinstance(kernel_size, tuple):
kernel_size = (int(kernel_size), int(kernel_size))
((h_w[0] + (2 * pad) - (dilation * (kernel_size[0] - 1)) - 1) / stride) + 1
((h_w[0] + (2 * padding) - (dilation * (kernel_size[0] - 1)) - 1) / stride) + 1
((h_w[1] + (2 * pad) - (dilation * (kernel_size[1] - 1)) - 1) / stride) + 1
((h_w[1] + (2 * padding) - (dilation * (kernel_size[1] - 1)) - 1) / stride) + 1
"""
Calculates the output shape (height and width) of the output of a max pooling layer.
kernel_size corresponds to the inputs of the
torch.nn.MaxPool2d layer (https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html)
:param kernel_size: The size of the kernel of the convolution
"""
height = (h_w[0] - kernel_size) // 2 + 1
width = (h_w[1] - kernel_size) // 2 + 1
return height, width

):
self.normalizer: Optional[Normalizer] = None
super().__init__()
self.layers = [nn.Linear(input_size, hidden_size)]
self.layers = [
linear_layer(
input_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
]
self.layers.append(Swish())
self.layers.append(nn.Linear(hidden_size, hidden_size))
self.layers.append(nn.LeakyReLU())
self.layers.append(
linear_layer(
hidden_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
)
self.layers.append(Swish())
def forward(self, inputs: torch.Tensor) -> None:
def forward(self, inputs: torch.Tensor) -> torch.Tensor:
if self.normalizer is not None:
inputs = self.normalizer(inputs)
return self.seq_layers(inputs)

conv_2_hw = conv_output_shape(conv_1_hw, 4, 2)
self.final_flat = conv_2_hw[0] * conv_2_hw[1] * 32
self.conv1 = nn.Conv2d(initial_channels, 16, [8, 8], [4, 4])
self.conv2 = nn.Conv2d(16, 32, [4, 4], [2, 2])
self.dense = nn.Linear(self.final_flat, self.h_size)
self.conv_layers = nn.Sequential(
nn.Conv2d(initial_channels, 16, [8, 8], [4, 4]),
nn.LeakyReLU(),
nn.Conv2d(16, 32, [4, 4], [2, 2]),
nn.LeakyReLU(),
)
self.dense = nn.Sequential(
linear_layer(
self.final_flat,
self.h_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
),
nn.LeakyReLU(),
)
conv_1 = nn.functional.leaky_relu(self.conv1(visual_obs))
conv_2 = nn.functional.leaky_relu(self.conv2(conv_1))
# hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))
hidden = nn.functional.leaky_relu(
self.dense(torch.reshape(conv_2, (-1, self.final_flat)))
)
hidden = self.conv_layers(visual_obs)
hidden = torch.reshape(hidden, (-1, self.final_flat))
hidden = self.dense(hidden)
return hidden

conv_3_hw = conv_output_shape(conv_2_hw, 3, 1)
self.final_flat = conv_3_hw[0] * conv_3_hw[1] * 64
self.conv1 = nn.Conv2d(initial_channels, 32, [8, 8], [4, 4])
self.conv2 = nn.Conv2d(32, 64, [4, 4], [2, 2])
self.conv3 = nn.Conv2d(64, 64, [3, 3], [1, 1])
self.dense = nn.Linear(self.final_flat, self.h_size)
self.conv_layers = nn.Sequential(
nn.Conv2d(initial_channels, 32, [8, 8], [4, 4]),
nn.LeakyReLU(),
nn.Conv2d(32, 64, [4, 4], [2, 2]),
nn.LeakyReLU(),
nn.Conv2d(64, 64, [3, 3], [1, 1]),
nn.LeakyReLU(),
)
self.dense = nn.Sequential(
linear_layer(
self.final_flat,
self.h_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
),
nn.LeakyReLU(),
)
def forward(self, visual_obs: torch.Tensor) -> None:
hidden = self.conv_layers(visual_obs)
hidden = hidden.view([-1, self.final_flat])
hidden = self.dense(hidden)
return hidden
def forward(self, visual_obs):
conv_1 = nn.functional.leaky_relu(self.conv1(visual_obs))
conv_2 = nn.functional.leaky_relu(self.conv2(conv_1))
conv_3 = nn.functional.leaky_relu(self.conv3(conv_2))
hidden = nn.functional.leaky_relu(
self.dense(conv_3.view([-1, self.final_flat]))
class ResNetBlock(nn.Module):
def __init__(self, channel: int):
"""
Creates a ResNet Block.
:param channel: The number of channels in the input (and output) tensors of the
convolutions
"""
super().__init__()
self.layers = nn.Sequential(
Swish(),
nn.Conv2d(channel, channel, [3, 3], [1, 1], padding=1),
Swish(),
nn.Conv2d(channel, channel, [3, 3], [1, 1], padding=1),
return hidden
def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
return input_tensor + self.layers(input_tensor)
class ResNetVisualEncoder(nn.Module):

self.layers.append(nn.MaxPool2d([3, 3], [2, 2]))
height, width = pool_out_shape((height, width), 3)
for _ in range(n_blocks):
self.layers.append(self.make_block(channel))
self.layers.append(ResNetBlock(channel))
self.layers.append(nn.LeakyReLU())
self.dense = nn.Linear(n_channels[-1] * height * width, final_hidden)
@staticmethod
def make_block(channel):
block_layers = [
nn.LeakyReLU(),
nn.Conv2d(channel, channel, [3, 3], [1, 1], padding=1),
nn.LeakyReLU(),
nn.Conv2d(channel, channel, [3, 3], [1, 1], padding=1),
]
return block_layers
@staticmethod
def forward_block(input_hidden, block_layers):
hidden = input_hidden
for layer in block_layers:
hidden = layer(hidden)
return hidden + input_hidden
self.layers.append(Swish())
self.dense = linear_layer(
n_channels[-1] * height * width,
final_hidden,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
if isinstance(layer, nn.Module):
hidden = layer(hidden)
elif isinstance(layer, list):
hidden = self.forward_block(hidden, layer)
hidden = layer(hidden)
before_out = hidden.view(batch_size, -1)
return torch.relu(self.dense(before_out))

123
ml-agents/mlagents/trainers/torch/utils.py


VectorEncoder,
VectorAndUnnormalizedInputEncoder,
)
from mlagents.trainers.settings import EncoderType
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.distributions import DistInstance, DiscreteDistInstance

EncoderType.RESNET: 15,
}
class ActionFlattener:
def __init__(self, behavior_spec: BehaviorSpec):
self._specs = behavior_spec
@property
def flattened_size(self) -> int:
if self._specs.is_action_continuous():
return self._specs.action_size
else:
return sum(self._specs.discrete_action_branches)
def forward(self, action: torch.Tensor) -> torch.Tensor:
if self._specs.is_action_continuous():
return action
else:
return torch.cat(
ModelUtils.actions_to_onehot(
torch.as_tensor(action, dtype=torch.long),
self._specs.discrete_action_branches,
),
dim=1,
)
def swish(input_activation: torch.Tensor) -> torch.Tensor:
"""Swish activation function. For more info: https://arxiv.org/abs/1710.05941"""
return torch.mul(input_activation, torch.sigmoid(input_activation))
def update_learning_rate(optim: torch.optim.Optimizer, lr: float) -> None:
"""
Apply a learning rate to a torch optimizer.
:param optim: Optimizer
:param lr: Learning rate
"""
for param_group in optim.param_groups:
param_group["lr"] = lr
class DecayedValue:
def __init__(
self,
schedule: ScheduleType,
initial_value: float,
min_value: float,
max_step: int,
):
"""
Object that represnets value of a parameter that should be decayed, assuming it is a function of
global_step.
:param schedule: Type of learning rate schedule.
:param initial_value: Initial value before decay.
:param min_value: Decay value to this value by max_step.
:param max_step: The final step count where the return value should equal min_value.
:param global_step: The current step count.
:return: The value.
"""
self.schedule = schedule
self.initial_value = initial_value
self.min_value = min_value
self.max_step = max_step
def get_value(self, global_step: int) -> float:
"""
Get the value at a given global step.
:param global_step: Step count.
:returns: Decayed value at this global step.
"""
if self.schedule == ScheduleType.CONSTANT:
return self.initial_value
elif self.schedule == ScheduleType.LINEAR:
return ModelUtils.polynomial_decay(
self.initial_value, self.min_value, self.max_step, global_step
)
else:
raise UnityTrainerException(f"The schedule {self.schedule} is invalid.")
@staticmethod
def polynomial_decay(
initial_value: float,
min_value: float,
max_step: int,
global_step: int,
power: float = 1.0,
) -> float:
"""
Get a decayed value based on a polynomial schedule, with respect to the current global step.
:param initial_value: Initial value before decay.
:param min_value: Decay value to this value by max_step.
:param max_step: The final step count where the return value should equal min_value.
:param global_step: The current step count.
:param power: Power of polynomial decay. 1.0 (default) is a linear decay.
:return: The current decayed value.
"""
global_step = min(global_step, max_step)
decayed_value = (initial_value - min_value) * (
1 - float(global_step) / max_step
) ** (power) + min_value
return decayed_value
@staticmethod
def get_encoder_for_type(encoder_type: EncoderType) -> nn.Module:

:return: List of one-hot tensors, one representing each branch.
"""
onehot_branches = [
torch.nn.functional.one_hot(_act.T, action_size[i])
for i, _act in enumerate(discrete_actions.T)
torch.nn.functional.one_hot(_act.T, action_size[i]).float()
for i, _act in enumerate(discrete_actions.long().T)
@staticmethod
def dynamic_partition(
data: torch.Tensor, partitions: torch.Tensor, num_partitions: int
) -> List[torch.Tensor]:
"""
Torch implementation of dynamic_partition :
https://www.tensorflow.org/api_docs/python/tf/dynamic_partition
Splits the data Tensor input into num_partitions Tensors according to the indices in
partitions.
:param data: The Tensor data that will be split into partitions.
:param partitions: An indices tensor that determines in which partition each element
of data will be in.
:param num_partitions: The number of partitions to output. Corresponds to the
maximum possible index in the partitions argument.
:return: A list of Tensor partitions (Their indices correspond to their partition index).
"""
res: List[torch.Tensor] = []
for i in range(num_partitions):
res += [data[(partitions == i).nonzero().squeeze(1)]]
return res
@staticmethod
def get_probs_and_entropy(

44
ml-agents/mlagents/trainers/trainer/rl_trainer.py


import abc
import time
import attr
from mlagents.model_serialization import copy_model_files
from mlagents.trainers.policy.checkpoint_manager import (
NNCheckpoint,
NNCheckpointManager,

from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trainer import Trainer
from mlagents.trainers.components.reward_signals import RewardSignalResult
from mlagents.trainers.components.reward_signals import RewardSignalResult, RewardSignal
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.settings import TestingConfiguration, TrainerSettings
from mlagents.trainers.settings import TestingConfiguration, TrainerSettings, FrameworkType
from mlagents.trainers.exception import UnityTrainerException
try:
from mlagents.trainers.policy.torch_policy import TorchPolicy
except ModuleNotFoundError:
TorchPolicy = None # type: ignore
RewardSignalResults = Dict[str, RewardSignalResult]

self._stats_reporter.add_property(
StatsPropertyType.HYPERPARAMETERS, self.trainer_settings.as_dict()
)
self.framework = "torch" if TestingConfiguration.use_torch else "tf"
self.framework = self.trainer_settings.framework
logger.debug(f"Using framework {self.framework.value}")
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps
self._next_save_step = 0

self.reward_buffer.appendleft(rewards.get(agent_id, 0))
rewards[agent_id] = 0
else:
self.stats_reporter.add_stat(
optimizer.reward_signals[name].stat_name, rewards.get(agent_id, 0)
)
if isinstance(optimizer.reward_signals[name], RewardSignal):
self.stats_reporter.add_stat(
optimizer.reward_signals[name].stat_name,
rewards.get(agent_id, 0),
)
else:
self.stats_reporter.add_stat(
f"Policy/{optimizer.reward_signals[name].name.capitalize()} Reward",
rewards.get(agent_id, 0),
)
rewards[agent_id] = 0
def _clear_update_buffer(self) -> None:

def create_policy(
self, parsed_behavior_id: BehaviorIdentifiers, behavior_spec: BehaviorSpec
) -> Policy:
if self.framework == "torch":
if self.framework == FrameworkType.PYTORCH and TorchPolicy is None:
raise UnityTrainerException(
"To use the experimental PyTorch backend, install the PyTorch Python package first."
)
elif self.framework == FrameworkType.PYTORCH:
return self.create_torch_policy(parsed_behavior_id, behavior_spec)
else:
return self.create_tf_policy(parsed_behavior_id, behavior_spec)

logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
elif n_policies == 0:
logger.warning("Trainer has no policies, not saving anything.")
return
# Copy the checkpointed model files to the final output location
copy_model_files(model_checkpoint.file_path, f"{policy.model_path}.nn")
self.saver.export(self.saver.model_path, self.brain_name)
NNCheckpointManager.track_final_checkpoint(self.brain_name, final_checkpoint)
@abc.abstractmethod

3
ml-agents/tests/yamato/yamato_utils.py


if extra_packages:
pip_commands += extra_packages
for cmd in pip_commands:
pip_index_url = "--index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple"
f"source {venv_path}/bin/activate; python -m pip install -q {cmd}",
f"source {venv_path}/bin/activate; python -m pip install -q {cmd} {pip_index_url}",
shell=True,
)
return venv_path

3
test_requirements.txt


pytest-cov==2.6.1
pytest-xdist
# PyTorch tests are here for the time being, before they are used in the codebase.
torch>=1.5.0
# onnx doesn't currently have a wheel for 3.8
tf2onnx>=1.5.5;python_version<'3.8'

1
utils/make_readme_table.py


ReleaseInfo("release_2", "1.0.2", "0.16.1", "May 20, 2020"),
ReleaseInfo("release_3", "1.1.0", "0.17.0", "June 10, 2020"),
ReleaseInfo("release_4", "1.2.0", "0.18.0", "July 15, 2020"),
ReleaseInfo("release_5", "1.2.1", "0.18.1", "July 31, 2020"),
]
MAX_DAYS = 150 # do not print releases older than this many days

3
com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs


using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.EditorTests")]

11
com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs.meta


fileFormatVersion: 2
guid: 48c8790647c3345e19c57d6c21065112
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.ml-agents/Runtime/Actuators.meta


fileFormatVersion: 2
guid: 26733e59183b6479e8f0e892a8bf09a4
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
com.unity.ml-agents/Tests/Editor/Actuators.meta


fileFormatVersion: 2
guid: c7e705f7d549e43c6be18ae809cd6f54
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

160
docs/images/TensorBoard-download.png

之前 之后
宽度: 709  |  高度: 393  |  大小: 46 KiB

20
ml-agents/mlagents/trainers/tests/torch/test_layers.py


import torch
from mlagents.trainers.torch.layers import Swish, linear_layer, Initialization
def test_swish():
layer = Swish()
input_tensor = torch.Tensor([[1, 2, 3], [4, 5, 6]])
target_tensor = torch.mul(input_tensor, torch.sigmoid(input_tensor))
assert torch.all(torch.eq(layer(input_tensor), target_tensor))
def test_initialization_layer():
torch.manual_seed(0)
# Test Zero
layer = linear_layer(
3, 4, kernel_init=Initialization.Zero, bias_init=Initialization.Zero
)
assert torch.all(torch.eq(layer.weight.data, torch.zeros_like(layer.weight.data)))
assert torch.all(torch.eq(layer.bias.data, torch.zeros_like(layer.bias.data)))

48
ml-agents/mlagents/trainers/torch/layers.py


import torch
from enum import Enum
class Swish(torch.nn.Module):
def forward(self, data: torch.Tensor) -> torch.Tensor:
return torch.mul(data, torch.sigmoid(data))
class Initialization(Enum):
Zero = 0
XavierGlorotNormal = 1
XavierGlorotUniform = 2
KaimingHeNormal = 3 # also known as Variance scaling
KaimingHeUniform = 4
_init_methods = {
Initialization.Zero: torch.zero_,
Initialization.XavierGlorotNormal: torch.nn.init.xavier_normal_,
Initialization.XavierGlorotUniform: torch.nn.init.xavier_uniform_,
Initialization.KaimingHeNormal: torch.nn.init.kaiming_normal_,
Initialization.KaimingHeUniform: torch.nn.init.kaiming_uniform_,
}
def linear_layer(
input_size: int,
output_size: int,
kernel_init: Initialization = Initialization.XavierGlorotUniform,
kernel_gain: float = 1.0,
bias_init: Initialization = Initialization.Zero,
) -> torch.nn.Module:
"""
Creates a torch.nn.Linear module and initializes its weights.
:param input_size: The size of the input tensor
:param output_size: The size of the output tensor
:param kernel_init: The Initialization to use for the weights of the layer
:param kernel_gain: The multiplier for the weights of the kernel. Note that in
TensorFlow, calling variance_scaling with scale 0.01 is equivalent to calling
KaimingHeNormal with kernel_gain of 0.1
:param bias_init: The Initialization to use for the weights of the bias layer
"""
layer = torch.nn.Linear(input_size, output_size)
_init_methods[kernel_init](layer.weight.data)
layer.weight.data *= kernel_gain
_init_methods[bias_init](layer.bias.data)
return layer

181
com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs


using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
namespace Unity.MLAgents.Actuators
{
/// <summary>
/// ActionSegment{T} is a data structure that allows access to a segment of an underlying array
/// in order to avoid the copying and allocation of sub-arrays. The segment is defined by
/// the offset into the original array, and an length.
/// </summary>
/// <typeparam name="T">The type of object stored in the underlying <see cref="Array"/></typeparam>
internal readonly struct ActionSegment<T> : IEnumerable<T>, IEquatable<ActionSegment<T>>
where T : struct
{
/// <summary>
/// The zero-based offset into the original array at which this segment starts.
/// </summary>
public readonly int Offset;
/// <summary>
/// The number of items this segment can access in the underlying array.
/// </summary>
public readonly int Length;
/// <summary>
/// An Empty segment which has an offset of 0, a Length of 0, and it's underlying array
/// is also empty.
/// </summary>
public static ActionSegment<T> Empty = new ActionSegment<T>(System.Array.Empty<T>(), 0, 0);
static void CheckParameters(T[] actionArray, int offset, int length)
{
#if DEBUG
if (offset + length > actionArray.Length)
{
throw new ArgumentOutOfRangeException(nameof(offset),
$"Arguments offset: {offset} and length: {length} " +
$"are out of bounds of actionArray: {actionArray.Length}.");
}
#endif
}
/// <summary>
/// Construct an <see cref="ActionSegment{T}"/> with an underlying array
/// and offset, and a length.
/// </summary>
/// <param name="actionArray">The underlying array which this segment has a view into</param>
/// <param name="offset">The zero-based offset into the underlying array.</param>
/// <param name="length">The length of the segment.</param>
public ActionSegment(T[] actionArray, int offset, int length)
{
CheckParameters(actionArray, offset, length);
Array = actionArray;
Offset = offset;
Length = length;
}
/// <summary>
/// Get the underlying <see cref="Array"/> of this segment.
/// </summary>
public T[] Array { get; }
/// <summary>
/// Allows access to the underlying array using array syntax.
/// </summary>
/// <param name="index">The zero-based index of the segment.</param>
/// <exception cref="IndexOutOfRangeException">Thrown when the index is less than 0 or
/// greater than or equal to <see cref="Length"/></exception>
public T this[int index]
{
get
{
if (index < 0 || index > Length)
{
throw new IndexOutOfRangeException($"Index out of bounds, expected a number between 0 and {Length}");
}
return Array[Offset + index];
}
}
/// <inheritdoc cref="IEnumerable{T}.GetEnumerator"/>
IEnumerator<T> IEnumerable<T>.GetEnumerator()
{
return new Enumerator(this);
}
/// <inheritdoc cref="IEnumerable{T}"/>
public IEnumerator GetEnumerator()
{
return new Enumerator(this);
}
/// <inheritdoc cref="ValueType.Equals(object)"/>
public override bool Equals(object obj)
{
if (!(obj is ActionSegment<T>))
{
return false;
}
return Equals((ActionSegment<T>)obj);
}
/// <inheritdoc cref="IEquatable{T}.Equals(T)"/>
public bool Equals(ActionSegment<T> other)
{
return Offset == other.Offset && Length == other.Length && Equals(Array, other.Array);
}
/// <inheritdoc cref="ValueType.GetHashCode"/>
public override int GetHashCode()
{
unchecked
{
var hashCode = Offset;
hashCode = (hashCode * 397) ^ Length;
hashCode = (hashCode * 397) ^ (Array != null ? Array.GetHashCode() : 0);
return hashCode;
}
}
/// <summary>
/// A private <see cref="IEnumerator{T}"/> for the <see cref="ActionSegment{T}"/> value type which follows its
/// rules of being a view into an underlying <see cref="Array"/>.
/// </summary>
struct Enumerator : IEnumerator<T>
{
readonly T[] m_Array;
readonly int m_Start;
readonly int m_End; // cache Offset + Count, since it's a little slow
int m_Current;
internal Enumerator(ActionSegment<T> arraySegment)
{
Debug.Assert(arraySegment.Array != null);
Debug.Assert(arraySegment.Offset >= 0);
Debug.Assert(arraySegment.Length >= 0);
Debug.Assert(arraySegment.Offset + arraySegment.Length <= arraySegment.Array.Length);
m_Array = arraySegment.Array;
m_Start = arraySegment.Offset;
m_End = arraySegment.Offset + arraySegment.Length;
m_Current = arraySegment.Offset - 1;
}
public bool MoveNext()
{
if (m_Current < m_End)
{
m_Current++;
return m_Current < m_End;
}
return false;
}
public T Current
{
get
{
if (m_Current < m_Start)
throw new InvalidOperationException("Enumerator not started.");
if (m_Current >= m_End)
throw new InvalidOperationException("Enumerator has reached the end already.");
return m_Array[m_Current];
}
}
object IEnumerator.Current => Current;
void IEnumerator.Reset()
{
m_Current = m_Start - 1;
}
public void Dispose()
{
}
}
}
}

3
com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs.meta


fileFormatVersion: 2
guid: 4fa1432c1ba3460caaa84303a9011ef2
timeCreated: 1595869823

75
com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs


using System;
using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents.Policies;
namespace Unity.MLAgents.Actuators
{
/// <summary>
/// Defines the structure of an Action Space to be used by the Actuator system.
/// </summary>
internal readonly struct ActionSpec
{
/// <summary>
/// An array of branch sizes for our action space.
///
/// For an IActuator that uses a Discrete <see cref="SpaceType"/>, the number of
/// branches is the Length of the Array and each index contains the branch size.
/// The cumulative sum of the total number of discrete actions can be retrieved
/// by the <see cref="SumOfDiscreteBranchSizes"/> property.
///
/// For an IActuator with a Continuous it will be null.
/// </summary>
public readonly int[] BranchSizes;
/// <summary>
/// The number of actions for a Continuous <see cref="SpaceType"/>.
/// </summary>
public int NumContinuousActions { get; }
/// <summary>
/// The number of branches for a Discrete <see cref="SpaceType"/>.
/// </summary>
public int NumDiscreteActions { get; }
/// <summary>
/// Get the total number of Discrete Actions that can be taken by calculating the Sum
/// of all of the Discrete Action branch sizes.
/// </summary>
public int SumOfDiscreteBranchSizes { get; }
/// <summary>
/// Creates a Continuous <see cref="ActionSpec"/> with the number of actions available.
/// </summary>
/// <param name="numActions">The number of actions available.</param>
/// <returns>An Continuous ActionSpec initialized with the number of actions available.</returns>
public static ActionSpec MakeContinuous(int numActions)
{
var actuatorSpace = new ActionSpec(numActions, 0);
return actuatorSpace;
}
/// <summary>
/// Creates a Discrete <see cref="ActionSpec"/> with the array of branch sizes that
/// represents the action space.
/// </summary>
/// <param name="branchSizes">The array of branch sizes for the discrete action space. Each index
/// contains the number of actions available for that branch.</param>
/// <returns>An Discrete ActionSpec initialized with the array of branch sizes.</returns>
public static ActionSpec MakeDiscrete(int[] branchSizes)
{
var numActions = branchSizes.Length;
var actuatorSpace = new ActionSpec(0, numActions, branchSizes);
return actuatorSpace;
}
ActionSpec(int numContinuousActions, int numDiscreteActions, int[] branchSizes = null)
{
NumContinuousActions = numContinuousActions;
NumDiscreteActions = numDiscreteActions;
BranchSizes = branchSizes;
SumOfDiscreteBranchSizes = branchSizes?.Sum() ?? 0;
}
}
}

3
com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs.meta


fileFormatVersion: 2
guid: ecdd6deefba1416ca149fe09d2a5afd8
timeCreated: 1595892361

17
com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs


using UnityEngine;
namespace Unity.MLAgents.Actuators
{
/// <summary>
/// Editor components for creating Actuators. Generally an IActuator component should
/// have a corresponding ActuatorComponent.
/// </summary>
internal abstract class ActuatorComponent : MonoBehaviour
{
/// <summary>
/// Create the IActuator. This is called by the Agent when it is initialized.
/// </summary>
/// <returns>Created IActuator object.</returns>
public abstract IActuator CreateActuator();
}
}

3
com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs.meta


fileFormatVersion: 2
guid: 77cefae5f6d841be9ff80b41293d271b
timeCreated: 1593017318

150
com.unity.ml-agents/Runtime/Actuators/ActuatorDiscreteActionMask.cs


using System;
using System.Collections.Generic;
using System.Linq;
namespace Unity.MLAgents.Actuators
{
/// <summary>
/// Implementation of IDiscreteActionMask that allows writing to the action mask from an <see cref="IActuator"/>.
/// </summary>
internal class ActuatorDiscreteActionMask : IDiscreteActionMask
{
/// When using discrete control, is the starting indices of the actions
/// when all the branches are concatenated with each other.
int[] m_StartingActionIndices;
int[] m_BranchSizes;
bool[] m_CurrentMask;
IList<IActuator> m_Actuators;
readonly int m_SumOfDiscreteBranchSizes;
readonly int m_NumBranches;
/// <summary>
/// The offset into the branches array that is used when actuators are writing to the action mask.
/// </summary>
public int CurrentBranchOffset { get; set; }
internal ActuatorDiscreteActionMask(IList<IActuator> actuators, int sumOfDiscreteBranchSizes, int numBranches)
{
m_Actuators = actuators;
m_SumOfDiscreteBranchSizes = sumOfDiscreteBranchSizes;
m_NumBranches = numBranches;
}
/// <inheritdoc cref="IDiscreteActionMask.WriteMask"/>
public void WriteMask(int branch, IEnumerable<int> actionIndices)
{
LazyInitialize();
// Perform the masking
foreach (var actionIndex in actionIndices)
{
#if DEBUG
if (branch >= m_NumBranches || actionIndex >= m_BranchSizes[CurrentBranchOffset + branch])
{
throw new UnityAgentsException(
"Invalid Action Masking: Action Mask is too large for specified branch.");
}
#endif
m_CurrentMask[actionIndex + m_StartingActionIndices[CurrentBranchOffset + branch]] = true;
}
}
void LazyInitialize()
{
if (m_BranchSizes == null)
{
m_BranchSizes = new int[m_NumBranches];
var start = 0;
for (var i = 0; i < m_Actuators.Count; i++)
{
var actuator = m_Actuators[i];
var branchSizes = actuator.ActionSpec.BranchSizes;
Array.Copy(branchSizes, 0, m_BranchSizes, start, branchSizes.Length);
start += branchSizes.Length;
}
}
// By default, the masks are null. If we want to specify a new mask, we initialize
// the actionMasks with trues.
if (m_CurrentMask == null)
{
m_CurrentMask = new bool[m_SumOfDiscreteBranchSizes];
}
// If this is the first time the masked actions are used, we generate the starting
// indices for each branch.
if (m_StartingActionIndices == null)
{
m_StartingActionIndices = Utilities.CumSum(m_BranchSizes);
}
}
/// <inheritdoc cref="IDiscreteActionMask.GetMask"/>
public bool[] GetMask()
{
#if DEBUG
if (m_CurrentMask != null)
{
AssertMask();
}
#endif
return m_CurrentMask;
}
/// <summary>
/// Makes sure that the current mask is usable.
/// </summary>
void AssertMask()
{
#if DEBUG
for (var branchIndex = 0; branchIndex < m_NumBranches; branchIndex++)
{
if (AreAllActionsMasked(branchIndex))
{
throw new UnityAgentsException(
"Invalid Action Masking : All the actions of branch " + branchIndex +
" are masked.");
}
}
#endif
}
/// <summary>
/// Resets the current mask for an agent.
/// </summary>
public void ResetMask()
{
if (m_CurrentMask != null)
{
Array.Clear(m_CurrentMask, 0, m_CurrentMask.Length);
}
}
/// <summary>
/// Checks if all the actions in the input branch are masked.
/// </summary>
/// <param name="branch"> The index of the branch to check.</param>
/// <returns> True if all the actions of the branch are masked.</returns>
bool AreAllActionsMasked(int branch)
{
if (m_CurrentMask == null)
{
return false;
}
var start = m_StartingActionIndices[branch];
var end = m_StartingActionIndices[branch + 1];
for (var i = start; i < end; i++)
{
if (!m_CurrentMask[i])
{
return false;
}
}
return true;
}
}
}

3
com.unity.ml-agents/Runtime/Actuators/ActuatorDiscreteActionMask.cs.meta


fileFormatVersion: 2
guid: d2a19e2f43fd4637a38d42b2a5f989f3
timeCreated: 1595459316

415
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


using System;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
namespace Unity.MLAgents.Actuators
{
/// <summary>
/// A class that manages the delegation of events, action buffers, and action mask for a list of IActuators.
/// </summary>
internal class ActuatorManager : IList<IActuator>
{
// IActuators managed by this object.
IList<IActuator> m_Actuators;
// An implementation of IDiscreteActionMask that allows for writing to it based on an offset.
ActuatorDiscreteActionMask m_DiscreteActionMask;
/// <summary>
/// Flag used to check if our IActuators are ready for execution.
/// </summary>
/// <seealso cref="ReadyActuatorsForExecution(IList{IActuator}, int, int, int)"/>
bool m_ReadyForExecution;
/// <summary>
/// The sum of all of the discrete branches for all of the <see cref="IActuator"/>s in this manager.
/// </summary>
internal int SumOfDiscreteBranchSizes { get; private set; }
/// <summary>
/// The number of the discrete branches for all of the <see cref="IActuator"/>s in this manager.
/// </summary>
internal int NumDiscreteActions { get; private set; }
/// <summary>
/// The number of continuous actions for all of the <see cref="IActuator"/>s in this manager.
/// </summary>
internal int NumContinuousActions { get; private set; }
/// <summary>
/// Returns the total actions which is calculated by <see cref="NumContinuousActions"/> + <see cref="NumDiscreteActions"/>.
/// </summary>
public int TotalNumberOfActions => NumContinuousActions + NumDiscreteActions;
/// <summary>
/// Gets the <see cref="IDiscreteActionMask"/> managed by this object.
/// </summary>
public ActuatorDiscreteActionMask DiscreteActionMask => m_DiscreteActionMask;
/// <summary>
/// Returns the previously stored actions for the actuators in this list.
/// </summary>
public float[] StoredContinuousActions { get; private set; }
/// <summary>
/// Returns the previously stored actions for the actuators in this list.
/// </summary>
public int[] StoredDiscreteActions { get; private set; }
/// <summary>
/// Create an ActuatorList with a preset capacity.
/// </summary>
/// <param name="capacity">The capacity of the list to create.</param>
public ActuatorManager(int capacity = 0)
{
m_Actuators = new List<IActuator>(capacity);
}
/// <summary>
/// <see cref="ReadyActuatorsForExecution(IList{IActuator}, int, int, int)"/>
/// </summary>
void ReadyActuatorsForExecution()
{
ReadyActuatorsForExecution(m_Actuators, NumContinuousActions, SumOfDiscreteBranchSizes,
NumDiscreteActions);
}
/// <summary>
/// This method validates that all <see cref="IActuator"/>s have unique names and equivalent action space types
/// if the `DEBUG` preprocessor macro is defined, and allocates the appropriate buffers to manage the actions for
/// all of the <see cref="IActuator"/>s that may live on a particular object.
/// </summary>
/// <param name="actuators">The list of actuators to validate and allocate buffers for.</param>
/// <param name="numContinuousActions">The total number of continuous actions for all of the actuators.</param>
/// <param name="sumOfDiscreteBranches">The total sum of the discrete branches for all of the actuators in order
/// to be able to allocate an <see cref="IDiscreteActionMask"/>.</param>
/// <param name="numDiscreteBranches">The number of discrete branches for all of the actuators.</param>
internal void ReadyActuatorsForExecution(IList<IActuator> actuators, int numContinuousActions, int sumOfDiscreteBranches, int numDiscreteBranches)
{
if (m_ReadyForExecution)
{
return;
}
#if DEBUG
// Make sure the names are actually unique
// Make sure all Actuators have the same SpaceType
ValidateActuators();
#endif
// Sort the Actuators by name to ensure determinism
SortActuators();
StoredContinuousActions = numContinuousActions == 0 ? Array.Empty<float>() : new float[numContinuousActions];
StoredDiscreteActions = numDiscreteBranches == 0 ? Array.Empty<int>() : new int[numDiscreteBranches];
m_DiscreteActionMask = new ActuatorDiscreteActionMask(actuators, sumOfDiscreteBranches, numDiscreteBranches);
m_ReadyForExecution = true;
}
/// <summary>
/// Updates the local action buffer with the action buffer passed in. If the buffer
/// passed in is null, the local action buffer will be cleared.
/// </summary>
/// <param name="continuousActionBuffer">The action buffer which contains all of the
/// continuous actions for the IActuators in this list.</param>
/// <param name="discreteActionBuffer">The action buffer which contains all of the
/// discrete actions for the IActuators in this list.</param>
public void UpdateActions(float[] continuousActionBuffer, int[] discreteActionBuffer)
{
ReadyActuatorsForExecution();
UpdateActionArray(continuousActionBuffer, StoredContinuousActions);
UpdateActionArray(discreteActionBuffer, StoredDiscreteActions);
}
static void UpdateActionArray<T>(T[] sourceActionBuffer, T[] destination)
{
if (sourceActionBuffer == null || sourceActionBuffer.Length == 0)
{
Array.Clear(destination, 0, destination.Length);
}
else
{
Debug.Assert(sourceActionBuffer.Length == destination.Length,
$"sourceActionBuffer:{sourceActionBuffer.Length} is a different" +
$" size than destination: {destination.Length}.");
Array.Copy(sourceActionBuffer, destination, destination.Length);
}
}
/// <summary>
/// This method will trigger the writing to the <see cref="IDiscreteActionMask"/> by all of the actuators
/// managed by this object.
/// </summary>
public void WriteActionMask()
{
ReadyActuatorsForExecution();
m_DiscreteActionMask.ResetMask();
var offset = 0;
for (var i = 0; i < m_Actuators.Count; i++)
{
var actuator = m_Actuators[i];
m_DiscreteActionMask.CurrentBranchOffset = offset;
actuator.WriteDiscreteActionMask(m_DiscreteActionMask);
offset += actuator.ActionSpec.NumDiscreteActions;
}
}
/// <summary>
/// Iterates through all of the IActuators in this list and calls their
/// <see cref="IActionReceiver.OnActionReceived"/> method on them with the appropriate
/// <see cref="ActionSegment{T}"/>s depending on their <see cref="IActionReceiver.ActionSpec"/>.
/// </summary>
public void ExecuteActions()
{
ReadyActuatorsForExecution();
var continuousStart = 0;
var discreteStart = 0;
for (var i = 0; i < m_Actuators.Count; i++)
{
var actuator = m_Actuators[i];
var numContinuousActions = actuator.ActionSpec.NumContinuousActions;
var numDiscreteActions = actuator.ActionSpec.NumDiscreteActions;
var continuousActions = ActionSegment<float>.Empty;
if (numContinuousActions > 0)
{
continuousActions = new ActionSegment<float>(StoredContinuousActions,
continuousStart,
numContinuousActions);
}
var discreteActions = ActionSegment<int>.Empty;
if (numDiscreteActions > 0)
{
discreteActions = new ActionSegment<int>(StoredDiscreteActions,
discreteStart,
numDiscreteActions);
}
actuator.OnActionReceived(new ActionBuffers(continuousActions, discreteActions));
continuousStart += numContinuousActions;
discreteStart += numDiscreteActions;
}
}
/// <summary>
/// Resets the <see cref="StoredContinuousActions"/> and <see cref="StoredDiscreteActions"/> buffers to be all
/// zeros and calls <see cref="IActuator.ResetData"/> on each <see cref="IActuator"/> managed by this object.
/// </summary>
public void ResetData()
{
if (!m_ReadyForExecution)
{
return;
}
Array.Clear(StoredContinuousActions, 0, StoredContinuousActions.Length);
Array.Clear(StoredDiscreteActions, 0, StoredDiscreteActions.Length);
for (var i = 0; i < m_Actuators.Count; i++)
{
m_Actuators[i].ResetData();
}
}
/// <summary>
/// Sorts the <see cref="IActuator"/>s according to their <see cref="IActuator.GetName"/> value.
/// </summary>
void SortActuators()
{
((List<IActuator>)m_Actuators).Sort((x,
y) => x.Name
.CompareTo(y.Name));
}
/// <summary>
/// Validates that the IActuators managed by this object have unique names and equivalent action space types.
/// Each Actuator needs to have a unique name in order for this object to ensure that the storage of action
/// buffers, and execution of Actuators remains deterministic across different sessions of running.
/// </summary>
void ValidateActuators()
{
for (var i = 0; i < m_Actuators.Count - 1; i++)
{
Debug.Assert(
!m_Actuators[i].Name.Equals(m_Actuators[i + 1].Name),
"Actuator names must be unique.");
var first = m_Actuators[i].ActionSpec;
var second = m_Actuators[i + 1].ActionSpec;
Debug.Assert(first.NumContinuousActions > 0 == second.NumContinuousActions > 0,
"Actuators on the same Agent must have the same action SpaceType.");
}
}
/// <summary>
/// Helper method to update bookkeeping items around buffer management for actuators added to this object.
/// </summary>
/// <param name="actuatorItem">The IActuator to keep bookkeeping for.</param>
void AddToBufferSizes(IActuator actuatorItem)
{
if (actuatorItem == null)
{
return;
}
NumContinuousActions += actuatorItem.ActionSpec.NumContinuousActions;
NumDiscreteActions += actuatorItem.ActionSpec.NumDiscreteActions;
SumOfDiscreteBranchSizes += actuatorItem.ActionSpec.SumOfDiscreteBranchSizes;
}
/// <summary>
/// Helper method to update bookkeeping items around buffer management for actuators removed from this object.
/// </summary>
/// <param name="actuatorItem">The IActuator to keep bookkeeping for.</param>
void SubtractFromBufferSize(IActuator actuatorItem)
{
if (actuatorItem == null)
{
return;
}
NumContinuousActions -= actuatorItem.ActionSpec.NumContinuousActions;
NumDiscreteActions -= actuatorItem.ActionSpec.NumDiscreteActions;
SumOfDiscreteBranchSizes -= actuatorItem.ActionSpec.SumOfDiscreteBranchSizes;
}
/// <summary>
/// Sets all of the bookkeeping items back to 0.
/// </summary>
void ClearBufferSizes()
{
NumContinuousActions = NumDiscreteActions = SumOfDiscreteBranchSizes = 0;
}
/*********************************************************************************
* IList implementation that delegates to m_Actuators List. *
*********************************************************************************/
/// <summary>
/// <inheritdoc cref="IEnumerable{T}.GetEnumerator"/>
/// </summary>
public IEnumerator<IActuator> GetEnumerator()
{
return m_Actuators.GetEnumerator();
}
/// <summary>
/// <inheritdoc cref="IList{T}.GetEnumerator"/>
/// </summary>
IEnumerator IEnumerable.GetEnumerator()
{
return ((IEnumerable)m_Actuators).GetEnumerator();
}
/// <summary>
/// <inheritdoc cref="ICollection{T}.Add"/>
/// </summary>
/// <param name="item"></param>
public void Add(IActuator item)
{
Debug.Assert(m_ReadyForExecution == false,
"Cannot add to the ActuatorManager after its buffers have been initialized");
m_Actuators.Add(item);
AddToBufferSizes(item);
}
/// <summary>
/// <inheritdoc cref="ICollection{T}.Clear"/>
/// </summary>
public void Clear()
{
Debug.Assert(m_ReadyForExecution == false,
"Cannot clear the ActuatorManager after its buffers have been initialized");
m_Actuators.Clear();
ClearBufferSizes();
}
/// <summary>
/// <inheritdoc cref="ICollection{T}.Contains"/>
/// </summary>
public bool Contains(IActuator item)
{
return m_Actuators.Contains(item);
}
/// <summary>
/// <inheritdoc cref="ICollection{T}.CopyTo"/>
/// </summary>
public void CopyTo(IActuator[] array, int arrayIndex)
{
m_Actuators.CopyTo(array, arrayIndex);
}
/// <summary>
/// <inheritdoc cref="ICollection{T}.Remove"/>
/// </summary>
public bool Remove(IActuator item)
{
Debug.Assert(m_ReadyForExecution == false,
"Cannot remove from the ActuatorManager after its buffers have been initialized");
if (m_Actuators.Remove(item))
{
SubtractFromBufferSize(item);
return true;
}
return false;
}
/// <summary>
/// <inheritdoc cref="ICollection{T}.Count"/>
/// </summary>
public int Count => m_Actuators.Count;
/// <summary>
/// <inheritdoc cref="ICollection{T}.IsReadOnly"/>
/// </summary>
public bool IsReadOnly => m_Actuators.IsReadOnly;
/// <summary>
/// <inheritdoc cref="IList{T}.IndexOf"/>
/// </summary>
public int IndexOf(IActuator item)
{
return m_Actuators.IndexOf(item);
}
/// <summary>
/// <inheritdoc cref="IList{T}.Insert"/>
/// </summary>
public void Insert(int index, IActuator item)
{
Debug.Assert(m_ReadyForExecution == false,
"Cannot insert into the ActuatorManager after its buffers have been initialized");
m_Actuators.Insert(index, item);
AddToBufferSizes(item);
}
/// <summary>
/// <inheritdoc cref="IList{T}.RemoveAt"/>
/// </summary>
public void RemoveAt(int index)
{
Debug.Assert(m_ReadyForExecution == false,
"Cannot remove from the ActuatorManager after its buffers have been initialized");
var actuator = m_Actuators[index];
SubtractFromBufferSize(actuator);
m_Actuators.RemoveAt(index);
}
/// <summary>
/// <inheritdoc cref="IList{T}.this"/>
/// </summary>
public IActuator this[int index]
{
get => m_Actuators[index];
set
{
Debug.Assert(m_ReadyForExecution == false,
"Cannot modify the ActuatorManager after its buffers have been initialized");
var old = m_Actuators[index];
SubtractFromBufferSize(old);
m_Actuators[index] = value;
AddToBufferSizes(value);
}
}
}
}

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存