浏览代码

Merge branch 'master' into develop-hybrid-actions-singleton

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
e5f14400
共有 216 个文件被更改,包括 1771 次插入848 次删除
  1. 12
      .circleci/config.yml
  2. 4
      .github/workflows/pre-commit.yml
  3. 47
      .yamato/com.unity.ml-agents-promotion.yml
  4. 14
      .yamato/com.unity.ml-agents-test.yml
  5. 3
      .yamato/gym-interface-test.yml
  6. 3
      .yamato/python-ll-api-test.yml
  7. 9
      .yamato/standalone-build-test.yml
  8. 8
      .yamato/training-int-tests.yml
  9. 5
      DevProject/ProjectSettings/EditorBuildSettings.asset
  10. 159
      Dockerfile
  11. 26
      Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
  12. 19
      Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
  13. 14
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  14. 23
      Project/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticVariableSpeed.unity
  15. 154
      Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab
  16. 73
      Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity
  17. 8
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  18. 125
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  19. 10
      README.md
  20. 1
      com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md
  21. 31
      com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md
  22. 4
      com.unity.ml-agents.extensions/README.md
  23. 4
      com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
  24. 2
      com.unity.ml-agents.extensions/package.json
  25. 38
      com.unity.ml-agents/CHANGELOG.md
  26. 2
      com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
  27. 1
      com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs
  28. 1
      com.unity.ml-agents/Editor/RenderTextureSensorComponentEditor.cs
  29. 49
      com.unity.ml-agents/Runtime/Academy.cs
  30. 2
      com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
  31. 2
      com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs
  32. 26
      com.unity.ml-agents/Runtime/Agent.cs
  33. 62
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  34. 4
      com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
  35. 2
      com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
  36. 2
      com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
  37. 40
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
  38. 34
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
  39. 64
      com.unity.ml-agents/Runtime/SensorHelper.cs
  40. 2
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  41. 28
      com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs
  42. 52
      com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
  43. 2
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
  44. 28
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs
  45. 198
      com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
  46. 47
      com.unity.ml-agents/Runtime/Utilities.cs
  47. 31
      com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
  48. 156
      com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
  49. 10
      com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs
  50. 4
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  51. 2
      com.unity.ml-agents/package.json
  52. 4
      docs/Installation-Anaconda-Windows.md
  53. 6
      docs/Installation.md
  54. 4
      docs/Learning-Environment-Create-New.md
  55. 2
      docs/Learning-Environment-Design-Agents.md
  56. 10
      docs/Learning-Environment-Examples.md
  57. 24
      docs/ML-Agents-Overview.md
  58. 16
      docs/Migrating.md
  59. 13
      docs/Training-Configuration-File.md
  60. 2
      docs/Training-on-Amazon-Web-Service.md
  61. 4
      docs/Unity-Inference-Engine.md
  62. 2
      gym-unity/gym_unity/__init__.py
  63. 4
      gym-unity/setup.py
  64. 2
      ml-agents-envs/mlagents_envs/__init__.py
  65. 11
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  66. 6
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
  67. 19
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  68. 6
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
  69. 4
      ml-agents-envs/mlagents_envs/environment.py
  70. 81
      ml-agents-envs/mlagents_envs/rpc_utils.py
  71. 76
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  72. 4
      ml-agents-envs/setup.py
  73. 7
      ml-agents/mlagents/torch_utils/cpu_utils.py
  74. 2
      ml-agents/mlagents/trainers/__init__.py
  75. 9
      ml-agents/mlagents/trainers/buffer.py
  76. 12
      ml-agents/mlagents/trainers/environment_parameter_manager.py
  77. 5
      ml-agents/mlagents/trainers/learn.py
  78. 3
      ml-agents/mlagents/trainers/model_saver/tf_model_saver.py
  79. 4
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  80. 10
      ml-agents/mlagents/trainers/policy/checkpoint_manager.py
  81. 7
      ml-agents/mlagents/trainers/policy/torch_policy.py
  82. 2
      ml-agents/mlagents/trainers/ppo/trainer.py
  83. 151
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  84. 6
      ml-agents/mlagents/trainers/sac/trainer.py
  85. 38
      ml-agents/mlagents/trainers/settings.py
  86. 2
      ml-agents/mlagents/trainers/tests/test_learn.py
  87. 8
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  88. 16
      ml-agents/mlagents/trainers/tests/test_settings.py
  89. 10
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  90. 1
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  91. 27
      ml-agents/mlagents/trainers/tests/test_trainer_util.py
  92. 24
      ml-agents/mlagents/trainers/tests/test_training_status.py
  93. 4
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
  94. 20
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py
  95. 10
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  96. 12
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  97. 10
      ml-agents/mlagents/trainers/tests/torch/test_sac.py
  98. 253
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  99. 17
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  100. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py

12
.circleci/config.yml


filters:
tags:
# Matches e.g. "release_123"
only: /^release_[0-9]+$/
only: /^DEPRECATED_release_[0-9]+$/
branches:
ignore: /.*/
- deploy:

tags:
# Matches e.g. "release_123"
only: /^release_[0-9]+$/
only: /^DEPRECATED_release_[0-9]+$/
branches:
ignore: /.*/
- deploy:

tags:
# Matches e.g. "release_123"
only: /^release_[0-9]+$/
only: /^DEPRECATED_release_[0-9]+$/
branches:
ignore: /.*/
# These deploy jobs upload to the pypi test repo. They have different tag triggers than the real ones.

filters:
tags:
# Matches e.g. "release_123_test456
only: /^release_[0-9]+_test[0-9]+$/
only: /^DEPRECATED_release_[0-9]+_test[0-9]+$/
branches:
ignore: /.*/
- deploy:

filters:
tags:
# Matches e.g. "release_123_test456
only: /^release_[0-9]+_test[0-9]+$/
only: /^DEPRECATED_release_[0-9]+_test[0-9]+$/
branches:
ignore: /.*/
- deploy:

filters:
tags:
# Matches e.g. "release_123_test456
only: /^release_[0-9]+_test[0-9]+$/
only: /^DEPRECATED_release_[0-9]+_test[0-9]+$/
branches:
ignore: /.*/

4
.github/workflows/pre-commit.yml


runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v1
- uses: actions/setup-python@v2
with:
python-version: 3.7.x
- uses: actions/setup-ruby@v1
with:
ruby-version: '2.6'

47
.yamato/com.unity.ml-agents-promotion.yml


test_editors:
- version: 2019.3
test_platforms:
- name: win
type: Unity::VM
image: package-ci/win10:stable
flavor: b1.large
---
{% for editor in test_editors %}
{% for platform in test_platforms %}
promotion_test_{{ platform.name }}_{{ editor.version }}:
name : Promotion Test {{ editor.version }} on {{ platform.name }}
agent:
type: {{ platform.type }}
image: {{ platform.image }}
flavor: {{ platform.flavor}}
variables:
UPMCI_PROMOTION: 1
commands:
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci package test --unity-version {{ editor.version }} --package-path com.unity.ml-agents
artifacts:
logs:
paths:
- "upm-ci~/test-results/**/*"
dependencies:
- .yamato/com.unity.ml-agents-pack.yml#pack
{% endfor %}
{% endfor %}
promotion_test_trigger:
name: Promotion Tests Trigger
dependencies:
{% for editor in test_editors %}
{% for platform in test_platforms %}
- .yamato/com.unity.ml-agents-promotion.yml#promotion_test_{{platform.name}}_{{editor.version}}
{% endfor %}
{% endfor %}
promote:
name: Promote to Production
agent:

- "upm-ci~/packages/*.tgz"
dependencies:
- .yamato/com.unity.ml-agents-pack.yml#pack
- .yamato/com.unity.ml-agents-test.yml#all_package_tests
{% for editor in test_editors %}
{% for platform in test_platforms %}
- .yamato/com.unity.ml-agents-promotion.yml#promotion_test_{{ platform.name }}_{{ editor.version }}
{% endfor %}
{% endfor %}

14
.yamato/com.unity.ml-agents-test.yml


- version: 2018.4
# 2018.4 doesn't support code-coverage
enableCodeCoverage: !!bool false
# We want some scene tests to run in the DevProject, but packages there only support 2019+
testProject: Project
testProject: DevProject
testProject: DevProject
testProject: DevProject
testProject: DevProject
test_platforms:
- name: win
type: Unity::VM

type: Unity::VM
image: package-ci/ubuntu:stable
flavor: b1.medium
packages:
- name: com.unity.ml-agents
assembly: Unity.ML-Agents

flavor: {{ platform.flavor}}
commands:
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci project test -u {{ editor.version }} --project-path Project --package-filter {{ package.name }} {{ coverageOptions }} --extra-utr-arg "reruncount=2"
- upm-ci project test -u {{ editor.version }} --project-path {{ editor.testProject }} --package-filter {{ package.name }} {{ coverageOptions }} --extra-utr-arg "reruncount=2"
{% if editor.enableCodeCoverage %}
- python3 ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ package.minCoveragePct }}
{% endif %}

pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match " {{ editor.testProject }}/**" OR
{% if package.name == "com.unity.ml-agents.extensions" %}
pull_request.changes.any match "com.unity.ml-agents.extensions/**" OR
{% endif %}

- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u trunk -c editor --wait --fast
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci project test -u {{ editor.version }} --project-path Project --package-filter {{ package.name }} {{ coverageOptions }} --extra-utr-arg "reruncount=2"
- upm-ci project test -u {{ editor.version }} --project-path {{ editor.testProject }} --package-filter {{ package.name }} {{ coverageOptions }} --extra-utr-arg "reruncount=2"
{% if editor.enableCodeCoverage %}
- python3 ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ package.minCoveragePct }}
{% endif %}

3
.yamato/gym-interface-test.yml


test_editors:
- version: 2019.4
{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_gym_interface_{{ editor.version }}:

3
.yamato/python-ll-api-test.yml


test_editors:
- version: 2019.4
{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_ll_api_{{ editor.version }}:

9
.yamato/standalone-build-test.yml


test_editors:
- version: 2018.4
- version: 2019.3
{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_standalone_{{ editor.version }}:

UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
# TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
- /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedGrid/TestGridCompressed.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureCompressed.unity
triggers:
cancel_old_ci: true
expression: |

8
.yamato/training-int-tests.yml


test_editors:
- version: 2018.4
- version: 2019.4
- version: 2020.1
{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_training_int_{{ editor.version }}:

UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
# TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
- /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python -u -m ml-agents.tests.yamato.training_int_tests
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need

5
DevProject/ProjectSettings/EditorBuildSettings.asset


EditorBuildSettings:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Scenes: []
m_Scenes:
- enabled: 1
path: Assets/ML-Agents/Scripts/Tests/Runtime/AcademyTest/AcademyStepperTestScene.unity
guid: 9bafc50b1e55b43b2b1ae9620f1f8311
m_configObjects: {}

159
Dockerfile


# Based off of python:3.6-slim, except that we are using ubuntu instead of debian.
FROM ubuntu:16.04
# ensure local python is preferred over distribution python
ENV PATH /usr/local/bin:$PATH
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
# http://bugs.python.org/issue19846
# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK.
ENV LANG C.UTF-8
RUN yes | unminimize
# runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
libexpat1 \
libffi6 \
libgdbm3 \
libreadline6 \
libsqlite3-0 \
libssl1.0.0 \
&& rm -rf /var/lib/apt/lists/*
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-xenial main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN wget https://packages.cloud.google.com/apt/doc/apt-key.gpg && apt-key add apt-key.gpg
RUN apt-get update && \
apt-get install -y --no-install-recommends wget curl tmux vim git gdebi-core \
build-essential python3-pip unzip google-cloud-sdk htop mesa-utils xorg-dev xorg \
libglvnd-dev libgl1-mesa-dev libegl1-mesa-dev libgles2-mesa-dev && \
wget http://security.ubuntu.com/ubuntu/pool/main/libx/libxfont/libxfont1_1.5.1-1ubuntu0.16.04.4_amd64.deb && \
wget http://security.ubuntu.com/ubuntu/pool/universe/x/xorg-server/xvfb_1.18.4-0ubuntu0.10_amd64.deb && \
yes | gdebi libxfont1_1.5.1-1ubuntu0.16.04.4_amd64.deb && \
yes | gdebi xvfb_1.18.4-0ubuntu0.10_amd64.deb
RUN python3 -m pip install --upgrade pip
RUN pip install setuptools==41.0.0
ENV GPG_KEY 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D
ENV PYTHON_VERSION 3.6.4
ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
RUN set -ex \
&& buildDeps=" \
dpkg-dev \
gcc \
libbz2-dev \
libc6-dev \
libexpat1-dev \
libffi-dev \
libgdbm-dev \
liblzma-dev \
libncursesw5-dev \
libreadline-dev \
libsqlite3-dev \
libssl-dev \
make \
tcl-dev \
tk-dev \
wget \
xz-utils \
zlib1g-dev \
# as of Stretch, "gpg" is no longer included by default
$(command -v gpg > /dev/null || echo 'gnupg dirmngr') \
" \
&& apt-get update && apt-get install -y $buildDeps --no-install-recommends && rm -rf /var/lib/apt/lists/* \
\
&& wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \
&& wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \
&& export GNUPGHOME="$(mktemp -d)" \
&& gpg --keyserver ha.pool.sks-keyservers.net --recv-keys "$GPG_KEY" \
&& gpg --batch --verify python.tar.xz.asc python.tar.xz \
&& rm -rf "$GNUPGHOME" python.tar.xz.asc \
&& mkdir -p /usr/src/python \
&& tar -xJC /usr/src/python --strip-components=1 -f python.tar.xz \
&& rm python.tar.xz \
\
&& cd /usr/src/python \
&& gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" \
&& ./configure \
--build="$gnuArch" \
--enable-loadable-sqlite-extensions \
--enable-shared \
--with-system-expat \
--with-system-ffi \
--without-ensurepip \
&& make -j "$(nproc)" \
&& make install \
&& ldconfig \
\
&& apt-get purge -y --auto-remove $buildDeps \
\
&& find /usr/local -depth \
\( \
\( -type d -a \( -name test -o -name tests \) \) \
-o \
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
\) -exec rm -rf '{}' + \
&& rm -rf /usr/src/python
# make some useful symlinks that are expected to exist
RUN cd /usr/local/bin \
&& ln -s idle3 idle \
&& ln -s pydoc3 pydoc \
&& ln -s python3 python \
&& ln -s python3-config python-config
# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value '<VERSION>'"
ENV PYTHON_PIP_VERSION 9.0.3
RUN set -ex; \
\
apt-get update; \
apt-get install -y --no-install-recommends wget; \
rm -rf /var/lib/apt/lists/*; \
\
wget -O get-pip.py 'https://bootstrap.pypa.io/get-pip.py'; \
\
apt-get purge -y --auto-remove wget; \
\
python get-pip.py \
--disable-pip-version-check \
--no-cache-dir \
"pip==$PYTHON_PIP_VERSION" \
; \
pip --version; \
\
find /usr/local -depth \
\( \
\( -type d -a \( -name test -o -name tests \) \) \
-o \
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
\) -exec rm -rf '{}' +; \
rm -f get-pip.py
RUN apt-get update && apt-get -y upgrade
# xvfb is used to do CPU based rendering of Unity
RUN apt-get install -y xvfb
# Install ml-agents-envs package locally
COPY ml-agents-envs /ml-agents-envs
WORKDIR /ml-agents-envs
RUN pip install -e .
# Install ml-agents package next
COPY ml-agents /ml-agents
#checkout ml-agents for SHA
RUN mkdir /ml-agents
RUN pip install -e .
# Port 5004 is the port used in Editor training.
# Environments will start from port 5005,
# so allow enough ports for several environments.
EXPOSE 5004-5050
ENTRYPOINT ["xvfb-run", "--auto-servernum", "--server-args='-screen 0 640x480x24'", "mlagents-learn"]
ARG SHA
RUN git init
RUN git remote add origin https://github.com/Unity-Technologies/ml-agents.git
RUN git fetch --depth 1 origin $SHA
RUN git checkout FETCH_HEAD
RUN pip install -e /ml-agents/ml-agents-envs
RUN pip install -e /ml-agents/ml-agents

26
Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 8
numStackedVectorObservations: 1
vectorActionSize: 02000000
vectorActionDescriptions: []
vectorActionSpaceType: 1
VectorObservationSize: 8
NumStackedVectorObservations: 1
VectorActionSize: 02000000
VectorActionDescriptions: []
VectorActionSpaceType: 1
m_TeamID: 0
m_useChildSensors: 1
TeamId: 0
m_UseChildSensors: 1
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114715123104194396
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: aaba48bf82bee4751aa7b89569e57f73, type: 3}
m_Name:
m_EditorClassIdentifier:
maxStep: 5000
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
useVecObs: 1
--- !u!114 &1306725529891448089
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
TakeActionsBetweenDecisions: 1
--- !u!114 &1758424554059689351
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1533320402322554
GameObject:
m_ObjectHideFlags: 0

19
Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
m_IndirectSpecularColor: {r: 0.44971168, g: 0.4997775, b: 0.57563686, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:

m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_Name
value: 3DBall
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_LocalPosition.x
value: 0

- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_RootOrder
value: 5
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}

maximumDeltaTime: 0.33333334
solverIterations: 6
solverVelocityIterations: 1
reuseCollisionCallbacks: 1
--- !u!1 &1746325439
GameObject:
m_ObjectHideFlags: 0

14
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


{
[Header("Specific to Ball3D")]
public GameObject ball;
[Tooltip("Whether to use vector observation. This option should be checked " +
"in 3DBall scene, and unchecked in Visual3DBall scene. ")]
public bool useVecObs;
Rigidbody m_BallRb;
EnvironmentParameters m_ResetParams;

public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(gameObject.transform.rotation.z);
sensor.AddObservation(gameObject.transform.rotation.x);
sensor.AddObservation(ball.transform.position - gameObject.transform.position);
sensor.AddObservation(m_BallRb.velocity);
if (useVecObs)
{
sensor.AddObservation(gameObject.transform.rotation.z);
sensor.AddObservation(gameObject.transform.rotation.x);
sensor.AddObservation(ball.transform.position - gameObject.transform.position);
sensor.AddObservation(m_BallRb.velocity);
}
}
public override void OnActionReceived(ActionBuffers actionBuffers)

23
Project/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticVariableSpeed.unity


objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 0456c89e8c9c243d595b039fe7aa0bf9, type: 3}
--- !u!1 &441460235 stripped
GameObject:
m_CorrespondingSourceObject: {fileID: 4845971001715176661, guid: 0456c89e8c9c243d595b039fe7aa0bf9,
type: 3}
m_PrefabInstance: {fileID: 71447557}
m_PrefabAsset: {fileID: 0}
--- !u!114 &441460236
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 441460235}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: f2902496c0120472b90269f94a0aec7e, type: 3}
m_Name:
m_EditorClassIdentifier:
Record: 1
NumStepsToRecord: 10000
DemonstrationName: ExpCrawlerStaVS
DemonstrationDirectory:
--- !u!1001 &455366880
PrefabInstance:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
target: {fileID: 1053322438}
smoothingTime: 0
--- !u!81 &914210116
AudioListener:
m_ObjectHideFlags: 0

154
Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab


m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 1145096862361766}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
m_Model: {fileID: 0}
VectorObservationSize: 1
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: c3b1eb0bcf06b4c0488599c7ab806de7, type: 3}
m_TeamID: 0
TeamId: 0
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114380897261200276
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
maxStep: 5000
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
area: {fileID: 1145096862361766}
turnSpeed: 300
moveSpeed: 2

myLaser: {fileID: 1900094563283840}
contribute: 0
useVectorObs: 0
useVectorFrozenFlag: 1
--- !u!114 &114326390494230518
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
m_Name:
m_EditorClassIdentifier:
camera: {fileID: 20380145723616022}
sensorName: CameraSensor
width: 84
height: 84
grayscale: 0
compression: 1
m_Camera: {fileID: 20380145723616022}
m_SensorName: CameraSensor
m_Width: 84
m_Height: 84
m_Grayscale: 0
m_ObservationStacks: 1
m_Compression: 1
--- !u!114 &4034342608499629224
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
TakeActionsBetweenDecisions: 1
--- !u!1 &1179319070824364
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
m_Model: {fileID: 0}
VectorObservationSize: 1
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: c3b1eb0bcf06b4c0488599c7ab806de7, type: 3}
m_TeamID: 0
TeamId: 0
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114869844339180154
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
maxStep: 5000
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
area: {fileID: 1145096862361766}
turnSpeed: 300
moveSpeed: 2

myLaser: {fileID: 1307818939507544}
contribute: 0
useVectorObs: 0
useVectorFrozenFlag: 1
--- !u!114 &114429222608880102
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
m_Name:
m_EditorClassIdentifier:
camera: {fileID: 20863703825242712}
sensorName: CameraSensor
width: 84
height: 84
grayscale: 0
compression: 1
m_Camera: {fileID: 20863703825242712}
m_SensorName: CameraSensor
m_Width: 84
m_Height: 84
m_Grayscale: 0
m_ObservationStacks: 1
m_Compression: 1
--- !u!114 &7234640249101665162
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
TakeActionsBetweenDecisions: 1
--- !u!1 &1353209702154624
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
m_Model: {fileID: 0}
VectorObservationSize: 1
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: c3b1eb0bcf06b4c0488599c7ab806de7, type: 3}
m_TeamID: 0
TeamId: 0
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114484596947519388
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
maxStep: 5000
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
area: {fileID: 1145096862361766}
turnSpeed: 300
moveSpeed: 2

myLaser: {fileID: 1898252046043334}
contribute: 0
useVectorObs: 0
useVectorFrozenFlag: 1
--- !u!114 &114036270357198286
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
m_Name:
m_EditorClassIdentifier:
camera: {fileID: 20696931947702132}
sensorName: CameraSensor
width: 84
height: 84
grayscale: 0
compression: 1
m_Camera: {fileID: 20696931947702132}
m_SensorName: CameraSensor
m_Width: 84
m_Height: 84
m_Grayscale: 0
m_ObservationStacks: 1
m_Compression: 1
--- !u!114 &3164735207755090463
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
TakeActionsBetweenDecisions: 1
--- !u!1 &1399553220224106
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 0
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
m_Model: {fileID: 0}
VectorObservationSize: 1
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: c3b1eb0bcf06b4c0488599c7ab806de7, type: 3}
m_TeamID: 0
TeamId: 0
m_UseChildActuators: 1
m_ObservableAttributeHandling: 0
--- !u!114 &114729119221978826
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
m_Name:
m_EditorClassIdentifier:
maxStep: 5000
agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
area: {fileID: 1145096862361766}
turnSpeed: 300
moveSpeed: 2

myLaser: {fileID: 1779831409734062}
contribute: 0
useVectorObs: 0
useVectorFrozenFlag: 1
--- !u!114 &114322691115031348
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
m_Name:
m_EditorClassIdentifier:
camera: {fileID: 20363738094913048}
sensorName: CameraSensor
width: 84
height: 84
grayscale: 0
compression: 1
m_Camera: {fileID: 20363738094913048}
m_SensorName: CameraSensor
m_Width: 84
m_Height: 84
m_Grayscale: 0
m_ObservationStacks: 1
m_Compression: 1
--- !u!114 &5903164052970896384
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
DecisionPeriod: 5
RepeatAction: 1
offsetStep: 0
TakeActionsBetweenDecisions: 1
--- !u!1 &1971119195936814
GameObject:
m_ObjectHideFlags: 0

73
Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity


m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 1145096862361766, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_Name
value: VisualFoodCollectorArea
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalPosition.x
value: 0

- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_RootOrder
value: 5
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}

m_AnchoredPosition: {x: -1000, y: -239.57645}
m_SizeDelta: {x: 160, y: 30}
m_Pivot: {x: 0.5, y: 0.5}
--- !u!1001 &1094805673
PrefabInstance:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 1145096862361766, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_Name
value: VisualFoodCollectorArea (1)
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalPosition.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalPosition.y
value: 60
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalPosition.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalRotation.x
value: -0
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalRotation.y
value: -0
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalRotation.z
value: -0
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalRotation.w
value: 1
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_RootOrder
value: 6
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
--- !u!1001 &1232021009
PrefabInstance:
m_ObjectHideFlags: 0

8
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


public GameObject myLaser;
public bool contribute;
public bool useVectorObs;
[Tooltip("Use only the frozen flag in vector observations. If \"Use Vector Obs\" " +
"is checked, this option has no effect. This option is necessary for the " +
"VisualFoodCollector scene.")]
public bool useVectorFrozenFlag;
EnvironmentParameters m_ResetParams;

sensor.AddObservation(localVelocity.z);
sensor.AddObservation(m_Frozen);
sensor.AddObservation(m_Shoot);
}
else if (useVectorFrozenFlag)
{
sensor.AddObservation(m_Frozen);
}
}

125
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


public class ModelOverrider : MonoBehaviour
{
HashSet<string> k_SupportedExtensions = new HashSet<string> { "nn", "onnx" };
const string k_CommandLineModelOverrideFlag = "--mlagents-override-model";
const string k_CommandLineModelOverrideDirectoryFlag = "--mlagents-override-model-directory";
const string k_CommandLineModelOverrideExtensionFlag = "--mlagents-override-model-extension";
const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";

Agent m_Agent;
// Assets paths to use, with the behavior name as the key.
Dictionary<string, string> m_BehaviorNameOverrides = new Dictionary<string, string>();
string m_OverrideExtension = "nn";
private List<string> m_OverrideExtensions = new List<string>();
// Cached loaded NNModels, with the behavior name as the key.
Dictionary<string, NNModel> m_CachedModels = new Dictionary<string, NNModel>();

public bool HasOverrides
{
get { return m_BehaviorNameOverrides.Count > 0 || !string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory); }
get
{
GetAssetPathFromCommandLine();
return !string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory);
}
}
public static string GetOverrideBehaviorName(string originalBehaviorName)

/// <returns></returns>
void GetAssetPathFromCommandLine()
{
m_BehaviorNameOverrides.Clear();
var maxEpisodes = 0;
string[] commandLineArgsOverride = null;
if (!string.IsNullOrEmpty(debugCommandLineOverride) && Application.isEditor)

var args = commandLineArgsOverride ?? Environment.GetCommandLineArgs();
for (var i = 0; i < args.Length; i++)
{
if (args[i] == k_CommandLineModelOverrideFlag && i < args.Length - 2)
{
var key = args[i + 1].Trim();
var value = args[i + 2].Trim();
m_BehaviorNameOverrides[key] = value;
}
else if (args[i] == k_CommandLineModelOverrideDirectoryFlag && i < args.Length - 1)
if (args[i] == k_CommandLineModelOverrideDirectoryFlag && i < args.Length - 1)
m_OverrideExtension = args[i + 1].Trim().ToLower();
var isKnownExtension = k_SupportedExtensions.Contains(m_OverrideExtension);
var overrideExtension = args[i + 1].Trim().ToLower();
var isKnownExtension = k_SupportedExtensions.Contains(overrideExtension);
Debug.LogError($"loading unsupported format: {m_OverrideExtension}");
Debug.LogError($"loading unsupported format: {overrideExtension}");
m_OverrideExtensions.Add(overrideExtension);
}
else if (args[i] == k_CommandLineQuitAfterEpisodesFlag && i < args.Length - 1)
{

}
}
if (HasOverrides)
if (!string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory))
{
// If overriding models, set maxEpisodes to 1 or the command line value
m_MaxEpisodes = maxEpisodes > 0 ? maxEpisodes : 1;

return m_CachedModels[behaviorName];
}
string assetPath = null;
if (m_BehaviorNameOverrides.ContainsKey(behaviorName))
if (string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory))
assetPath = m_BehaviorNameOverrides[behaviorName];
}
else if (!string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory))
{
assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.{m_OverrideExtension}");
Debug.Log($"No override directory set.");
return null;
if (string.IsNullOrEmpty(assetPath))
{
Debug.Log($"No override for BehaviorName {behaviorName}, and no directory set.");
return null;
}
// Try the override extensions in order. If they weren't set, try .nn first, then .onnx.
var overrideExtensions = (m_OverrideExtensions.Count > 0)
? m_OverrideExtensions.ToArray()
: new[] { "nn", "onnx" };
try
bool isOnnx = false;
string assetName = null;
foreach (var overrideExtension in overrideExtensions)
rawModel = File.ReadAllBytes(assetPath);
var assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.{overrideExtension}");
try
{
rawModel = File.ReadAllBytes(assetPath);
isOnnx = overrideExtension.Equals("onnx");
assetName = "Override - " + Path.GetFileName(assetPath);
break;
}
catch (IOException)
{
// Do nothing - try the next extension, or we'll exit if nothing loaded.
}
catch (IOException)
if (rawModel == null)
Debug.Log($"Couldn't load file {assetPath} at full path {Path.GetFullPath(assetPath)}", this);
Debug.Log($"Couldn't load model file(s) for {behaviorName} in {m_BehaviorNameOverrideDirectory} (full path: {Path.GetFullPath(m_BehaviorNameOverrideDirectory)}");
NNModel asset;
var isOnnx = m_OverrideExtension.Equals("onnx");
if (isOnnx)
{
var converter = new ONNXModelConverter(true);
var onnxModel = converter.Convert(rawModel);
var asset = isOnnx ? LoadOnnxModel(rawModel) : LoadBarracudaModel(rawModel);
asset.name = assetName;
m_CachedModels[behaviorName] = asset;
return asset;
}
NNModelData assetData = ScriptableObject.CreateInstance<NNModelData>();
using (var memoryStream = new MemoryStream())
using (var writer = new BinaryWriter(memoryStream))
{
ModelWriter.Save(writer, onnxModel);
assetData.Value = memoryStream.ToArray();
}
assetData.name = "Data";
assetData.hideFlags = HideFlags.HideInHierarchy;
NNModel LoadBarracudaModel(byte[] rawModel)
{
var asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
asset.modelData.Value = rawModel;
return asset;
}
asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = assetData;
}
else
NNModel LoadOnnxModel(byte[] rawModel)
{
var converter = new ONNXModelConverter(true);
var onnxModel = converter.Convert(rawModel);
NNModelData assetData = ScriptableObject.CreateInstance<NNModelData>();
using (var memoryStream = new MemoryStream())
using (var writer = new BinaryWriter(memoryStream))
// Note - this approach doesn't work for onnx files. Need to replace with
// the equivalent of ONNXModelImporter.OnImportAsset()
asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
asset.modelData.Value = rawModel;
ModelWriter.Save(writer, onnxModel);
assetData.Value = memoryStream.ToArray();
assetData.name = "Data";
assetData.hideFlags = HideFlags.HideInHierarchy;
asset.name = "Override - " + Path.GetFileName(assetPath);
m_CachedModels[behaviorName] = asset;
var asset = ScriptableObject.CreateInstance<NNModel>();
asset.modelData = assetData;
/// <summary>
/// Load the NNModel file from the specified path, and give it to the attached agent.

10
README.md


# Unity ML-Agents Toolkit
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/)
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/)
[![license badge](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE)

## Releases & Documentation
**Our latest, stable release is `Release 7`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md)
**Our latest, stable release is `Release 8`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/Readme.md)
to get started with the latest release of ML-Agents.**
The table below lists all our releases, including our `master` branch which is

| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **Release 7** | **September 16, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_7)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip)** |
| **Release 8** | **October 14, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_8)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_8.zip)** |
| **Release 7** | September 16, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_7) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip) |
| **Release 1** | April 30, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_1_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_1.zip) |
## Citation

1
com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md


The Grid Sensor combines the generality of data extraction from Raycasts with the image processing power of Convolutional Neural Networks. The Grid Sensor can be used to collect data in the general form of a "Width x Height x Channel" matrix which can be used for training Reinforcement Learning agents or for data analysis.
<img src="images/gridsensor-debug.png" align="middle" width="3000"/>
# Motivation

31
com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md


# About ML-Agents Extensions package (`com.unity.ml-agents.extensions`)
The Unity ML-Agents Extensions package optional add-ons to the C# SDK for the
The Unity ML-Agents Extensions package contains optional add-ons to the C# SDK for the
[Unity ML-Agents Toolkit](https://github.com/Unity-Technologies/ml-agents).
These extensions are all considered experimental, and their API or behavior

| _Runtime_ | Contains core C# APIs for integrating ML-Agents into your Unity scene. |
| _Tests_ | Contains the unit tests for the package. |
<a name="Installation"></a>
## Installation
The ML-Agents Extensions package is not currently available in the Package Manager. There are two
recommended ways to install the package:
## Installation
### Local Installation
[Clone the repository](../../docs/Installation.md#clone-the-ml-agents-toolkit-repository-optional) and follow the
[Local Installation for Development](../../docs/Installation.md#advanced-local-installation-for-development-1)
directions (substituting `com.unity.ml-agents.extensions` for the package name).
### Github via Package Manager
In Unity 2019.4 or later, open the Package Manager, hit the "+" button, and select "Add package from git URL".
![Package Manager git URL](../../docs/images/unity_package_manager_git_url.png)
In the dialog that appears, enter
```
git+https://github.com/Unity-Technologies/ml-agents.git?path=com.unity.ml-agents.extensions
```
To install this _ML-Agents_ package, follow the instructions in the [Package
Manager documentation](https://docs.unity3d.com/Manual/upm-ui-install.html).
You can also edit your project's `manifest.json` directly and add the following line to the `dependencies`
section:
```
"com.unity.ml-agents.extensions": "git+https://github.com/Unity-Technologies/ml-agents.git?path=com.unity.ml-agents.extensions",
```
See [Git dependencies](https://docs.unity3d.com/Manual/upm-git.html#subfolder) for more information.
## Requirements

- 2018.4 and later
## Known Limitations
none
## Need Help?
The main [README](../../README.md) contains links for contacting the team or getting support.

4
com.unity.ml-agents.extensions/README.md


# ML-Agents Extensions
This is a source-only package for new features based on ML-Agents.
More details coming soon.
See the [package documentation](Documentation~/com.unity.ml-agents.extensions.md) for more information

4
com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs


[Tooltip("Whether to show gizmos or not")]
public bool ShowGizmos = false;
public SensorCompressionType CompressionType = SensorCompressionType.PNG;
/// <summary>
/// Array of colors displaying the DebugColors for each cell in OnDrawGizmos. Only updated if ShowGizmos.
/// </summary>

/// <inheritdoc/>
public virtual SensorCompressionType GetCompressionType()
{
return SensorCompressionType.PNG;
return CompressionType;
}
/// <summary>

2
com.unity.ml-agents.extensions/package.json


"unity": "2018.4",
"description": "A source-only package for new features based on ML-Agents",
"dependencies": {
"com.unity.ml-agents": "1.4.0-preview"
"com.unity.ml-agents": "1.5.0-preview"
}
}

38
com.unity.ml-agents/CHANGELOG.md


and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)

### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.5.0-preview] - 2020-10-14
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Added the Random Network Distillation (RND) intrinsic reward signal to the Pytorch
trainers. To use RND, add a `rnd` section to the `reward_signals` section of your
yaml configuration file. [More information here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-Configuration-File.md#rnd-intrinsic-reward) (#4473)
### Minor Changes
#### com.unity.ml-agents (C#)
- Stacking for compressed observations is now supported. An additional setting
option `Observation Stacks` is added in editor to sensor components that support
compressed observations. A new class `ISparseChannelSensor` with an
additional method `GetCompressedChannelMapping()`is added to generate a mapping
of the channels in compressed data to the actual channel after decompression,
for the python side to decompress correctly. (#4476)
- Added a new visual 3DBall environment. (#4513)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The Communication API was changed to 1.2.0 to indicate support for stacked
compressed observation. A new entry `compressed_channel_mapping` is added to the
proto to handle decompression correctly. Newer versions of the package that wish to
make use of this will also need a compatible version of the Python trainers. (#4476)
- In the `VisualFoodCollector` scene, a vector flag representing the frozen state of
the agent is added to the input observations in addition to the original first-person
camera frame. The scene is able to train with the provided default config file. (#4511)
- Added conversion to string for sampler classes to increase the verbosity of
the curriculum lesson changes. The lesson updates would now output the sampler
stats in addition to the lesson and parameter name to the console. (#4484)
- Localized documentation in Russian is added. Thanks to @SergeyMatrosov for
the contribution. (#4529)
### Bug Fixes
#### com.unity.ml-agents (C#)
- Fixed a bug where accessing the Academy outside of play mode would cause the
Academy to get stepped multiple times when in play mode. (#4532)
#### ml-agents / ml-agents-envs / gym-unity (Python)

2
com.unity.ml-agents/Documentation~/com.unity.ml-agents.md


[unity ML-Agents Toolkit]: https://github.com/Unity-Technologies/ml-agents
[unity inference engine]: https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html
[package manager documentation]: https://docs.unity3d.com/Manual/upm-ui-install.html
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Installation.md
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Installation.md
[github repository]: https://github.com/Unity-Technologies/ml-agents
[python package]: https://github.com/Unity-Technologies/ml-agents
[execution order of event functions]: https://docs.unity3d.com/Manual/ExecutionOrder.html

1
com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs


EditorGUILayout.PropertyField(so.FindProperty("m_Width"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_Height"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_Grayscale"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_ObservationStacks"), true);
}
EditorGUI.EndDisabledGroup();
EditorGUILayout.PropertyField(so.FindProperty("m_Compression"), true);

1
com.unity.ml-agents/Editor/RenderTextureSensorComponentEditor.cs


EditorGUILayout.PropertyField(so.FindProperty("m_RenderTexture"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_SensorName"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_Grayscale"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_ObservationStacks"), true);
}
EditorGUI.EndDisabledGroup();

49
com.unity.ml-agents/Runtime/Academy.cs


* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/
* https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/
*/
namespace Unity.MLAgents

{
void FixedUpdate()
{
Academy.Instance.EnvironmentStep();
// Check if the stepper belongs to the current Academy and destroy it if it's not.
// This is to prevent from having leaked stepper from previous runs.
if (!Academy.IsInitialized || !Academy.Instance.IsStepperOwner(this))
{
Destroy(this.gameObject);
}
else
{
Academy.Instance.EnvironmentStep();
}
}
}

/// fall back to inference or heuristic decisions. (You can also set agents to always use
/// inference or heuristics.)
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/" +
"docs/Learning-Environment-Design.md")]
public class Academy : IDisposable
{

/// <term>1.1.0</term>
/// <description>Support concatenated PNGs for compressed observations.</description>
/// </item>
/// <item>
/// <term>1.2.0</term>
/// <description>Support compression mapping for stacked compressed observations.</description>
/// </item>
const string k_ApiVersion = "1.1.0";
const string k_ApiVersion = "1.2.0";
internal const string k_PackageVersion = "1.4.0-preview";
internal const string k_PackageVersion = "1.5.0-preview";
const int k_EditorTrainingPort = 5004;

Application.quitting += Dispose;
LazyInitialize();
#if UNITY_EDITOR
EditorApplication.playModeStateChanged += HandleOnPlayModeChanged;
#endif
#if UNITY_EDITOR
/// <summary>
/// Clean up the Academy when switching from edit mode to play mode
/// </summary>
/// <param name="state">State.</param>
void HandleOnPlayModeChanged(PlayModeStateChange state)
{
if (state == PlayModeStateChange.ExitingEditMode)
{
Dispose();
}
}
#endif
/// <summary>
/// Initialize the Academy if it hasn't already been initialized.

// Reset the Lazy instance
s_Lazy = new Lazy<Academy>(() => new Academy());
}
/// <summary>
/// Check if the input AcademyFixedUpdateStepper belongs to this Academy.
/// </summary>
internal bool IsStepperOwner(AcademyFixedUpdateStepper stepper)
{
return GameObject.ReferenceEquals(stepper.gameObject, Academy.Instance.m_StepperObject);
}
}
}

2
com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs


///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="IActionReceiver.OnActionReceived"/>
void WriteDiscreteActionMask(IDiscreteActionMask actionMask);

2
com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs


///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="branch">The branch for which the actions will be masked.</param>
/// <param name="actionIndices">The indices of the masked actions.</param>

26
com.unity.ml-agents/Runtime/Agent.cs


/// [OnDisable()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnDisable.html]
/// [OnBeforeSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnBeforeSerialize.html
/// [OnAfterSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnAfterSerialize.html
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design.md
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Readme.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Readme.md
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/" +
"docs/Learning-Environment-Design-Agents.md")]
[Serializable]
[RequireComponent(typeof(BehaviorParameters))]

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// </remarks>
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
///</remarks>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)

/// implementing a simple heuristic function can aid in debugging agent actions and interactions
/// with its environment.
///
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </remarks>
/// <example>

/// For more information about observations, see [Observations and Sensors].
///
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// </remarks>
public virtual void CollectObservations(VectorSensor sensor)
{

///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="IActionReceiver.OnActionReceived"/>
public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)

///
/// For more information about implementing agent actions see [Agents - Actions].
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="actions">
/// Struct containing the buffers of actions to be executed at this step.

62
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


/// <summary>
/// Static flag to make sure that we only fire the warning once.
/// </summary>
private static bool s_HaveWarnedAboutTrainerCapabilities = false;
private static bool s_HaveWarnedTrainerCapabilitiesMultiPng = false;
private static bool s_HaveWarnedTrainerCapabilitiesMapping = false;
/// <summary>
/// Generate an ObservationProto for the sensor using the provided ObservationWriter.

var trainerCanHandle = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.ConcatenatedPngObservations;
if (!trainerCanHandle)
{
if (!s_HaveWarnedAboutTrainerCapabilities)
if (!s_HaveWarnedTrainerCapabilitiesMultiPng)
s_HaveWarnedAboutTrainerCapabilities = true;
s_HaveWarnedTrainerCapabilitiesMultiPng = true;
}
compressionType = SensorCompressionType.None;
}
}
// Check capabilities if we need mapping for compressed observations
if (compressionType != SensorCompressionType.None && shape.Length == 3 && shape[2] > 3)
{
var trainerCanHandleMapping = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.CompressedChannelMapping;
var isTrivialMapping = IsTrivialMapping(sensor);
if (!trainerCanHandleMapping && !isTrivialMapping)
{
if (!s_HaveWarnedTrainerCapabilitiesMapping)
{
Debug.LogWarning($"The sensor {sensor.GetName()} is using non-trivial mapping and " +
"the attached trainer doesn't support compression mapping. " +
"Switching to uncompressed observations.");
s_HaveWarnedTrainerCapabilitiesMapping = true;
}
compressionType = SensorCompressionType.None;
}

"return SensorCompressionType.None from GetCompressionType()."
);
}
var compressibleSensor = sensor as ISparseChannelSensor;
if (compressibleSensor != null)
{
observationProto.CompressedChannelMapping.AddRange(compressibleSensor.GetCompressedChannelMapping());
}
}
observationProto.Shape.AddRange(shape);
return observationProto;

return new UnityRLCapabilities
{
BaseRLCapabilities = proto.BaseRLCapabilities,
ConcatenatedPngObservations = proto.ConcatenatedPngObservations
ConcatenatedPngObservations = proto.ConcatenatedPngObservations,
CompressedChannelMapping = proto.CompressedChannelMapping,
};
}

{
BaseRLCapabilities = rlCaps.BaseRLCapabilities,
ConcatenatedPngObservations = rlCaps.ConcatenatedPngObservations,
CompressedChannelMapping = rlCaps.CompressedChannelMapping,
}
internal static bool IsTrivialMapping(ISensor sensor)
{
var compressibleSensor = sensor as ISparseChannelSensor;
if (compressibleSensor is null)
{
return true;
}
var mapping = compressibleSensor.GetCompressedChannelMapping();
if (mapping == null)
{
return true;
}
// check if mapping equals zero mapping
if (mapping.Length == 3 && mapping.All(m => m == 0))
{
return true;
}
// check if mapping equals identity mapping
for (var i = 0; i < mapping.Length; i++)
{
if (mapping[i] != i)
{
return false;
}
}
return true;
}
}
}

4
com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs


{
public bool BaseRLCapabilities;
public bool ConcatenatedPngObservations;
public bool CompressedChannelMapping;
public UnityRLCapabilities(bool baseRlCapabilities = true, bool concatenatedPngObservations = true)
public UnityRLCapabilities(bool baseRlCapabilities = true, bool concatenatedPngObservations = true, bool compressedChannelMapping = true)
CompressedChannelMapping = compressedChannelMapping;
}
/// <summary>

2
com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs


/// See [Imitation Learning - Recording Demonstrations] for more information.
///
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
/// </remarks>
[RequireComponent(typeof(Agent))]
[AddComponentMenu("ML Agents/Demonstration Recorder", (int)MenuGroup.Default)]

2
com.unity.ml-agents/Runtime/DiscreteActionMasker.cs


///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="branch">The branch for which the actions will be masked.</param>
/// <param name="actionIndices">The indices of the masked actions.</param>

40
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMiWwoYVW5pdHlSTENh",
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMifQoYVW5pdHlSTENh",
"Chtjb25jYXRlbmF0ZWRQbmdPYnNlcnZhdGlvbnMYAiABKAhCJaoCIlVuaXR5",
"Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
"Chtjb25jYXRlbmF0ZWRQbmdPYnNlcnZhdGlvbnMYAiABKAgSIAoYY29tcHJl",
"c3NlZENoYW5uZWxNYXBwaW5nGAMgASgIQiWqAiJVbml0eS5NTEFnZW50cy5D",
"b21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping" }, null, null, null)
}));
}
#endregion

public UnityRLCapabilitiesProto(UnityRLCapabilitiesProto other) : this() {
baseRLCapabilities_ = other.baseRLCapabilities_;
concatenatedPngObservations_ = other.concatenatedPngObservations_;
compressedChannelMapping_ = other.compressedChannelMapping_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}

}
}
/// <summary>Field number for the "compressedChannelMapping" field.</summary>
public const int CompressedChannelMappingFieldNumber = 3;
private bool compressedChannelMapping_;
/// <summary>
/// compression mapping for stacking compressed observations.
/// </summary>
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool CompressedChannelMapping {
get { return compressedChannelMapping_; }
set {
compressedChannelMapping_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as UnityRLCapabilitiesProto);

}
if (BaseRLCapabilities != other.BaseRLCapabilities) return false;
if (ConcatenatedPngObservations != other.ConcatenatedPngObservations) return false;
if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (BaseRLCapabilities != false) hash ^= BaseRLCapabilities.GetHashCode();
if (ConcatenatedPngObservations != false) hash ^= ConcatenatedPngObservations.GetHashCode();
if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}

if (ConcatenatedPngObservations != false) {
output.WriteRawTag(16);
output.WriteBool(ConcatenatedPngObservations);
}
if (CompressedChannelMapping != false) {
output.WriteRawTag(24);
output.WriteBool(CompressedChannelMapping);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);

if (ConcatenatedPngObservations != false) {
size += 1 + 1;
}
if (CompressedChannelMapping != false) {
size += 1 + 1;
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

if (other.ConcatenatedPngObservations != false) {
ConcatenatedPngObservations = other.ConcatenatedPngObservations;
}
if (other.CompressedChannelMapping != false) {
CompressedChannelMapping = other.CompressedChannelMapping;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}

}
case 16: {
ConcatenatedPngObservations = input.ReadBool();
break;
}
case 24: {
CompressedChannelMapping = input.ReadBool();
break;
}
}

34
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyL5AQoQT2JzZXJ2YXRp",
"aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKdAgoQT2JzZXJ2YXRp",
"RmxvYXREYXRhSAAaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2Jz",
"ZXJ2YXRpb25fZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05F",
"EAASBwoDUE5HEAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9i",
"amVjdHNiBnByb3RvMw=="));
"RmxvYXREYXRhSAASIgoaY29tcHJlc3NlZF9jaGFubmVsX21hcHBpbmcYBSAD",
"KAUaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25f",
"ZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5H",
"EAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
"b3RvMw=="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
}));
}
#endregion

public ObservationProto(ObservationProto other) : this() {
shape_ = other.shape_.Clone();
compressionType_ = other.compressionType_;
compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

}
}
/// <summary>Field number for the "compressed_channel_mapping" field.</summary>
public const int CompressedChannelMappingFieldNumber = 5;
private static readonly pb::FieldCodec<int> _repeated_compressedChannelMapping_codec
= pb::FieldCodec.ForInt32(42);
private readonly pbc::RepeatedField<int> compressedChannelMapping_ = new pbc::RepeatedField<int>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<int> CompressedChannelMapping {
get { return compressedChannelMapping_; }
}
private object observationData_;
/// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
public enum ObservationDataOneofCase {

if (CompressionType != other.CompressionType) return false;
if (CompressedData != other.CompressedData) return false;
if (!object.Equals(FloatData, other.FloatData)) return false;
if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
if (ObservationDataCase != other.ObservationDataCase) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (CompressionType != 0) hash ^= CompressionType.GetHashCode();
if (observationDataCase_ == ObservationDataOneofCase.CompressedData) hash ^= CompressedData.GetHashCode();
if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
hash ^= compressedChannelMapping_.GetHashCode();
hash ^= (int) observationDataCase_;
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();

output.WriteRawTag(34);
output.WriteMessage(FloatData);
}
compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

if (observationDataCase_ == ObservationDataOneofCase.FloatData) {
size += 1 + pb::CodedOutputStream.ComputeMessageSize(FloatData);
}
size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

if (other.CompressionType != 0) {
CompressionType = other.CompressionType;
}
compressedChannelMapping_.Add(other.compressedChannelMapping_);
switch (other.ObservationDataCase) {
case ObservationDataOneofCase.CompressedData:
CompressedData = other.CompressedData;

}
input.ReadMessage(subBuilder);
FloatData = subBuilder;
break;
}
case 42:
case 40: {
compressedChannelMapping_.AddEntriesFrom(input, _repeated_compressedChannelMapping_codec);
break;
}
}

64
com.unity.ml-agents/Runtime/SensorHelper.cs


using UnityEngine;
using Unity.Barracuda;
namespace Unity.MLAgents.Sensors
{

}
}
errorMessage = null;
return true;
}
/// <summary>
/// Generates the observations for the provided sensor, and returns true if they equal the
/// expected values. If they are unequal, errorMessage is also set.
/// This should not generally be used in production code. It is only intended for
/// simplifying unit tests.
/// </summary>
/// <param name="sensor"></param>
/// <param name="expected"></param>
/// <param name="errorMessage"></param>
/// <returns></returns>
public static bool CompareObservation(ISensor sensor, float[,,] expected, out string errorMessage)
{
var tensorShape = new TensorShape(0, expected.GetLength(0), expected.GetLength(1), expected.GetLength(2));
var numExpected = tensorShape.height * tensorShape.width * tensorShape.channels;
const float fill = -1337f;
var output = new float[numExpected];
for (var i = 0; i < numExpected; i++)
{
output[i] = fill;
}
if (numExpected > 0)
{
if (fill != output[0])
{
errorMessage = "Error setting output buffer.";
return false;
}
}
ObservationWriter writer = new ObservationWriter();
writer.SetTarget(output, sensor.GetObservationShape(), 0);
// Make sure ObservationWriter didn't touch anything
if (numExpected > 0)
{
if (fill != output[0])
{
errorMessage = "ObservationWriter.SetTarget modified a buffer it shouldn't have.";
return false;
}
}
sensor.Write(writer);
for (var h = 0; h < tensorShape.height; h++)
{
for (var w = 0; w < tensorShape.width; w++)
{
for (var c = 0; c < tensorShape.channels; c++)
{
if (expected[h, w, c] != output[tensorShape.Index(0, h, w, c)])
{
errorMessage = $"Expected and actual differed in position [{h}, {w}, {c}]. " +
"Expected: {expected[h, w, c]} Actual: {output[tensorShape.Index(0, h, w, c)]} ";
return false;
}
}
}
}
errorMessage = null;
return true;
}

2
com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs


using (TimerStack.Instance.Scoped("CameraSensor.WriteToTensor"))
{
var texture = ObservationToTexture(m_Camera, m_Width, m_Height);
var numWritten = Utilities.TextureToTensorProxy(texture, writer, m_Grayscale);
var numWritten = writer.WriteTexture(texture, m_Grayscale);
DestroyTexture(texture);
return numWritten;
}

28
com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs


set { m_Grayscale = value; }
}
[HideInInspector, SerializeField]
[Range(1, 50)]
[Tooltip("Number of camera frames that will be stacked before being fed to the neural network.")]
int m_ObservationStacks = 1;
[HideInInspector, SerializeField, FormerlySerializedAs("compression")]
SensorCompressionType m_Compression = SensorCompressionType.PNG;

}
/// <summary>
/// Whether to stack previous observations. Using 1 means no previous observations.
/// Note that changing this after the sensor is created has no effect.
/// </summary>
public int ObservationStacks
{
get { return m_ObservationStacks; }
set { m_ObservationStacks = value; }
}
/// <summary>
/// Creates the <see cref="CameraSensor"/>
/// </summary>
/// <returns>The created <see cref="CameraSensor"/> object for this component.</returns>

if (ObservationStacks != 1)
{
return new StackingSensor(m_Sensor, ObservationStacks);
}
return m_Sensor;
}

/// <returns>The observation shape of the associated <see cref="CameraSensor"/> object.</returns>
public override int[] GetObservationShape()
{
return CameraSensor.GenerateShape(m_Width, m_Height, Grayscale);
var stacks = ObservationStacks > 1 ? ObservationStacks : 1;
var cameraSensorshape = CameraSensor.GenerateShape(m_Width, m_Height, Grayscale);
if (stacks > 1)
{
cameraSensorshape[cameraSensorshape.Length - 1] *= stacks;
}
return cameraSensorshape;
}
/// <summary>

52
com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs


}
}
}
public static class ObservationWriterExtension
{
/// <summary>
/// Writes a Texture2D into a ObservationWriter.
/// </summary>
/// <param name="obsWriter">
/// Writer to fill with Texture data.
/// </param>
/// <param name="texture">
/// The texture to be put into the tensor.
/// </param>
/// <param name="grayScale">
/// If set to <c>true</c> the textures will be converted to grayscale before
/// being stored in the tensor.
/// </param>
/// <returns>The number of floats written</returns>
public static int WriteTexture(
this ObservationWriter obsWriter,
Texture2D texture,
bool grayScale)
{
var width = texture.width;
var height = texture.height;
var texturePixels = texture.GetPixels32();
// During training, we convert from Texture to PNG before sending to the trainer, which has the
// effect of flipping the image. We need another flip here at inference time to match this.
for (var h = height - 1; h >= 0; h--)
{
for (var w = 0; w < width; w++)
{
var currentPixel = texturePixels[(height - h - 1) * width + w];
if (grayScale)
{
obsWriter[h, w, 0] =
(currentPixel.r + currentPixel.g + currentPixel.b) / 3f / 255.0f;
}
else
{
// For Color32, the r, g and b values are between 0 and 255.
obsWriter[h, w, 0] = currentPixel.r / 255.0f;
obsWriter[h, w, 1] = currentPixel.g / 255.0f;
obsWriter[h, w, 2] = currentPixel.b / 255.0f;
}
}
}
return height * width * (grayScale ? 1 : 3);
}
}
}

2
com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs


using (TimerStack.Instance.Scoped("RenderTextureSensor.Write"))
{
var texture = ObservationToTexture(m_RenderTexture);
var numWritten = Utilities.TextureToTensorProxy(texture, writer, m_Grayscale);
var numWritten = writer.WriteTexture(texture, m_Grayscale);
DestroyTexture(texture);
return numWritten;
}

28
com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs


set { m_Grayscale = value; }
}
[HideInInspector, SerializeField]
[Range(1, 50)]
[Tooltip("Number of frames that will be stacked before being fed to the neural network.")]
int m_ObservationStacks = 1;
[HideInInspector, SerializeField, FormerlySerializedAs("compression")]
SensorCompressionType m_Compression = SensorCompressionType.PNG;

set { m_Compression = value; UpdateSensor(); }
}
/// <summary>
/// Whether to stack previous observations. Using 1 means no previous observations.
/// Note that changing this after the sensor is created has no effect.
/// </summary>
public int ObservationStacks
{
get { return m_ObservationStacks; }
set { m_ObservationStacks = value; }
}
if (ObservationStacks != 1)
{
return new StackingSensor(m_Sensor, ObservationStacks);
}
return m_Sensor;
}

var width = RenderTexture != null ? RenderTexture.width : 0;
var height = RenderTexture != null ? RenderTexture.height : 0;
var observationShape = new[] { height, width, Grayscale ? 1 : 3 };
return new[] { height, width, Grayscale ? 1 : 3 };
var stacks = ObservationStacks > 1 ? ObservationStacks : 1;
if (stacks > 1)
{
observationShape[2] *= stacks;
}
return observationShape;
}
/// <summary>

198
com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs


using System;
using System.Linq;
using System.Runtime.CompilerServices;
using UnityEngine;
using Unity.Barracuda;
[assembly: InternalsVisibleTo("Unity.ML-Agents.Editor.Tests")]
namespace Unity.MLAgents.Sensors
{

/// For example, 4 stacked sets of observations would be output like
/// | t = now - 3 | t = now -3 | t = now - 2 | t = now |
/// Internally, a circular buffer of arrays is used. The m_CurrentIndex represents the most recent observation.
///
/// Currently, compressed and multidimensional observations are not supported.
/// Currently, observations are stacked on the last dimension.
public class StackingSensor : ISensor
public class StackingSensor : ISparseChannelSensor
{
/// <summary>
/// The wrapped sensor.

string m_Name;
int[] m_Shape;
int[] m_WrappedShape;
/// <summary>
/// Buffer of previous observations

byte[][] m_StackedCompressedObservations;
byte[] m_EmptyCompressedObservation;
int[] m_CompressionMapping;
TensorShape m_tensorShape;
/// <summary>
/// Initializes the sensor.

m_Name = $"StackingSensor_size{numStackedObservations}_{wrapped.GetName()}";
if (wrapped.GetCompressionType() != SensorCompressionType.None)
m_WrappedShape = wrapped.GetObservationShape();
m_Shape = new int[m_WrappedShape.Length];
m_UnstackedObservationSize = wrapped.ObservationSize();
for (int d = 0; d < m_WrappedShape.Length; d++)
throw new UnityAgentsException("StackingSensor doesn't support compressed observations.'");
m_Shape[d] = m_WrappedShape[d];
var shape = wrapped.GetObservationShape();
if (shape.Length != 1)
// TODO support arbitrary stacking dimension
m_Shape[m_Shape.Length - 1] *= numStackedObservations;
// Initialize uncompressed buffer anyway in case python trainer does not
// support the compression mapping and has to fall back to uncompressed obs.
m_StackedObservations = new float[numStackedObservations][];
for (var i = 0; i < numStackedObservations; i++)
throw new UnityAgentsException("Only 1-D observations are supported by StackingSensor");
m_StackedObservations[i] = new float[m_UnstackedObservationSize];
m_Shape = new int[shape.Length];
m_UnstackedObservationSize = wrapped.ObservationSize();
for (int d = 0; d < shape.Length; d++)
if (m_WrappedSensor.GetCompressionType() != SensorCompressionType.None)
m_Shape[d] = shape[d];
m_StackedCompressedObservations = new byte[numStackedObservations][];
m_EmptyCompressedObservation = CreateEmptyPNG();
for (var i = 0; i < numStackedObservations; i++)
{
m_StackedCompressedObservations[i] = m_EmptyCompressedObservation;
}
m_CompressionMapping = ConstructStackedCompressedChannelMapping(wrapped);
// TODO support arbitrary stacking dimension
m_Shape[0] *= numStackedObservations;
m_StackedObservations = new float[numStackedObservations][];
for (var i = 0; i < numStackedObservations; i++)
if (m_Shape.Length != 1)
m_StackedObservations[i] = new float[m_UnstackedObservationSize];
m_tensorShape = new TensorShape(0, m_WrappedShape[0], m_WrappedShape[1], m_WrappedShape[2]);
}
}

// First, call the wrapped sensor's write method. Make sure to use our own writer, not the passed one.
var wrappedShape = m_WrappedSensor.GetObservationShape();
m_LocalWriter.SetTarget(m_StackedObservations[m_CurrentIndex], wrappedShape, 0);
m_LocalWriter.SetTarget(m_StackedObservations[m_CurrentIndex], m_WrappedShape, 0);
for (var i = 0; i < m_NumStackedObservations; i++)
if (m_WrappedShape.Length == 1)
{
for (var i = 0; i < m_NumStackedObservations; i++)
{
var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
writer.AddRange(m_StackedObservations[obsIndex], numWritten);
numWritten += m_UnstackedObservationSize;
}
}
else
var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
writer.AddRange(m_StackedObservations[obsIndex], numWritten);
numWritten += m_UnstackedObservationSize;
for (var i = 0; i < m_NumStackedObservations; i++)
{
var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
for (var h = 0; h < m_WrappedShape[0]; h++)
{
for (var w = 0; w < m_WrappedShape[1]; w++)
{
for (var c = 0; c < m_WrappedShape[2]; c++)
{
writer[h, w, i * m_WrappedShape[2] + c] = m_StackedObservations[obsIndex][m_tensorShape.Index(0, h, w, c)];
}
}
}
}
numWritten = m_WrappedShape[0] * m_WrappedShape[1] * m_WrappedShape[2] * m_NumStackedObservations;
}
return numWritten;

{
Array.Clear(m_StackedObservations[i], 0, m_StackedObservations[i].Length);
}
if (m_WrappedSensor.GetCompressionType() != SensorCompressionType.None)
{
for (var i = 0; i < m_NumStackedObservations; i++)
{
m_StackedCompressedObservations[i] = m_EmptyCompressedObservation;
}
}
}
/// <inheritdoc/>

}
/// <inheritdoc/>
public virtual byte[] GetCompressedObservation()
public byte[] GetCompressedObservation()
return null;
var compressed = m_WrappedSensor.GetCompressedObservation();
m_StackedCompressedObservations[m_CurrentIndex] = compressed;
int bytesLength = 0;
foreach (byte[] compressedObs in m_StackedCompressedObservations)
{
bytesLength += compressedObs.Length;
}
byte[] outputBytes = new byte[bytesLength];
int offset = 0;
for (var i = 0; i < m_NumStackedObservations; i++)
{
var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
Buffer.BlockCopy(m_StackedCompressedObservations[obsIndex],
0, outputBytes, offset, m_StackedCompressedObservations[obsIndex].Length);
offset += m_StackedCompressedObservations[obsIndex].Length;
}
return outputBytes;
public virtual SensorCompressionType GetCompressionType()
public int[] GetCompressedChannelMapping()
return SensorCompressionType.None;
return m_CompressionMapping;
// TODO support stacked compressed observations (byte stream)
/// <inheritdoc/>
public SensorCompressionType GetCompressionType()
{
return m_WrappedSensor.GetCompressionType();
}
/// <summary>
/// Create Empty PNG for initializing the buffer for stacking.
/// </summary>
internal byte[] CreateEmptyPNG()
{
int height = m_WrappedSensor.GetObservationShape()[0];
int width = m_WrappedSensor.GetObservationShape()[1];
var texture2D = new Texture2D(width, height, TextureFormat.RGB24, false);
Color32[] resetColorArray = texture2D.GetPixels32();
Color32 black = new Color32(0, 0, 0, 0);
for (int i = 0; i < resetColorArray.Length; i++)
{
resetColorArray[i] = black;
}
texture2D.SetPixels32(resetColorArray);
texture2D.Apply();
return texture2D.EncodeToPNG();
}
/// <summary>
/// Constrct stacked CompressedChannelMapping.
/// </summary>
internal int[] ConstructStackedCompressedChannelMapping(ISensor wrappedSenesor)
{
// Get CompressedChannelMapping of the wrapped sensor. If the
// wrapped sensor doesn't have one, use default mapping.
// Default mapping: {0, 0, 0} for grayscale, identity mapping {1, 2, ..., n} otherwise.
int[] wrappedMapping = null;
int wrappedNumChannel = wrappedSenesor.GetObservationShape()[2];
var sparseChannelSensor = m_WrappedSensor as ISparseChannelSensor;
if (sparseChannelSensor != null)
{
wrappedMapping = sparseChannelSensor.GetCompressedChannelMapping();
}
if (wrappedMapping == null)
{
if (wrappedNumChannel == 1)
{
wrappedMapping = new int[] { 0, 0, 0 };
}
else
{
wrappedMapping = Enumerable.Range(0, wrappedNumChannel).ToArray();
}
}
// Construct stacked mapping using the mapping of wrapped sensor.
// First pad the wrapped mapping to multiple of 3, then repeat
// and add offset to each copy to form the stacked mapping.
int paddedMapLength = (wrappedMapping.Length + 2) / 3 * 3;
var compressionMapping = new int[paddedMapLength * m_NumStackedObservations];
for (var i = 0; i < m_NumStackedObservations; i++)
{
var offset = wrappedNumChannel * i;
for (var j = 0; j < paddedMapLength; j++)
{
if (j < wrappedMapping.Length)
{
compressionMapping[j + paddedMapLength * i] = wrappedMapping[j] >= 0 ? wrappedMapping[j] + offset : -1;
}
else
{
compressionMapping[j + paddedMapLength * i] = -1;
}
}
}
return compressionMapping;
}
}
}

47
com.unity.ml-agents/Runtime/Utilities.cs


{
internal static class Utilities
{
/// <summary>
/// Puts a Texture2D into a ObservationWriter.
/// </summary>
/// <param name="texture">
/// The texture to be put into the tensor.
/// </param>
/// <param name="obsWriter">
/// Writer to fill with Texture data.
/// </param>
/// <param name="grayScale">
/// If set to <c>true</c> the textures will be converted to grayscale before
/// being stored in the tensor.
/// </param>
/// <returns>The number of floats written</returns>
internal static int TextureToTensorProxy(
Texture2D texture,
ObservationWriter obsWriter,
bool grayScale)
{
var width = texture.width;
var height = texture.height;
var texturePixels = texture.GetPixels32();
// During training, we convert from Texture to PNG before sending to the trainer, which has the
// effect of flipping the image. We need another flip here at inference time to match this.
for (var h = height - 1; h >= 0; h--)
{
for (var w = 0; w < width; w++)
{
var currentPixel = texturePixels[(height - h - 1) * width + w];
if (grayScale)
{
obsWriter[h, w, 0] =
(currentPixel.r + currentPixel.g + currentPixel.b) / 3f / 255.0f;
}
else
{
// For Color32, the r, g and b values are between 0 and 255.
obsWriter[h, w, 0] = currentPixel.r / 255.0f;
obsWriter[h, w, 1] = currentPixel.g / 255.0f;
obsWriter[h, w, 2] = currentPixel.b / 255.0f;
}
}
}
return height * width * (grayScale ? 1 : 3);
}
/// <summary>
/// Calculates the cumulative sum of an integer array. The result array will be one element

31
com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs


}
}
class DummySparseChannelSensor : DummySensor, ISparseChannelSensor
{
public int[] Mapping;
internal DummySparseChannelSensor()
{
}
public int[] GetCompressedChannelMapping()
{
return Mapping;
}
}
[Test]
public void TestGetObservationProtoCapabilities()
{

}
}
[Test]
public void TestIsTrivialMapping()
{
Assert.AreEqual(GrpcExtensions.IsTrivialMapping(new DummySensor()), true);
var sparseChannelSensor = new DummySparseChannelSensor();
sparseChannelSensor.Mapping = null;
Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), true);
sparseChannelSensor.Mapping = new int[] { 0, 0, 0 };
Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), true);
sparseChannelSensor.Mapping = new int[] { 0, 1, 2, 3, 4 };
Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), true);
sparseChannelSensor.Mapping = new int[] { 1, 2, 3, 4, -1, -1 };
Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), false);
sparseChannelSensor.Mapping = new int[] { 0, 0, 0, 1, 1, 1 };
Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), false);
}
}
}

156
com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs


using NUnit.Framework;
using System;
using System.Linq;
using UnityEngine;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Tests

}
[Test]
public void TestStacking()
public void TestVectorStacking()
{
VectorSensor wrapped = new VectorSensor(2);
ISensor sensor = new StackingSensor(wrapped, 3);

}
[Test]
public void TestStackingReset()
public void TestVectorStackingReset()
{
VectorSensor wrapped = new VectorSensor(2);
ISensor sensor = new StackingSensor(wrapped, 3);

sensor.Reset();
wrapped.AddObservation(new[] { 5f, 6f });
SensorTestHelper.CompareObservation(sensor, new[] { 0f, 0f, 0f, 0f, 5f, 6f });
}
class Dummy3DSensor : ISparseChannelSensor
{
public SensorCompressionType CompressionType = SensorCompressionType.PNG;
public int[] Mapping;
public int[] Shape;
public float[,,] CurrentObservation;
internal Dummy3DSensor()
{
}
public int[] GetObservationShape()
{
return Shape;
}
public int Write(ObservationWriter writer)
{
for (var h = 0; h < Shape[0]; h++)
{
for (var w = 0; w < Shape[1]; w++)
{
for (var c = 0; c < Shape[2]; c++)
{
writer[h, w, c] = CurrentObservation[h, w, c];
}
}
}
return Shape[0] * Shape[1] * Shape[2];
}
public byte[] GetCompressedObservation()
{
var writer = new ObservationWriter();
var flattenedObservation = new float[Shape[0] * Shape[1] * Shape[2]];
writer.SetTarget(flattenedObservation, Shape, 0);
Write(writer);
byte[] bytes = Array.ConvertAll(flattenedObservation, (z) => (byte)z);
return bytes;
}
public void Update() { }
public void Reset() { }
public SensorCompressionType GetCompressionType()
{
return CompressionType;
}
public string GetName()
{
return "Dummy";
}
public int[] GetCompressedChannelMapping()
{
return Mapping;
}
}
[Test]
public void TestStackingMapping()
{
// Test grayscale stacked mapping with CameraSensor
var cameraSensor = new CameraSensor(new Camera(), 64, 64,
true, "grayscaleCamera", SensorCompressionType.PNG);
var stackedCameraSensor = new StackingSensor(cameraSensor, 2);
Assert.AreEqual(stackedCameraSensor.GetCompressedChannelMapping(), new[] { 0, 0, 0, 1, 1, 1 });
// Test RGB stacked mapping with RenderTextureSensor
var renderTextureSensor = new RenderTextureSensor(new RenderTexture(24, 16, 0),
false, "renderTexture", SensorCompressionType.PNG);
var stackedRenderTextureSensor = new StackingSensor(renderTextureSensor, 2);
Assert.AreEqual(stackedRenderTextureSensor.GetCompressedChannelMapping(), new[] { 0, 1, 2, 3, 4, 5 });
// Test mapping with number of layers not being multiple of 3
var dummySensor = new Dummy3DSensor();
dummySensor.Shape = new int[] { 2, 2, 4 };
dummySensor.Mapping = new int[] { 0, 1, 2, 3 };
var stackedDummySensor = new StackingSensor(dummySensor, 2);
Assert.AreEqual(stackedDummySensor.GetCompressedChannelMapping(), new[] { 0, 1, 2, 3, -1, -1, 4, 5, 6, 7, -1, -1 });
// Test mapping with dummy layers that should be dropped
var paddedDummySensor = new Dummy3DSensor();
paddedDummySensor.Shape = new int[] { 2, 2, 4 };
paddedDummySensor.Mapping = new int[] { 0, 1, 2, 3, -1, -1 };
var stackedPaddedDummySensor = new StackingSensor(paddedDummySensor, 2);
Assert.AreEqual(stackedPaddedDummySensor.GetCompressedChannelMapping(), new[] { 0, 1, 2, 3, -1, -1, 4, 5, 6, 7, -1, -1 });
}
[Test]
public void Test3DStacking()
{
var wrapped = new Dummy3DSensor();
wrapped.Shape = new int[] { 2, 1, 2 };
var sensor = new StackingSensor(wrapped, 2);
// Check the stacking is on the last dimension
wrapped.CurrentObservation = new[, ,] { { { 1f, 2f } }, { { 3f, 4f } } };
SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 0f, 0f, 1f, 2f } }, { { 0f, 0f, 3f, 4f } } });
sensor.Update();
wrapped.CurrentObservation = new[, ,] { { { 5f, 6f } }, { { 7f, 8f } } };
SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 1f, 2f, 5f, 6f } }, { { 3f, 4f, 7f, 8f } } });
sensor.Update();
wrapped.CurrentObservation = new[, ,] { { { 9f, 10f } }, { { 11f, 12f } } };
SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 5f, 6f, 9f, 10f } }, { { 7f, 8f, 11f, 12f } } });
// Check that if we don't call Update(), the same observations are produced
SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 5f, 6f, 9f, 10f } }, { { 7f, 8f, 11f, 12f } } });
// Test reset
sensor.Reset();
wrapped.CurrentObservation = new[, ,] { { { 13f, 14f } }, { { 15f, 16f } } };
SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 0f, 0f, 13f, 14f } }, { { 0f, 0f, 15f, 16f } } });
}
[Test]
public void TestStackedGetCompressedObservation()
{
var wrapped = new Dummy3DSensor();
wrapped.Shape = new int[] { 1, 1, 3 };
var sensor = new StackingSensor(wrapped, 2);
wrapped.CurrentObservation = new[, ,] { { { 1f, 2f, 3f } } };
var expected1 = sensor.CreateEmptyPNG();
expected1 = expected1.Concat(Array.ConvertAll(new[] { 1f, 2f, 3f }, (z) => (byte)z)).ToArray();
Assert.AreEqual(sensor.GetCompressedObservation(), expected1);
sensor.Update();
wrapped.CurrentObservation = new[, ,] { { { 4f, 5f, 6f } } };
var expected2 = Array.ConvertAll(new[] { 1f, 2f, 3f, 4f, 5f, 6f }, (z) => (byte)z);
Assert.AreEqual(sensor.GetCompressedObservation(), expected2);
sensor.Update();
wrapped.CurrentObservation = new[, ,] { { { 7f, 8f, 9f } } };
var expected3 = Array.ConvertAll(new[] { 4f, 5f, 6f, 7f, 8f, 9f }, (z) => (byte)z);
Assert.AreEqual(sensor.GetCompressedObservation(), expected3);
// Test reset
sensor.Reset();
wrapped.CurrentObservation = new[, ,] { { { 10f, 11f, 12f } } };
var expected4 = sensor.CreateEmptyPNG();
expected4 = expected4.Concat(Array.ConvertAll(new[] { 10f, 11f, 12f }, (z) => (byte)z)).ToArray();
Assert.AreEqual(sensor.GetCompressedObservation(), expected4);
}
}
}

10
com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs


namespace Unity.MLAgents.Tests
{
public static class SensorTestHelper
{
public static void CompareObservation(ISensor sensor, float[] expected)
{
string errorMessage;
bool isOK = SensorHelper.CompareObservation(sensor, expected, out errorMessage);
Assert.IsTrue(isOK, errorMessage);
}
}
public class VectorSensorTests
{
[Test]

4
com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs


[SetUp]
public static void Setup()
{
if (Academy.IsInitialized)
{
Academy.Instance.Dispose();
}
Academy.Instance.AutomaticSteppingEnabled = false;
}

2
com.unity.ml-agents/package.json


{
"name": "com.unity.ml-agents",
"displayName": "ML Agents",
"version": "1.4.0-preview",
"version": "1.5.0-preview",
"unity": "2018.4",
"description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
"dependencies": {

4
docs/Installation-Anaconda-Windows.md


the ml-agents Conda environment by typing `activate ml-agents`)_:
```sh
git clone --branch release_7 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_8 https://github.com/Unity-Technologies/ml-agents.git
The `--branch release_7` option will switch to the tag of the latest stable
The `--branch release_8` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
unstable.

6
docs/Installation.md


of our tutorials / guides assume you have access to our example environments).
```sh
git clone --branch release_7 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_8 https://github.com/Unity-Technologies/ml-agents.git
The `--branch release_7` option will switch to the tag of the latest stable
The `--branch release_8` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
unstable.

ML-Agents Toolkit for your purposes. If you plan to contribute those changes
back, make sure to clone the `master` branch (by omitting `--branch release_7`
back, make sure to clone the `master` branch (by omitting `--branch release_8`
from the command above). See our
[Contributions Guidelines](../com.unity.ml-agents/CONTRIBUTING.md) for more
information on contributing to the ML-Agents Toolkit.

4
docs/Learning-Environment-Create-New.md


}
// Fell off platform
if (this.transform.localPosition.y < 0)
else if (this.transform.localPosition.y < 0)
{
EndEpisode();
}

1. Add the `Decision Requester` script with the Add Component button from the
RollerAgent Inspector.
1. Change **Decision Period** to `10`. For more information on decisions, see [the Agent documentation](Learning-Environment-Design-Agents.md#decisions)
1. Drag the Target GameObject from the Hierarchy window to the RollerAgent
Target field.
1. Add the `Behavior Parameters` script with the Add Component button from the
RollerAgent Inspector.
1. Modify the Behavior Parameters of the Agent :

2
docs/Learning-Environment-Design-Agents.md


AddReward(1.0f);
EndEpisode();
}
if (hitObjects.Where(col => col.gameObject.tag == "pit").ToArray().Length == 1)
else if (hitObjects.Where(col => col.gameObject.tag == "pit").ToArray().Length == 1)
{
AddReward(-1f);
EndEpisode();

10
docs/Learning-Environment-Examples.md


rotation of the agent cube and position of ball.
- Vector Action space: (Continuous) Size of 2, with one value corresponding to
X-rotation, and the other to Z-rotation.
- Visual Observations: None.
- Visual Observations: Third-person view from the upper-front of the agent. Use
`Visual3DBall` scene.
- Float Properties: Three
- scale: Specifies the scale of the ball in the 3 dimensions (equal across the
three dimensions)

- Side Motion (3 possible actions: Left, Right, No Action)
- Rotation (3 possible actions: Rotate Left, Rotate Right, No Action)
- Laser (2 possible actions: Laser, No Action)
- Visual Observations (Optional): First-person camera per-agent. Use
`VisualFoodCollector` scene. **The visual observation version of this
environment does not train with the provided default training parameters.**
- Visual Observations (Optional): First-person camera per-agent, plus one vector
flag representing the frozen state of the agent. This scene uses a combination
of vector and visual observations and the training will not succeed without
the frozen vector flag. Use `VisualFoodCollector` scene.
- Float Properties: Two
- laser_length: Length of the laser used by the agent
- Default: 1

24
docs/ML-Agents-Overview.md


- [A Quick Note on Reward Signals](#a-quick-note-on-reward-signals)
- [Deep Reinforcement Learning](#deep-reinforcement-learning)
- [Curiosity for Sparse-reward Environments](#curiosity-for-sparse-reward-environments)
- [RND for Sparse-reward Environments](#rnd-for-sparse-reward-environments)
- [Imitation Learning](#imitation-learning)
- [GAIL (Generative Adversarial Imitation Learning)](#gail-generative-adversarial-imitation-learning)
- [Behavioral Cloning (BC)](#behavioral-cloning-bc)

and intrinsic reward signals.
The ML-Agents Toolkit allows reward signals to be defined in a modular way, and
we provide three reward signals that can the mixed and matched to help shape
we provide four reward signals that can the mixed and matched to help shape
your agent's behavior:
- `extrinsic`: represents the rewards defined in your environment, and is

- `curiosity`: represents an intrinsic reward signal that encourages exploration
in sparse-reward environments that is defined by the Curiosity module (see
below).
- `rnd`: represents an intrinsic reward signal that encourages exploration
in sparse-reward environments that is defined by the Curiosity module (see
below). (Not available for TensorFlow trainers)
### Deep Reinforcement Learning

For more information, see our dedicated
[blog post on the Curiosity module](https://blogs.unity3d.com/2018/06/26/solving-sparse-reward-tasks-with-curiosity/).
#### RND for Sparse-reward Environments
Similarly to Curiosity, Random Network Distillation (RND) is useful in sparse or rare
reward environments as it helps the Agent explore. The RND Module is implemented following
the paper [Exploration by Random Network Distillation](https://arxiv.org/abs/1810.12894).
RND uses two networks:
- The first is a network with fixed random weights that takes observations as inputs and
generates an encoding
- The second is a network with similar architecture that is trained to predict the
outputs of the first network and uses the observations the Agent collects as training data.
The loss (the squared difference between the predicted and actual encoded observations)
of the trained model is used as intrinsic reward. The more an Agent visits a state, the
more accurate the predictions and the lower the rewards which encourages the Agent to
explore new states with higher prediction errors.
__Note:__ RND is not available for TensorFlow trainers (only PyTorch trainers)
### Imitation Learning

16
docs/Migrating.md


# Migrating
## Migrating from Release 3 to latest
## Migrating from Release 7 to latest
### Important changes
- Some trainer files were moved. If you were using the `TrainerFactory` class, it was moved to
the `trainers/trainer` folder.
- The `components` folder containing `bc` and `reward_signals` code was moved to the `trainers/tf`
folder
### Steps to Migrate
- Replace calls to `from mlagents.trainers.trainer_util import TrainerFactory` to `from mlagents.trainers.trainer import TrainerFactory`
- Replace calls to `from mlagents.trainers.trainer_util import handle_existing_directories` to `from mlagents.trainers.directory_utils import validate_existing_directories`
- Replace `mlagents.trainers.components` with `mlagents.trainers.tf.components` in your import statements.
## Migrating from Release 3 to Release 7
### Important changes
- The Parameter Randomization feature has been merged with the Curriculum feature. It is now possible to specify a sampler

13
docs/Training-Configuration-File.md


- [Extrinsic Rewards](#extrinsic-rewards)
- [Curiosity Intrinsic Reward](#curiosity-intrinsic-reward)
- [GAIL Intrinsic Reward](#gail-intrinsic-reward)
- [RND Intrinsic Reward](#rnd-intrinsic-reward)
- [Reward Signal Settings for SAC](#reward-signal-settings-for-sac)
- [Behavioral Cloning](#behavioral-cloning)
- [Memory-enhanced Agents using Recurrent Neural Networks](#memory-enhanced-agents-using-recurrent-neural-networks)

| `gail -> learning_rate` | (Optional, default = `3e-4`) Learning rate used to update the discriminator. This should typically be decreased if training is unstable, and the GAIL loss is unstable. <br><br>Typical range: `1e-5` - `1e-3` |
| `gail -> use_actions` | (default = `false`) Determines whether the discriminator should discriminate based on both observations and actions, or just observations. Set to True if you want the agent to mimic the actions from the demonstrations, and False if you'd rather have the agent visit the same states as in the demonstrations but with possibly different actions. Setting to False is more likely to be stable, especially with imperfect demonstrations, but may learn slower. |
| `gail -> use_vail` | (default = `false`) Enables a variational bottleneck within the GAIL discriminator. This forces the discriminator to learn a more general representation and reduces its tendency to be "too good" at discriminating, making learning more stable. However, it does increase training time. Enable this if you notice your imitation learning is unstable, or unable to learn the task at hand. |
### RND Intrinsic Reward
Random Network Distillation (RND) is only available for the PyTorch trainers.
To enable RND, provide these settings:
| **Setting** | **Description** |
| :--------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `rnd -> strength` | (default = `1.0`) Magnitude of the curiosity reward generated by the intrinsic rnd module. This should be scaled in order to ensure it is large enough to not be overwhelmed by extrinsic reward signals in the environment. Likewise it should not be too large to overwhelm the extrinsic reward signal. <br><br>Typical range: `0.001` - `0.01` |
| `rnd -> gamma` | (default = `0.99`) Discount factor for future rewards. <br><br>Typical range: `0.8` - `0.995` |
| `rnd -> encoding_size` | (default = `64`) Size of the encoding used by the intrinsic RND model. <br><br>Typical range: `64` - `256` |
| `curiosity -> learning_rate` | (default = `3e-4`) Learning rate used to update the RND module. This should be large enough for the RND module to quickly learn the state representation, but small enough to allow for stable learning. <br><br>Typical range: `1e-5` - `1e-3`
## Behavioral Cloning

2
docs/Training-on-Amazon-Web-Service.md


2. Clone the ML-Agents repo and install the required Python packages
```sh
git clone --branch release_7 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_8 https://github.com/Unity-Technologies/ml-agents.git
cd ml-agents/ml-agents/
pip3 install -e .
```

4
docs/Unity-Inference-Engine.md


loading expects certain conventions for constants and tensor names. While it is
possible to construct a model that follows these conventions, we don't provide
any additional help for this. More details can be found in
[TensorNames.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/com.unity.ml-agents/Runtime/Inference/TensorNames.cs)
[TensorNames.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/com.unity.ml-agents/Runtime/Inference/TensorNames.cs)
[BarracudaModelParamLoader.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs).
[BarracudaModelParamLoader.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs).
If you wish to run inference on an externally trained model, you should use
Barracuda directly, instead of trying to run it through ML-Agents.

2
gym-unity/gym_unity/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.21.0.dev0"
__version__ = "0.22.0.dev0"
# Git tag that will be checked to determine whether to trigger upload to pypi
__release_tag__ = None

4
gym-unity/setup.py


class VerifyVersionCommand(install):
"""
Custom command to verify that the git tag is the expected one for the release.
Based on https://circleci.com/blog/continuously-deploying-python-packages-to-pypi-with-circleci/
Originally based on https://circleci.com/blog/continuously-deploying-python-packages-to-pypi-with-circleci/
This differs slightly because our tags and versions are different.
"""

tag = os.getenv("CIRCLE_TAG")
tag = os.getenv("GITHUB_REF", "NO GITHUB TAG!").replace("refs/tags/", "")
if tag != EXPECTED_TAG:
info = "Git tag: {} does not match the expected tag of this app: {}".format(

2
ml-agents-envs/mlagents_envs/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.21.0.dev0"
__version__ = "0.22.0.dev0"
# Git tag that will be checked to determine whether to trigger upload to pypi
__release_tag__ = None

11
ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py


name='mlagents_envs/communicator_objects/capabilities.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"[\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"}\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='compressedChannelMapping', full_name='communicator_objects.UnityRLCapabilitiesProto.compressedChannelMapping', index=2,
number=3, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

oneofs=[
],
serialized_start=79,
serialized_end=170,
serialized_end=204,
)
DESCRIPTOR.message_types_by_name['UnityRLCapabilitiesProto'] = _UNITYRLCAPABILITIESPROTO

6
ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi


DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
baseRLCapabilities = ... # type: builtin___bool
concatenatedPngObservations = ... # type: builtin___bool
compressedChannelMapping = ... # type: builtin___bool
compressedChannelMapping : typing___Optional[builtin___bool] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> UnityRLCapabilitiesProto: ...

def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"concatenatedPngObservations"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"concatenatedPngObservations",b"concatenatedPngObservations"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations"]) -> None: ...

19
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py


name='mlagents_envs/communicator_objects/observation.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xf9\x01\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x9d\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)
_COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(

],
containing_type=None,
options=None,
serialized_start=330,
serialized_end=371,
serialized_start=366,
serialized_end=407,
)
_sym_db.RegisterEnumDescriptor(_COMPRESSIONTYPEPROTO)

extension_ranges=[],
oneofs=[
],
serialized_start=283,
serialized_end=308,
serialized_start=319,
serialized_end=344,
)
_OBSERVATIONPROTO = _descriptor.Descriptor(

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='compressed_channel_mapping', full_name='communicator_objects.ObservationProto.compressed_channel_mapping', index=4,
number=5, type=5, cpp_type=1, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

index=0, containing_type=None, fields=[]),
],
serialized_start=79,
serialized_end=328,
serialized_end=364,
)
_OBSERVATIONPROTO_FLOATDATA.containing_type = _OBSERVATIONPROTO

6
ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi


shape = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
compression_type = ... # type: CompressionTypeProto
compressed_data = ... # type: builtin___bytes
compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
@property
def float_data(self) -> ObservationProto.FloatData: ...

compression_type : typing___Optional[CompressionTypeProto] = None,
compressed_data : typing___Optional[builtin___bytes] = None,
float_data : typing___Optional[ObservationProto.FloatData] = None,
compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> ObservationProto: ...

def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...

4
ml-agents-envs/mlagents_envs/environment.py


# Revision history:
# * 1.0.0 - initial version
# * 1.1.0 - support concatenated PNGs for compressed observations.
API_VERSION = "1.1.0"
# * 1.2.0 - support compression mapping for stacked compressed observations.
API_VERSION = "1.2.0"
# Default port that the editor listens on. If an environment executable
# isn't specified, this port will be used.

capabilities = UnityRLCapabilitiesProto()
capabilities.baseRLCapabilities = True
capabilities.concatenatedPngObservations = True
capabilities.compressedChannelMapping = True
return capabilities
@staticmethod

81
ml-agents-envs/mlagents_envs/rpc_utils.py


@timed
def process_pixels(image_bytes: bytes, expected_channels: int) -> np.ndarray:
def process_pixels(
image_bytes: bytes, expected_channels: int, mappings: Optional[List[int]] = None
) -> np.ndarray:
"""
Converts byte array observation image into numpy array, re-sizes it,
and optionally converts it to grey scale

"""
image_fp = OffsetBytesIO(image_bytes)
if expected_channels == 1:
# Convert to grayscale
with hierarchical_timer("image_decompress"):
image = Image.open(image_fp)
# Normally Image loads lazily, load() forces it to do loading in the timer scope.
image.load()
s = np.array(image, dtype=np.float32) / 255.0
s = np.mean(s, axis=2)
s = np.reshape(s, [s.shape[0], s.shape[1], 1])
return s
# Normally Image loads lazily, load() forces it to do loading in the timer scope.
image.load()
image_arrays.append(np.array(image, dtype=np.float32) / 255.0)

# Didn't find the header, so must be at the end.
break
img = np.concatenate(image_arrays, axis=2)
# We can drop additional channels since they may need to be added to include
# numbers of observation channels not divisible by 3.
actual_channels = list(img.shape)[2]
if actual_channels > expected_channels:
img = img[..., 0:expected_channels]
if mappings is not None and len(mappings) > 0:
return _process_images_mapping(image_arrays, mappings)
else:
return _process_images_num_channels(image_arrays, expected_channels)
def _process_images_mapping(image_arrays, mappings):
"""
Helper function for processing decompressed images with compressed channel mappings.
"""
image_arrays = np.concatenate(image_arrays, axis=2).transpose((2, 0, 1))
if len(mappings) != len(image_arrays):
raise UnityObservationException(
f"Compressed observation and its mapping had different number of channels - "
f"observation had {len(image_arrays)} channels but its mapping had {len(mappings)} channels"
)
if len({m for m in mappings if m > -1}) != max(mappings) + 1:
raise UnityObservationException(
f"Invalid Compressed Channel Mapping: the mapping {mappings} does not have the correct format."
)
if max(mappings) >= len(image_arrays):
raise UnityObservationException(
f"Invalid Compressed Channel Mapping: the mapping has index larger than the total "
f"number of channels in observation - mapping index {max(mappings)} is"
f"invalid for input observation with {len(image_arrays)} channels."
)
processed_image_arrays: List[np.array] = [[] for _ in range(max(mappings) + 1)]
for mapping_idx, img in zip(mappings, image_arrays):
if mapping_idx > -1:
processed_image_arrays[mapping_idx].append(img)
for i, img_array in enumerate(processed_image_arrays):
processed_image_arrays[i] = np.mean(img_array, axis=0)
img = np.stack(processed_image_arrays, axis=2)
return img
def _process_images_num_channels(image_arrays, expected_channels):
"""
Helper function for processing decompressed images with number of expected channels.
This is for old API without mapping provided. Use the first n channel, n=expected_channels.
"""
if expected_channels == 1:
# Convert to grayscale
img = np.mean(image_arrays[0], axis=2)
img = np.reshape(img, [img.shape[0], img.shape[1], 1])
else:
img = np.concatenate(image_arrays, axis=2)
# We can drop additional channels since they may need to be added to include
# numbers of observation channels not divisible by 3.
actual_channels = list(img.shape)[2]
if actual_channels > expected_channels:
img = img[..., 0:expected_channels]
return img

img = np.reshape(img, obs.shape)
return img
else:
img = process_pixels(obs.compressed_data, expected_channels)
img = process_pixels(
obs.compressed_data, expected_channels, list(obs.compressed_channel_mapping)
)
# Compare decompressed image size to observation shape and make sure they match
if list(obs.shape) != list(img.shape):
raise UnityObservationException(

76
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


return bytes_out
def generate_compressed_proto_obs(in_array: np.ndarray) -> ObservationProto:
# test helper function for old C# API (no compressed channel mapping)
def generate_compressed_proto_obs(
in_array: np.ndarray, grayscale: bool = False
) -> ObservationProto:
obs_proto.shape.extend(in_array.shape)
if grayscale:
# grayscale flag is only used for old API without mapping
expected_shape = [in_array.shape[0], in_array.shape[1], 1]
obs_proto.shape.extend(expected_shape)
else:
obs_proto.shape.extend(in_array.shape)
return obs_proto
# test helper function for new C# API (with compressed channel mapping)
def generate_compressed_proto_obs_with_mapping(
in_array: np.ndarray, mapping: List[int]
) -> ObservationProto:
obs_proto = ObservationProto()
obs_proto.compressed_data = generate_compressed_data(in_array)
obs_proto.compression_type = PNG
if mapping is not None:
obs_proto.compressed_channel_mapping.extend(mapping)
expected_shape = [
in_array.shape[0],
in_array.shape[1],
len({m for m in mapping if m >= 0}),
]
obs_proto.shape.extend(expected_shape)
else:
obs_proto.shape.extend(in_array.shape)
return obs_proto

in_array_1 = np.random.rand(128, 64, 3)
proto_obs_1 = generate_compressed_proto_obs(in_array_1)
in_array_2 = np.random.rand(128, 64, 3)
proto_obs_2 = generate_uncompressed_proto_obs(in_array_2)
in_array_2_mapping = [0, 1, 2]
proto_obs_2 = generate_compressed_proto_obs_with_mapping(
in_array_2, in_array_2_mapping
)
ap1 = AgentInfoProto()
ap1.observations.extend([proto_obs_1])
ap2 = AgentInfoProto()

assert list(arr.shape) == [2, 128, 64, 3]
assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)
def test_process_visual_observation_grayscale():
in_array_1 = np.random.rand(128, 64, 3)
proto_obs_1 = generate_compressed_proto_obs(in_array_1, grayscale=True)
expected_out_array_1 = np.mean(in_array_1, axis=2, keepdims=True)
in_array_2 = np.random.rand(128, 64, 3)
in_array_2_mapping = [0, 0, 0]
proto_obs_2 = generate_compressed_proto_obs_with_mapping(
in_array_2, in_array_2_mapping
)
expected_out_array_2 = np.mean(in_array_2, axis=2, keepdims=True)
ap1 = AgentInfoProto()
ap1.observations.extend([proto_obs_1])
ap2 = AgentInfoProto()
ap2.observations.extend([proto_obs_2])
ap_list = [ap1, ap2]
arr = _process_visual_observation(0, (128, 64, 1), ap_list)
assert list(arr.shape) == [2, 128, 64, 1]
assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)
def test_process_visual_observation_padded_channels():
in_array_1 = np.random.rand(128, 64, 12)
in_array_1_mapping = [0, 1, 2, 3, -1, -1, 4, 5, 6, 7, -1, -1]
proto_obs_1 = generate_compressed_proto_obs_with_mapping(
in_array_1, in_array_1_mapping
)
expected_out_array_1 = np.take(in_array_1, [0, 1, 2, 3, 6, 7, 8, 9], axis=2)
ap1 = AgentInfoProto()
ap1.observations.extend([proto_obs_1])
ap_list = [ap1]
arr = _process_visual_observation(0, (128, 64, 8), ap_list)
assert list(arr.shape) == [1, 128, 64, 8]
assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
def test_process_visual_observation_bad_shape():

4
ml-agents-envs/setup.py


class VerifyVersionCommand(install):
"""
Custom command to verify that the git tag is the expected one for the release.
Based on https://circleci.com/blog/continuously-deploying-python-packages-to-pypi-with-circleci/
Originally based on https://circleci.com/blog/continuously-deploying-python-packages-to-pypi-with-circleci/
This differs slightly because our tags and versions are different.
"""

tag = os.getenv("CIRCLE_TAG")
tag = os.getenv("GITHUB_REF", "NO GITHUB TAG!").replace("refs/tags/", "")
if tag != EXPECTED_TAG:
info = "Git tag: {} does not match the expected tag of this app: {}".format(

7
ml-agents/mlagents/torch_utils/cpu_utils.py


"""
period = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_period_us")
quota = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")
share = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.shares")
is_kubernetes = os.getenv("KUBERNETES_SERVICE_HOST") is not None
elif period > 0 and share > 0 and is_kubernetes:
# In kubernetes, each requested CPU is 1024 CPU shares
# https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#how-pods-with-resource-limits-are-run
return int(share // 1024)
else:
return os.cpu_count()

2
ml-agents/mlagents/trainers/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.21.0.dev0"
__version__ = "0.22.0.dev0"
# Git tag that will be checked to determine whether to trigger upload to pypi
__release_tag__ = None

9
ml-agents/mlagents/trainers/buffer.py


import numpy as np
import h5py
from typing import List, BinaryIO
import itertools
from mlagents_envs.exception import UnityException

np.random.randint(num_sequences_in_buffer, size=num_seq_to_sample)
* sequence_length
) # Sample random sequence starts
for i in start_idxes:
for key in self:
mini_batch[key].extend(self[key][i : i + sequence_length])
for key in self:
mb_list = [self[key][i : i + sequence_length] for i in start_idxes]
# See comparison of ways to make a list from a list of lists here:
# https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists
mini_batch[key].set(list(itertools.chain.from_iterable(mb_list)))
return mini_batch
def save_to_file(self, file_object: BinaryIO) -> None:

12
ml-agents/mlagents/trainers/environment_parameter_manager.py


lesson_num = GlobalTrainingStatus.get_parameter_state(
param_name, StatusType.LESSON_NUM
)
next_lesson_num = lesson_num + 1
and len(settings.curriculum) > lesson_num + 1
and len(settings.curriculum) > next_lesson_num
):
behavior_to_consider = lesson.completion_criteria.behavior
if behavior_to_consider in trainer_steps:

self._smoothed_values[param_name] = new_smoothing
if must_increment:
GlobalTrainingStatus.set_parameter_state(
param_name, StatusType.LESSON_NUM, lesson_num + 1
param_name, StatusType.LESSON_NUM, next_lesson_num
new_lesson_name = settings.curriculum[lesson_num + 1].name
new_lesson_name = settings.curriculum[next_lesson_num].name
new_lesson_value = settings.curriculum[next_lesson_num].value
f"Parameter '{param_name}' has changed. Now in lesson '{new_lesson_name}'"
f"Parameter '{param_name}' has been updated to {new_lesson_value}."
+ f" Now in lesson '{new_lesson_name}'"
)
updated = True
if lesson.completion_criteria.require_reset:

5
ml-agents/mlagents/trainers/learn.py


from mlagents import tf_utils
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories
from mlagents.trainers.trainer import TrainerFactory
from mlagents.trainers.directory_utils import validate_existing_directories
from mlagents.trainers.stats import (
TensorboardWriter,
StatsReporter,

run_logs_dir = os.path.join(write_path, "run_logs")
port: Optional[int] = env_settings.base_port
# Check if directory exists
handle_existing_directories(
validate_existing_directories(
write_path,
checkpoint_settings.resume,
checkpoint_settings.force,

3
ml-agents/mlagents/trainers/model_saver/tf_model_saver.py


# only on worker-0 if there are multiple workers
if self.policy and self.policy.rank is not None and self.policy.rank != 0:
return
if self.graph is None:
logger.info("No model to export")
return
export_policy_model(
self.model_path, output_filepath, behavior_name, self.graph, self.sess
)

4
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.optimizer import Optimizer
from mlagents.trainers.trajectory import SplitObservations
from mlagents.trainers.components.reward_signals.reward_signal_factory import (
from mlagents.trainers.tf.components.reward_signals.reward_signal_factory import (
from mlagents.trainers.components.bc.module import BCModule
from mlagents.trainers.tf.components.bc.module import BCModule
class TFOptimizer(Optimizer): # pylint: disable=W0223

10
ml-agents/mlagents/trainers/policy/checkpoint_manager.py


@attr.s(auto_attribs=True)
class NNCheckpoint:
class ModelCheckpoint:
steps: int
file_path: str
reward: Optional[float]

class NNCheckpointManager:
class ModelCheckpointManager:
@staticmethod
def get_checkpoints(behavior_name: str) -> List[Dict[str, Any]]:
checkpoint_list = GlobalTrainingStatus.get_parameter_state(

while len(checkpoints) > keep_checkpoints:
if keep_checkpoints <= 0 or len(checkpoints) == 0:
break
NNCheckpointManager.remove_checkpoint(checkpoints.pop(0))
ModelCheckpointManager.remove_checkpoint(checkpoints.pop(0))
cls, behavior_name: str, new_checkpoint: NNCheckpoint, keep_checkpoints: int
cls, behavior_name: str, new_checkpoint: ModelCheckpoint, keep_checkpoints: int
) -> None:
"""
Make room for new checkpoint if needed and insert new checkpoint information.

@classmethod
def track_final_checkpoint(
cls, behavior_name: str, final_checkpoint: NNCheckpoint
cls, behavior_name: str, final_checkpoint: ModelCheckpoint
) -> None:
"""
Ensures number of checkpoints stored is within the max number of checkpoints

7
ml-agents/mlagents/trainers/policy/torch_policy.py


self.actor_critic = ac_class(
observation_shapes=self.behavior_spec.observation_shapes,
network_settings=trainer_settings.network_settings,
continuous_act_size=self.continuous_act_size,
discrete_act_size=self.discrete_act_size,
action_spec=self.behavior_spec.action_spec,
stream_names=reward_signal_names,
conditional_sigma=self.condition_sigma_on_obs,
tanh_squash=tanh_squash,

) -> Tuple[SplitObservations, np.ndarray]:
vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
mask = None
if len(self.discrete_act_size) > 0:
mask = torch.ones([len(decision_requests), np.sum(self.discrete_act_size)])
if self.discrete_act_size > 0:
mask = torch.ones([len(decision_requests), np.sum(self.discrete_act_branches)])
if decision_requests.action_mask is not None:
mask = torch.as_tensor(
1 - np.concatenate(decision_requests.action_mask, axis=1)

2
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.tf.components.reward_signals import RewardSignal
from mlagents import torch_utils
if torch_utils.is_available():

151
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents_envs.timers import timed
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
from contextlib import ExitStack
EPSILON = 1e-6 # Small value to avoid divide by zero

actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
q1_grad: bool = True,
q2_grad: bool = True,
q1_out, _ = self.q1_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
q2_out, _ = self.q2_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
"""
Performs a forward pass on the value network, which consists of a Q1 and Q2
network. Optionally does not evaluate gradients for either the Q1, Q2, or both.
:param vec_inputs: List of vector observation tensors.
:param vis_input: List of visual observation tensors.
:param actions: For a continuous Q function (has actions), tensor of actions.
Otherwise, None.
:param memories: Initial memories if using memory. Otherwise, None.
:param sequence_length: Sequence length if using memory.
:param q1_grad: Whether or not to compute gradients for the Q1 network.
:param q2_grad: Whether or not to compute gradients for the Q2 network.
:return: Tuple of two dictionaries, which both map {reward_signal: Q} for Q1 and Q2,
respectively.
"""
# ExitStack allows us to enter the torch.no_grad() context conditionally
with ExitStack() as stack:
if not q1_grad:
stack.enter_context(torch.no_grad())
q1_out, _ = self.q1_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
with ExitStack() as stack:
if not q2_grad:
stack.enter_context(torch.no_grad())
q2_out, _ = self.q2_network(
vec_inputs,
vis_inputs,
actions=actions,
memories=memories,
sequence_length=sequence_length,
)
return q1_out, q2_out
def __init__(self, policy: TorchPolicy, trainer_params: TrainerSettings):

self.policy.behavior_spec.observation_shapes,
policy_network_settings,
)
self.soft_update(self.policy.actor_critic.critic, self.target_network, 1.0)
ModelUtils.soft_update(
self.policy.actor_critic.critic, self.target_network, 1.0
)
self._log_ent_coef = torch.nn.Parameter(
torch.log(torch.as_tensor([self.init_entcoef] * len(self.act_size))),

q2_loss = torch.mean(torch.stack(q2_losses))
return q1_loss, q2_loss
def soft_update(self, source: nn.Module, target: nn.Module, tau: float) -> None:
for source_param, target_param in zip(source.parameters(), target.parameters()):
target_param.data.copy_(
target_param.data * (1.0 - tau) + source_param.data * tau
)
def sac_value_loss(
self,
log_probs: torch.Tensor,

min_policy_qs = {}
with torch.no_grad():
_ent_coef = torch.exp(self._log_ent_coef)
for name in values.keys():
if not discrete:
min_policy_qs[name] = torch.min(q1p_out[name], q2p_out[name])
else:
action_probs = log_probs.exp()
_branched_q1p = ModelUtils.break_into_branches(
q1p_out[name] * action_probs, self.act_size
)
_branched_q2p = ModelUtils.break_into_branches(
q2p_out[name] * action_probs, self.act_size
)
_q1p_mean = torch.mean(
torch.stack(
[torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q1p]
),
dim=0,
)
_q2p_mean = torch.mean(
torch.stack(
[torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q2p]
),
dim=0,
)
for name in values.keys():
if not discrete:
min_policy_qs[name] = torch.min(q1p_out[name], q2p_out[name])
else:
action_probs = log_probs.exp()
_branched_q1p = ModelUtils.break_into_branches(
q1p_out[name] * action_probs, self.act_size
)
_branched_q2p = ModelUtils.break_into_branches(
q2p_out[name] * action_probs, self.act_size
)
_q1p_mean = torch.mean(
torch.stack(
[
torch.sum(_br, dim=1, keepdim=True)
for _br in _branched_q1p
]
),
dim=0,
)
_q2p_mean = torch.mean(
torch.stack(
[
torch.sum(_br, dim=1, keepdim=True)
for _br in _branched_q2p
]
),
dim=0,
)
min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)
min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)
value_losses = []
if not discrete:

self.target_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
(
sampled_actions,
log_probs,
entropies,
sampled_values,
_,
) = self.policy.sample_actions(
(sampled_actions, log_probs, _, _) = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,

)
value_estimates, _ = self.policy.actor_critic.critic_pass(
vec_obs, vis_obs, memories, sequence_length=self.policy.sequence_length
)
# Only need grad for q1, as that is used for policy.
q1p_out, q2p_out = self.value_network(
vec_obs,
vis_obs,

q2_grad=False,
)
q1_out, q2_out = self.value_network(
vec_obs,

)
q1_stream, q2_stream = q1_out, q2_out
else:
with torch.no_grad():
q1p_out, q2p_out = self.value_network(
vec_obs,
vis_obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
# For discrete, you don't need to backprop through the Q for the policy
q1p_out, q2p_out = self.value_network(
vec_obs,
vis_obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
q1_grad=False,
q2_grad=False,
)
q1_out, q2_out = self.value_network(
vec_obs,
vis_obs,

q1_stream, q2_stream, target_values, dones, rewards, masks
)
value_loss = self.sac_value_loss(
log_probs, sampled_values, q1p_out, q2p_out, masks, use_discrete
log_probs, value_estimates, q1p_out, q2p_out, masks, use_discrete
)
policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks, use_discrete)
entropy_loss = self.sac_entropy_loss(log_probs, masks, use_discrete)

self.entropy_optimizer.step()
# Update target network
self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
ModelUtils.soft_update(
self.policy.actor_critic.critic, self.target_network, self.tau
)
update_stats = {
"Losses/Policy Loss": policy_loss.item(),
"Losses/Value Loss": value_loss.item(),

6
ml-agents/mlagents/trainers/sac/trainer.py


import os
import numpy as np
from mlagents.trainers.policy.checkpoint_manager import NNCheckpoint
from mlagents.trainers.policy.checkpoint_manager import ModelCheckpoint
from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import timed

from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.tf.components.reward_signals import RewardSignal
from mlagents import torch_utils
if torch_utils.is_available():

self.checkpoint_replay_buffer = self.hyperparameters.save_replay_buffer
def _checkpoint(self) -> NNCheckpoint:
def _checkpoint(self) -> ModelCheckpoint:
"""
Writes a checkpoint model to memory
Overrides the default to save the replay buffer.

38
ml-agents/mlagents/trainers/settings.py


EXTRINSIC: str = "extrinsic"
GAIL: str = "gail"
CURIOSITY: str = "curiosity"
RND: str = "rnd"
def to_settings(self) -> type:
_mapping = {

RewardSignalType.RND: RNDSettings,
}
return _mapping[self]

learning_rate: float = 3e-4
@attr.s(auto_attribs=True)
class RNDSettings(RewardSignalSettings):
encoding_size: int = 64
learning_rate: float = 1e-4
# SAMPLERS #############################################################################
class ParameterRandomizationType(Enum):
UNIFORM: str = "uniform"

class ParameterRandomizationSettings(abc.ABC):
seed: int = parser.get_default("seed")
def __str__(self) -> str:
"""
Helper method to output sampler stats to console.
"""
raise TrainerConfigError(f"__str__ not implemented for type {self.__class__}.")
@staticmethod
def structure(
d: Union[Mapping, float], t: type

class ConstantSettings(ParameterRandomizationSettings):
value: float = 0.0
def __str__(self) -> str:
"""
Helper method to output sampler stats to console.
"""
return f"Float: value={self.value}"
def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
"""
Helper method to send sampler settings over EnvironmentParametersChannel

class UniformSettings(ParameterRandomizationSettings):
min_value: float = attr.ib()
max_value: float = 1.0
def __str__(self) -> str:
"""
Helper method to output sampler stats to console.
"""
return f"Uniform sampler: min={self.min_value}, max={self.max_value}"
@min_value.default
def _min_value_default(self):

mean: float = 1.0
st_dev: float = 1.0
def __str__(self) -> str:
"""
Helper method to output sampler stats to console.
"""
return f"Gaussian sampler: mean={self.mean}, stddev={self.st_dev}"
def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
"""
Helper method to send sampler settings over EnvironmentParametersChannel

@attr.s(auto_attribs=True)
class MultiRangeUniformSettings(ParameterRandomizationSettings):
intervals: List[Tuple[float, float]] = attr.ib()
def __str__(self) -> str:
"""
Helper method to output sampler stats to console.
"""
return f"MultiRangeUniform sampler: intervals={self.intervals}"
@intervals.default
def _intervals_default(self):

2
ml-agents/mlagents/trainers/tests/test_learn.py


@patch("mlagents.trainers.learn.write_timing_tree")
@patch("mlagents.trainers.learn.write_run_options")
@patch("mlagents.trainers.learn.handle_existing_directories")
@patch("mlagents.trainers.learn.validate_existing_directories")
@patch("mlagents.trainers.learn.TrainerFactory")
@patch("mlagents.trainers.learn.SubprocessEnvManager")
@patch("mlagents.trainers.learn.create_environment_factory")

8
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


from unittest import mock
import pytest
import mlagents.trainers.tests.mock_brain as mb
from mlagents.trainers.policy.checkpoint_manager import NNCheckpoint
from mlagents.trainers.policy.checkpoint_manager import ModelCheckpoint
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.tests.test_buffer import construct_fake_buffer
from mlagents.trainers.agent_processor import AgentManagerQueue

"framework", [FrameworkType.TENSORFLOW, FrameworkType.PYTORCH], ids=["tf", "torch"]
)
@mock.patch("mlagents.trainers.trainer.trainer.StatsReporter.write_stats")
@mock.patch("mlagents.trainers.trainer.rl_trainer.NNCheckpointManager.add_checkpoint")
@mock.patch(
"mlagents.trainers.trainer.rl_trainer.ModelCheckpointManager.add_checkpoint"
)
def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary, framework):
trainer = create_rl_trainer(framework)
mock_policy = mock.Mock()

add_checkpoint_calls = [
mock.call(
trainer.brain_name,
NNCheckpoint(
ModelCheckpoint(
step,
f"{trainer.model_saver.model_path}/{trainer.brain_name}-{step}.{export_ext}",
None,

16
ml-agents/mlagents/trainers/tests/test_settings.py


assert isinstance(
env_param_settings["length"].curriculum[0].value, MultiRangeUniformSettings
)
# Check __str__ is correct
assert (
str(env_param_settings["mass"].curriculum[0].value)
== "Uniform sampler: min=1.0, max=2.0"
)
assert (
str(env_param_settings["scale"].curriculum[0].value)
== "Gaussian sampler: mean=1.0, stddev=2.0"
)
assert (
str(env_param_settings["length"].curriculum[0].value)
== "MultiRangeUniform sampler: intervals=[(1.0, 2.0), (3.0, 4.0)]"
)
assert str(env_param_settings["gravity"].curriculum[0].value) == "Float: value=1"
assert isinstance(
env_param_settings["wall_height"].curriculum[0].value, ConstantSettings
)

10
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests.test_simple_rl import (
_check_environment_trains,
PPO_CONFIG,
from mlagents.trainers.tests.check_env_trains import (
check_environment_trains,
from mlagents.trainers.tests.dummy_config import ppo_dummy_config
def mock_env_factory(worker_id):

simple_env_factory, EngineConfig.default_config(), num_envs
)
# Run PPO using env_manager
_check_environment_trains(
check_environment_trains(
{"1D": PPO_CONFIG},
{"1D": ppo_dummy_config()},
env_manager=env_manager,
success_threshold=None,
)

1
ml-agents/mlagents/trainers/tests/test_trainer_controller.py


trainer_mock.write_tensorboard_text = MagicMock()
tc = basic_trainer_controller
tc.initialize_trainers = MagicMock()
tc.trainers = {"testbrain": trainer_mock}
tc.advance = MagicMock()
tc.trainers["testbrain"].get_step = 0

27
ml-agents/mlagents/trainers/tests/test_trainer_util.py


import os
from unittest.mock import patch
from mlagents.trainers import trainer_util
from mlagents.trainers.trainer import TrainerFactory
from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG
from mlagents.trainers.tests.dummy_config import ppo_dummy_config
from mlagents.trainers.directory_utils import validate_existing_directories
return RunOptions(behaviors={"testbrain": PPO_CONFIG})
return RunOptions(behaviors={"testbrain": ppo_dummy_config()})
@patch("mlagents_envs.base_env.BehaviorSpec")

expected_reward_buff_cap = 1
base_config = dummy_config.behaviors
expected_config = PPO_CONFIG
expected_config = ppo_dummy_config()
def mock_constructor(
self,

assert artifact_path == os.path.join(output_path, brain_name)
with patch.object(PPOTrainer, "__init__", mock_constructor):
trainer_factory = trainer_util.TrainerFactory(
trainer_factory = TrainerFactory(
trainer_config=base_config,
output_path=output_path,
train_model=train_model,

brain_name = "testbrain"
no_default_config = RunOptions().behaviors
trainer_factory = trainer_util.TrainerFactory(
trainer_factory = TrainerFactory(
trainer_config=no_default_config,
output_path="output_path",
train_model=True,

def test_existing_directories(tmp_path):
output_path = os.path.join(tmp_path, "runid")
# Test fresh new unused path - should do nothing.
trainer_util.handle_existing_directories(output_path, False, False)
validate_existing_directories(output_path, False, False)
trainer_util.handle_existing_directories(output_path, True, False)
validate_existing_directories(output_path, True, False)
trainer_util.handle_existing_directories(output_path, False, False)
validate_existing_directories(output_path, False, False)
trainer_util.handle_existing_directories(output_path, True, False)
validate_existing_directories(output_path, True, False)
trainer_util.handle_existing_directories(output_path, False, True)
validate_existing_directories(output_path, False, True)
trainer_util.handle_existing_directories(output_path, False, True, init_path)
validate_existing_directories(output_path, False, True, init_path)
trainer_util.handle_existing_directories(output_path, False, True, init_path)
validate_existing_directories(output_path, False, True, init_path)

24
ml-agents/mlagents/trainers/tests/test_training_status.py


GlobalTrainingStatus,
)
from mlagents.trainers.policy.checkpoint_manager import (
NNCheckpointManager,
NNCheckpoint,
ModelCheckpointManager,
ModelCheckpoint,
)

brain_name, StatusType.CHECKPOINTS, test_checkpoint_list
)
new_checkpoint_4 = NNCheckpoint(
new_checkpoint_4 = ModelCheckpoint(
NNCheckpointManager.add_checkpoint(brain_name, new_checkpoint_4, 4)
assert len(NNCheckpointManager.get_checkpoints(brain_name)) == 4
ModelCheckpointManager.add_checkpoint(brain_name, new_checkpoint_4, 4)
assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4
new_checkpoint_5 = NNCheckpoint(
new_checkpoint_5 = ModelCheckpoint(
NNCheckpointManager.add_checkpoint(brain_name, new_checkpoint_5, 4)
assert len(NNCheckpointManager.get_checkpoints(brain_name)) == 4
ModelCheckpointManager.add_checkpoint(brain_name, new_checkpoint_5, 4)
assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4
final_model = NNCheckpoint(current_step, final_model_path, 3.294, final_model_time)
final_model = ModelCheckpoint(
current_step, final_model_path, 3.294, final_model_time
)
NNCheckpointManager.track_final_checkpoint(brain_name, final_model)
assert len(NNCheckpointManager.get_checkpoints(brain_name)) == 4
ModelCheckpointManager.track_final_checkpoint(brain_name, final_model)
assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4
check_checkpoints = GlobalTrainingStatus.saved_state[brain_name][
StatusType.CHECKPOINTS.value

4
ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py


).unsqueeze(0)
with torch.no_grad():
_, log_probs1, _, _, _ = policy1.sample_actions(
_, log_probs1, _, _ = policy1.sample_actions(
_, log_probs2, _, _, _ = policy2.sample_actions(
_, log_probs2, _, _ = policy2.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
)

20
ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py


import pytest
import os
import numpy as np
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver

CuriositySettings,
GAILSettings,
RNDSettings,
from mlagents.trainers.tests.torch.test_reward_providers.utils import (
create_agent_buffer,
)
DEMO_PATH = (
os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)

trainer_settings.reward_signals = {
RewardSignalType.CURIOSITY: CuriositySettings(),
RewardSignalType.GAIL: GAILSettings(demo_path=DEMO_PATH),
RewardSignalType.RND: RNDSettings(),
}
policy = create_policy_mock(trainer_settings, use_discrete=False)
optimizer = OptimizerClass(policy, trainer_settings)

module_dict_2 = optimizer2.get_modules()
assert "Module:GAIL" in module_dict_1
assert "Module:GAIL" in module_dict_2
assert "Module:Curiosity" in module_dict_1
assert "Module:Curiosity" in module_dict_2
assert "Module:RND-pred" in module_dict_1
assert "Module:RND-pred" in module_dict_2
assert "Module:RND-target" in module_dict_1
assert "Module:RND-target" in module_dict_2
for name, module1 in module_dict_1.items():
assert name in module_dict_2
module2 = module_dict_2[name]

# Run some rewards
data = create_agent_buffer(policy.behavior_spec, 1)
for reward_name in optimizer.reward_signals.keys():
rp_1 = optimizer.reward_signals[reward_name]
rp_2 = optimizer2.reward_signals[reward_name]
assert np.array_equal(rp_1.evaluate(data), rp_2.evaluate(data))

10
ml-agents/mlagents/trainers/tests/torch/test_policy.py


if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
(
sampled_actions,
log_probs,
entropies,
sampled_values,
memories,
) = policy.sample_actions(
(sampled_actions, log_probs, entropies, memories) = policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,

else:
assert log_probs.shape == (64, policy.behavior_spec.action_shape)
assert entropies.shape == (64, policy.behavior_spec.action_size)
for val in sampled_values.values():
assert val.shape == (64,)
if rnn:
assert memories.shape == (1, 1, policy.m_size)

12
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


import numpy as np
from mlagents.tf_utils import tf
import copy
import attr
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer

from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG
from mlagents.trainers.tests.test_reward_signals import ( # noqa: F401; pylint: disable=unused-variable
from mlagents.trainers.settings import NetworkSettings, FrameworkType
from mlagents.trainers.tests.dummy_config import ( # noqa: F401; pylint: disable=unused-variable
ppo_dummy_config,
curiosity_dummy_config,
gail_dummy_config,
)

def dummy_config():
return copy.deepcopy(PPO_CONFIG)
return attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)
VECTOR_ACTION_SPACE = 2

def test_ppo_optimizer_update_gail(gail_dummy_config, dummy_config): # noqa: F811
# Test evaluate
dummy_config.reward_signals = gail_dummy_config
config = attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)
PPO_CONFIG, use_rnn=False, use_discrete=False, use_visual=False
config, use_rnn=False, use_discrete=False, use_visual=False
)
# Test update
update_buffer = mb.simulate_rollout(

10
ml-agents/mlagents/trainers/tests/torch/test_sac.py


import pytest
import copy
import attr
from mlagents.trainers.tests.torch.test_simple_rl import SAC_CONFIG
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.tests.test_reward_signals import ( # noqa: F401; pylint: disable=unused-variable
from mlagents.trainers.settings import NetworkSettings, FrameworkType
from mlagents.trainers.tests.dummy_config import ( # noqa: F401; pylint: disable=unused-variable
sac_dummy_config,
curiosity_dummy_config,
)

return copy.deepcopy(SAC_CONFIG)
return attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
VECTOR_ACTION_SPACE = 2

253
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


import math
import tempfile
import numpy as np
from typing import Dict
from mlagents.trainers.tests.simple_test_envs import (
SimpleEnvironment,

)
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.simple_env_manager import SimpleEnvManager
from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary
TrainerSettings,
PPOSettings,
SACSettings,
TrainerType,
ScheduleType,
from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
from mlagents_envs.side_channel.environment_parameters_channel import (
EnvironmentParametersChannel,
)
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
DemonstrationMetaProto,
)

BRAIN_NAME = "1D"
PPO_CONFIG = TrainerSettings(
trainer_type=TrainerType.PPO,
hyperparameters=PPOSettings(
learning_rate=5.0e-3,
learning_rate_schedule=ScheduleType.CONSTANT,
batch_size=16,
buffer_size=64,
),
network_settings=NetworkSettings(num_layers=1, hidden_units=32),
summary_freq=500,
max_steps=3000,
threaded=False,
framework=FrameworkType.PYTORCH,
from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
from mlagents.trainers.tests.check_env_trains import (
check_environment_trains,
default_reward_processor,
SAC_CONFIG = TrainerSettings(
trainer_type=TrainerType.SAC,
hyperparameters=SACSettings(
learning_rate=5.0e-3,
learning_rate_schedule=ScheduleType.CONSTANT,
batch_size=8,
buffer_init_steps=100,
buffer_size=5000,
tau=0.01,
init_entcoef=0.01,
),
network_settings=NetworkSettings(num_layers=1, hidden_units=16),
summary_freq=100,
max_steps=1000,
threaded=False,
framework=FrameworkType.PYTORCH,
)
BRAIN_NAME = "1D"
PPO_TORCH_CONFIG = attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)
SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
# The reward processor is passed as an argument to _check_environment_trains.
# It is applied to the list of all final rewards for each brain individually.
# This is so that we can process all final rewards in different ways for different algorithms.
# Custom reward processors should be built within the test function and passed to _check_environment_trains
# Default is average over the last 5 final rewards
def default_reward_processor(rewards, last_n_rewards=5):
rewards_to_use = rewards[-last_n_rewards:]
# For debugging tests
print(f"Last {last_n_rewards} rewards:", rewards_to_use)
return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()
class DebugWriter(StatsWriter):
"""
Print to stdout so stats can be viewed in pytest
"""
def __init__(self):
self._last_reward_summary: Dict[str, float] = {}
def get_last_rewards(self):
return self._last_reward_summary
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int
) -> None:
for val, stats_summary in values.items():
if val == "Environment/Cumulative Reward":
print(step, val, stats_summary.mean)
self._last_reward_summary[category] = stats_summary.mean
def _check_environment_trains(
env,
trainer_config,
reward_processor=default_reward_processor,
env_parameter_manager=None,
success_threshold=0.9,
env_manager=None,
):
if env_parameter_manager is None:
env_parameter_manager = EnvironmentParameterManager()
# Create controller and begin training.
with tempfile.TemporaryDirectory() as dir:
run_id = "id"
seed = 1337
StatsReporter.writers.clear() # Clear StatsReporters so we don't write to file
debug_writer = DebugWriter()
StatsReporter.add_writer(debug_writer)
if env_manager is None:
env_manager = SimpleEnvManager(env, EnvironmentParametersChannel())
trainer_factory = TrainerFactory(
trainer_config=trainer_config,
output_path=dir,
train_model=True,
load_model=False,
seed=seed,
param_manager=env_parameter_manager,
multi_gpu=False,
)
tc = TrainerController(
trainer_factory=trainer_factory,
output_path=dir,
run_id=run_id,
param_manager=env_parameter_manager,
train=True,
training_seed=seed,
)
# Begin training
tc.start_learning(env_manager)
if (
success_threshold is not None
): # For tests where we are just checking setup and not reward
processed_rewards = [
reward_processor(rewards) for rewards in env.final_rewards.values()
]
assert all(not math.isnan(reward) for reward in processed_rewards)
assert all(reward > success_threshold for reward in processed_rewards)
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_ppo(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_ppo(use_discrete):
env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=1, step_size=0.8)
env = HybridEnvironment(
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=1, step_size=0.8
)
new_hyperparams = attr.evolve(
PPO_CONFIG.hyperparameters, batch_size=32, buffer_size=1280
)

env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=0, step_size=0.8)
env = HybridEnvironment(
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=0, step_size=0.8
)
env = HybridEnvironment([BRAIN_NAME], continuous_action_size=0, discrete_action_size=1, step_size=0.8)
env = HybridEnvironment(
[BRAIN_NAME], continuous_action_size=0, discrete_action_size=1, step_size=0.8
)
env = HybridEnvironment([BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8)
env = HybridEnvironment(
[BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8
)
PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=.01
PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8)
env = HybridEnvironment(
[BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8
)
PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=.05
PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.05
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_2d_ppo(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_2d_ppo(use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
# )

# _check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#@pytest.mark.parametrize("num_visual", [1, 2])
#def test_visual_ppo(num_visual, use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# @pytest.mark.parametrize("num_visual", [1, 2])
# def test_visual_ppo(num_visual, use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME],
# use_discrete=use_discrete,

# _check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("num_visual", [1, 2])
#@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
#def test_visual_advanced_ppo(vis_encode_type, num_visual):
# @pytest.mark.parametrize("num_visual", [1, 2])
# @pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
# def test_visual_advanced_ppo(vis_encode_type, num_visual):
# env = SimpleEnvironment(
# [BRAIN_NAME],
# use_discrete=True,

# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_recurrent_ppo(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_recurrent_ppo(use_discrete):
# env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
# new_network_settings = attr.evolve(
# PPO_CONFIG.network_settings,

# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_sac(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_sac(use_discrete):
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_2d_sac(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_2d_sac(use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
# )

#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#@pytest.mark.parametrize("num_visual", [1, 2])
#def test_visual_sac(num_visual, use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# @pytest.mark.parametrize("num_visual", [1, 2])
# def test_visual_sac(num_visual, use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME],
# use_discrete=use_discrete,

# _check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("num_visual", [1, 2])
#@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
#def test_visual_advanced_sac(vis_encode_type, num_visual):
# @pytest.mark.parametrize("num_visual", [1, 2])
# @pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
# def test_visual_advanced_sac(vis_encode_type, num_visual):
# env = SimpleEnvironment(
# [BRAIN_NAME],
# use_discrete=True,

# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_recurrent_sac(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_recurrent_sac(use_discrete):
# step_size = 0.2 if use_discrete else 0.5
# env = MemoryEnvironment(
# [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size

# _check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_ghost(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_ghost(use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
# )

# _check_environment_trains(env, {BRAIN_NAME: config})
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_ghost_fails(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_ghost_fails(use_discrete):
# env = SimpleEnvironment(
# [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
# )

# )
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_asymm_ghost(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_asymm_ghost(use_discrete):
# # Make opponent for asymmetric case
# brain_name_opp = BRAIN_NAME + "Opp"
# env = SimpleEnvironment(

# _check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_simple_asymm_ghost_fails(use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_simple_asymm_ghost_fails(use_discrete):
# # Make opponent for asymmetric case
# brain_name_opp = BRAIN_NAME + "Opp"
# env = SimpleEnvironment(

# )
#
#
#@pytest.fixture(scope="session")
#def simple_record(tmpdir_factory):
# @pytest.fixture(scope="session")
# def simple_record(tmpdir_factory):
# def record_demo(use_discrete, num_visual=0, num_vector=1):
# env = RecordEnvironment(
# [BRAIN_NAME],

# return record_demo
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#@pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
#def test_gail(simple_record, use_discrete, trainer_config):
# @pytest.mark.parametrize("use_discrete", [True, False])
# @pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
# def test_gail(simple_record, use_discrete, trainer_config):
# demo_path = simple_record(use_discrete)
# env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
# bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)

# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_gail_visual_ppo(simple_record, use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_gail_visual_ppo(simple_record, use_discrete):
# demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
# env = SimpleEnvironment(
# [BRAIN_NAME],

# _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
#
#
#@pytest.mark.parametrize("use_discrete", [True, False])
#def test_gail_visual_sac(simple_record, use_discrete):
# @pytest.mark.parametrize("use_discrete", [True, False])
# def test_gail_visual_sac(simple_record, use_discrete):
# demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
# env = SimpleEnvironment(
# [BRAIN_NAME],

17
ml-agents/mlagents/trainers/tests/torch/test_utils.py


masks = torch.tensor([False, False, True, True, True])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 4.0
def test_soft_update():
class TestModule(torch.nn.Module):
def __init__(self, vals):
super().__init__()
self.parameter = torch.nn.Parameter(torch.ones(5, 5, 5) * vals)
tm1 = TestModule(0)
tm2 = TestModule(1)
tm3 = TestModule(2)
ModelUtils.soft_update(tm1, tm3, tau=0.5)
assert torch.equal(tm3.parameter, torch.ones(5, 5, 5))
ModelUtils.soft_update(tm1, tm2, tau=1.0)
assert torch.equal(tm2.parameter, tm1.parameter)

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


else:
vis_obs = []
selected_actions, all_log_probs, _, _, _ = self.policy.sample_actions(
selected_actions, all_log_probs, _, _ = self.policy.sample_actions(
vec_obs,
vis_obs,
masks=act_masks,

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存