比较提交

...
此合并请求有变更与目标分支冲突。
/test_requirements.txt
/.pre-commit-config.yaml
/DevProject/ProjectSettings/EditorBuildSettings.asset
/DevProject/ProjectSettings/ProjectVersion.txt
/DevProject/Packages/manifest.json
/utils/validate_versions.py
/utils/make_readme_table.py
/.yamato/gym-interface-test.yml
/.yamato/protobuf-generation-test.yml
/.yamato/training-int-tests.yml
/.yamato/python-ll-api-test.yml
/.yamato/standalone-build-test.yml
/.yamato/com.unity.ml-agents-test.yml
/gym-unity/setup.py
/gym-unity/gym_unity/envs/__init__.py
/gym-unity/gym_unity/__init__.py
/Project/Packages/manifest.json
/Project/ProjectSettings/ProjectVersion.txt
/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
/com.unity.ml-agents/package.json
/com.unity.ml-agents/CONTRIBUTING.md
/com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
/com.unity.ml-agents/Editor/DemonstrationImporter.cs
/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
/com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs
/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
/com.unity.ml-agents/Runtime/Constants.cs
/com.unity.ml-agents/Runtime/SideChannels/SideChannelsManager.cs
/com.unity.ml-agents/Runtime/Inference/TensorProxy.cs
/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
/com.unity.ml-agents/Runtime/Academy.cs
/com.unity.ml-agents/Runtime/Agent.cs
/com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
/com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs
/com.unity.ml-agents/Runtime/Sensors/SensorShapeValidator.cs
/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
/com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
/com.unity.ml-agents/CHANGELOG.md
/ml-agents-envs/setup.py
/ml-agents-envs/mlagents_envs/communicator.py
/ml-agents-envs/mlagents_envs/rpc_communicator.py
/ml-agents-envs/mlagents_envs/exception.py
/ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py
/ml-agents-envs/mlagents_envs/base_env.py
/ml-agents-envs/mlagents_envs/environment.py
/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
/ml-agents-envs/mlagents_envs/__init__.py
/docs/Using-Tensorboard.md
/docs/Learning-Environment-Create-New.md
/docs/Training-ML-Agents.md
/docs/Installation-Anaconda-Windows.md
/docs/Installation.md
/ml-agents/tests/yamato/check_coverage_percent.py
/ml-agents/tests/yamato/scripts/run_gym.py
/ml-agents/tests/yamato/scripts/run_llapi.py
/ml-agents/tests/yamato/yamato_utils.py
/ml-agents/setup.py
/ml-agents/mlagents/trainers/trainer_controller.py
/ml-agents/mlagents/trainers/stats.py
/ml-agents/mlagents/trainers/subprocess_env_manager.py
/ml-agents/mlagents/trainers/ghost/trainer.py
/ml-agents/mlagents/trainers/ppo/trainer.py
/ml-agents/mlagents/trainers/sac/trainer.py
/ml-agents/mlagents/trainers/trainer/trainer.py
/ml-agents/mlagents/trainers/trainer/rl_trainer.py
/ml-agents/mlagents/trainers/buffer.py
/ml-agents/mlagents/trainers/__init__.py
/README.md
/com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
/com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs
/com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs
/com.unity.ml-agents/Tests/Editor/TensorUtilsTest.cs
/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
/com.unity.ml-agents/Tests/Editor/RecursionCheckerTests.cs
/.github/workflows
/docs/Versioning.md
/DevProject/Assets/ML-Agents
/com.unity.ml-agents/Runtime/Analytics.meta
/com.unity.ml-agents/Runtime/Analytics
/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs.meta
/com.unity.ml-agents/Tests/Editor/Analytics.meta
/com.unity.ml-agents/Tests/Editor/Analytics
/.circleci/config.yml
/com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
/ml-agents/mlagents/trainers/components/bc/model.py
/ml-agents/mlagents/trainers/components/bc/module.py
/ml-agents/mlagents/trainers/components/reward_signals/__init__.py
/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
/ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
/ml-agents/mlagents/trainers/curriculum.py
/ml-agents/mlagents/trainers/models.py
/ml-agents/mlagents/trainers/policy/tf_policy.py
/ml-agents/mlagents/trainers/ppo/optimizer.py
/ml-agents/mlagents/trainers/sac/network.py
/ml-agents/mlagents/trainers/sac/optimizer.py
/ml-agents/mlagents/trainers/sampler_class.py
/ml-agents/mlagents/trainers/tests/test_nn_policy.py
/ml-agents/mlagents/trainers/tests/test_simple_rl.py
/ml-agents/mlagents/trainers/trainer_util.py

10 次代码提交

作者 SHA1 备注 提交日期
GitHub ea26ba4f update to 1.0.8 (#5395) 3 年前
GitHub 9c3b179a [MLA-1981] Don't allow connections to newer python trainers (#5370) 3 年前
GitHub 5f8d255c fix null reference in model validation code, better error message (#5350) 4 年前
Chris Elion e2d1263f fix NPE in model validation code, better error message 4 年前
GitHub 741d75d5 [verified branch] better message for new model format (#5245) 4 年前
GitHub 1188d424 Bump code coverage for yamato's sake. (#4906) (#5032) 4 年前
GitHub ae854781 Update package version to 1.0.7 (#5029) 4 年前
GitHub 8052ee6a update master to main, remove pylint check (#5031) 4 年前
GitHub 8d3e2225 [MLA-1743] Backport inference GC Optimizations (#4916) 4 年前
GitHub f6e79bdb [MLA-1742] backport SideChannel GC reduction (#4915) 4 年前
共有 154 个文件被更改,包括 1262 次插入650 次删除
  1. 201
      .circleci/config.yml
  2. 49
      .pre-commit-config.yaml
  3. 2
      test_requirements.txt
  4. 4
      gym-unity/gym_unity/__init__.py
  5. 13
      gym-unity/gym_unity/envs/__init__.py
  6. 4
      gym-unity/setup.py
  7. 4
      ml-agents-envs/mlagents_envs/__init__.py
  8. 8
      ml-agents-envs/mlagents_envs/base_env.py
  9. 2
      ml-agents-envs/mlagents_envs/communicator.py
  10. 24
      ml-agents-envs/mlagents_envs/environment.py
  11. 2
      ml-agents-envs/mlagents_envs/exception.py
  12. 2
      ml-agents-envs/mlagents_envs/rpc_communicator.py
  13. 2
      ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py
  14. 2
      ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py
  15. 4
      ml-agents-envs/mlagents_envs/tests/test_side_channel.py
  16. 2
      ml-agents-envs/setup.py
  17. 51
      README.md
  18. 8
      docs/Using-Tensorboard.md
  19. 1
      docs/Learning-Environment-Create-New.md
  20. 11
      docs/Training-ML-Agents.md
  21. 10
      docs/Installation-Anaconda-Windows.md
  22. 10
      docs/Installation.md
  23. 36
      utils/validate_versions.py
  24. 66
      utils/validate_meta_files.py
  25. 3
      utils/make_readme_table.py
  26. 5
      DevProject/ProjectSettings/EditorBuildSettings.asset
  27. 4
      DevProject/ProjectSettings/ProjectVersion.txt
  28. 19
      DevProject/Packages/manifest.json
  29. 5
      .yamato/protobuf-generation-test.yml
  30. 47
      .yamato/com.unity.ml-agents-promotion.yml
  31. 24
      .yamato/gym-interface-test.yml
  32. 25
      .yamato/python-ll-api-test.yml
  33. 48
      .yamato/com.unity.ml-agents-test.yml
  34. 17
      .yamato/standalone-build-test.yml
  35. 4
      .yamato/training-int-tests.yml
  36. 3
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  37. 1
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
  38. 1
      Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  39. 3
      Project/Packages/manifest.json
  40. 2
      Project/ProjectSettings/ProjectVersion.txt
  41. 2
      Project/ProjectSettings/UnityConnectSettings.asset
  42. 4
      com.unity.ml-agents/CONTRIBUTING.md
  43. 20
      com.unity.ml-agents/Editor/BrainParametersDrawer.cs
  44. 4
      com.unity.ml-agents/Editor/DemonstrationImporter.cs
  45. 18
      com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
  46. 86
      com.unity.ml-agents/CHANGELOG.md
  47. 26
      com.unity.ml-agents/Runtime/SideChannels/SideChannelsManager.cs
  48. 52
      com.unity.ml-agents/Runtime/Agent.cs
  49. 2
      com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
  50. 2
      com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
  51. 27
      com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
  52. 4
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  53. 17
      com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
  54. 9
      com.unity.ml-agents/Runtime/Sensors/SensorShapeValidator.cs
  55. 5
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs
  56. 29
      com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
  57. 21
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
  58. 8
      com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
  59. 2
      com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
  60. 18
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  61. 88
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  62. 15
      com.unity.ml-agents/Runtime/Constants.cs
  63. 21
      com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
  64. 46
      com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
  65. 7
      com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
  66. 7
      com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
  67. 23
      com.unity.ml-agents/Runtime/Inference/TensorProxy.cs
  68. 40
      com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
  69. 31
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  70. 97
      com.unity.ml-agents/Runtime/Academy.cs
  71. 4
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  72. 2
      com.unity.ml-agents/Tests/Editor/PublicAPI/Unity.ML-Agents.Editor.Tests.PublicAPI.asmdef
  73. 38
      com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs
  74. 69
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  75. 8
      com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs
  76. 89
      com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
  77. 12
      com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs
  78. 82
      com.unity.ml-agents/Tests/Editor/TensorUtilsTest.cs
  79. 9
      com.unity.ml-agents/package.json
  80. 4
      ml-agents/mlagents/trainers/__init__.py
  81. 2
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  82. 4
      ml-agents/mlagents/trainers/buffer.py
  83. 2
      ml-agents/mlagents/trainers/components/bc/model.py
  84. 2
      ml-agents/mlagents/trainers/components/bc/module.py
  85. 2
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  86. 6
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  87. 6
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  88. 4
      ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
  89. 12
      ml-agents/mlagents/trainers/curriculum.py
  90. 4
      ml-agents/mlagents/trainers/ghost/controller.py
  91. 2
      ml-agents/mlagents/trainers/ghost/trainer.py
  92. 6
      ml-agents/mlagents/trainers/models.py
  93. 13
      ml-agents/mlagents/trainers/policy/tf_policy.py
  94. 12
      ml-agents/mlagents/trainers/ppo/optimizer.py
  95. 20
      ml-agents/mlagents/trainers/ppo/trainer.py
  96. 6
      ml-agents/mlagents/trainers/sac/network.py
  97. 4
      ml-agents/mlagents/trainers/sac/optimizer.py
  98. 14
      ml-agents/mlagents/trainers/sac/trainer.py
  99. 2
      ml-agents/mlagents/trainers/sampler_class.py
  100. 12
      ml-agents/mlagents/trainers/stats.py

201
.circleci/config.yml


- image: circleci/python:3.8.2
jobs:
build_python:
parameters:
executor:
type: executor
pyversion:
type: string
description: python version to being used (currently only affects caching).
pip_constraints:
type: string
description: Constraints file that is passed to "pip install". We constraint older versions of libraries for older python runtime, in order to help ensure compatibility.
enforce_onnx_conversion:
type: integer
default: 0
description: Whether to raise an exception if ONNX models couldn't be saved.
executor: << parameters.executor >>
working_directory: ~/repo
# Run additional numpy checks on unit tests
environment:
TEST_ENFORCE_NUMPY_FLOAT32: 1
TEST_ENFORCE_ONNX_CONVERSION: << parameters.enforce_onnx_conversion >>
steps:
- checkout
- run:
# Combine all the python dependencies into one file so that we can use that for the cache checksum
name: Combine pip dependencies for caching
command: cat ml-agents/setup.py ml-agents-envs/setup.py gym-unity/setup.py test_requirements.txt << parameters.pip_constraints >> > python_deps.txt
- restore_cache:
keys:
# Parameterize the cache so that different python versions can get different versions of the packages
- v1-dependencies-py<< parameters.pyversion >>-{{ checksum "python_deps.txt" }}
- run:
name: Install Dependencies
command: |
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip
pip install --upgrade setuptools
pip install --progress-bar=off -e ./ml-agents-envs -c << parameters.pip_constraints >>
pip install --progress-bar=off -e ./ml-agents -c << parameters.pip_constraints >>
pip install --progress-bar=off -r test_requirements.txt -c << parameters.pip_constraints >>
pip install --progress-bar=off -e ./gym-unity -c << parameters.pip_constraints >>
- save_cache:
paths:
- ./venv
key: v1-dependencies-py<< parameters.pyversion >>-{{ checksum "python_deps.txt" }}
- run:
name: Run Tests for ml-agents and gym_unity
# This also dumps the installed pip packages to a file, so we can see what versions are actually being used.
command: |
. venv/bin/activate
mkdir test-reports
pip freeze > test-reports/pip_versions.txt
pytest -n 2 --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
- run:
name: Verify there are no hidden/missing metafiles.
# Renaming files or deleting files can leave metafiles behind that makes Unity very unhappy.
command: |
. venv/bin/activate
python utils/validate_meta_files.py
- store_test_results:
path: test-reports
- store_artifacts:
path: test-reports
destination: test-reports
- store_artifacts:
path: htmlcov
destination: htmlcov
pre-commit:
docker:
- image: circleci/python:3.7.3
working_directory: ~/repo/
steps:
- checkout
- run:
name: Combine precommit config and python versions for caching
command: |
cat .pre-commit-config.yaml > pre-commit-deps.txt
python -VV >> pre-commit-deps.txt
- restore_cache:
keys:
- v1-precommit-deps-{{ checksum "pre-commit-deps.txt" }}
- run:
name: Install Dependencies
command: |
# Need ruby for search-and-replace
sudo apt-get update
sudo apt-get install ruby-full
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip
pip install --upgrade setuptools
pip install pre-commit
# Install the hooks now so that they'll be cached
pre-commit install-hooks
- save_cache:
paths:
- ~/.cache/pre-commit
- ./venv
key: v1-precommit-deps-{{ checksum "pre-commit-deps.txt" }}
- run:
name: Check Code Style using pre-commit
command: |
. venv/bin/activate
pre-commit run --show-diff-on-failure --all-files
markdown_link_check:
parameters:
precommit_command:
type: string
description: precommit hook to run
default: markdown-link-check
docker:
- image: circleci/node:12.6.0
working_directory: ~/repo
steps:
- checkout
- restore_cache:
keys:
- v1-node-dependencies-{{ checksum ".pre-commit-config.yaml" }}
# fallback to using the latest cache if no exact match is found
- v1-node-dependencies-
- run:
name: Install Dependencies
command: |
sudo apt-get install python3-venv
python3 -m venv venv
. venv/bin/activate
pip install pre-commit
- run: sudo npm install -g markdown-link-check
- save_cache:
paths:
- ./venv
key: v1-node-dependencies-{{ checksum ".pre-commit-config.yaml" }}
- run:
name: Run markdown-link-check via precommit
command: |
. venv/bin/activate
pre-commit run --hook-stage manual << parameters.precommit_command >> --all-files
deploy:
parameters:
directory:

version: 2
workflow:
jobs:
- build_python:
name: python_3.6.1
executor: python361
pyversion: 3.6.1
# Test python 3.6 with the oldest supported versions
pip_constraints: test_constraints_min_version.txt
- build_python:
name: python_3.7.3
executor: python373
pyversion: 3.7.3
# Test python 3.7 with the newest supported versions
pip_constraints: test_constraints_max_tf1_version.txt
# Make sure ONNX conversion passes here (recent version of tensorflow 1.x)
enforce_onnx_conversion: 1
- build_python:
name: python_3.7.3+tf2
executor: python373
pyversion: 3.7.3
# Test python 3.7 with the newest supported versions
pip_constraints: test_constraints_max_tf2_version.txt
- build_python:
name: python_3.8.2+tf2.2
executor: python382
pyversion: 3.8.2
# Test python 3.8 with the newest edge versions
pip_constraints: test_constraints_max_tf2_version.txt
- markdown_link_check
- pre-commit
# The first deploy jobs are the "real" ones that upload to pypi
- deploy:
name: deploy ml-agents-envs

only: /^release_[0-9]+_test[0-9]+$/
branches:
ignore: /.*/
nightly:
triggers:
- schedule:
cron: "0 0 * * *"
filters:
branches:
only:
- develop
jobs:
- markdown_link_check:
name: markdown-link-check full
precommit_command: markdown-link-check-full

49
.pre-commit-config.yaml


files: "gym-unity/.*"
args: [--ignore-missing-imports, --disallow-incomplete-defs]
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.1
hooks:
- id: flake8
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py
)$
# flake8-tidy-imports is used for banned-modules, not actually tidying
additional_dependencies: [flake8-comprehensions==3.2.2, flake8-tidy-imports==4.1.0, flake8-bugbear==20.1.4]
- repo: https://github.com/asottile/pyupgrade
rev: v2.7.0
hooks:
- id: pyupgrade
args: [--py3-plus, --py36-plus]
exclude: >
(?x)^(
.*barracuda.py|
.*_pb2.py|
.*_pb2_grpc.py
)$
rev: v2.4.0
rev: v2.5.0
hooks:
- id: mixed-line-ending
exclude: >

.*.meta
)$
args: [--fix=lf]
- id: flake8
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py
)$
# flake8-tidy-imports is used for banned-modules, not actually tidying
additional_dependencies: [flake8-comprehensions==3.1.4, flake8-tidy-imports==4.0.0, flake8-bugbear==20.1.2]
- id: trailing-whitespace
name: trailing-whitespace-markdown
types: [markdown]

# Won't handle the templating in yamato
exclude: \.yamato/*
exclude: \.yamato/.*
- repo: https://github.com/pre-commit/mirrors-pylint
rev: v2.4.4
hooks:
- id: pylint
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py|
.*/tests/.*
)$
args: [--score=n]
- repo: https://github.com/mattlqx/pre-commit-search-and-replace
rev: v1.0.3

2
test_requirements.txt


# Test-only dependencies should go here, not in setup.py
pytest>4.0.0,<6.0.0
pytest-cov==2.6.1
pytest-xdist
pytest-xdist==1.34.0
# onnx doesn't currently have a wheel for 3.8
tf2onnx>=1.5.5;python_version<'3.8'

4
gym-unity/gym_unity/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.16.0"
__version__ = "0.16.1"
__release_tag__ = "release_1"
__release_tag__ = "release_2"

13
gym-unity/gym_unity/envs/__init__.py


self._env.step()
self.visual_obs = None
self._n_agents = -1
# Save the step result from the last time all Agents requested decisions.
self._previous_decision_step: DecisionSteps = None

self._env.step()
decision_step, terminal_step = self._env.get_steps(self.name)
self._check_agents(max(len(decision_step), len(terminal_step)))
if len(terminal_step) != 0:
# The agent is done
self.game_over = True

logger.warning("Could not seed environment %s", self.name)
return
def _check_agents(self, n_agents: int) -> None:
if self._n_agents > 1:
@staticmethod
def _check_agents(n_agents: int) -> None:
if n_agents > 1:
"There can only be one Agent in the environment but {n_agents} were detected."
f"There can only be one Agent in the environment but {n_agents} were detected."
)
@property

@property
def observation_space(self):
return self._observation_space
@property
def number_agents(self):
return self._n_agents
class ActionFlattener:

4
gym-unity/setup.py


tag = os.getenv("CIRCLE_TAG")
if tag != EXPECTED_TAG:
info = "Git tag: {0} does not match the expected tag of this app: {1}".format(
info = "Git tag: {} does not match the expected tag of this app: {}".format(
tag, EXPECTED_TAG
)
sys.exit(info)

author_email="ML-Agents@unity3d.com",
url="https://github.com/Unity-Technologies/ml-agents",
packages=find_packages(),
install_requires=["gym", "mlagents_envs=={}".format(VERSION)],
install_requires=["gym", f"mlagents_envs=={VERSION}"],
cmdclass={"verify": VerifyVersionCommand},
)

4
ml-agents-envs/mlagents_envs/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.16.0"
__version__ = "0.16.1"
__release_tag__ = "release_1"
__release_tag__ = "release_2"

8
ml-agents-envs/mlagents_envs/base_env.py


:returns: The DecisionStep
"""
if agent_id not in self.agent_id_to_index:
raise KeyError(
"agent_id {} is not present in the DecisionSteps".format(agent_id)
)
raise KeyError(f"agent_id {agent_id} is not present in the DecisionSteps")
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []
for batched_obs in self.obs:

specific agent
"""
if agent_id not in self.agent_id_to_index:
raise KeyError(
"agent_id {} is not present in the TerminalSteps".format(agent_id)
)
raise KeyError(f"agent_id {agent_id} is not present in the TerminalSteps")
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []
for batched_obs in self.obs:

2
ml-agents-envs/mlagents_envs/communicator.py


from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
class Communicator(object):
class Communicator:
def __init__(self, worker_id=0, base_port=5005):
"""
Python side of the communication. Must be used in pair with the right Unity Communicator equivalent.

24
ml-agents-envs/mlagents_envs/environment.py


for _sc in side_channels:
if _sc.channel_id in self.side_channels:
raise UnityEnvironmentException(
"There cannot be two side channels with the same channel id {0}.".format(
"There cannot be two side channels with the same channel id {}.".format(
_sc.channel_id
)
)

.replace(".x86", "")
)
true_filename = os.path.basename(os.path.normpath(env_path))
logger.debug("The true file name is {}".format(true_filename))
logger.debug(f"The true file name is {true_filename}")
if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
return None

f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
)
else:
logger.debug("This is the launch string {}".format(launch_string))
logger.debug(f"This is the launch string {launch_string}")
# Launch Unity environment
subprocess_args = [launch_string]
if no_graphics:

def _assert_behavior_exists(self, behavior_name: str) -> None:
if behavior_name not in self._env_specs:
raise UnityActionException(
"The group {0} does not correspond to an existing agent group "
"The group {} does not correspond to an existing agent group "
"in the environment".format(behavior_name)
)

expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
if action.shape != expected_shape:
raise UnityActionException(
"The behavior {0} needs an input of dimension {1} but received input of dimension {2}".format(
behavior_name, expected_shape, action.shape
)
"The behavior {} needs an input of dimension {} for "
"(<number of agents>, <action size>) but received input of "
"dimension {}".format(behavior_name, expected_shape, action.shape)
)
if action.dtype != expected_type:
action = action.astype(expected_type)

expected_shape = (spec.action_size,)
if action.shape != expected_shape:
raise UnityActionException(
f"The Agent {0} with BehaviorName {1} needs an input of dimension "
f"{2} but received input of dimension {3}".format(
agent_id, behavior_name, expected_shape, action.shape
)
f"The Agent {agent_id} with BehaviorName {behavior_name} needs an input of dimension "
f"{expected_shape} but received input of dimension {action.shape}"
)
expected_type = np.float32 if spec.is_action_continuous() else np.int32
if action.dtype != expected_type:

)
if len(message_data) != message_len:
raise UnityEnvironmentException(
"The message received by the side channel {0} was "
"The message received by the side channel {} was "
"unexpectedly short. Make sure your Unity Environment "
"sending side channel data properly.".format(channel_id)
)

else:
logger.warning(
"Unknown side channel data received. Channel type "
": {0}.".format(channel_id)
": {}.".format(channel_id)
)
@staticmethod

2
ml-agents-envs/mlagents_envs/exception.py


def __init__(self, worker_id):
message = self.MESSAGE_TEMPLATE.format(str(worker_id))
super(UnityWorkerInUseException, self).__init__(message)
super().__init__(message)

2
ml-agents-envs/mlagents_envs/rpc_communicator.py


s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.bind(("localhost", port))
except socket.error:
except OSError:
raise UnityWorkerInUseException(self.worker_id)
finally:
s.close()

2
ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py


FLOAT = 0
def __init__(self) -> None:
channel_id = uuid.UUID(("534c891e-810f-11ea-a9d0-822485860400"))
channel_id = uuid.UUID("534c891e-810f-11ea-a9d0-822485860400")
super().__init__(channel_id)
def on_message_received(self, msg: IncomingMessage) -> None:

2
ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py


def __init__(self, channel_id: uuid.UUID = None) -> None:
self._float_properties: Dict[str, float] = {}
if channel_id is None:
channel_id = uuid.UUID(("60ccf7d0-4f7e-11ea-b238-784f4387d1f7"))
channel_id = uuid.UUID("60ccf7d0-4f7e-11ea-b238-784f4387d1f7")
super().__init__(channel_id)
def on_message_received(self, msg: IncomingMessage) -> None:

4
ml-agents-envs/mlagents_envs/tests/test_side_channel.py


sender = RawBytesChannel(guid)
receiver = RawBytesChannel(guid)
sender.send_raw_data("foo".encode("ascii"))
sender.send_raw_data("bar".encode("ascii"))
sender.send_raw_data(b"foo")
sender.send_raw_data(b"bar")
data = UnityEnvironment._generate_side_channel_data({sender.channel_id: sender})
UnityEnvironment._parse_side_channel_message({receiver.channel_id: receiver}, data)

2
ml-agents-envs/setup.py


tag = os.getenv("CIRCLE_TAG")
if tag != EXPECTED_TAG:
info = "Git tag: {0} does not match the expected tag of this app: {1}".format(
info = "Git tag: {} does not match the expected tag of this app: {}".format(
tag, EXPECTED_TAG
)
sys.exit(info)

51
README.md


# Unity ML-Agents Toolkit
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_1_docs/docs/)
[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs/docs/)
[![license badge](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE)

## Releases & Documentation
**Our latest, stable release is `Release 1`. Click [here](docs/Readme.md) to
get started with the latest release of ML-Agents.**
**Our latest, stable release is `Release 2`. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs/docs/Readme.md)
to get started with the latest release of ML-Agents.**
The table below lists all our releases, including our `master` branch which is under active
development and may be unstable. A few helpful guidelines:
* The docs links in the table below include installation and usage instructions specific to each
release. Remember to always use the documentation that corresponds to the release version you're
using.
* See the [GitHub releases](https://github.com/Unity-Technologies/ml-agents/releases) for more
details of the changes between versions.
* If you have used an earlier version of the ML-Agents Toolkit, we strongly recommend our
[guide on migrating from earlier versions](docs/Migrating.md).
The table below lists all our releases, including our `main` branch which is
under active development and may be unstable. A few helpful guidelines:
- The [Versioning page](docs/Versioning.md) overviews how we manage our GitHub
releases and the versioning process for each of the ML-Agents components.
- The [Releases page](https://github.com/Unity-Technologies/ml-agents/releases)
contains details of the changes between releases.
- The [Migration page](docs/Migrating.md) contains details on how to upgrade
from earlier releases of the ML-Agents Toolkit.
- The **Documentation** links in the table below include installation and usage
instructions specific to each release. Remember to always use the
documentation that corresponds to the release version you're using.
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **Release 1** | **April 30, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_1)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_1/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_1.zip)** |
| **main (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/main) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/main/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/main.zip) |
| **Release 2** | **May 19, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_2)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_2/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_2.zip)** |
| **Release 1** | April 30, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_1.zip) |
| **0.15.1** | March 30, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.1.zip) |
| **0.15.0** | March 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip) |
| **0.14.1** | February 26, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.1.zip) |

| **0.12.1** | December 11, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.1.zip) |
| **0.12.0** | December 2, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.0.zip) |
## Citation
If you are a researcher interested in a discussion of Unity as an AI platform,

If you use Unity or the ML-Agents Toolkit to conduct research, we ask that you
cite the following paper as a reference:
Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D.
(2018). Unity: A General Platform for Intelligent Agents. _arXiv preprint
arXiv:1809.02627._ https://github.com/Unity-Technologies/ml-agents.
Juliani, A., Berges, V., Teng, E., Cohen, A., Harper, J., Elion, C., Goy, C.,
Gao, Y., Henry, H., Mattar, M., Lange, D. (2020). Unity: A General Platform for
Intelligent Agents. _arXiv preprint
[arXiv:1809.02627](https://arxiv.org/abs/1809.02627)._
https://github.com/Unity-Technologies/ml-agents.
- (May 12, 2020)
[Announcing ML-Agents Unity Package v1.0!](https://blogs.unity3d.com/2020/05/12/announcing-ml-agents-unity-package-v1-0/)
- (February 28, 2020)
[Training intelligent adversaries using self-play with ML-Agents](https://blogs.unity3d.com/2020/02/28/training-intelligent-adversaries-using-self-play-with-ml-agents/)
- (November 11, 2019)

For any other questions or feedback, connect directly with the ML-Agents team at
ml-agents@unity3d.com.
## Privacy
In order to improve the developer experience for Unity ML-Agents Toolkit, we have added in-editor analytics.
Please refer to "Information that is passively collected by Unity" in the
[Unity Privacy Policy](https://unity3d.com/legal/privacy-policy).
## License

8
docs/Using-Tensorboard.md


the --port option.
**Note:** If you don't assign a `run-id` identifier, `mlagents-learn` uses the
default string, "ppo". All the statistics will be saved to the same sub-folder
and displayed as one session in TensorBoard. After a few runs, the displays can
become difficult to interpret in this situation. You can delete the folders
under the `summaries` directory to clear out old statistics.
default string, "ppo". You can delete the folders under the `results` directory
to clear out old statistics.
On the left side of the TensorBoard window, you can select which of the training
runs you want to display. You can select multiple run-ids to compare statistics.

```csharp
var statsRecorder = Academy.Instance.StatsRecorder;
statsSideChannel.Add("MyMetric", 1.0);
statsRecorder.Add("MyMetric", 1.0);
```

1
docs/Learning-Environment-Create-New.md


learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e4
memory_size: 128
normalize: false
num_epoch: 3
num_layers: 2

11
docs/Training-ML-Agents.md


normalize: false
num_layers: 2
time_horizon: 64
summary_freq: 10000
init_path: null
# PPO-specific configs
beta: 5.0e-3

batch_size: 512
num_epoch: 3
samples_per_update: 0
init_path:
reward_signals:
# environment reward

strength: 0.02
gamma: 0.99
encoding_size: 256
learning_rate: 3e-4
learning_rate: 3.0e-4
# GAIL
gail:

demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
learning_rate: 3e-4
learning_rate: 3.0e-4
use_actions: false
use_vail: false

`interval_2_max`], ...]
- **sub-arguments** - `intervals`
The implementation of the samplers can be found at
`ml-agents-envs/mlagents_envs/sampler_class.py`.
The implementation of the samplers can be found in the
[sampler_class.py file](../ml-agents/mlagents/trainers/sampler_class.py).
#### Defining a New Sampler Type

10
docs/Installation-Anaconda-Windows.md


the ml-agents Conda environment by typing `activate ml-agents`)_:
```sh
git clone --branch release_1 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_2 https://github.com/Unity-Technologies/ml-agents.git
The `--branch release_1` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
The `--branch release_2` option will switch to the tag of the latest stable
release. Omitting that will get the `main` branch which is potentially
unstable.
If you don't want to use Git, you can find download links on the

connected to the Internet and then type in the Anaconda Prompt:
```console
pip install mlagents
pip install mlagents==0.16.1
```
This will complete the installation of all the required Python packages to run

this, you can try:
```console
pip install mlagents --no-cache-dir
pip install mlagents==0.16.1 --no-cache-dir
```
This `--no-cache-dir` tells the pip to disable the cache.

10
docs/Installation.md


of our tutorials / guides assume you have access to our example environments).
```sh
git clone --branch release_1 https://github.com/Unity-Technologies/ml-agents.git
git clone --branch release_2 https://github.com/Unity-Technologies/ml-agents.git
The `--branch release_1` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
The `--branch release_2` option will switch to the tag of the latest stable
release. Omitting that will get the `main` branch which is potentially
unstable.
#### Advanced: Local Installation for Development

back, make sure to clone the `master` branch (by omitting `--branch release_1`
back, make sure to clone the `main` branch (by omitting `--branch release_2`
from the command above). See our
[Contributions Guidelines](../com.unity.ml-agents/CONTRIBUTING.md) for more
information on contributing to the ML-Agents Toolkit.

run from the command line:
```sh
pip3 install mlagents
pip3 install mlagents==0.16.1
```
Note that this will install `mlagents` from PyPi, _not_ from the cloned

36
utils/validate_versions.py


def extract_version_string(filename):
with open(filename) as f:
for l in f.readlines():
if l.startswith(VERSION_LINE_START):
return l.replace(VERSION_LINE_START, "").strip()
for line in f.readlines():
if line.startswith(VERSION_LINE_START):
return line.replace(VERSION_LINE_START, "").strip()
return None

def set_package_version(new_version: str) -> None:
with open(UNITY_PACKAGE_JSON_PATH, "r") as f:
with open(UNITY_PACKAGE_JSON_PATH) as f:
package_json = json.load(f)
if "version" in package_json:
package_json["version"] = new_version

f.writelines(lines)
def print_release_tag_commands(
python_version: str, csharp_version: str, release_tag: str
):
python_tag = f"python-packages_{python_version}"
csharp_tag = f"com.unity.ml-agents_{csharp_version}"
docs_tag = f"{release_tag}_docs"
print(
f"""
###
Use these commands to create the tags after the release:
###
git checkout {release_tag}
git tag -f latest_release
git push -f origin latest_release
git tag -f {docs_tag}
git push -f origin {docs_tag}
git tag {python_tag}
git push -f origin {python_tag}
git tag {csharp_tag}
git push -f origin {csharp_tag}
"""
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--python-version", default=None)

if args.csharp_version:
print(f"Updating C# package to version {args.csharp_version}")
set_version(args.python_version, args.csharp_version, args.release_tag)
if args.release_tag is not None:
print_release_tag_commands(
args.python_version, args.csharp_version, args.release_tag
)
else:
ok = check_versions()
return_code = 0 if ok else 1

66
utils/validate_meta_files.py


def main():
asset_path = "Project/Assets"
asset_paths = [
"Project/Assets",
"DevProject/Assets",
"com.unity.ml-agents",
"com.unity.ml-agents.extensions",
]
allow_list = frozenset(
[
"com.unity.ml-agents/.editorconfig",
"com.unity.ml-agents/.gitignore",
"com.unity.ml-agents/.npmignore",
"com.unity.ml-agents/Tests/.tests.json",
"com.unity.ml-agents.extensions/.gitignore",
"com.unity.ml-agents.extensions/.npmignore",
"com.unity.ml-agents.extensions/Tests/.tests.json",
]
)
ignored_dirs = {"Documentation~"}
for root, dirs, files in os.walk(asset_path):
dirs = set(dirs)
files = set(files)
for asset_path in asset_paths:
for root, dirs, files in os.walk(asset_path):
# Modifying the dirs list with topdown=True (the default) will prevent us from recursing those directories
for ignored in ignored_dirs:
try:
dirs.remove(ignored)
except ValueError:
pass
combined = dirs | files
for f in combined:
if f.endswith(python_suffix):
# Probably this script; skip it
continue
dirs = set(dirs)
files = set(files)
combined = dirs | files
for f in combined:
if f.endswith(python_suffix):
# Probably this script; skip it
continue
# We expect each non-.meta file to have a .meta file, and each .meta file to have a non-.meta file
if f.endswith(meta_suffix):
expected = f.replace(meta_suffix, "")
else:
expected = f + meta_suffix
full_path = os.path.join(root, f)
if full_path in allow_list:
continue
# We expect each non-.meta file to have a .meta file, and each .meta file to have a non-.meta file
if f.endswith(meta_suffix):
expected = f.replace(meta_suffix, "")
else:
expected = f + meta_suffix
if expected not in combined:
unmatched.add(os.path.join(root, f))
else:
num_matched += 1
if expected not in combined:
unmatched.add(full_path)
else:
num_matched += 1
if unmatched:
raise Exception(

3
utils/make_readme_table.py


ReleaseInfo.from_simple_tag("0.15.0", "March 18, 2020"),
ReleaseInfo.from_simple_tag("0.15.1", "March 30, 2020"),
ReleaseInfo("release_1", "1.0.0", "0.16.0", "April 30, 2020"),
ReleaseInfo("release_2", "1.0.1", "0.16.1", "May 19, 2020"),
print(table_line("master (unstable)", "master", "--"))
print(table_line("main (unstable)", "main", "--"))
highlight = True # whether to bold the line or not
for version_info in sorted_versions:
if version_info.elapsed_days <= MAX_DAYS:

5
DevProject/ProjectSettings/EditorBuildSettings.asset


EditorBuildSettings:
m_ObjectHideFlags: 0
serializedVersion: 2
m_Scenes: []
m_Scenes:
- enabled: 1
path: Assets/ML-Agents/Scripts/Tests/Runtime/AcademyTest/AcademyStepperTestScene.unity
guid: 9bafc50b1e55b43b2b1ae9620f1f8311
m_configObjects: {}

4
DevProject/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2019.3.3f1
m_EditorVersionWithRevision: 2019.3.3f1 (7ceaae5f7503)
m_EditorVersion: 2019.4.17f1
m_EditorVersionWithRevision: 2019.4.17f1 (667c8606c536)

19
DevProject/Packages/manifest.json


"dependencies": {
"com.unity.2d.sprite": "1.0.0",
"com.unity.2d.tilemap": "1.0.0",
"com.unity.ads": "2.0.8",
"com.unity.ads": "3.5.2",
"com.unity.ide.vscode": "1.1.4",
"com.unity.ide.vscode": "1.2.3",
"com.unity.multiplayer-hlapi": "1.0.4",
"com.unity.package-manager-doctools": "1.1.1-preview.3",
"com.unity.package-validation-suite": "0.7.15-preview",
"com.unity.purchasing": "2.0.6",
"com.unity.test-framework": "1.1.11",
"com.unity.testtools.codecoverage": "0.2.2-preview",
"com.unity.multiplayer-hlapi": "1.0.6",
"com.unity.package-manager-doctools": "1.7.0-preview",
"com.unity.package-validation-suite": "0.19.0-preview",
"com.unity.purchasing": "2.2.1",
"com.unity.test-framework": "1.1.19",
"com.unity.test-framework.performance": "2.2.0-preview",
"com.unity.testtools.codecoverage": "1.0.0-pre.3",
"com.unity.xr.legacyinputhelpers": "1.3.8",
"com.unity.xr.legacyinputhelpers": "2.1.6",
"com.unity.modules.ai": "1.0.0",
"com.unity.modules.androidjni": "1.0.0",
"com.unity.modules.animation": "1.0.0",

5
.yamato/protobuf-generation-test.yml


nuget install Grpc.Tools -Version $GRPC_VERSION -OutputDirectory protobuf-definitions/
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip
pip install grpcio-tools==1.13.0 --progress-bar=off
pip install mypy-protobuf==1.16.0 --progress-bar=off
pip install --upgrade pip --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
pip install grpcio==1.28.1 grpcio-tools==1.13.0 protobuf==3.11.3 six==1.14.0 mypy-protobuf==1.16.0 --progress-bar=off --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
cd protobuf-definitions
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/protoc
chmod +x Grpc.Tools.$GRPC_VERSION/tools/macosx_x64/grpc_csharp_plugin

47
.yamato/com.unity.ml-agents-promotion.yml


test_editors:
- version: 2019.3
test_platforms:
- name: win
type: Unity::VM
image: package-ci/win10:stable
flavor: b1.large
---
{% for editor in test_editors %}
{% for platform in test_platforms %}
promotion_test_{{ platform.name }}_{{ editor.version }}:
name : Promotion Test {{ editor.version }} on {{ platform.name }}
agent:
type: {{ platform.type }}
image: {{ platform.image }}
flavor: {{ platform.flavor}}
variables:
UPMCI_PROMOTION: 1
commands:
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci package test --unity-version {{ editor.version }} --package-path com.unity.ml-agents
artifacts:
logs:
paths:
- "upm-ci~/test-results/**/*"
dependencies:
- .yamato/com.unity.ml-agents-pack.yml#pack
{% endfor %}
{% endfor %}
promotion_test_trigger:
name: Promotion Tests Trigger
dependencies:
{% for editor in test_editors %}
{% for platform in test_platforms %}
- .yamato/com.unity.ml-agents-promotion.yml#promotion_test_{{platform.name}}_{{editor.version}}
{% endfor %}
{% endfor %}
promote:
name: Promote to Production
agent:

- "upm-ci~/packages/*.tgz"
dependencies:
- .yamato/com.unity.ml-agents-pack.yml#pack
- .yamato/com.unity.ml-agents-test.yml#all_package_tests
{% for editor in test_editors %}
{% for platform in test_platforms %}
- .yamato/com.unity.ml-agents-promotion.yml#promotion_test_{{ platform.name }}_{{ editor.version }}
{% endfor %}
{% endfor %}

24
.yamato/gym-interface-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.setup_venv
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
dependencies:

changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- "ml-agents/**"
- "ml-agents-envs/**"
- ".yamato/gym-interface-test.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
expression: |
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match "gym-unity/**" OR
pull_request.changes.any match ".yamato/gym-interface-test.yml") AND
NOT pull_request.changes.all match "**/*.md"
{% endfor %}

25
.yamato/python-ll-api-test.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
- ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer

cancel_old_ci: true
changes:
only:
- "com.unity.ml-agents/**"
- "Project/**"
- "ml-agents/**"
- "ml-agents-envs/**"
- ".yamato/python-ll-api-test.yml"
except:
- "*.md"
- "com.unity.ml-agents/*.md"
- "com.unity.ml-agents/**/*.md"
expression: |
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR
pull_request.changes.any match "Project/**" OR
pull_request.changes.any match "ml-agents/**" OR
pull_request.changes.any match "ml-agents-envs/**" OR
pull_request.changes.any match ".yamato/python-ll-api-test.yml") AND
NOT pull_request.changes.all match "**/*.md"
{% endfor %}

48
.yamato/com.unity.ml-agents-test.yml


# 2018.4 doesn't support code-coverage
coverageOptions:
minCoveragePct: 0
# We want some scene tests to run in the DevProject, but packages there only support 2019+
testProject: Project
testProject: DevProject
- version: 2020.2
testProject: DevProject
- version: 2020.3
trunk_editor:
- version: trunk
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
minCoveragePct: 72
testProject: DevProject
test_platforms:
- name: win
type: Unity::VM

{% endfor %}
{% endfor %}
{% for editor in trunk_editor %}
{% for platform in test_platforms %}
- .yamato/com.unity.ml-agents-test.yml#test_{{ platform.name }}_{{ editor.version }}
{% endfor %}
{% endfor %}
- branch: master
- branch: main
frequency: daily
{% for editor in test_editors %}

flavor: {{ platform.flavor}}
commands:
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci package test -u {{ editor.version }} --package-path com.unity.ml-agents {{ editor.coverageOptions }}
- python ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ editor.minCoveragePct }}
- upm-ci project test -u {{ editor.version }} --project-path {{ editor.testProject }} {{ editor.coverageOptions }} --extra-utr-arg "reruncount=2"
- python3 ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ editor.minCoveragePct }}
artifacts:
logs:
paths:

changes:
only:
- "com.unity.ml-agents/**"
- "{{ editor.testProject }}/**"
{% for editor in trunk_editor %}
{% for platform in test_platforms %}
test_{{ platform.name }}_trunk:
name : com.unity.ml-agents test {{ editor.version }} on {{ platform.name }}
agent:
type: {{ platform.type }}
image: {{ platform.image }}
flavor: {{ platform.flavor}}
commands:
- python -m pip install unity-downloader-cli --extra-index-url https://artifactory.eu-cph-1.unityops.net/api/pypi/common-python/simple
- unity-downloader-cli -u trunk -c editor --wait --fast
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci package test -u {{ editor.version }} --package-path com.unity.ml-agents {{ editor.coverageOptions }}
- python ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ editor.minCoveragePct }}
artifacts:
logs:
paths:
- "upm-ci~/test-results/**/*"
dependencies:
- .yamato/com.unity.ml-agents-pack.yml#pack
triggers:
cancel_old_ci: true
{% endfor %}
{% endfor %}

17
.yamato/standalone-build-test.yml


test_editors:
- version: 2018.4
- version: 2020.3
---
{% for editor in test_editors %}
test_mac_standalone_{{ editor.version }}:

image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
image: package-ci/mac:stable
- pip install pyyaml
- python -u -m ml-agents.tests.yamato.standalone_build_tests
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
- python3 -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
- python3 -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
triggers:
cancel_old_ci: true
changes:

4
.yamato/training-int-tests.yml


test_editors:
- version: 2018.4
- version: 2020.3
---
{% for editor in test_editors %}
test_mac_training_int_{{ editor.version }}:

variables:
UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -u -m ml-agents.tests.yamato.training_int_tests
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need

3
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


public override void Heuristic(float[] actionsOut)
{
actionsOut[0] = 0f;
actionsOut[1] = 0f;
actionsOut[2] = 0f;
if (Input.GetKey(KeyCode.D))
{
actionsOut[2] = 2f;

1
Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs


public override void Heuristic(float[] actionsOut)
{
Array.Clear(actionsOut, 0, actionsOut.Length);
//forward
if (Input.GetKey(KeyCode.W))
{

1
Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


public override void Heuristic(float[] actionsOut)
{
System.Array.Clear(actionsOut, 0, actionsOut.Length);
if (Input.GetKey(KeyCode.D))
{
actionsOut[1] = 2f;

3
Project/Packages/manifest.json


"com.unity.analytics": "3.2.3",
"com.unity.collab-proxy": "1.2.15",
"com.unity.ml-agents": "file:../../com.unity.ml-agents",
"com.unity.package-manager-ui": "2.0.8",
"com.unity.package-manager-ui": "2.0.13",
"com.unity.purchasing": "2.0.3",
"com.unity.textmeshpro": "1.4.1",
"com.unity.modules.ai": "1.0.0",
"com.unity.modules.animation": "1.0.0",

2
Project/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2018.4.17f1
m_EditorVersion: 2018.4.35f1

2
Project/ProjectSettings/UnityConnectSettings.asset


UnityConnectSettings:
m_ObjectHideFlags: 0
serializedVersion: 1
m_Enabled: 1
m_Enabled: 0
m_TestMode: 0
m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events
m_EventUrl: https://cdp.cloud.unity3d.com/v1/events

4
com.unity.ml-agents/CONTRIBUTING.md


## Communication
First, please read through our
[code of conduct](https://github.com/Unity-Technologies/ml-agents/blob/master/CODE_OF_CONDUCT.md),
[code of conduct](https://github.com/Unity-Technologies/ml-agents/blob/main/CODE_OF_CONDUCT.md),
as we expect all our contributors to follow it.
Second, before starting on a project that you intend to contribute to the

## Git Branches
The master branch corresponds to the most recent version of the project. Note
The `main` branch corresponds to the most recent version of the project. Note
that this may be newer that the
[latest release](https://github.com/Unity-Technologies/ml-agents/releases/tag/latest_release).

20
com.unity.ml-agents/Editor/BrainParametersDrawer.cs


static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
vecActionSize.arraySize = 1;
// This check is here due to:
// https://fogbugz.unity3d.com/f/cases/1246524/
// If this case has been resolved, please remove this if condition.
if (vecActionSize.arraySize != 1)
{
vecActionSize.arraySize = 1;
}
var continuousActionSize =
vecActionSize.GetArrayElementAtIndex(0);
EditorGUI.PropertyField(

static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
vecActionSize.arraySize = EditorGUI.IntField(
var newSize = EditorGUI.IntField(
// This check is here due to:
// https://fogbugz.unity3d.com/f/cases/1246524/
// If this case has been resolved, please remove this if condition.
if (newSize != vecActionSize.arraySize)
{
vecActionSize.arraySize = newSize;
}
position.y += k_LineHeight;
position.x += 20;
position.width -= 20;

4
com.unity.ml-agents/Editor/DemonstrationImporter.cs


using Unity.MLAgents.CommunicatorObjects;
using UnityEditor;
using UnityEngine;
#if UNITY_2020_2_OR_NEWER
using UnityEditor.AssetImporters;
#else
#endif
using Unity.MLAgents.Demonstrations;
namespace Unity.MLAgents.Editor

18
com.unity.ml-agents/Documentation~/com.unity.ml-agents.md


Manager documentation].
To install the companion Python package to enable training behaviors, follow the
[installation instructions] on our [GitHub repository].
[installation instructions] on our [GitHub repository]. It is strongly recommended that you
use the Python package that corresponds to this release (version 0.16.1) for the best experience;
versions between 0.16.1 and 0.20.0 are supported. Versions after 0.25.1 cannot be used.
## Requirements

the documentation, you can checkout our [GitHUb Repository], which also includes
a number of ways to [connect with us] including our [ML-Agents Forum].
[unity ML-Agents Toolkit]: https://github.com/Unity-Technologies/ml-agents
In order to improve the developer experience for Unity ML-Agents Toolkit, we have added in-editor analytics.
Please refer to "Information that is passively collected by Unity" in the
[Unity Privacy Policy](https://unity3d.com/legal/privacy-policy).
[unity ML-Agents Toolkit]: https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Installation.md
[github repository]: https://github.com/Unity-Technologies/ml-agents
[python package]: https://github.com/Unity-Technologies/ml-agents
[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Installation.md
[github repository]: https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs
[python package]: https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs
[connect with us]: https://github.com/Unity-Technologies/ml-agents#community-and-feedback
[connect with us]: https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs#community-and-feedback
[ml-agents forum]: https://forum.unity.com/forums/ml-agents.453/

86
com.unity.ml-agents/CHANGELOG.md


and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [1.0.8] - 2021-05-26
### Bug Fixes
#### com.unity.ml-agents (C#)
- Fixed a null reference exception that occurred when loading an ONNX model file that was generated with a new
version of the Python trainer (0.26.0 or newer). (#5350)
- Added checked to prevent training with incompatible versions of the python trainer (0.26.0 or newer). (#5370)
## [1.0.7] - 2021-03-04
### Minor Changes
#### com.unity.ml-agents (C#)
In order to improve the developer experience for Unity ML-Agents Toolkit, we have added in-editor analytics.
Please refer to "Information that is passively collected by Unity" in the
[Unity Privacy Policy](https://unity3d.com/legal/privacy-policy).
### Bug Fixes
#### com.unity.ml-agents (C#)
- Removed unnecessary memory allocations in `SensorShapeValidator.ValidateSensors()` (#4915)
- Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4915)
- Removed several memory allocations that happened during inference. On a test scene, this
reduced the amount of memory allocated by approximately 25%. (#4916)
## [1.0.6] - 2020-11-13
### Minor Changes
#### com.unity.ml-agents (C#)
- Update documentation with recommended version of Python trainer. (#4535)
- Log a warning if a version of the Python trainer is used that is newer than expected. (#4535)
- Update Barracuda to 1.0.4. (#4644)
### Bug Fixes
#### com.unity.ml-agents (C#)
- Fixed a bug with visual observations using .onnx model files and newer versions of Barracuda (1.1.0 or later). (#4533)
- `Agent.CollectObservations()`, `Agent.EndEpisode()`, and `Academy.EnvironmentStep()` will now throw an exception
if they are called recursively (for example, if they call `Agent.EndEpisode()`).
Previously, this would result in an infinite loop and cause the editor to hang. (#4638)
- Fixed a bug where accessing the Academy outside of play mode would cause the Academy to get stepped multiple times when in play mode. (#4637)
## [1.0.5] - 2020-09-23
### Minor Changes
#### com.unity.ml-agents (C#)
- Update Barracuda to 1.0.3. (#4506)
## [1.0.4] - 2020-08-19
### Minor Changes
#### com.unity.ml-agents (C#)
- Update Barracuda to 1.0.2. (#4385)
- Explicitly call out dependencies in package.json.
## [1.0.3] - 2020-07-07
### Minor Changes
#### com.unity.ml-agents (C#)
- Update Barracuda to 1.0.1. (#4187)
### Bug Fixes
#### com.unity.ml-agents (C#)
- Fixed an issue where RayPerceptionSensor would raise an exception when the
list of tags was empty, or a tag in the list was invalid (unknown, null, or
empty string). (#4155)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- Fixed issue with FoodCollector, Soccer, and WallJump when playing with keyboard. (#4147, #4174)
## [1.0.2] - 2020-06-04
### Minor Changes
#### com.unity.ml-agents (C#)
- Remove 'preview' tag.
## [1.0.2-preview] - 2020-05-19
### Bug Fixes
#### com.unity.ml-agents (C#)
- Fix missing .meta file
## [1.0.1-preview] - 2020-05-19
### Bug Fixes
#### com.unity.ml-agents (C#)
- A bug that would cause the editor to go into a loop when a prefab was selected was fixed. (#3949)
- BrainParameters.ToProto() no longer throws an exception if none of the fields have been set. (#3930)
- The Barracuda dependency was upgraded to 0.7.1-preview. (#3977)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- An issue was fixed where using `--initialize-from` would resume from the past step count. (#3962)
- The gym wrapper error for the wrong number of agents now fires more consistently, and more details
were added to the error message when the input dimension is wrong. (#3963)
## [1.0.0-preview] - 2020-05-06
### Major Changes

- Introduced the `SideChannelManager` to register, unregister and access side
channels. (#3807)
- `Academy.FloatProperties` was replaced by `Academy.EnvironmentParameters`.
See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Migrating.md)
See the [Migration Guide](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Migrating.md)
for more details on upgrading. (#3807)
- `SideChannel.OnMessageReceived` is now a protected method (was public)
- SideChannel IncomingMessages methods now take an optional default argument,

`--load`. (#3705)
- The Jupyter notebooks have been removed from the repository. (#3704)
- The multi-agent gym option was removed from the gym wrapper. For multi-agent
scenarios, use the [Low Level Python API](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md). (#3681)
scenarios, use the [Low Level Python API](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Python-API.md). (#3681)
[Low Level Python API](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md)
[Low Level Python API](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Python-API.md)
documentation for more information. If you use `mlagents-learn` for training, this should be a
transparent change. (#3681)
- Added ability to start training (initialize model weights) from a previous run

26
com.unity.ml-agents/Runtime/SideChannels/SideChannelsManager.cs


/// <returns></returns>
internal static byte[] GetSideChannelMessage(Dictionary<Guid, SideChannel> sideChannels)
{
if (!HasOutgoingMessages(sideChannels))
{
// Early out so that we don't create the MemoryStream or BinaryWriter.
// This is the most common case.
return Array.Empty<byte>();
}
using (var memStream = new MemoryStream())
{
using (var binaryWriter = new BinaryWriter(memStream))

return memStream.ToArray();
}
}
}
/// <summary>
/// Check whether any of the sidechannels have queued messages.
/// </summary>
/// <param name="sideChannels"></param>
/// <returns></returns>
static bool HasOutgoingMessages(Dictionary<Guid, SideChannel> sideChannels)
{
foreach (var sideChannel in sideChannels.Values)
{
var messageList = sideChannel.MessageQueue;
if (messageList.Count > 0)
{
return true;
}
}
return false;
}
/// <summary>

52
com.unity.ml-agents/Runtime/Agent.cs


/// [OnDisable()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnDisable.html]
/// [OnBeforeSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnBeforeSerialize.html
/// [OnAfterSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnAfterSerialize.html
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design.md
/// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design-Agents.md
/// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Readme.md
/// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Readme.md
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/" +
"docs/Learning-Environment-Design-Agents.md")]
[Serializable]
[RequireComponent(typeof(BehaviorParameters))]

/// </summary>
internal VectorSensor collectObservationsSensor;
private RecursionChecker m_CollectObservationsChecker = new RecursionChecker("CollectObservations");
private RecursionChecker m_OnEpisodeBeginChecker = new RecursionChecker("OnEpisodeBegin");
/// <summary>
/// Called when the attached [GameObject] becomes enabled and active.
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html

// episode when initializing until after the Academy had its first reset.
if (Academy.Instance.TotalStepCount != 0)
{
OnEpisodeBegin();
using (m_OnEpisodeBeginChecker.Start())
{
OnEpisodeBegin();
}
}
}

{
// Make sure the latest observations are being passed to training.
collectObservationsSensor.Reset();
CollectObservations(collectObservationsSensor);
using (m_CollectObservationsChecker.Start())
{
CollectObservations(collectObservationsSensor);
}
}
// Request the last decision with no callbacks
// We request a decision so Python knows the Agent is done immediately

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// </remarks>
/// <param name="reward">The new value of the reward.</param>
public void SetReward(float reward)

/// for information about mixing reward signals from curiosity and Generative Adversarial
/// Imitation Learning (GAIL) with rewards supplied through this method.
///
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
/// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design-Agents.md#rewards
/// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
///</remarks>
/// <param name="increment">Incremental reward value.</param>
public void AddReward(float increment)

///
/// Your heuristic implementation can use any decision making logic you specify. Assign decision
/// values to the float[] array, <paramref name="actionsOut"/>, passed to your function as a parameter.
/// The same array will be reused between steps. It is up to the user to initialize
/// the values on each call, for example by calling `Array.Clear(actionsOut, 0, actionsOut.Length);`.
/// Add values to the array at the same indexes as they are used in your
/// <seealso cref="OnActionReceived(float[])"/> function, which receives this array and
/// implements the corresponding agent behavior. See [Actions] for more information

/// implementing a simple heuristic function can aid in debugging agent actions and interactions
/// with its environment.
///
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </remarks>
/// <example>

UpdateSensors();
using (TimerStack.Instance.Scoped("CollectObservations"))
{
CollectObservations(collectObservationsSensor);
using (m_CollectObservationsChecker.Start())
{
CollectObservations(collectObservationsSensor);
}
}
using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks"))
{

/// For more information about observations, see [Observations and Sensors].
///
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
/// </remarks>
public virtual void CollectObservations(VectorSensor sensor)
{

///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="OnActionReceived(float[])"/>
public virtual void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)

///
/// For more information about implementing agent actions see [Agents - Actions].
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="vectorAction">
/// An array containing the action vector. The length of the array is specified

{
ResetData();
m_StepCount = 0;
OnEpisodeBegin();
using (m_OnEpisodeBeginChecker.Start())
{
OnEpisodeBegin();
}
}
/// <summary>

2
com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs


/// See [Imitation Learning - Recording Demonstrations] for more information.
///
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
/// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
/// </remarks>
[RequireComponent(typeof(Agent))]
[AddComponentMenu("ML Agents/Demonstration Recorder", (int)MenuGroup.Default)]

2
com.unity.ml-agents/Runtime/DiscreteActionMasker.cs


///
/// See [Agents - Actions] for more information on masking actions.
///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_1_docs/docs/Learning-Environment-Design-Agents.md#actions
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="branch">The branch for which the actions will be masked.</param>
/// <param name="actionIndices">The indices of the masked actions.</param>

27
com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs


/// </summary>
List<int[]> m_SensorShapes;
private string m_BehaviorName;
private BrainParameters m_BrainParameters;
/// <summary>
/// Whether or not we've tried to send analytics for this model. We only ever try to send once per policy,
/// and do additional deduplication in the analytics code.
/// </summary>
private bool m_AnalyticsSent;
InferenceDevice inferenceDevice)
InferenceDevice inferenceDevice,
string behaviorName
)
m_BehaviorName = behaviorName;
m_BrainParameters = brainParameters;
if (!m_AnalyticsSent)
{
m_AnalyticsSent = true;
Analytics.InferenceAnalytics.InferenceModelSet(
m_ModelRunner.Model,
m_BehaviorName,
m_ModelRunner.InferenceDevice,
sensors,
m_BrainParameters
);
}
m_AgentId = info.episodeId;
m_ModelRunner?.PutObservations(info, sensors);
}

4
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


"Either assign a model, or change to a different Behavior Type."
);
}
return new BarracudaPolicy(m_BrainParameters, m_Model, m_InferenceDevice);
return new BarracudaPolicy(m_BrainParameters, m_Model, m_InferenceDevice, m_BehaviorName);
}
case BehaviorType.Default:
if (Academy.Instance.IsCommunicatorOn)

if (m_Model != null)
{
return new BarracudaPolicy(m_BrainParameters, m_Model, m_InferenceDevice);
return new BarracudaPolicy(m_BrainParameters, m_Model, m_InferenceDevice, m_BehaviorName);
}
else
{

17
com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs


using UnityEngine;
using System.Collections.Generic;
using System;
using Unity.MLAgents.Analytics;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies

int m_AgentId;
string m_FullyQualifiedBehaviorName;
private bool m_AnalyticsSent = false;
private BrainParameters m_BrainParameters;
internal ICommunicator m_Communicator;
/// <inheritdoc />

{
m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
m_Communicator = Academy.Instance.Communicator;
m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, brainParameters);
m_Communicator?.SubscribeBrain(m_FullyQualifiedBehaviorName, brainParameters);
m_BrainParameters = brainParameters;
if (!m_AnalyticsSent)
{
m_AnalyticsSent = true;
TrainingAnalytics.RemotePolicyInitialized(
m_FullyQualifiedBehaviorName,
sensors,
m_BrainParameters
);
}
m_AgentId = info.episodeId;
m_Communicator?.PutObservations(m_FullyQualifiedBehaviorName, info, sensors);
}

9
com.unity.ml-agents/Runtime/Sensors/SensorShapeValidator.cs


{
// Check for compatibility with the other Agents' Sensors
// TODO make sure this only checks once per agent
Debug.Assert(m_SensorShapes.Count == sensors.Count, $"Number of Sensors must match. {m_SensorShapes.Count} != {sensors.Count}");
Debug.AssertFormat(
m_SensorShapes.Count == sensors.Count,
"Number of Sensors must match. {0} != {1}",
m_SensorShapes.Count,
sensors.Count
);
for (var i = 0; i < Mathf.Min(m_SensorShapes.Count, sensors.Count); i++)
{
var cachedShape = m_SensorShapes[i];

{
Debug.Assert(cachedShape[j] == sensorShape[j], "Sensor sizes much match.");
Debug.Assert(cachedShape[j] == sensorShape[j], "Sensor sizes must match.");
}
}
}

5
com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs


else
{
var rayInput = GetRayPerceptionInput();
// We don't actually need the tags here, since they don't affect the display of the rays.
// Additionally, the user might be in the middle of typing the tag name when this is called,
// and there's no way to turn off the "Tag ... is not defined" error logs.
// So just don't use any tags here.
rayInput.DetectableTags = null;
for (var rayIndex = 0; rayIndex < rayInput.Angles.Count; rayIndex++)
{
DebugDisplayInfo.RayInfo debugRay;

29
com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs


}
/// <summary>
/// 1D write access at a specified index. Use AddRange if possible instead.
/// 1D write access at a specified index. Use AddList if possible instead.
/// </summary>
/// <param name="index">Index to write to.</param>
public float this[int index]

{
m_Proxy.data[m_Batch, index + m_Offset + writeOffset] = val;
index++;
}
}
}
/// <summary>
/// Write the range of floats
/// </summary>
/// <param name="data"></param>
/// <param name="writeOffset">Optional write offset.</param>
internal void AddList(IList<float> data, int writeOffset = 0)
{
if (m_Data != null)
{
for (var index = 0; index < data.Count; index++)
{
var val = data[index];
m_Data[index + m_Offset + writeOffset] = val;
}
}
else
{
for (var index = 0; index < data.Count; index++)
{
var val = data[index];
m_Proxy.data[m_Batch, index + m_Offset + writeOffset] = val;
}
}
}

21
com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs


rayOutput.ToFloatArray(numDetectableTags, rayIndex, m_Observations);
}
// Finally, add the observations to the ObservationWriter
writer.AddRange(m_Observations);
writer.AddList(m_Observations);
}
return m_Observations.Length;
}

if (castHit)
{
// Find the index of the tag of the object that was hit.
for (var i = 0; i < input.DetectableTags.Count; i++)
var numTags = input.DetectableTags?.Count ?? 0;
for (var i = 0; i < numTags; i++)
if (hitObject.CompareTag(input.DetectableTags[i]))
var tagsEqual = false;
try
{
var tag = input.DetectableTags[i];
if (!string.IsNullOrEmpty(tag))
{
tagsEqual = hitObject.CompareTag(tag);
}
}
catch (UnityException)
{
// If the tag is null, empty, or not a valid tag, just ignore it.
}
if (tagsEqual)
{
rayOutput.HitTaggedObject = true;
rayOutput.HitTagIndex = i;

8
com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs


for (var i = 0; i < m_NumStackedObservations; i++)
{
var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
writer.AddRange(m_StackedObservations[obsIndex], numWritten);
writer.AddList(m_StackedObservations[obsIndex], numWritten);
numWritten += m_UnstackedObservationSize;
}

{
return SensorCompressionType.None;
}
internal ISensor GetWrappedSensor()
{
return m_WrappedSensor;
}
// TODO support stacked compressed observations (byte stream)
}

2
com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs


m_Observations.Add(0);
}
}
writer.AddRange(m_Observations);
writer.AddList(m_Observations);
return expectedObservations;
}

18
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


{
var agentInfoProto = ai.ToAgentInfoProto();
var agentActionProto = new AgentActionProto
var agentActionProto = new AgentActionProto();
if(ai.storedVectorActions != null)
VectorActions = { ai.storedVectorActions }
};
agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
}
return new AgentInfoActionPairProto
{

var brainParametersProto = new BrainParametersProto
{
VectorActionSize = { bp.VectorActionSize },
VectorActionSpaceType =
(SpaceTypeProto)bp.VectorActionSpaceType,
VectorActionSpaceType = (SpaceTypeProto) bp.VectorActionSpaceType,
brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
if(bp.VectorActionDescriptions != null)
{
brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
}
return brainParametersProto;
}

/// </summary>
public static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm)
{
var demonstrationName = dm.demonstrationName ?? "";
var demoProto = new DemonstrationMetaProto
{
ApiVersion = DemonstrationMetaData.ApiVersion,

DemonstrationName = dm.demonstrationName
DemonstrationName = demonstrationName
};
return demoProto;
}

88
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Unity.MLAgents.Analytics;
using Unity.MLAgents.CommunicatorObjects;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Policies;

/// Responsible for communication with External using gRPC.
internal class RpcCommunicator : ICommunicator
{
public event QuitCommandHandler QuitCommandReceived;
public event ResetCommandHandler ResetCommandReceived;

return true;
}
internal static bool CheckPythonPackageVersionIsCompatible(string pythonLibraryVersion)
{
// Try to extract the numerical values from the pythonLibraryVersion, e.g. remove the ".dev0" suffix
var versionMatch = Regex.Match(pythonLibraryVersion, @"[0-9]+\.[0-9]+\.[0-9]");
if (versionMatch.Success)
{
pythonLibraryVersion = versionMatch.Value;
}
Version pythonVersion;
try
{
pythonVersion = new Version(pythonLibraryVersion);
}
catch
{
// Unparseable version.
// Anything like 0.42.0.dev0 should have been caught with the regex above, so anything here
// is totally bogus. For now, ignore these and let CheckPythonPackageVersionIsSupported handle it.
return true;
}
if (pythonVersion > PythonTrainerVersions.s_MaxCompatibleVersion)
{
return false;
}
return true;
}
/// <summary>
/// Check if the package is in the supported range. Note that some versions might be unsupported but
/// still compatible.
/// </summary>
/// <param name="pythonLibraryVersion"></param>
/// <returns></returns>
internal static bool CheckPythonPackageVersionIsSupported(string pythonLibraryVersion)
{
Version pythonVersion;
try
{
pythonVersion = new Version(pythonLibraryVersion);
}
catch
{
// Unparseable - this also catches things like "0.20.0.dev0" which we don't want to support
return false;
}
if (pythonVersion < PythonTrainerVersions.s_MinSupportedVersion ||
pythonVersion > PythonTrainerVersions.s_MaxSupportedVersion)
{
return false;
}
return true;
}
/// <summary>
/// Sends the initialization parameters through the Communicator.
/// Is used by the academy to send initialization parameters to the communicator.

var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion;
var unityCommunicationVersion = initParameters.unityCommunicationVersion;
TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(unityCommunicationVersion,
pythonCommunicationVersion,

}
throw new UnityAgentsException("ICommunicator.Initialize() failed.");
}
var packageVersionCompatible = CheckPythonPackageVersionIsCompatible(pythonPackageVersion);
if (!packageVersionCompatible)
{
Debug.LogErrorFormat(
"Python package version ({0}) will produce model files that are incompatible with this " +
"version of the com.unity.ml-agents Unity package. Please downgrade to a Python package " +
"between {1} and {2}, or update to a new version of com.unity.ml-agents.",
pythonPackageVersion,
PythonTrainerVersions.s_MinSupportedVersion,
PythonTrainerVersions.s_MaxSupportedVersion
);
throw new UnityAgentsException("Incompatible trainer version.");
}
var packageVersionSupported = CheckPythonPackageVersionIsSupported(pythonPackageVersion);
if (!packageVersionSupported)
{
Debug.LogWarningFormat(
"Python package version ({0}) is out of the supported range or not from an official release. " +
"It is strongly recommended that you use a Python package between {1} and {2}. " +
"Training will proceed, but the output format may be different.",
pythonPackageVersion,
PythonTrainerVersions.s_MinSupportedVersion,
PythonTrainerVersions.s_MaxSupportedVersion
);
}
}
catch

15
com.unity.ml-agents/Runtime/Constants.cs


using System;
namespace Unity.MLAgents
{
/// <summary>

Default = 0,
Sensors = 50
}
internal static class PythonTrainerVersions
{
// The python package version should be >= s_MinSupportedVersion
// and <= s_MaxSupportedVersion.
internal static Version s_MinSupportedVersion = new Version("0.16.1");
internal static Version s_MaxSupportedVersion = new Version("0.20.0");
// Any version > to this is known to be incompatible and we will block training.
// Covers any patch to the release before the 2.0.0 package release.
internal static Version s_MaxCompatibleVersion = new Version("0.25.999");
}
}

21
com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs


using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents.Inference.Utils;

/// </summary>
internal class ContinuousActionOutputApplier : TensorApplier.IApplier
{
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, float[]> lastActions)
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionValue = lastActions[agentId];

m_Allocator = allocator;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, float[]> lastActions)
{
//var tensorDataProbabilities = tensorProxy.Data as float[,];
var idActionPairList = actionIds as List<int> ?? actionIds.ToList();

outputTensor.data.Dispose();
}
var agentIndex = 0;
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionVal = lastActions[agentId];

m_Memories = memories;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, float[]> lastActions)
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
var agentId = actionIds[i];
List<float> memory;
if (!m_Memories.TryGetValue(agentId, out memory)
|| memory.Count < memorySize)

m_Memories = memories;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, float[]> lastActions)
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
var agentId = actionIds[i];
List<float> memory;
if (!m_Memories.TryGetValue(agentId, out memory)
|| memory.Count < memorySize * m_MemoriesCount)

46
com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs


m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
}

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
tensorProxy.data?.Dispose();
tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1));

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
tensorProxy.shape = new long[0];
tensorProxy.data?.Dispose();

m_SensorIndices.Add(sensorIndex);
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var info in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var info = infos[infoIndex];
if (info.agentInfo.done)
{
// If the agent is done, we might have a stale reference to the sensors

{
var tensorOffset = 0;
// Write each sensor consecutively to the tensor
foreach (var sensorIndex in m_SensorIndices)
// TOOD
for (var sensorIndexIndex = 0; sensorIndexIndex < m_SensorIndices.Count; sensorIndexIndex++)
var sensorIndex = m_SensorIndices[sensorIndexIndex];
var sensor = info.sensors[sensorIndex];
m_ObservationWriter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var numWritten = sensor.Write(m_ObservationWriter);

}
public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var infoSensorPair = infos[infoIndex];
var info = infoSensorPair.agentInfo;
List<float> memory;

m_Memories = memories;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var infoSensorPair = infos[infoIndex];
var info = infoSensorPair.agentInfo;
var offset = memorySize * m_MemoryIndex;
List<float> memory;

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var infoSensorPair = infos[infoIndex];
var info = infoSensorPair.agentInfo;
var pastAction = info.storedVectorActions;
if (pastAction != null)

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var infoSensorPair = infos[infoIndex];
var agentInfo = infoSensorPair.agentInfo;
var maskList = agentInfo.discreteActionMasks;
for (var j = 0; j < maskSize; j++)

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
TensorUtils.FillTensorWithRandomNormal(tensorProxy, m_RandomNormal);

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var infoSensorPair = infos[infoIndex];
var sensor = infoSensorPair.sensors[m_SensorIndex];
if (infoSensorPair.agentInfo.done)
{

7
com.unity.ml-agents/Runtime/Inference/TensorApplier.cs


/// </param>
/// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
/// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions);
void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, float[]> lastActions);
}
readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();

/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated applier.</exception>
public void ApplyTensors(
IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
IReadOnlyList<TensorProxy> tensors, IList<int> actionIds, Dictionary<int, float[]> lastActions)
foreach (var tensor in tensors)
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
var tensor = tensors[tensorIndex];
if (!m_Dict.ContainsKey(tensor.name))
{
throw new UnityAgentsException(

7
com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs


/// the tensor's data.
/// </param>
void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos);
TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos);
}
readonly Dictionary<string, IGenerator> m_Dict = new Dictionary<string, IGenerator>();

/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated generator.</exception>
public void GenerateTensors(
IEnumerable<TensorProxy> tensors, int currentBatchSize, IEnumerable<AgentInfoSensorsPair> infos)
IReadOnlyList<TensorProxy> tensors, int currentBatchSize, IList<AgentInfoSensorsPair> infos)
foreach (var tensor in tensors)
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
var tensor = tensors[tensorIndex];
if (!m_Dict.ContainsKey(tensor.name))
{
throw new UnityAgentsException(

23
com.unity.ml-agents/Runtime/Inference/TensorProxy.cs


public Type DataType => k_TypeMap[valueType];
public long[] shape;
public Tensor data;
public long Height
{
get { return shape.Length == 4 ? shape[1] : shape[5]; }
}
public long Width
{
get { return shape.Length == 4 ? shape[2] : shape[6]; }
}
public long Channels
{
get { return shape.Length == 4 ? shape[3] : shape[7]; }
}
}
internal static class TensorUtils

tensor.data?.Dispose();
tensor.shape[0] = batch;
if (tensor.shape.Length == 4)
if (tensor.shape.Length == 4 || tensor.shape.Length == 8)
(int)tensor.shape[1],
(int)tensor.shape[2],
(int)tensor.shape[3]));
(int)tensor.Height,
(int)tensor.Width,
(int)tensor.Channels));
}
else
{

40
com.unity.ml-agents/Runtime/Inference/ModelRunner.cs


bool m_Verbose = false;
string[] m_OutputNames;
IReadOnlyList<TensorProxy> m_InferenceInputs;
IReadOnlyList<TensorProxy> m_InferenceOutputs;
List<TensorProxy> m_InferenceOutputs;
Dictionary<string, Tensor> m_InputsByName;
Dictionary<int, List<float>> m_Memories = new Dictionary<int, List<float>>();
SensorShapeValidator m_SensorShapeValidator = new SensorShapeValidator();

seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TensorApplier = new TensorApplier(
brainParameters, seed, m_TensorAllocator, m_Memories, barracudaModel);
m_InputsByName = new Dictionary<string, Tensor>();
m_InferenceOutputs = new List<TensorProxy>();
static Dictionary<string, Tensor> PrepareBarracudaInputs(IEnumerable<TensorProxy> infInputs)
public InferenceDevice InferenceDevice
{
get { return m_InferenceDevice; }
}
public NNModel Model
{
get { return m_Model; }
}
void PrepareBarracudaInputs(IReadOnlyList<TensorProxy> infInputs)
var inputs = new Dictionary<string, Tensor>();
foreach (var inp in infInputs)
m_InputsByName.Clear();
for (var i = 0; i < infInputs.Count; i++)
inputs[inp.name] = inp.data;
var inp = infInputs[i];
m_InputsByName[inp.name] = inp.data;
return inputs;
}
public void Dispose()

m_TensorAllocator?.Reset(false);
}
List<TensorProxy> FetchBarracudaOutputs(string[] names)
void FetchBarracudaOutputs(string[] names)
var outputs = new List<TensorProxy>();
m_InferenceOutputs.Clear();
outputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n));
m_InferenceOutputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n));
return outputs;
}
public void PutObservations(AgentInfo info, List<ISensor> sensors)

Profiler.EndSample();
Profiler.BeginSample($"MLAgents.{m_Model.name}.PrepareBarracudaInputs");
var inputs = PrepareBarracudaInputs(m_InferenceInputs);
PrepareBarracudaInputs(m_InferenceInputs);
m_Engine.Execute(inputs);
m_Engine.Execute(m_InputsByName);
m_InferenceOutputs = FetchBarracudaOutputs(m_OutputNames);
FetchBarracudaOutputs(m_OutputNames);
Profiler.EndSample();
Profiler.BeginSample($"MLAgents.{m_Model.name}.ApplyTensors");

31
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


}
var modelApiVersion = (int)model.GetTensorByName(TensorNames.VersionNumber)[0];
var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
var isContinuousInt = (int)model.GetTensorByName(TensorNames.IsContinuousControl)[0];
var isContinuous = GetActionType(isContinuousInt);
var actionSize = (int)model.GetTensorByName(TensorNames.ActionOutputShape)[0];
if (modelApiVersion == -1)
{
failedModelChecks.Add(

}
if (modelApiVersion != k_ApiVersion)
if (modelApiVersion < k_ApiVersion)
{
failedModelChecks.Add(
$"Version of the trainer the model was trained with ({modelApiVersion}) " +

if (modelApiVersion > k_ApiVersion)
{
var minTrainerVersion = PythonTrainerVersions.s_MinSupportedVersion;
var maxTrainerVersion = PythonTrainerVersions.s_MaxSupportedVersion;
failedModelChecks.Add(
"Model was trained with a newer version of the trainer than is supported. " +
$"Either retrain with an older trainer (between versions {minTrainerVersion} and {maxTrainerVersion}), " +
$"or update to a newer version of com.unity.ml-agents.\n" +
$"Model version: {modelApiVersion} Supported version: {k_ApiVersion}"
);
return failedModelChecks;
}
var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
var isContinuousInt = (int)model.GetTensorByName(TensorNames.IsContinuousControl)[0];
var isContinuous = GetActionType(isContinuousInt);
var actionSize = (int)model.GetTensorByName(TensorNames.ActionOutputShape)[0];
failedModelChecks.AddRange(
CheckIntScalarPresenceHelper(new Dictionary<string, int>()

var heightBp = shape[0];
var widthBp = shape[1];
var pixelBp = shape[2];
var heightT = tensorProxy.shape[1];
var widthT = tensorProxy.shape[2];
var pixelT = tensorProxy.shape[3];
var heightT = tensorProxy.Height;
var widthT = tensorProxy.Width;
var pixelT = tensorProxy.Channels;
if ((widthBp != widthT) || (heightBp != heightT) || (pixelBp != pixelT))
{
return $"The visual Observation of the model does not match. " +

97
com.unity.ml-agents/Runtime/Academy.cs


* API. For more information on each of these entities, in addition to how to
* set-up a learning environment and train the behavior of characters in a
* Unity scene, please browse our documentation pages on GitHub:
* https://github.com/Unity-Technologies/ml-agents/tree/release_1_docs/docs/
* https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs/docs/
*/
namespace Unity.MLAgents

{
void FixedUpdate()
{
Academy.Instance.EnvironmentStep();
// Check if the stepper belongs to the current Academy and destroy it if it's not.
// This is to prevent from having leaked stepper from previous runs.
if (!Academy.IsInitialized || !Academy.Instance.IsStepperOwner(this))
{
Destroy(this.gameObject);
}
else
{
Academy.Instance.EnvironmentStep();
}
}
}

/// fall back to inference or heuristic decisions. (You can also set agents to always use
/// inference or heuristics.)
/// </remarks>
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_1_docs/" +
[HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_2_verified_docs/" +
"docs/Learning-Environment-Design.md")]
public class Academy : IDisposable
{

/// Unity package version of com.unity.ml-agents.
/// This must match the version string in package.json and is checked in a unit test.
/// </summary>
internal const string k_PackageVersion = "1.0.0-preview";
internal const string k_PackageVersion = "1.0.8";
const int k_EditorTrainingPort = 5004;

// Flag used to keep track of the first time the Academy is reset.
bool m_HadFirstReset;
// Detect an Academy step called by user code that is also called by the Academy.
private RecursionChecker m_StepRecursionChecker = new RecursionChecker("EnvironmentStep");
// Random seed used for inference.
int m_InferenceSeed;

Application.quitting += Dispose;
LazyInitialize();
#if UNITY_EDITOR
EditorApplication.playModeStateChanged += HandleOnPlayModeChanged;
#endif
#if UNITY_EDITOR
/// <summary>
/// Clean up the Academy when switching from edit mode to play mode
/// </summary>
/// <param name="state">State.</param>
void HandleOnPlayModeChanged(PlayModeStateChange state)
{
if (state == PlayModeStateChange.ExitingEditMode)
{
Dispose();
}
}
#endif
/// <summary>
/// Initialize the Academy if it hasn't already been initialized.

/// </summary>
public void EnvironmentStep()
{
if (!m_HadFirstReset)
using (m_StepRecursionChecker.Start())
ForcedFullReset();
}
if (!m_HadFirstReset)
{
ForcedFullReset();
}
AgentPreStep?.Invoke(m_StepCount);
AgentPreStep?.Invoke(m_StepCount);
m_StepCount += 1;
m_TotalStepCount += 1;
AgentIncrementStep?.Invoke();
m_StepCount += 1;
m_TotalStepCount += 1;
AgentIncrementStep?.Invoke();
using (TimerStack.Instance.Scoped("AgentSendState"))
{
AgentSendState?.Invoke();
}
using (TimerStack.Instance.Scoped("AgentSendState"))
{
AgentSendState?.Invoke();
}
using (TimerStack.Instance.Scoped("DecideAction"))
{
DecideAction?.Invoke();
}
using (TimerStack.Instance.Scoped("DecideAction"))
{
DecideAction?.Invoke();
}
// If the communicator is not on, we need to clear the SideChannel sending queue
if (!IsCommunicatorOn)
{
SideChannelsManager.GetSideChannelMessage();
}
// If the communicator is not on, we need to clear the SideChannel sending queue
if (!IsCommunicatorOn)
{
SideChannelsManager.GetSideChannelMessage();
}
using (TimerStack.Instance.Scoped("AgentAct"))
{
AgentAct?.Invoke();
using (TimerStack.Instance.Scoped("AgentAct"))
{
AgentAct?.Invoke();
}
}
}

// Reset the Lazy instance
s_Lazy = new Lazy<Academy>(() => new Academy());
}
/// <summary>
/// Check if the input AcademyFixedUpdateStepper belongs to this Academy.
/// </summary>
internal bool IsStepperOwner(AcademyFixedUpdateStepper stepper)
{
return GameObject.ReferenceEquals(stepper.gameObject, Academy.Instance.m_StepperObject);
}
}
}

4
com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs


[SetUp]
public static void Setup()
{
if (Academy.IsInitialized)
{
Academy.Instance.Dispose();
}
Academy.Instance.AutomaticSteppingEnabled = false;
}

2
com.unity.ml-agents/Tests/Editor/PublicAPI/Unity.ML-Agents.Editor.Tests.PublicAPI.asmdef


"references": [
"Unity.ML-Agents.Editor",
"Unity.ML-Agents",
"Barracuda",
"Unity.Barracuda",
"Unity.ML-Agents.CommunicatorObjects"
],
"optionalUnityReferences": [

38
com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs


pythonPackageVerStr));
}
[Test]
public void TestCheckPythonPackageVersionIsCompatible()
{
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.13.37")); // low is OK
Assert.IsFalse(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.26.0")); // too high
// These are fine
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.16.1"));
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.17.17"));
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.25.0"));
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.25.1"));
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.25.2"));
// "dev" strings should get removed before parsing
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.17.0.dev0"));
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.25.0.dev0"));
Assert.IsFalse(RpcCommunicator.CheckPythonPackageVersionIsCompatible("0.26.0.dev0"));
// otherwise unparseable - keep support for these
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsCompatible("the airspeed velocity of an unladen swallow"));
}
[Test]
public void TestCheckPythonPackageVersionIsSupported()
{
Assert.IsFalse(RpcCommunicator.CheckPythonPackageVersionIsSupported("0.13.37")); // too low
Assert.IsFalse(RpcCommunicator.CheckPythonPackageVersionIsSupported("0.42.0")); // too high
// These are fine
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsSupported("0.16.1"));
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsSupported("0.17.17"));
Assert.IsTrue(RpcCommunicator.CheckPythonPackageVersionIsSupported("0.20.0"));
// "dev" string or otherwise unparseable
Assert.IsFalse(RpcCommunicator.CheckPythonPackageVersionIsSupported("0.17.0.dev0"));
Assert.IsFalse(RpcCommunicator.CheckPythonPackageVersionIsSupported("oh point seventeen point oh"));
}
}
}

69
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


{
public Action OnRequestDecision;
ObservationWriter m_ObsWriter = new ObservationWriter();
public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
foreach(var sensor in sensors){
public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
foreach (var sensor in sensors)
{
sensor.GetObservationProto(m_ObsWriter);
}
OnRequestDecision?.Invoke();

agent1.SetPolicy(policy);
StackingSensor sensor = null;
foreach(ISensor s in agent1.sensors){
if (s is StackingSensor){
foreach (ISensor s in agent1.sensors)
{
if (s is StackingSensor)
{
sensor = s as StackingSensor;
}
}

{
agent1.RequestDecision();
aca.EnvironmentStep();
}
policy.OnRequestDecision = () => SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});

public void TestAgentDontCallBaseOnEnable()
{
_InnerAgentTestOnEnableOverride();
}
}
[TestFixture]
public class AgentRecursionTests
{
[SetUp]
public void SetUp()
{
if (Academy.IsInitialized)
{
Academy.Instance.Dispose();
}
}
class CollectObsEndEpisodeAgent : Agent
{
public override void CollectObservations(VectorSensor sensor)
{
// NEVER DO THIS IN REAL CODE!
EndEpisode();
}
}
class OnEpisodeBeginEndEpisodeAgent : Agent
{
public override void OnEpisodeBegin()
{
// NEVER DO THIS IN REAL CODE!
EndEpisode();
}
}
void TestRecursiveThrows<T>() where T : Agent
{
var gameObj = new GameObject();
var agent = gameObj.AddComponent<T>();
agent.LazyInitialize();
agent.RequestDecision();
Assert.Throws<UnityAgentsException>(() =>
{
Academy.Instance.EnvironmentStep();
});
}
[Test]
public void TestRecursiveCollectObsEndEpisodeThrows()
{
TestRecursiveThrows<CollectObsEndEpisodeAgent>();
}
[Test]
public void TestRecursiveOnEpisodeBeginEndEpisodeThrows()
{
TestRecursiveThrows<OnEpisodeBeginEndEpisodeAgent>();
}
}
}

8
com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs


validator.ValidateSensors(sensorList1);
var sensorList2 = new List<ISensor>() { new DummySensor(1), new DummySensor(2, 3), new DummySensor(4, 5, 7) };
LogAssert.Expect(LogType.Assert, "Sensor sizes much match.");
LogAssert.Expect(LogType.Assert, "Sensor sizes must match.");
LogAssert.Expect(LogType.Assert, "Sensor sizes much match.");
LogAssert.Expect(LogType.Assert, "Sensor sizes must match.");
validator.ValidateSensors(sensorList1);
}

var sensorList2 = new List<ISensor>() { new DummySensor(1), new DummySensor(9) };
LogAssert.Expect(LogType.Assert, "Number of Sensors must match. 3 != 2");
LogAssert.Expect(LogType.Assert, "Sensor dimensions must match.");
LogAssert.Expect(LogType.Assert, "Sensor sizes much match.");
LogAssert.Expect(LogType.Assert, "Sensor sizes must match.");
validator.ValidateSensors(sensorList2);
// Add the sensors in the other order

LogAssert.Expect(LogType.Assert, "Sensor dimensions must match.");
LogAssert.Expect(LogType.Assert, "Sensor sizes much match.");
LogAssert.Expect(LogType.Assert, "Sensor sizes must match.");
validator.ValidateSensors(sensorList1);
}
}

89
com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs


using System.Collections.Generic;
using NUnit.Framework;
using UnityEngine;
using UnityEngine.TestTools;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Tests

// hit fraction is arbitrary but should be finite in [0,1]
Assert.GreaterOrEqual(outputBuffer[2], 0.0f);
Assert.LessOrEqual(outputBuffer[2], 1.0f);
}
}
[Test]
public void TestStaticPerceive()
{
SetupScene();
var obj = new GameObject("agent");
var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
perception.RaysPerDirection = 0; // single ray
perception.MaxRayDegrees = 45;
perception.RayLength = 20;
perception.DetectableTags = new List<string>();
perception.DetectableTags.Add(k_CubeTag);
perception.DetectableTags.Add(k_SphereTag);
var radii = new[] { 0f, .5f };
foreach (var castRadius in radii)
{
perception.SphereCastRadius = castRadius;
var castInput = perception.GetRayPerceptionInput();
var castOutput = RayPerceptionSensor.Perceive(castInput);
Assert.AreEqual(1, castOutput.RayOutputs.Length);
// Expected to hit the cube
Assert.AreEqual(0, castOutput.RayOutputs[0].HitTagIndex);
}
}
[Test]
public void TestStaticPerceiveInvalidTags()
{
SetupScene();
var obj = new GameObject("agent");
var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
perception.RaysPerDirection = 0; // single ray
perception.MaxRayDegrees = 45;
perception.RayLength = 20;
perception.DetectableTags = new List<string>();
perception.DetectableTags.Add("Bad tag");
perception.DetectableTags.Add(null);
perception.DetectableTags.Add("");
perception.DetectableTags.Add(k_CubeTag);
var radii = new[] { 0f, .5f };
foreach (var castRadius in radii)
{
perception.SphereCastRadius = castRadius;
var castInput = perception.GetRayPerceptionInput();
// There's no clean way that I can find to check for a defined tag without
// logging an error.
LogAssert.Expect(LogType.Error, "Tag: Bad tag is not defined.");
var castOutput = RayPerceptionSensor.Perceive(castInput);
Assert.AreEqual(1, castOutput.RayOutputs.Length);
// Expected to hit the cube
Assert.AreEqual(3, castOutput.RayOutputs[0].HitTagIndex);
}
}
[Test]
public void TestStaticPerceiveNoTags()
{
SetupScene();
var obj = new GameObject("agent");
var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
perception.RaysPerDirection = 0; // single ray
perception.MaxRayDegrees = 45;
perception.RayLength = 20;
perception.DetectableTags = null;
var radii = new[] { 0f, .5f };
foreach (var castRadius in radii)
{
perception.SphereCastRadius = castRadius;
var castInput = perception.GetRayPerceptionInput();
var castOutput = RayPerceptionSensor.Perceive(castInput);
Assert.AreEqual(1, castOutput.RayOutputs.Length);
// Expected to hit the cube
Assert.AreEqual(-1, castOutput.RayOutputs[0].HitTagIndex);
}
}
}

12
com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs


writer[0] = 3f;
Assert.AreEqual(new[] { 1f, 3f, 2f }, buffer);
// AddRange
// AddList
writer.AddRange(new[] {4f, 5f});
writer.AddList(new[] {4f, 5f});
// AddRange with offset
// AddList with offset
writer.AddRange(new[] {6f, 7f});
writer.AddList(new[] {6f, 7f});
Assert.AreEqual(new[] { 4f, 6f, 7f }, buffer);
}

Assert.AreEqual(2f, t.data[1, 1]);
Assert.AreEqual(3f, t.data[1, 2]);
// AddRange
// AddList
t = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint,

writer.SetTarget(t, 1, 1);
writer.AddRange(new[] {-1f, -2f});
writer.AddList(new[] {-1f, -2f});
Assert.AreEqual(0f, t.data[0, 0]);
Assert.AreEqual(0f, t.data[0, 1]);
Assert.AreEqual(0f, t.data[0, 2]);

82
com.unity.ml-agents/Tests/Editor/TensorUtilsTest.cs


using System;
using System.Text.RegularExpressions;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Inference.Utils;
using Unity.Barracuda;

{
public class TensorUtilsTest
{
[TestCase(4, TestName = "TestResizeTensor_4D")]
[TestCase(8, TestName = "TestResizeTensor_8D")]
public void TestResizeTensor(int dimension)
{
if (dimension == 8)
{
// Barracuda 1.0.x doesn't support 8D tensors
// Barracuda 1.1.x does but it initially broke ML-Agents support
// Unfortunately, the PackageInfo methods don't exist in earlier versions of the editor,
// so just skip that variant of the test then.
// It's unlikely, but possible that we'll upgrade to a newer dependency of Barracuda,
// in which case we should make sure this test is run then.
#if UNITY_2019_3_OR_NEWER
var packageInfo = UnityEditor.PackageManager.PackageInfo.FindForAssembly(typeof(Tensor).Assembly);
Assert.AreEqual("com.unity.barracuda", packageInfo.name);
var barracuda8DSupport = new Version(1, 1, 0);
var versionMatch = Regex.Match(packageInfo.version, @"[0-9]+\.[0-9]+\.[0-9]");
if (!versionMatch.Success)
{
// Couldn't parse barracuda version
return;
}
var strippedBarracudaVersion = versionMatch.Value;
var version = new Version(strippedBarracudaVersion);
if (version <= barracuda8DSupport)
{
return;
}
#else
return;
#endif
}
var alloc = new TensorCachingAllocator();
var height = 64;
var width = 84;
var channels = 3;
// Set shape to {1, ..., height, width, channels}
// For 8D, the ... are all 1's
var shape = new long[dimension];
for (var i = 0; i < dimension; i++)
{
shape[i] = 1;
}
shape[dimension - 3] = height;
shape[dimension - 2] = width;
shape[dimension - 1] = channels;
var intShape = new int[dimension];
for (var i = 0; i < dimension; i++)
{
intShape[i] = (int)shape[i];
}
var tensorProxy = new TensorProxy
{
valueType = TensorProxy.TensorType.Integer,
data = new Tensor(intShape),
shape = shape,
};
// These should be invariant after the resize.
Assert.AreEqual(height, tensorProxy.data.shape.height);
Assert.AreEqual(width, tensorProxy.data.shape.width);
Assert.AreEqual(channels, tensorProxy.data.shape.channels);
TensorUtils.ResizeTensor(tensorProxy, 42, alloc);
Assert.AreEqual(height, tensorProxy.shape[dimension - 3]);
Assert.AreEqual(width, tensorProxy.shape[dimension - 2]);
Assert.AreEqual(channels, tensorProxy.shape[dimension - 1]);
Assert.AreEqual(height, tensorProxy.data.shape.height);
Assert.AreEqual(width, tensorProxy.data.shape.width);
Assert.AreEqual(channels, tensorProxy.data.shape.channels);
alloc.Dispose();
}
[Test]
public void RandomNormalTestTensorInt()
{

9
com.unity.ml-agents/package.json


{
"name": "com.unity.ml-agents",
"displayName": "ML Agents",
"version": "1.0.0-preview",
"version": "1.0.8",
"com.unity.barracuda": "0.7.0-preview"
"com.unity.barracuda": "1.0.4",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",
"com.unity.modules.physics2d": "1.0.0",
"com.unity.modules.unityanalytics": "1.0.0"
}
}

4
ml-agents/mlagents/trainers/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.16.0"
__version__ = "0.16.1"
__release_tag__ = "release_1"
__release_tag__ = "release_2"

2
ml-agents/mlagents/trainers/subprocess_env_manager.py


return self.env_workers[0].recv().payload
def close(self) -> None:
logger.debug(f"SubprocessEnvManager closing.")
logger.debug("SubprocessEnvManager closing.")
self.step_queue.close()
self.step_queue.join_thread()
for env_worker in self.env_workers:

4
ml-agents/mlagents/trainers/buffer.py


super().__init__()
def __str__(self):
return ", ".join(["'{0}' : {1}".format(k, str(self[k])) for k in self.keys()])
return ", ".join(["'{}' : {}".format(k, str(self[k])) for k in self.keys()])
def reset_agent(self) -> None:
"""

key_list = list(self.keys())
if not self.check_length(key_list):
raise BufferException(
"The length of the fields {0} were not of same length".format(key_list)
f"The length of the fields {key_list} were not of same length"
)
for field_key in key_list:
target_buffer[field_key].extend(

2
ml-agents/mlagents/trainers/components/bc/model.py


from mlagents.trainers.policy.tf_policy import TFPolicy
class BCModel(object):
class BCModel:
def __init__(
self, policy: TFPolicy, learning_rate: float = 3e-4, anneal_steps: int = 0
):

2
ml-agents/mlagents/trainers/components/bc/module.py


for k in param_keys:
if k not in config_dict:
raise UnityTrainerException(
"The required pre-training hyper-parameter {0} was not defined. Please check your \
"The required pre-training hyper-parameter {} was not defined. Please check your \
trainer YAML file.".format(
k
)

2
ml-agents/mlagents/trainers/components/reward_signals/__init__.py


for k in param_keys:
if k not in config_dict:
raise UnityTrainerException(
"The hyper-parameter {0} could not be found for {1}.".format(
"The hyper-parameter {} could not be found for {}.".format(
k, cls.__name__
)
)

6
ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py


from mlagents.trainers.policy.tf_policy import TFPolicy
class CuriosityModel(object):
class CuriosityModel:
def __init__(
self, policy: TFPolicy, encoding_size: int = 128, learning_rate: float = 3e-4
):

self.encoding_size,
ModelUtils.swish,
1,
"curiosity_stream_{}_visual_obs_encoder".format(i),
f"curiosity_stream_{i}_visual_obs_encoder",
False,
)

ModelUtils.swish,
1,
"curiosity_stream_{}_visual_obs_encoder".format(i),
f"curiosity_stream_{i}_visual_obs_encoder",
True,
)
visual_encoders.append(encoded_visual)

6
ml-agents/mlagents/trainers/components/reward_signals/gail/model.py


EPSILON = 1e-7
class GAILModel(object):
class GAILModel:
def __init__(
self,
policy: TFPolicy,

self.encoding_size,
ModelUtils.swish,
1,
"gail_stream_{}_visual_obs_encoder".format(i),
f"gail_stream_{i}_visual_obs_encoder",
False,
)

ModelUtils.swish,
1,
"gail_stream_{}_visual_obs_encoder".format(i),
f"gail_stream_{i}_visual_obs_encoder",
True,
)
visual_policy_encoders.append(encoded_policy_visual)

4
ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py


"""
rcls = NAME_TO_CLASS.get(name)
if not rcls:
raise UnityTrainerException("Unknown reward signal type {0}".format(name))
raise UnityTrainerException(f"Unknown reward signal type {name}")
"Unknown parameters given for reward signal {0}".format(name)
f"Unknown parameters given for reward signal {name}"
)
return class_inst

12
ml-agents/mlagents/trainers/curriculum.py


for key in parameters:
config[key] = parameters[key][self.lesson_num]
logger.info(
"{0} lesson changed. Now in lesson {1}: {2}".format(
"{} lesson changed. Now in lesson {}: {}".format(
self.brain_name,
self.lesson_num,
", ".join([str(x) + " -> " + str(config[x]) for x in config]),

try:
with open(config_path) as data_file:
return Curriculum._load_curriculum(data_file)
except IOError:
raise CurriculumLoadingError(
"The file {0} could not be found.".format(config_path)
)
except OSError:
raise CurriculumLoadingError(f"The file {config_path} could not be found.")
raise CurriculumLoadingError(
"There was an error decoding {}".format(config_path)
)
raise CurriculumLoadingError(f"There was an error decoding {config_path}")
@staticmethod
def _load_curriculum(fp: TextIO) -> Dict:

4
ml-agents/mlagents/trainers/ghost/controller.py


"""
self._queue.append(self._learning_team)
self._learning_team = self._queue.popleft()
logger.debug(
"Learning team {} swapped on step {}".format(self._learning_team, step)
)
logger.debug(f"Learning team {self._learning_team} swapped on step {step}")
self._changed_training_team = True
# Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and

2
ml-agents/mlagents/trainers/ghost/trainer.py


:param run_id: The identifier of the current run
"""
super(GhostTrainer, self).__init__(
super().__init__(
brain_name, trainer_parameters, training, run_id, reward_buff_cap
)

6
ml-agents/mlagents/trainers/models.py


)
else:
raise UnityTrainerException(
"The learning rate schedule {} is invalid.".format(lr_schedule)
f"The learning rate schedule {lr_schedule} is invalid."
)
return learning_rate

h_size,
activation=activation,
reuse=reuse,
name="hidden_{}".format(i),
name=f"hidden_{i}",
kernel_initializer=tf.initializers.variance_scaling(1.0),
)
return hidden

"""
value_heads = {}
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
value_heads[name] = value
value = tf.reduce_mean(list(value_heads.values()), 0)
return value_heads, value

13
ml-agents/mlagents/trainers/policy/tf_policy.py


self.sequence_length = trainer_parameters["sequence_length"]
if self.m_size == 0:
raise UnityPolicyException(
"The memory size for brain {0} is 0 even "
"The memory size for brain {} is 0 even "
"The memory size for brain {0} is {1} "
"The memory size for brain {} is {} "
"but it must be divisible by 2.".format(
brain.brain_name, self.m_size
)

ckpt = tf.train.get_checkpoint_state(model_path)
if ckpt is None:
raise UnityPolicyException(
"The model {0} could not be loaded. Make "
"The model {} could not be loaded. Make "
"sure you specified the right "
"--run-id and that the previous run you are loading from had the same "
"behavior names.".format(model_path)

except tf.errors.NotFoundError:
raise UnityPolicyException(
"The model {0} was found but could not be loaded. Make "
"The model {} was found but could not be loaded. Make "
"sure the model is from the same version of ML-Agents, has the same behavior parameters, "
"and is using the same trainer configuration as the current run.".format(
model_path

self._set_step(0)
logger.info(
"Starting training from step 0 and saving to {}.".format(
self.model_path

logger.info(
"Resuming training from step {}.".format(self.get_current_step())
)
logger.info(f"Resuming training from step {self.get_current_step()}.")
def initialize_or_load(self):
# If there is an initialize path, load from that. Else, load from the set model path.

12
ml-agents/mlagents/trainers/ppo/optimizer.py


self.old_values = {}
for name in value_heads.keys():
returns_holder = tf.placeholder(
shape=[None], dtype=tf.float32, name="{}_returns".format(name)
shape=[None], dtype=tf.float32, name=f"{name}_returns"
shape=[None], dtype=tf.float32, name="{}_value_estimate".format(name)
shape=[None], dtype=tf.float32, name=f"{name}_value_estimate"
)
self.returns_holders[name] = returns_holder
self.old_values[name] = old_value

self.all_old_log_probs: mini_batch["action_probs"],
}
for name in self.reward_signals:
feed_dict[self.returns_holders[name]] = mini_batch[
"{}_returns".format(name)
]
feed_dict[self.old_values[name]] = mini_batch[
"{}_value_estimates".format(name)
]
feed_dict[self.returns_holders[name]] = mini_batch[f"{name}_returns"]
feed_dict[self.old_values[name]] = mini_batch[f"{name}_value_estimates"]
if self.policy.output_pre is not None and "actions_pre" in mini_batch:
feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]

20
ml-agents/mlagents/trainers/ppo/trainer.py


:param seed: The seed the model will be initialized with
:param run_id: The identifier of the current run
"""
super(PPOTrainer, self).__init__(
super().__init__(
brain_name, trainer_parameters, training, run_id, reward_buff_cap
)
self.param_keys = [

trajectory.done_reached and not trajectory.max_step_reached,
)
for name, v in value_estimates.items():
agent_buffer_trajectory["{}_value_estimates".format(name)].extend(v)
agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
self._stats_reporter.add_stat(
self.optimizer.reward_signals[name].value_name, np.mean(v)
)

evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward
agent_buffer_trajectory["{}_rewards".format(name)].extend(evaluate_result)
agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
# Report the reward signals
self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

for name in self.optimizer.reward_signals:
bootstrap_value = value_next[name]
local_rewards = agent_buffer_trajectory[
"{}_rewards".format(name)
].get_batch()
local_rewards = agent_buffer_trajectory[f"{name}_rewards"].get_batch()
"{}_value_estimates".format(name)
f"{name}_value_estimates"
].get_batch()
local_advantage = get_gae(
rewards=local_rewards,

)
local_return = local_advantage + local_value_estimates
# This is later use as target for the different value estimates
agent_buffer_trajectory["{}_returns".format(name)].set(local_return)
agent_buffer_trajectory["{}_advantage".format(name)].set(local_advantage)
agent_buffer_trajectory[f"{name}_returns"].set(local_return)
agent_buffer_trajectory[f"{name}_advantage"].set(local_advantage)
tmp_advantages.append(local_advantage)
tmp_returns.append(local_return)

self.update_buffer.shuffle(sequence_length=self.policy.sequence_length)
buffer = self.update_buffer
max_num_batch = buffer_length // batch_size
for l in range(0, max_num_batch * batch_size, batch_size):
for i in range(0, max_num_batch * batch_size, batch_size):
buffer.make_mini_batch(l, l + batch_size), n_sequences
buffer.make_mini_batch(i, i + batch_size), n_sequences
)
for stat_name, value in update_stats.items():
batch_update_stats[stat_name].append(value)

6
ml-agents/mlagents/trainers/sac/network.py


"""
self.value_heads = {}
for name in stream_names:
value = tf.layers.dense(hidden_input, 1, name="{}_value".format(name))
value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
self.value_heads[name] = value
self.value = tf.reduce_mean(list(self.value_heads.values()), 0)

q1_heads = {}
for name in stream_names:
_q1 = tf.layers.dense(q1_hidden, num_outputs, name="{}_q1".format(name))
_q1 = tf.layers.dense(q1_hidden, num_outputs, name=f"{name}_q1")
q1_heads[name] = _q1
q1 = tf.reduce_mean(list(q1_heads.values()), axis=0)

q2_heads = {}
for name in stream_names:
_q2 = tf.layers.dense(q2_hidden, num_outputs, name="{}_q2".format(name))
_q2 = tf.layers.dense(q2_hidden, num_outputs, name=f"{name}_q2")
q2_heads[name] = _q2
q2 = tf.reduce_mean(list(q2_heads.values()), axis=0)

4
ml-agents/mlagents/trainers/sac/optimizer.py


)
rewards_holder = tf.placeholder(
shape=[None], dtype=tf.float32, name="{}_rewards".format(name)
shape=[None], dtype=tf.float32, name=f"{name}_rewards"
)
self.rewards_holders[name] = rewards_holder

self.policy.mask_input: batch["masks"] * burn_in_mask,
}
for name in self.reward_signals:
feed_dict[self.rewards_holders[name]] = batch["{}_rewards".format(name)]
feed_dict[self.rewards_holders[name]] = batch[f"{name}_rewards"]
if self.policy.use_continuous_act:
feed_dict[self.policy_network.external_action_in] = batch["actions"]

14
ml-agents/mlagents/trainers/sac/trainer.py


filename = os.path.join(
self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
)
logger.info("Saving Experience Replay Buffer to {}".format(filename))
logger.info(f"Saving Experience Replay Buffer to {filename}")
with open(filename, "wb") as file_object:
self.update_buffer.save_to_file(file_object)

filename = os.path.join(
self.trainer_parameters["model_path"], "last_replay_buffer.hdf5"
)
logger.info("Loading Experience Replay Buffer from {}".format(filename))
logger.info(f"Loading Experience Replay Buffer from {filename}")
with open(filename, "rb+") as file_object:
self.update_buffer.load_from_file(file_object)
logger.info(

batch_update_stats: Dict[str, list] = defaultdict(list)
while self.step / self.update_steps > self.steps_per_update:
logger.debug("Updating SAC policy at step {}".format(self.step))
logger.debug(f"Updating SAC policy at step {self.step}")
buffer = self.update_buffer
if (
self.update_buffer.num_experiences

)
# Get rewards for each reward
for name, signal in self.optimizer.reward_signals.items():
sampled_minibatch[
"{}_rewards".format(name)
] = signal.evaluate_batch(sampled_minibatch).scaled_reward
sampled_minibatch[f"{name}_rewards"] = signal.evaluate_batch(
sampled_minibatch
).scaled_reward
update_stats = self.optimizer.update(sampled_minibatch, n_sequences)
for stat_name, value in update_stats.items():

# Get minibatches for reward signal update if needed
reward_signal_minibatches = {}
for name, signal in self.optimizer.reward_signals.items():
logger.debug("Updating {} at step {}".format(name, self.step))
logger.debug(f"Updating {name} at step {self.step}")
# Some signals don't need a minibatch to be sampled - so we don't!
if signal.update_dict:
reward_signal_minibatches[name] = buffer.sample_mini_batch(

2
ml-agents/mlagents/trainers/sampler_class.py


for param_name, cur_param_dict in self.reset_param_dict.items():
if "sampler-type" not in cur_param_dict:
raise SamplerException(
"'sampler_type' argument hasn't been supplied for the {0} parameter".format(
"'sampler_type' argument hasn't been supplied for the {} parameter".format(
param_name
)
)

12
ml-agents/mlagents/trainers/stats.py


)
if self.self_play and "Self-play/ELO" in values:
elo_stats = values["Self-play/ELO"]
logger.info("{} ELO: {:0.3f}. ".format(category, elo_stats.mean))
logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ")
else:
logger.info(
"{}: Step: {}. No episode was completed since last summary. {}".format(

) -> None:
if property_type == StatsPropertyType.HYPERPARAMETERS:
logger.info(
"""Hyperparameters for behavior name {0}: \n{1}""".format(
"""Hyperparameters for behavior name {}: \n{}""".format(
category, self._dict_to_str(value, 0)
)
)

[
"\t"
+ " " * num_tabs
+ "{0}:\t{1}".format(
+ "{}:\t{}".format(
x, self._dict_to_str(param_dict[x], num_tabs + 1)
)
for x in param_dict

self._maybe_create_summary_writer(category)
for key, value in values.items():
summary = tf.Summary()
summary.value.add(tag="{}".format(key), simple_value=value.mean)
summary.value.add(tag=f"{key}", simple_value=value.mean)
self.summary_writers[category].add_summary(summary, step)
self.summary_writers[category].flush()

for file_name in os.listdir(directory_name):
if file_name.startswith("events.out"):
logger.warning(
"{} was left over from a previous run. Deleting.".format(file_name)
f"{file_name} was left over from a previous run. Deleting."
)
full_fname = os.path.join(directory_name, file_name)
try:

s_op = tf.summary.text(
name,
tf.convert_to_tensor(
([[str(x), str(input_dict[x])] for x in input_dict])
[[str(x), str(input_dict[x])] for x in input_dict]
),
)
s = sess.run(s_op)

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存