浏览代码

Merge branch 'master' into release_9_branch_merge

/release_9_branch
Ruo-Ping Dong 4 年前
当前提交
9e08be87
共有 134 个文件被更改,包括 962 次插入945 次删除
  1. 2
      .github/ISSUE_TEMPLATE/bug_report.md
  2. 3
      .yamato/gym-interface-test.yml
  3. 3
      .yamato/python-ll-api-test.yml
  4. 9
      .yamato/standalone-build-test.yml
  5. 10
      .yamato/training-int-tests.yml
  6. 159
      Dockerfile
  7. 22
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  8. 8
      README.md
  9. 4
      com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
  10. 16
      com.unity.ml-agents/CHANGELOG.md
  11. 27
      com.unity.ml-agents/Runtime/Academy.cs
  12. 24
      com.unity.ml-agents/Runtime/Agent.cs
  13. 2
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  14. 52
      com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
  15. 2
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
  16. 8
      com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
  17. 47
      com.unity.ml-agents/Runtime/Utilities.cs
  18. 56
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  19. 2
      com.unity.ml-agents/package.json
  20. 2
      docs/Background-Machine-Learning.md
  21. 10
      docs/Getting-Started.md
  22. 24
      docs/Installation.md
  23. 4
      docs/Learning-Environment-Create-New.md
  24. 2
      docs/Learning-Environment-Design-Agents.md
  25. 4
      docs/Learning-Environment-Executable.md
  26. 8
      docs/ML-Agents-Overview.md
  27. 2
      docs/Readme.md
  28. 2
      docs/Training-Configuration-File.md
  29. 35
      docs/Training-ML-Agents.md
  30. 2
      docs/Training-on-Amazon-Web-Service.md
  31. 5
      docs/Unity-Inference-Engine.md
  32. 4
      gym-unity/gym_unity/__init__.py
  33. 21
      gym-unity/gym_unity/envs/__init__.py
  34. 8
      gym-unity/gym_unity/tests/test_gym.py
  35. 4
      ml-agents-envs/mlagents_envs/__init__.py
  36. 154
      ml-agents-envs/mlagents_envs/base_env.py
  37. 35
      ml-agents-envs/mlagents_envs/environment.py
  38. 27
      ml-agents-envs/mlagents_envs/rpc_utils.py
  39. 15
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  40. 30
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  41. 60
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  42. 1
      ml-agents/mlagents/tf_utils/__init__.py
  43. 63
      ml-agents/mlagents/tf_utils/tf.py
  44. 1
      ml-agents/mlagents/torch_utils/__init__.py
  45. 66
      ml-agents/mlagents/torch_utils/torch.py
  46. 4
      ml-agents/mlagents/trainers/__init__.py
  47. 11
      ml-agents/mlagents/trainers/cli_utils.py
  48. 13
      ml-agents/mlagents/trainers/demo_loader.py
  49. 6
      ml-agents/mlagents/trainers/learn.py
  50. 19
      ml-agents/mlagents/trainers/policy/policy.py
  51. 6
      ml-agents/mlagents/trainers/policy/tf_policy.py
  52. 18
      ml-agents/mlagents/trainers/policy/torch_policy.py
  53. 4
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  54. 41
      ml-agents/mlagents/trainers/ppo/trainer.py
  55. 27
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  56. 61
      ml-agents/mlagents/trainers/sac/trainer.py
  57. 2
      ml-agents/mlagents/trainers/settings.py
  58. 86
      ml-agents/mlagents/trainers/stats.py
  59. 60
      ml-agents/mlagents/trainers/tests/mock_brain.py
  60. 15
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  61. 2
      ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
  62. 5
      ml-agents/mlagents/trainers/tests/tensorflow/test_models.py
  63. 8
      ml-agents/mlagents/trainers/tests/tensorflow/test_nn_policy.py
  64. 12
      ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
  65. 9
      ml-agents/mlagents/trainers/tests/tensorflow/test_sac.py
  66. 4
      ml-agents/mlagents/trainers/tests/tensorflow/test_saver.py
  67. 8
      ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
  68. 25
      ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
  69. 19
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  70. 6
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  71. 20
      ml-agents/mlagents/trainers/tests/test_stats.py
  72. 4
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  73. 2
      ml-agents/mlagents/trainers/tests/torch/test_ghost.py
  74. 41
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  75. 15
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  76. 8
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  77. 32
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  78. 18
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
  79. 29
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
  80. 25
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
  81. 8
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  82. 6
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  83. 4
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  84. 2
      ml-agents/mlagents/trainers/tf/components/bc/model.py
  85. 4
      ml-agents/mlagents/trainers/tf/components/bc/module.py
  86. 2
      ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/model.py
  87. 2
      ml-agents/mlagents/trainers/tf/components/reward_signals/gail/model.py
  88. 4
      ml-agents/mlagents/trainers/tf/model_serialization.py
  89. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  90. 16
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  91. 3
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  92. 1
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  93. 5
      ml-agents/mlagents/trainers/torch/distributions.py
  94. 4
      ml-agents/mlagents/trainers/torch/model_serialization.py
  95. 42
      ml-agents/mlagents/trainers/torch/networks.py
  96. 18
      ml-agents/mlagents/trainers/torch/utils.py
  97. 34
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  98. 18
      ml-agents/mlagents/trainers/trainer/trainer_factory.py
  99. 10
      ml-agents/mlagents/trainers/trainer_controller.py
  100. 12
      ml-agents/mlagents/trainers/training_status.py

2
.github/ISSUE_TEMPLATE/bug_report.md


- Unity Version: [e.g. Unity 2020.1f1]
- OS + version: [e.g. Windows 10]
- _ML-Agents version_: (e.g. ML-Agents v0.8, or latest `develop` branch from source)
- _TensorFlow version_: (you can run `pip3 show tensorflow` to get this)
- _Torch version_: (you can run `pip3 show torch` to get this)
- _Environment_: (which example environment you used to reproduce the error)
**NOTE:** We are unable to help reproduce bugs with custom environments. Please attempt to reproduce your issue with one of the example environments, or provide a minimal patch to one of the environments needed to reproduce the issue.

3
.yamato/gym-interface-test.yml


test_editors:
- version: 2019.4
{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_gym_interface_{{ editor.version }}:

3
.yamato/python-ll-api-test.yml


test_editors:
- version: 2019.4
{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_ll_api_{{ editor.version }}:

9
.yamato/standalone-build-test.yml


test_editors:
- version: 2018.4
- version: 2019.3
{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_standalone_{{ editor.version }}:

UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
# TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
- /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedGrid/TestGridCompressed.unity
- python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureCompressed.unity
triggers:
cancel_old_ci: true
expression: |

10
.yamato/training-int-tests.yml


test_editors:
- version: 2018.4
- version: 2019.4
- version: 2020.1
{% metadata_file .yamato/test_versions.metafile %}
---
{% for editor in test_editors %}
test_mac_training_int_{{ editor.version }}:

UNITY_VERSION: {{ editor.version }}
commands:
- pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
# TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
- /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- python -u -m ml-agents.tests.yamato.training_int_tests --csharp=1.0.0
- python -u -m ml-agents.tests.yamato.training_int_tests --csharp={{ editor.csharp_backcompat_version }}
dependencies:
- .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
triggers:

159
Dockerfile


# Based off of python:3.6-slim, except that we are using ubuntu instead of debian.
FROM ubuntu:16.04
# ensure local python is preferred over distribution python
ENV PATH /usr/local/bin:$PATH
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
# http://bugs.python.org/issue19846
# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK.
ENV LANG C.UTF-8
RUN yes | unminimize
# runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
libexpat1 \
libffi6 \
libgdbm3 \
libreadline6 \
libsqlite3-0 \
libssl1.0.0 \
&& rm -rf /var/lib/apt/lists/*
RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-xenial main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
RUN wget https://packages.cloud.google.com/apt/doc/apt-key.gpg && apt-key add apt-key.gpg
RUN apt-get update && \
apt-get install -y --no-install-recommends wget curl tmux vim git gdebi-core \
build-essential python3-pip unzip google-cloud-sdk htop mesa-utils xorg-dev xorg \
libglvnd-dev libgl1-mesa-dev libegl1-mesa-dev libgles2-mesa-dev && \
wget http://security.ubuntu.com/ubuntu/pool/main/libx/libxfont/libxfont1_1.5.1-1ubuntu0.16.04.4_amd64.deb && \
wget http://security.ubuntu.com/ubuntu/pool/universe/x/xorg-server/xvfb_1.18.4-0ubuntu0.10_amd64.deb && \
yes | gdebi libxfont1_1.5.1-1ubuntu0.16.04.4_amd64.deb && \
yes | gdebi xvfb_1.18.4-0ubuntu0.10_amd64.deb
RUN python3 -m pip install --upgrade pip
RUN pip install setuptools==41.0.0
ENV GPG_KEY 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D
ENV PYTHON_VERSION 3.6.4
ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
RUN set -ex \
&& buildDeps=" \
dpkg-dev \
gcc \
libbz2-dev \
libc6-dev \
libexpat1-dev \
libffi-dev \
libgdbm-dev \
liblzma-dev \
libncursesw5-dev \
libreadline-dev \
libsqlite3-dev \
libssl-dev \
make \
tcl-dev \
tk-dev \
wget \
xz-utils \
zlib1g-dev \
# as of Stretch, "gpg" is no longer included by default
$(command -v gpg > /dev/null || echo 'gnupg dirmngr') \
" \
&& apt-get update && apt-get install -y $buildDeps --no-install-recommends && rm -rf /var/lib/apt/lists/* \
\
&& wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \
&& wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \
&& export GNUPGHOME="$(mktemp -d)" \
&& gpg --keyserver ha.pool.sks-keyservers.net --recv-keys "$GPG_KEY" \
&& gpg --batch --verify python.tar.xz.asc python.tar.xz \
&& rm -rf "$GNUPGHOME" python.tar.xz.asc \
&& mkdir -p /usr/src/python \
&& tar -xJC /usr/src/python --strip-components=1 -f python.tar.xz \
&& rm python.tar.xz \
\
&& cd /usr/src/python \
&& gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" \
&& ./configure \
--build="$gnuArch" \
--enable-loadable-sqlite-extensions \
--enable-shared \
--with-system-expat \
--with-system-ffi \
--without-ensurepip \
&& make -j "$(nproc)" \
&& make install \
&& ldconfig \
\
&& apt-get purge -y --auto-remove $buildDeps \
\
&& find /usr/local -depth \
\( \
\( -type d -a \( -name test -o -name tests \) \) \
-o \
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
\) -exec rm -rf '{}' + \
&& rm -rf /usr/src/python
# make some useful symlinks that are expected to exist
RUN cd /usr/local/bin \
&& ln -s idle3 idle \
&& ln -s pydoc3 pydoc \
&& ln -s python3 python \
&& ln -s python3-config python-config
# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value '<VERSION>'"
ENV PYTHON_PIP_VERSION 9.0.3
RUN set -ex; \
\
apt-get update; \
apt-get install -y --no-install-recommends wget; \
rm -rf /var/lib/apt/lists/*; \
\
wget -O get-pip.py 'https://bootstrap.pypa.io/get-pip.py'; \
\
apt-get purge -y --auto-remove wget; \
\
python get-pip.py \
--disable-pip-version-check \
--no-cache-dir \
"pip==$PYTHON_PIP_VERSION" \
; \
pip --version; \
\
find /usr/local -depth \
\( \
\( -type d -a \( -name test -o -name tests \) \) \
-o \
\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
\) -exec rm -rf '{}' +; \
rm -f get-pip.py
RUN apt-get update && apt-get -y upgrade
# xvfb is used to do CPU based rendering of Unity
RUN apt-get install -y xvfb
# Install ml-agents-envs package locally
COPY ml-agents-envs /ml-agents-envs
WORKDIR /ml-agents-envs
RUN pip install -e .
# Install ml-agents package next
COPY ml-agents /ml-agents
#checkout ml-agents for SHA
RUN mkdir /ml-agents
RUN pip install -e .
# Port 5004 is the port used in Editor training.
# Environments will start from port 5005,
# so allow enough ports for several environments.
EXPOSE 5004-5050
ENTRYPOINT ["xvfb-run", "--auto-servernum", "--server-args='-screen 0 640x480x24'", "mlagents-learn"]
ARG SHA
RUN git init
RUN git remote add origin https://github.com/Unity-Technologies/ml-agents.git
RUN git fetch --depth 1 origin $SHA
RUN git checkout FETCH_HEAD
RUN pip install -e /ml-agents/ml-agents-envs
RUN pip install -e /ml-agents/ml-agents

22
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


var bp = m_Agent.GetComponent<BehaviorParameters>();
var behaviorName = bp.BehaviorName;
var nnModel = GetModelForBehaviorName(behaviorName);
NNModel nnModel = null;
try
{
nnModel = GetModelForBehaviorName(behaviorName);
}
catch (Exception e)
{
overrideError = $"Exception calling GetModelForBehaviorName: {e}";
}
overrideError =
$"Didn't find a model for behaviorName {behaviorName}. Make " +
$"sure the behaviorName is set correctly in the commandline " +
$"and that the model file exists";
if (string.IsNullOrEmpty(overrideError))
{
overrideError =
$"Didn't find a model for behaviorName {behaviorName}. Make " +
"sure the behaviorName is set correctly in the commandline " +
"and that the model file exists";
}
}
else
{

8
README.md


**The Unity Machine Learning Agents Toolkit** (ML-Agents) is an open-source
project that enables games and simulations to serve as environments for
training intelligent agents. Agents can be trained using reinforcement learning,
imitation learning, neuroevolution, or other machine learning methods through a
simple-to-use Python API. We also provide implementations (based on TensorFlow)
training intelligent agents. We provide implementations (based on PyTorch)
train intelligent agents for 2D, 3D and VR/AR games. These trained agents can be
train intelligent agents for 2D, 3D and VR/AR games. Researchers can also use the
provided simple-to-use Python API to train Agents using reinforcement learning,
imitation learning, neuroevolution, or any other methods. These trained agents can be
used for multiple purposes, including controlling NPC behavior (in a variety of
settings such as multi-agent and adversarial), automated testing of game builds
and evaluating different game design decisions pre-release. The ML-Agents

4
com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs


[Tooltip("Whether to show gizmos or not")]
public bool ShowGizmos = false;
public SensorCompressionType CompressionType = SensorCompressionType.PNG;
/// <summary>
/// Array of colors displaying the DebugColors for each cell in OnDrawGizmos. Only updated if ShowGizmos.
/// </summary>

/// <inheritdoc/>
public virtual SensorCompressionType GetCompressionType()
{
return SensorCompressionType.PNG;
return CompressionType;
}
/// <summary>

16
com.unity.ml-agents/CHANGELOG.md


[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased] - 2020-11-04
## [Unreleased]
- PyTorch trainers are now the default. See the
[installation docs](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Installation.md) for
more information on installing PyTorch. For the time being, TensorFlow is still available;
you can use the TensorFlow backend by adding `--tensorflow` to the CLI, or
adding `framework: tensorflow` in the configuration YAML. (#4517)
- The Barracuda dependency was upgraded to 1.1.2 (#4571)
- The `action_probs` node is no longer listed as an output in TensorFlow models (#4613).
- `Agent.CollectObservations()` and `Agent.EndEpisode()` will now throw an exception
if they are called recursively (for example, if they call `Agent.EndEpisode()`).
Previously, this would result in an infinite loop and cause the editor to hang. (#4573)
- Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)
- Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)
## [1.5.0-preview] - 2020-10-14
### Major Changes

27
com.unity.ml-agents/Runtime/Academy.cs


// Flag used to keep track of the first time the Academy is reset.
bool m_HadFirstReset;
// Whether the Academy is in the middle of a step. This is used to detect and Academy
// step called by user code that is also called by the Academy.
bool m_IsStepping;
// Detect an Academy step called by user code that is also called by the Academy.
private RecursionChecker m_StepRecursionChecker = new RecursionChecker("EnvironmentStep");
// Random seed used for inference.
int m_InferenceSeed;

/// </summary>
public void EnvironmentStep()
{
// Check whether we're already in the middle of a step.
// This shouldn't happen generally, but could happen if user code (e.g. CollectObservations)
// that is called by EnvironmentStep() also calls EnvironmentStep(). This would result
// in an infinite loop and/or stack overflow, so stop it before it happens.
if (m_IsStepping)
{
throw new UnityAgentsException(
"Academy.EnvironmentStep() called recursively. " +
"This might happen if you call EnvironmentStep() from custom code such as " +
"CollectObservations() or OnActionReceived()."
);
}
m_IsStepping = true;
try
using (m_StepRecursionChecker.Start())
{
if (!m_HadFirstReset)
{

{
AgentAct?.Invoke();
}
}
finally
{
// Reset m_IsStepping when we're done (or if an exception occurred).
m_IsStepping = false;
}
}

24
com.unity.ml-agents/Runtime/Agent.cs


/// </summary>
internal VectorSensor collectObservationsSensor;
private RecursionChecker m_CollectObservationsChecker = new RecursionChecker("CollectObservations");
private RecursionChecker m_OnEpisodeBeginChecker = new RecursionChecker("OnEpisodeBegin");
/// <summary>
/// List of IActuators that this Agent will delegate actions to if any exist.
/// </summary>

// episode when initializing until after the Academy had its first reset.
if (Academy.Instance.TotalStepCount != 0)
{
OnEpisodeBegin();
using (m_OnEpisodeBeginChecker.Start())
{
OnEpisodeBegin();
}
}
}

{
// Make sure the latest observations are being passed to training.
collectObservationsSensor.Reset();
CollectObservations(collectObservationsSensor);
using (m_CollectObservationsChecker.Start())
{
CollectObservations(collectObservationsSensor);
}
}
// Request the last decision with no callbacks
// We request a decision so Python knows the Agent is done immediately

UpdateSensors();
using (TimerStack.Instance.Scoped("CollectObservations"))
{
CollectObservations(collectObservationsSensor);
using (m_CollectObservationsChecker.Start())
{
CollectObservations(collectObservationsSensor);
}
}
using (TimerStack.Instance.Scoped("CollectDiscreteActionMasks"))
{

{
ResetData();
m_StepCount = 0;
OnEpisodeBegin();
using (m_OnEpisodeBeginChecker.Start())
{
OnEpisodeBegin();
}
}
/// <summary>

2
com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs


using (TimerStack.Instance.Scoped("CameraSensor.WriteToTensor"))
{
var texture = ObservationToTexture(m_Camera, m_Width, m_Height);
var numWritten = Utilities.TextureToTensorProxy(texture, writer, m_Grayscale);
var numWritten = writer.WriteTexture(texture, m_Grayscale);
DestroyTexture(texture);
return numWritten;
}

52
com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs


}
}
}
public static class ObservationWriterExtension
{
/// <summary>
/// Writes a Texture2D into a ObservationWriter.
/// </summary>
/// <param name="obsWriter">
/// Writer to fill with Texture data.
/// </param>
/// <param name="texture">
/// The texture to be put into the tensor.
/// </param>
/// <param name="grayScale">
/// If set to <c>true</c> the textures will be converted to grayscale before
/// being stored in the tensor.
/// </param>
/// <returns>The number of floats written</returns>
public static int WriteTexture(
this ObservationWriter obsWriter,
Texture2D texture,
bool grayScale)
{
var width = texture.width;
var height = texture.height;
var texturePixels = texture.GetPixels32();
// During training, we convert from Texture to PNG before sending to the trainer, which has the
// effect of flipping the image. We need another flip here at inference time to match this.
for (var h = height - 1; h >= 0; h--)
{
for (var w = 0; w < width; w++)
{
var currentPixel = texturePixels[(height - h - 1) * width + w];
if (grayScale)
{
obsWriter[h, w, 0] =
(currentPixel.r + currentPixel.g + currentPixel.b) / 3f / 255.0f;
}
else
{
// For Color32, the r, g and b values are between 0 and 255.
obsWriter[h, w, 0] = currentPixel.r / 255.0f;
obsWriter[h, w, 1] = currentPixel.g / 255.0f;
obsWriter[h, w, 2] = currentPixel.b / 255.0f;
}
}
}
return height * width * (grayScale ? 1 : 3);
}
}
}

2
com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs


using (TimerStack.Instance.Scoped("RenderTextureSensor.Write"))
{
var texture = ObservationToTexture(m_RenderTexture);
var numWritten = Utilities.TextureToTensorProxy(texture, writer, m_Grayscale);
var numWritten = writer.WriteTexture(texture, m_Grayscale);
DestroyTexture(texture);
return numWritten;
}

8
com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs


int height = m_WrappedSensor.GetObservationShape()[0];
int width = m_WrappedSensor.GetObservationShape()[1];
var texture2D = new Texture2D(width, height, TextureFormat.RGB24, false);
Color32[] resetColorArray = texture2D.GetPixels32();
Color32 black = new Color32(0, 0, 0, 0);
for (int i = 0; i < resetColorArray.Length; i++)
{
resetColorArray[i] = black;
}
texture2D.SetPixels32(resetColorArray);
texture2D.Apply();
return texture2D.EncodeToPNG();
}

47
com.unity.ml-agents/Runtime/Utilities.cs


{
internal static class Utilities
{
/// <summary>
/// Puts a Texture2D into a ObservationWriter.
/// </summary>
/// <param name="texture">
/// The texture to be put into the tensor.
/// </param>
/// <param name="obsWriter">
/// Writer to fill with Texture data.
/// </param>
/// <param name="grayScale">
/// If set to <c>true</c> the textures will be converted to grayscale before
/// being stored in the tensor.
/// </param>
/// <returns>The number of floats written</returns>
internal static int TextureToTensorProxy(
Texture2D texture,
ObservationWriter obsWriter,
bool grayScale)
{
var width = texture.width;
var height = texture.height;
var texturePixels = texture.GetPixels32();
// During training, we convert from Texture to PNG before sending to the trainer, which has the
// effect of flipping the image. We need another flip here at inference time to match this.
for (var h = height - 1; h >= 0; h--)
{
for (var w = 0; w < width; w++)
{
var currentPixel = texturePixels[(height - h - 1) * width + w];
if (grayScale)
{
obsWriter[h, w, 0] =
(currentPixel.r + currentPixel.g + currentPixel.b) / 3f / 255.0f;
}
else
{
// For Color32, the r, g and b values are between 0 and 255.
obsWriter[h, w, 0] = currentPixel.r / 255.0f;
obsWriter[h, w, 1] = currentPixel.g / 255.0f;
obsWriter[h, w, 2] = currentPixel.b / 255.0f;
}
}
}
return height * width * (grayScale ? 1 : 3);
}
/// <summary>
/// Calculates the cumulative sum of an integer array. The result array will be one element

56
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


}
}
}
[TestFixture]
public class AgentRecursionTests
{
[SetUp]
public void SetUp()
{
if (Academy.IsInitialized)
{
Academy.Instance.Dispose();
}
}
class CollectObsEndEpisodeAgent : Agent
{
public override void CollectObservations(VectorSensor sensor)
{
// NEVER DO THIS IN REAL CODE!
EndEpisode();
}
}
class OnEpisodeBeginEndEpisodeAgent : Agent
{
public override void OnEpisodeBegin()
{
// NEVER DO THIS IN REAL CODE!
EndEpisode();
}
}
void TestRecursiveThrows<T>() where T : Agent
{
var gameObj = new GameObject();
var agent = gameObj.AddComponent<T>();
agent.LazyInitialize();
agent.RequestDecision();
Assert.Throws<UnityAgentsException>(() =>
{
Academy.Instance.EnvironmentStep();
});
}
[Test]
public void TestRecursiveCollectObsEndEpisodeThrows()
{
TestRecursiveThrows<CollectObsEndEpisodeAgent>();
}
[Test]
public void TestRecursiveOnEpisodeBeginEndEpisodeThrows()
{
TestRecursiveThrows<OnEpisodeBeginEndEpisodeAgent>();
}
}
}

2
com.unity.ml-agents/package.json


"unity": "2018.4",
"description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
"dependencies": {
"com.unity.barracuda": "1.1.1-preview",
"com.unity.barracuda": "1.1.2-preview",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",

2
docs/Background-Machine-Learning.md


one where the number of observations an agent perceives and the number of
actions they can take are large). Many of the algorithms we provide in ML-Agents
use some form of deep learning, built on top of the open-source library,
[TensorFlow](Background-TensorFlow.md).
[PyTorch](Background-PyTorch.md).

10
docs/Getting-Started.md


## Running a pre-trained model
We include pre-trained models for our agents (`.nn` files) and we use the
We include pre-trained models for our agents (`.onnx` files) and we use the
[Unity Inference Engine](Unity-Inference-Engine.md) to run these models inside
Unity. In this section, we will use the pre-trained model for the 3D Ball
example.

## Training a new model with Reinforcement Learning
While we provide pre-trained `.nn` files for the agents in this environment, any
While we provide pre-trained models for the agents in this environment, any
environment you make yourself will require training agents from scratch to
generate a new model file. In this section we will demonstrate how to use the
reinforcement learning algorithms that are part of the ML-Agents Python package

use it with compatible Agents (the Agents that generated the model). **Note:**
Do not just close the Unity Window once the `Saved Model` message appears.
Either wait for the training process to close the window or press `Ctrl+C` at
the command-line prompt. If you close the window manually, the `.nn` file
the command-line prompt. If you close the window manually, the `.onnx` file
containing the trained model is not exported into the ml-agents folder.
If you've quit the training early using `Ctrl+C` and want to resume training,

mlagents-learn config/ppo/3DBall.yaml --run-id=first3DBallRun --resume
```
Your trained model will be at `results/<run-identifier>/<behavior_name>.nn` where
Your trained model will be at `results/<run-identifier>/<behavior_name>.onnx` where
`<behavior_name>` is the name of the `Behavior Name` of the agents corresponding
to the model. This file corresponds to your model's latest checkpoint. You can
now embed this trained model into your Agents by following the steps below,

`Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
1. Open the Unity Editor, and select the **3DBall** scene as described above.
1. Select the **3DBall** prefab Agent object.
1. Drag the `<behavior_name>.nn` file from the Project window of the Editor to
1. Drag the `<behavior_name>.onnx` file from the Project window of the Editor to
the **Model** placeholder in the **Ball3DAgent** inspector window.
1. Press the **Play** button at the top of the Editor.

24
docs/Installation.md


[instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers)
on installing it.
Although we do not provide support for Anaconda installation on Windows, the
previous
[Windows Anaconda Installation (Deprecated) guide](Installation-Anaconda-Windows.md)
is still available.
### Clone the ML-Agents Toolkit Repository (Optional)
Now that you have installed Unity and Python, you can now install the Unity and

dependencies for each project and are supported on Mac / Windows / Linux. We
offer a dedicated [guide on Virtual Environments](Using-Virtual-Environment.md).
#### (Windows) Installing PyTorch
On Windows, you'll have to install the PyTorch package separately prior to
installing ML-Agents. Activate your virtual environment and run from the command line:
```sh
pip3 install torch==1.7.0 -f https://download.pytorch.org/whl/torch_stable.html
```
Note that on Windows, you may also need Microsoft's
[Visual C++ Redistributable](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads)
if you don't have it already. See the [PyTorch installation guide](https://pytorch.org/get-started/locally/)
for more installation options and versions.
#### Installing `mlagents`
To install the `mlagents` Python package, activate your virtual environment and
run from the command line:

By installing the `mlagents` package, the dependencies listed in the
[setup.py file](../ml-agents/setup.py) are also installed. These include
[TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support).
[PyTorch](Background-PyTorch.md) (Requires a CPU w/ AVX support).
#### Advanced: Local Installation for Development

the repository's root directory, run:
```sh
pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
pip3 install -e ./ml-agents-envs
pip3 install -e ./ml-agents
```

4
docs/Learning-Environment-Create-New.md


}
// Fell off platform
if (this.transform.localPosition.y < 0)
else if (this.transform.localPosition.y < 0)
{
EndEpisode();
}

1. Add the `Decision Requester` script with the Add Component button from the
RollerAgent Inspector.
1. Change **Decision Period** to `10`. For more information on decisions, see [the Agent documentation](Learning-Environment-Design-Agents.md#decisions)
1. Drag the Target GameObject from the Hierarchy window to the RollerAgent
Target field.
1. Add the `Behavior Parameters` script with the Add Component button from the
RollerAgent Inspector.
1. Modify the Behavior Parameters of the Agent :

2
docs/Learning-Environment-Design-Agents.md


AddReward(1.0f);
EndEpisode();
}
if (hitObjects.Where(col => col.gameObject.tag == "pit").ToArray().Length == 1)
else if (hitObjects.Where(col => col.gameObject.tag == "pit").ToArray().Length == 1)
{
AddReward(-1f);
EndEpisode();

4
docs/Learning-Environment-Executable.md


```
You can press Ctrl+C to stop the training, and your trained model will be at
`results/<run-identifier>/<behavior_name>.nn`, which corresponds to your model's
`results/<run-identifier>/<behavior_name>.onnx`, which corresponds to your model's
latest checkpoint. (**Note:** There is a known bug on Windows that causes the
saving of the model to fail when you early terminate the training, it's
recommended to wait until Step has reached the max_steps parameter you set in

`Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
1. Open the Unity Editor, and select the **3DBall** scene as described above.
1. Select the **3DBall** prefab from the Project window and select **Agent**.
1. Drag the `<behavior_name>.nn` file from the Project window of the Editor to
1. Drag the `<behavior_name>.onnx` file from the Project window of the Editor to
the **Model** placeholder in the **Ball3DAgent** inspector window.
1. Press the **Play** button at the top of the Editor.

8
docs/ML-Agents-Overview.md


for training intelligent agents. Agents can be trained using reinforcement
learning, imitation learning, neuroevolution, or other machine learning methods
through a simple-to-use Python API. We also provide implementations (based on
TensorFlow) of state-of-the-art algorithms to enable game developers and
PyTorch) of state-of-the-art algorithms to enable game developers and
hobbyists to easily train intelligent agents for 2D, 3D and VR/AR games. These
trained agents can be used for multiple purposes, including controlling NPC
behavior (in a variety of settings such as multi-agent and adversarial),

that include overviews and helpful resources on the
[Unity Engine](Background-Unity.md),
[machine learning](Background-Machine-Learning.md) and
[TensorFlow](Background-TensorFlow.md). We **strongly** recommend browsing the
[PyTorch](Background-PyTorch.md). We **strongly** recommend browsing the
machine learning concepts or have not previously heard of TensorFlow.
machine learning concepts or have not previously heard of PyTorch.
The remainder of this page contains a deep dive into ML-Agents, its key
components, different training modes and scenarios. By the end of it, you should

### Custom Training and Inference
In the previous mode, the Agents were used for training to generate a TensorFlow
In the previous mode, the Agents were used for training to generate a PyTorch
model that the Agents can later use. However, any user of the ML-Agents Toolkit
can leverage their own algorithms for training. In this case, the behaviors of
all the Agents in the scene will be controlled within Python. You can even turn

2
docs/Readme.md


- [ML-Agents Toolkit Overview](ML-Agents-Overview.md)
- [Background: Unity](Background-Unity.md)
- [Background: Machine Learning](Background-Machine-Learning.md)
- [Background: TensorFlow](Background-TensorFlow.md)
- [Background: PyTorch](Background-PyTorch.md)
- [Example Environments](Learning-Environment-Examples.md)
## Creating Learning Environments

2
docs/Training-Configuration-File.md


| `time_horizon` | (default = `64`) How many steps of experience to collect per-agent before adding it to the experience buffer. When this limit is reached before the end of an episode, a value estimate is used to predict the overall expected reward from the agent's current state. As such, this parameter trades off between a less biased, but higher variance estimate (long time horizon) and more biased, but less varied estimate (short time horizon). In cases where there are frequent rewards within an episode, or episodes are prohibitively large, a smaller number can be more ideal. This number should be large enough to capture all the important behavior within a sequence of an agent's actions. <br><br> Typical range: `32` - `2048` |
| `max_steps` | (default = `500000`) Total number of steps (i.e., observation collected and action taken) that must be taken in the environment (or across all environments if using multiple in parallel) before ending the training process. If you have multiple agents with the same behavior name within your environment, all steps taken by those agents will contribute to the same `max_steps` count. <br><br>Typical range: `5e5` - `1e7` |
| `keep_checkpoints` | (default = `5`) The maximum number of model checkpoints to keep. Checkpoints are saved after the number of steps specified by the checkpoint_interval option. Once the maximum number of checkpoints has been reached, the oldest checkpoint is deleted when saving a new checkpoint. |
| `checkpoint_interval` | (default = `500000`) The number of experiences collected between each checkpoint by the trainer. A maximum of `keep_checkpoints` checkpoints are saved before old ones are deleted. Each checkpoint saves the `.nn` (and `.onnx` if applicable) files in `results/` folder.|
| `checkpoint_interval` | (default = `500000`) The number of experiences collected between each checkpoint by the trainer. A maximum of `keep_checkpoints` checkpoints are saved before old ones are deleted. Each checkpoint saves the `.onnx` (and `.nn` if using TensorFlow) files in `results/` folder.|
| `init_path` | (default = None) Initialize trainer from a previously saved model. Note that the prior run should have used the same trainer configurations as the current run, and have been saved with the same version of ML-Agents. <br><br>You should provide the full path to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`. This option is provided in case you want to initialize different behaviors from different runs; in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize all models from the same run. |
| `threaded` | (default = `true`) By default, model updates can happen while the environment is being stepped. This violates the [on-policy](https://spinningup.openai.com/en/latest/user/algorithms.html#the-on-policy-algorithms) assumption of PPO slightly in exchange for a training speedup. To maintain the strict on-policyness of PPO, you can disable parallel updates by setting `threaded` to `false`. There is usually no reason to turn `threaded` off for SAC. |
| `hyperparameters -> learning_rate` | (default = `3e-4`) Initial learning rate for gradient descent. Corresponds to the strength of each gradient descent update step. This should typically be decreased if training is unstable, and the reward does not consistently increase. <br><br>Typical range: `1e-5` - `1e-3` |

35
docs/Training-ML-Agents.md


- [Curriculum Learning](#curriculum)
- [Training with a Curriculum](#training-with-a-curriculum)
- [Training Using Concurrent Unity Instances](#training-using-concurrent-unity-instances)
- [Using PyTorch (Experimental)](#using-pytorch-experimental)
For a broad overview of reinforcement learning, imitation learning and all the
training scenarios, methods and options within the ML-Agents Toolkit, see

values. See [Using TensorBoard](Using-Tensorboard.md) for more details on how
to visualize the training metrics.
1. Models: these contain the model checkpoints that
are updated throughout training and the final model file (`.nn`). This final
are updated throughout training and the final model file (`.onnx`). This final
model file is generated once either when training completes or is
interrupted.
1. Timers file (under `results/<run-identifier>/run_logs`): this contains aggregated

- **Result Variation Using Concurrent Unity Instances** - If you keep all the
hyperparameters the same, but change `--num-envs=<n>`, the results and model
would likely change.
### Using PyTorch (Experimental)
ML-Agents, by default, uses TensorFlow as its backend, but experimental support
for PyTorch has been added. To use PyTorch, the `torch` Python package must
be installed, and PyTorch must be enabled for your trainer.
#### Installing PyTorch
If you've already installed ML-Agents, follow the
[official PyTorch install instructions](https://pytorch.org/get-started/locally/) for
your platform and configuration. Note that on Windows, you may also need Microsoft's
[Visual C++ Redistributable](https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads) if you don't have it already.
If you're installing or upgrading ML-Agents on Linux or Mac, you can also run
`pip3 install mlagents[torch]` instead of `pip3 install mlagents`
during [installation](Installation.md). On Windows, install ML-Agents first and then
separately install PyTorch.
#### Enabling PyTorch
PyTorch can be enabled in one of two ways. First, by adding `--torch` to the
`mlagents-learn` command. This will make all behaviors train with PyTorch.
Second, by changing the `framework` option for your agent behavior in the
configuration YAML as below. This will use PyTorch just for that behavior.
```yaml
behaviors:
YourAgentBehavior:
framework: pytorch
```

2
docs/Training-on-Amazon-Web-Service.md


# Download and install the latest Nvidia driver for ubuntu
# Please refer to http://download.nvidia.com/XFree86/Linux-#x86_64/latest.txt
$ wget http://download.nvidia.com/XFree86/Linux-x86_64/390.87/NVIDIA-Linux-x86_64-390.87.run
$ sudo /bin/bash ./NVIDIA-Linux-x86_64-390.67.run --accept-license --no-questions --ui=none
$ sudo /bin/bash ./NVIDIA-Linux-x86_64-390.87.run --accept-license --no-questions --ui=none
# Disable Nouveau as it will clash with the Nvidia driver
$ sudo echo 'blacklist nouveau' | sudo tee -a /etc/modprobe.d/blacklist.conf

5
docs/Unity-Inference-Engine.md


[industry-standard open format](https://onnx.ai/about.html) produced by the
[tf2onnx package](https://github.com/onnx/tensorflow-onnx).
Export to ONNX is currently considered beta. To enable it, make sure
`tf2onnx>=1.5.5` is installed in pip. tf2onnx does not currently support
tensorflow 2.0.0 or later, or earlier than 1.12.0.
Export to ONNX is used if using PyTorch (the default). To enable it
while using TensorFlow, make sure `tf2onnx>=1.6.1` is installed in pip.
## Using the Unity Inference Engine

4
gym-unity/gym_unity/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.21.1"
__version__ = "0.22.0.dev0"
__release_tag__ = "release_9"
__release_tag__ = None

21
gym-unity/gym_unity/envs/__init__.py


self._previous_decision_step = decision_steps
# Set action spaces
if self.group_spec.is_action_discrete():
branches = self.group_spec.discrete_action_branches
if self.group_spec.action_size == 1:
if self.group_spec.action_spec.is_discrete():
self.action_size = self.group_spec.action_spec.discrete_size
branches = self.group_spec.action_spec.discrete_branches
if self.group_spec.action_spec.discrete_size == 1:
self._action_space = spaces.Discrete(branches[0])
else:
if flatten_branched:

self._action_space = spaces.MultiDiscrete(branches)
else:
elif self.group_spec.action_spec.is_continuous():
high = np.array([1] * self.group_spec.action_shape)
self.action_size = self.group_spec.action_spec.continuous_size
high = np.array([1] * self.group_spec.action_spec.continuous_size)
else:
raise UnityGymException(
"The gym wrapper does not provide explicit support for both discrete "
"and continuous actions."
)
# Set observations space
list_spaces: List[gym.Space] = []

# Translate action into list
action = self._flattener.lookup_action(action)
spec = self.group_spec
action = np.array(action).reshape((1, spec.action_size))
action = np.array(action).reshape((1, self.action_size))
self._env.set_actions(self.name, action)
self._env.step()

8
gym-unity/gym_unity/tests/test_gym.py


from gym_unity.envs import UnityToGymWrapper
from mlagents_envs.base_env import (
BehaviorSpec,
ActionType,
ActionSpec,
DecisionSteps,
TerminalSteps,
BehaviorMapping,

Creates a mock BrainParameters object with parameters.
"""
# Avoid using mutable object as default param
act_type = ActionType.DISCRETE
act_type = ActionType.CONTINUOUS
action_spec = ActionSpec.create_continuous(vector_action_space_size)
action_spec = ActionSpec.create_discrete(vector_action_space_size)
return BehaviorSpec(obs_shapes, act_type, vector_action_space_size)
return BehaviorSpec(obs_shapes, action_spec)
def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):

4
ml-agents-envs/mlagents_envs/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.21.1"
__version__ = "0.22.0.dev0"
__release_tag__ = "release_9"
__release_tag__ = None

154
ml-agents-envs/mlagents_envs/base_env.py


NamedTuple,
Tuple,
Optional,
Union,
Dict,
Iterator,
Any,

from enum import Enum
from mlagents_envs.exception import UnityActionException
AgentId = int
BehaviorName = str

)
class ActionType(Enum):
DISCRETE = 0
CONTINUOUS = 1
class BehaviorSpec(NamedTuple):
class ActionSpec(NamedTuple):
A NamedTuple to containing information about the observations and actions
spaces for a group of Agents under the same behavior.
- observation_shapes is a List of Tuples of int : Each Tuple corresponds
to an observation's dimensions. The shape tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
- action_type is the type of data of the action. it can be discrete or
continuous. If discrete, the action tensors are expected to be int32. If
continuous, the actions are expected to be float32.
- action_shape is:
- An int in continuous action space corresponding to the number of
floats that constitute the action.
- A Tuple of int in discrete action space where each int corresponds to
the number of discrete actions available to the agent.
A NamedTuple containing utility functions and information about the action spaces
for a group of Agents under the same behavior.
- num_continuous_actions is an int corresponding to the number of floats which
constitute the action.
- discrete_branch_sizes is a Tuple of int where each int corresponds to
the number of discrete actions available to the agent on an independent action branch.
observation_shapes: List[Tuple]
action_type: ActionType
action_shape: Union[int, Tuple[int, ...]]
continuous_size: int
discrete_branches: Tuple[int, ...]
def __eq__(self, other):
return (
self.continuous_size == other.continuous_size
and self.discrete_branches == other.discrete_branches
)
def __str__(self):
return f"Continuous: {self.continuous_size}, Discrete: {self.discrete_branches}"
def is_action_discrete(self) -> bool:
# For backwards compatibility
def is_discrete(self) -> bool:
return self.action_type == ActionType.DISCRETE
return self.discrete_size > 0 and self.continuous_size == 0
def is_action_continuous(self) -> bool:
# For backwards compatibility
def is_continuous(self) -> bool:
return self.action_type == ActionType.CONTINUOUS
return self.discrete_size == 0 and self.continuous_size > 0
def action_size(self) -> int:
def discrete_size(self) -> int:
Returns the dimension of the action.
- In the continuous case, will return the number of continuous actions.
- In the (multi-)discrete case, will return the number of action.
branches.
Returns a an int corresponding to the number of discrete branches.
if self.action_type == ActionType.DISCRETE:
return len(self.action_shape) # type: ignore
else:
return self.action_shape # type: ignore
@property
def discrete_action_branches(self) -> Optional[Tuple[int, ...]]:
"""
Returns a Tuple of int corresponding to the number of possible actions
for each branch (only for discrete actions). Will return None in
for continuous actions.
"""
if self.action_type == ActionType.DISCRETE:
return self.action_shape # type: ignore
else:
return None
return len(self.discrete_branches)
def create_empty_action(self, n_agents: int) -> np.ndarray:
def empty_action(self, n_agents: int) -> np.ndarray:
if self.action_type == ActionType.DISCRETE:
return np.zeros((n_agents, self.action_size), dtype=np.int32)
else:
return np.zeros((n_agents, self.action_size), dtype=np.float32)
if self.is_continuous():
return np.zeros((n_agents, self.continuous_size), dtype=np.float32)
return np.zeros((n_agents, self.discrete_size), dtype=np.int32)
def create_random_action(self, n_agents: int) -> np.ndarray:
def random_action(self, n_agents: int) -> np.ndarray:
:param generator: The random number generator used for creating random action
if self.is_action_continuous():
if self.is_continuous():
low=-1.0, high=1.0, size=(n_agents, self.action_size)
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
return action
elif self.is_action_discrete():
branch_size = self.discrete_action_branches
else:
branch_size = self.discrete_branches
action = np.column_stack(
[
np.random.randint(

dtype=np.int32,
)
for i in range(self.action_size)
for i in range(self.discrete_size)
return action
return action
def _validate_action(
self, actions: np.ndarray, n_agents: int, name: str
) -> np.ndarray:
"""
Validates that action has the correct action dim
for the correct number of agents and ensures the type.
"""
if self.continuous_size > 0:
_size = self.continuous_size
else:
_size = self.discrete_size
_expected_shape = (n_agents, _size)
if actions.shape != _expected_shape:
raise UnityActionException(
f"The behavior {name} needs an input of dimension "
f"{_expected_shape} for (<number of agents>, <action size>) but "
f"received input of dimension {actions.shape}"
)
_expected_type = np.float32 if self.is_continuous() else np.int32
if actions.dtype != _expected_type:
actions = actions.astype(_expected_type)
return actions
@staticmethod
def create_continuous(continuous_size: int) -> "ActionSpec":
"""
Creates an ActionSpec that is homogenously continuous
"""
return ActionSpec(continuous_size, ())
@staticmethod
def create_discrete(discrete_branches: Tuple[int]) -> "ActionSpec":
"""
Creates an ActionSpec that is homogenously discrete
"""
return ActionSpec(0, discrete_branches)
class BehaviorSpec(NamedTuple):
"""
A NamedTuple containing information about the observation and action
spaces for a group of Agents under the same behavior.
- observation_shapes is a List of Tuples of int : Each Tuple corresponds
to an observation's dimensions. The shape tuples have the same ordering as
the ordering of the DecisionSteps and TerminalSteps.
- action_spec is an ActionSpec NamedTuple
"""
observation_shapes: List[Tuple]
action_spec: ActionSpec
class BehaviorMapping(Mapping):

35
ml-agents-envs/mlagents_envs/environment.py


n_agents = len(self._env_state[group_name][0])
self._env_actions[group_name] = self._env_specs[
group_name
].create_empty_action(n_agents)
].action_spec.empty_action(n_agents)
step_input = self._generate_step_input(self._env_actions)
with hierarchical_timer("communicator.exchange"):
outputs = self._communicator.exchange(step_input)

self._assert_behavior_exists(behavior_name)
if behavior_name not in self._env_state:
return
spec = self._env_specs[behavior_name]
expected_type = np.float32 if spec.is_action_continuous() else np.int32
expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
if action.shape != expected_shape:
raise UnityActionException(
f"The behavior {behavior_name} needs an input of dimension "
f"{expected_shape} for (<number of agents>, <action size>) but "
f"received input of dimension {action.shape}"
)
if action.dtype != expected_type:
action = action.astype(expected_type)
action_spec = self._env_specs[behavior_name].action_spec
num_agents = len(self._env_state[behavior_name][0])
action = action_spec._validate_action(action, num_agents, behavior_name)
self._env_actions[behavior_name] = action
def set_action_for_agent(

if behavior_name not in self._env_state:
return
spec = self._env_specs[behavior_name]
expected_shape = (spec.action_size,)
if action.shape != expected_shape:
raise UnityActionException(
f"The Agent {agent_id} with BehaviorName {behavior_name} needs "
f"an input of dimension {expected_shape} but received input of "
f"dimension {action.shape}"
)
expected_type = np.float32 if spec.is_action_continuous() else np.int32
if action.dtype != expected_type:
action = action.astype(expected_type)
action_spec = self._env_specs[behavior_name].action_spec
num_agents = len(self._env_state[behavior_name][0])
action = action_spec._validate_action(action, num_agents, behavior_name)
self._env_actions[behavior_name] = spec.create_empty_action(
len(self._env_state[behavior_name][0])
)
self._env_actions[behavior_name] = action_spec.empty_action(num_agents)
try:
index = np.where(self._env_state[behavior_name][0].agent_id == agent_id)[0][
0

27
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import (
BehaviorSpec,
ActionType,
ActionSpec,
DecisionSteps,
TerminalSteps,
)

from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
import numpy as np
import io
from typing import cast, List, Tuple, Union, Collection, Optional, Iterable
from typing import cast, List, Tuple, Collection, Optional, Iterable
from PIL import Image

:return: BehaviorSpec object.
"""
observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
action_type = (
ActionType.DISCRETE
if brain_param_proto.vector_action_space_type == 0
else ActionType.CONTINUOUS
)
if action_type == ActionType.CONTINUOUS:
action_shape: Union[
int, Tuple[int, ...]
] = brain_param_proto.vector_action_size[0]
if brain_param_proto.vector_action_space_type == 1:
action_spec = ActionSpec(brain_param_proto.vector_action_size[0], ())
action_shape = tuple(brain_param_proto.vector_action_size)
return BehaviorSpec(observation_shape, action_type, action_shape)
action_spec = ActionSpec(0, tuple(brain_param_proto.vector_action_size))
return BehaviorSpec(observation_shape, action_spec)
class OffsetBytesIO:

[agent_info.id for agent_info in terminal_agent_info_list], dtype=np.int32
)
action_mask = None
if behavior_spec.is_action_discrete():
if behavior_spec.action_spec.discrete_size > 0:
a_size = np.sum(behavior_spec.discrete_action_branches)
a_size = np.sum(behavior_spec.action_spec.discrete_branches)
mask_matrix = np.ones((n_agents, a_size), dtype=np.bool)
for agent_index, agent_info in enumerate(decision_agent_info_list):
if agent_info.action_mask is not None:

for k in range(a_size)
]
action_mask = (1 - mask_matrix).astype(np.bool)
indices = _generate_split_indices(behavior_spec.discrete_action_branches)
indices = _generate_split_indices(
behavior_spec.action_spec.discrete_branches
)
action_mask = np.split(action_mask, indices, axis=1)
return (
DecisionSteps(

15
ml-agents-envs/mlagents_envs/tests/test_envs.py


from unittest import mock
import pytest
import numpy as np
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.base_env import DecisionSteps, TerminalSteps
from mlagents_envs.exception import UnityEnvironmentException, UnityActionException

env.step()
decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
n_agents = len(decision_steps)
env.set_actions(
"RealFakeBrain", np.zeros((n_agents, spec.action_size), dtype=np.float32)
)
env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents))
env.set_actions(
"RealFakeBrain",
np.zeros((n_agents - 1, spec.action_size), dtype=np.float32),
)
env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents - 1))