Merge branch 'master' into develop-hybrid-actions-singleton

4 年前 · e5f14400
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
          filters:
            tags:
              # Matches e.g. "release_123"
-              only: /^release_[0-9]+$/
+              only: /^DEPRECATED_release_[0-9]+$/
            branches:
              ignore: /.*/
      - deploy:
            tags:
              # Matches e.g. "release_123"
-              only: /^release_[0-9]+$/
+              only: /^DEPRECATED_release_[0-9]+$/
            branches:
              ignore: /.*/
      - deploy:
            tags:
              # Matches e.g. "release_123"
-              only: /^release_[0-9]+$/
+              only: /^DEPRECATED_release_[0-9]+$/
            branches:
              ignore: /.*/
      # These deploy jobs upload to the pypi test repo. They have different tag triggers than the real ones.
          filters:
            tags:
              # Matches e.g. "release_123_test456
-              only: /^release_[0-9]+_test[0-9]+$/
+              only: /^DEPRECATED_release_[0-9]+_test[0-9]+$/
            branches:
              ignore: /.*/
      - deploy:
          filters:
            tags:
              # Matches e.g. "release_123_test456
-              only: /^release_[0-9]+_test[0-9]+$/
+              only: /^DEPRECATED_release_[0-9]+_test[0-9]+$/
            branches:
              ignore: /.*/
      - deploy:
          filters:
            tags:
              # Matches e.g. "release_123_test456
-              only: /^release_[0-9]+_test[0-9]+$/
+              only: /^DEPRECATED_release_[0-9]+_test[0-9]+$/
            branches:
              ignore: /.*/
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2
-    - uses: actions/setup-python@v1
+    - uses: actions/setup-python@v2
+      with:
+        python-version: 3.7.x
    - uses: actions/setup-ruby@v1
      with:
        ruby-version: '2.6'
--- a/.yamato/com.unity.ml-agents-promotion.yml
+++ b/.yamato/com.unity.ml-agents-promotion.yml
+test_editors:
+  - version: 2019.3
+test_platforms:
+  - name: win
+    type: Unity::VM
+    image: package-ci/win10:stable
+    flavor: b1.large
+---
+
+{% for editor in test_editors %}
+{% for platform in test_platforms %}
+promotion_test_{{ platform.name }}_{{ editor.version }}:
+  name : Promotion Test {{ editor.version }} on {{ platform.name }}
+  agent:
+    type: {{ platform.type }}
+    image: {{ platform.image }}
+    flavor: {{ platform.flavor}}
+  variables:
+    UPMCI_PROMOTION: 1
+  commands:
+    - npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
+    - upm-ci package test --unity-version {{ editor.version }} --package-path com.unity.ml-agents
+  artifacts:
+    logs:
+      paths:
+        - "upm-ci~/test-results/**/*"
+  dependencies:
+    - .yamato/com.unity.ml-agents-pack.yml#pack
+{% endfor %}
+{% endfor %}
+
+promotion_test_trigger:
+  name: Promotion Tests Trigger
+  dependencies:
+{% for editor in test_editors %}
+{% for platform in test_platforms %}
+    - .yamato/com.unity.ml-agents-promotion.yml#promotion_test_{{platform.name}}_{{editor.version}}
+{% endfor %}
+{% endfor %}
+
+
 promote:
  name: Promote to Production
  agent:
        - "upm-ci~/packages/*.tgz"
  dependencies:
    - .yamato/com.unity.ml-agents-pack.yml#pack
-    - .yamato/com.unity.ml-agents-test.yml#all_package_tests
+{% for editor in test_editors %}
+{% for platform in test_platforms %}
+    - .yamato/com.unity.ml-agents-promotion.yml#promotion_test_{{ platform.name }}_{{ editor.version }}
+{% endfor %}
+{% endfor %}
--- a/.yamato/com.unity.ml-agents-test.yml
+++ b/.yamato/com.unity.ml-agents-test.yml
  - version: 2018.4
    # 2018.4 doesn't support code-coverage
    enableCodeCoverage: !!bool false
+    # We want some scene tests to run in the DevProject, but packages there only support 2019+
+    testProject: Project
+    testProject: DevProject
+    testProject: DevProject
+    testProject: DevProject
+
+    testProject: DevProject
+
 test_platforms:
  - name: win
    type: Unity::VM
    type: Unity::VM
    image: package-ci/ubuntu:stable
    flavor: b1.medium
+
 packages:
  - name: com.unity.ml-agents
    assembly: Unity.ML-Agents
    flavor: {{ platform.flavor}}
  commands:
    - npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
-    - upm-ci project test -u {{ editor.version }} --project-path Project --package-filter {{ package.name }} {{ coverageOptions }} --extra-utr-arg "reruncount=2"
+    - upm-ci project test -u {{ editor.version }} --project-path {{ editor.testProject }} --package-filter {{ package.name }} {{ coverageOptions }} --extra-utr-arg "reruncount=2"
    {% if editor.enableCodeCoverage %}
    - python3 ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ package.minCoveragePct }}
    {% endif %}
      pull_request.target match "release.+") AND
      NOT pull_request.draft AND
      (pull_request.changes.any match "com.unity.ml-agents/**" OR
+       pull_request.changes.any match " {{ editor.testProject }}/**" OR
      {% if package.name == "com.unity.ml-agents.extensions" %}
        pull_request.changes.any match "com.unity.ml-agents.extensions/**" OR
      {% endif %}
    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
    - unity-downloader-cli -u trunk -c editor --wait --fast
    - npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
-    - upm-ci project test -u {{ editor.version }} --project-path Project --package-filter {{ package.name }} {{ coverageOptions }} --extra-utr-arg "reruncount=2"
+    - upm-ci project test -u {{ editor.version }} --project-path {{ editor.testProject }} --package-filter {{ package.name }} {{ coverageOptions }} --extra-utr-arg "reruncount=2"
    {% if editor.enableCodeCoverage %}
    - python3 ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ package.minCoveragePct }}
    {% endif %}
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
-test_editors:
-  - version: 2019.4
+{% metadata_file .yamato/test_versions.metafile %}
 ---
 {% for editor in test_editors %}
 test_gym_interface_{{ editor.version }}:
--- a/.yamato/python-ll-api-test.yml
+++ b/.yamato/python-ll-api-test.yml
-test_editors:
-  - version: 2019.4
+{% metadata_file .yamato/test_versions.metafile %}
 ---
 {% for editor in test_editors %}
 test_mac_ll_api_{{ editor.version }}:
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
-test_editors:
-  - version: 2018.4
-  - version: 2019.3
+{% metadata_file .yamato/test_versions.metafile %}
 ---
 {% for editor in test_editors %}
 test_mac_standalone_{{ editor.version }}:
    UNITY_VERSION: {{ editor.version }}
  commands:
    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+    # TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
+    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
+    - /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedGrid/TestGridCompressed.unity
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/TestScenes/TestCompressedTexture/TestTextureCompressed.unity
  triggers:
    cancel_old_ci: true
    expression: |
--- a/.yamato/training-int-tests.yml
+++ b/.yamato/training-int-tests.yml
-test_editors:
-  - version: 2018.4
-  - version: 2019.4
-  - version: 2020.1
+{% metadata_file .yamato/test_versions.metafile %}
 ---
 {% for editor in test_editors %}
 test_mac_training_int_{{ editor.version }}:
    UNITY_VERSION: {{ editor.version }}
  commands:
    - pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+    # TODO remove the "--user" command and the path prefix when we can migrate away from the custom bokken image
+    - python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade --user
+    - /Users/bokken/Library/Python/3.7/bin/unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
    - python -u -m ml-agents.tests.yamato.training_int_tests
    # Backwards-compatibility tests.
    # If we make a breaking change to the communication protocol, these will need
--- a/DevProject/ProjectSettings/EditorBuildSettings.asset
+++ b/DevProject/ProjectSettings/EditorBuildSettings.asset
 EditorBuildSettings:
  m_ObjectHideFlags: 0
  serializedVersion: 2
-  m_Scenes: []
+  m_Scenes:
+  - enabled: 1
+    path: Assets/ML-Agents/Scripts/Tests/Runtime/AcademyTest/AcademyStepperTestScene.unity
+    guid: 9bafc50b1e55b43b2b1ae9620f1f8311
  m_configObjects: {}
--- a/159
+++ b/159
-# Based off of python:3.6-slim, except that we are using ubuntu instead of debian.
-FROM ubuntu:16.04
-
-
-# ensure local python is preferred over distribution python
-ENV PATH /usr/local/bin:$PATH
+FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
-# http://bugs.python.org/issue19846
-# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK.
-ENV LANG C.UTF-8
+RUN yes | unminimize
-# runtime dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
-		ca-certificates \
-		libexpat1 \
-		libffi6 \
-		libgdbm3 \
-		libreadline6 \
-		libsqlite3-0 \
-		libssl1.0.0 \
-	&& rm -rf /var/lib/apt/lists/*
+RUN echo "deb http://packages.cloud.google.com/apt cloud-sdk-xenial main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
+RUN wget https://packages.cloud.google.com/apt/doc/apt-key.gpg && apt-key add apt-key.gpg
+RUN apt-get update && \
+  apt-get install -y --no-install-recommends wget curl tmux vim git gdebi-core \
+  build-essential python3-pip unzip google-cloud-sdk htop mesa-utils xorg-dev xorg \
+  libglvnd-dev libgl1-mesa-dev libegl1-mesa-dev libgles2-mesa-dev && \
+  wget http://security.ubuntu.com/ubuntu/pool/main/libx/libxfont/libxfont1_1.5.1-1ubuntu0.16.04.4_amd64.deb && \
+  wget http://security.ubuntu.com/ubuntu/pool/universe/x/xorg-server/xvfb_1.18.4-0ubuntu0.10_amd64.deb && \
+  yes | gdebi libxfont1_1.5.1-1ubuntu0.16.04.4_amd64.deb && \
+  yes | gdebi xvfb_1.18.4-0ubuntu0.10_amd64.deb
+RUN python3 -m pip install --upgrade pip
+RUN pip install setuptools==41.0.0
-ENV GPG_KEY 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D
-ENV PYTHON_VERSION 3.6.4
+ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
-RUN set -ex \
-	&& buildDeps=" \
-		dpkg-dev \
-		gcc \
-		libbz2-dev \
-		libc6-dev \
-		libexpat1-dev \
-		libffi-dev \
-		libgdbm-dev \
-		liblzma-dev \
-		libncursesw5-dev \
-		libreadline-dev \
-		libsqlite3-dev \
-		libssl-dev \
-		make \
-		tcl-dev \
-		tk-dev \
-		wget \
-		xz-utils \
-		zlib1g-dev \
-# as of Stretch, "gpg" is no longer included by default
-		$(command -v gpg > /dev/null || echo 'gnupg dirmngr') \
-	" \
-	&& apt-get update && apt-get install -y $buildDeps --no-install-recommends && rm -rf /var/lib/apt/lists/* \
-	\
-	&& wget -O python.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \
-	&& wget -O python.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \
-	&& export GNUPGHOME="$(mktemp -d)" \
-	&& gpg --keyserver ha.pool.sks-keyservers.net --recv-keys "$GPG_KEY" \
-	&& gpg --batch --verify python.tar.xz.asc python.tar.xz \
-	&& rm -rf "$GNUPGHOME" python.tar.xz.asc \
-	&& mkdir -p /usr/src/python \
-	&& tar -xJC /usr/src/python --strip-components=1 -f python.tar.xz \
-	&& rm python.tar.xz \
-	\
-	&& cd /usr/src/python \
-	&& gnuArch="$(dpkg-architecture --query DEB_BUILD_GNU_TYPE)" \
-	&& ./configure \
-		--build="$gnuArch" \
-		--enable-loadable-sqlite-extensions \
-		--enable-shared \
-		--with-system-expat \
-		--with-system-ffi \
-		--without-ensurepip \
-	&& make -j "$(nproc)" \
-	&& make install \
-	&& ldconfig \
-	\
-	&& apt-get purge -y --auto-remove $buildDeps \
-	\
-	&& find /usr/local -depth \
-		\( \
-			\( -type d -a \( -name test -o -name tests \) \) \
-			-o \
-			\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
-		\) -exec rm -rf '{}' + \
-	&& rm -rf /usr/src/python
-
-# make some useful symlinks that are expected to exist
-RUN cd /usr/local/bin \
-	&& ln -s idle3 idle \
-	&& ln -s pydoc3 pydoc \
-	&& ln -s python3 python \
-	&& ln -s python3-config python-config
-
-# if this is called "PIP_VERSION", pip explodes with "ValueError: invalid truth value '<VERSION>'"
-ENV PYTHON_PIP_VERSION 9.0.3
-
-RUN set -ex; \
-	\
-	apt-get update; \
-	apt-get install -y --no-install-recommends wget; \
-	rm -rf /var/lib/apt/lists/*; \
-	\
-	wget -O get-pip.py 'https://bootstrap.pypa.io/get-pip.py'; \
-	\
-	apt-get purge -y --auto-remove wget; \
-	\
-	python get-pip.py \
-		--disable-pip-version-check \
-		--no-cache-dir \
-		"pip==$PYTHON_PIP_VERSION" \
-	; \
-	pip --version; \
-	\
-	find /usr/local -depth \
-		\( \
-			\( -type d -a \( -name test -o -name tests \) \) \
-			-o \
-			\( -type f -a \( -name '*.pyc' -o -name '*.pyo' \) \) \
-		\) -exec rm -rf '{}' +; \
-	rm -f get-pip.py
-
-
-RUN apt-get update && apt-get -y upgrade
-
-# xvfb is used to do CPU based rendering of Unity
-RUN apt-get install -y xvfb
-
-# Install ml-agents-envs package locally
-COPY ml-agents-envs /ml-agents-envs
-WORKDIR /ml-agents-envs
-RUN pip install -e .
-
-# Install ml-agents package next
-COPY ml-agents /ml-agents
+#checkout ml-agents for SHA
+RUN mkdir /ml-agents
-RUN pip install -e .
-
-# Port 5004 is the port used in Editor training.
-# Environments will start from port 5005, 
-# so allow enough ports for several environments.
-EXPOSE 5004-5050
-
-ENTRYPOINT ["xvfb-run", "--auto-servernum", "--server-args='-screen 0 640x480x24'", "mlagents-learn"]
+ARG SHA
+RUN git init
+RUN git remote add origin https://github.com/Unity-Technologies/ml-agents.git
+RUN git fetch --depth 1 origin $SHA
+RUN git checkout FETCH_HEAD
+RUN pip install -e /ml-agents/ml-agents-envs
+RUN pip install -e /ml-agents/ml-agents
--- a/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 8
-    numStackedVectorObservations: 1
-    vectorActionSize: 02000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 1
+    VectorObservationSize: 8
+    NumStackedVectorObservations: 1
+    VectorActionSize: 02000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 1
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114715123104194396
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: aaba48bf82bee4751aa7b89569e57f73, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  maxStep: 5000
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
+  MaxStep: 5000
+  useVecObs: 1
 --- !u!114 &1306725529891448089
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
-  offsetStep: 0
+  TakeActionsBetweenDecisions: 1
 --- !u!114 &1758424554059689351
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1533320402322554
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
+  m_IndirectSpecularColor: {r: 0.44971168, g: 0.4997775, b: 0.57563686, a: 1}
  m_UseRadianceAmbientProbe: 0
 --- !u!157 &3
 LightmapSettings:
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
+      propertyPath: m_Name
+      value: 3DBall
+      objectReference: {fileID: 0}
    - target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
      propertyPath: m_LocalPosition.x
      value: 0
    - target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
      propertyPath: m_RootOrder
      value: 5
+      objectReference: {fileID: 0}
+    - target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.z
+      value: 0
      objectReference: {fileID: 0}
    m_RemovedComponents: []
  m_SourcePrefab: {fileID: 100100000, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
  maximumDeltaTime: 0.33333334
  solverIterations: 6
  solverVelocityIterations: 1
+  reuseCollisionCallbacks: 1
 --- !u!1 &1746325439
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
 {
    [Header("Specific to Ball3D")]
    public GameObject ball;
+    [Tooltip("Whether to use vector observation. This option should be checked " +
+        "in 3DBall scene, and unchecked in Visual3DBall scene. ")]
+    public bool useVecObs;
    Rigidbody m_BallRb;
    EnvironmentParameters m_ResetParams;


    public override void CollectObservations(VectorSensor sensor)
    {
-        sensor.AddObservation(gameObject.transform.rotation.z);
-        sensor.AddObservation(gameObject.transform.rotation.x);
-        sensor.AddObservation(ball.transform.position - gameObject.transform.position);
-        sensor.AddObservation(m_BallRb.velocity);
+        if (useVecObs)
+        {
+            sensor.AddObservation(gameObject.transform.rotation.z);
+            sensor.AddObservation(gameObject.transform.rotation.x);
+            sensor.AddObservation(ball.transform.position - gameObject.transform.position);
+            sensor.AddObservation(m_BallRb.velocity);
+        }
    }

    public override void OnActionReceived(ActionBuffers actionBuffers)
--- a/Project/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticVariableSpeed.unity
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticVariableSpeed.unity
      objectReference: {fileID: 0}
    m_RemovedComponents: []
  m_SourcePrefab: {fileID: 100100000, guid: 0456c89e8c9c243d595b039fe7aa0bf9, type: 3}
--- !u!1 &441460235 stripped
-GameObject:
-  m_CorrespondingSourceObject: {fileID: 4845971001715176661, guid: 0456c89e8c9c243d595b039fe7aa0bf9,
-    type: 3}
-  m_PrefabInstance: {fileID: 71447557}
-  m_PrefabAsset: {fileID: 0}
--- !u!114 &441460236
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 441460235}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: f2902496c0120472b90269f94a0aec7e, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  Record: 1
-  NumStepsToRecord: 10000
-  DemonstrationName: ExpCrawlerStaVS
-  DemonstrationDirectory: 
 --- !u!1001 &455366880
 PrefabInstance:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  target: {fileID: 1053322438}
+  smoothingTime: 0
 --- !u!81 &914210116
 AudioListener:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab
  m_PrefabInstance: {fileID: 0}
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 1145096862361766}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
  m_LocalPosition: {x: 0, y: 0, z: 0}
  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children:
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 1
-    vectorActionSize: 03000000030000000300000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
-  m_Model: {fileID: 0}
+    VectorObservationSize: 1
+    NumStackedVectorObservations: 1
+    VectorActionSize: 03000000030000000300000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+  m_Model: {fileID: 11400000, guid: c3b1eb0bcf06b4c0488599c7ab806de7, type: 3}
-  m_TeamID: 0
+  TeamId: 0
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114380897261200276
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  maxStep: 5000
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
+  MaxStep: 5000
  area: {fileID: 1145096862361766}
  turnSpeed: 300
  moveSpeed: 2
  myLaser: {fileID: 1900094563283840}
  contribute: 0
  useVectorObs: 0
+  useVectorFrozenFlag: 1
 --- !u!114 &114326390494230518
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  camera: {fileID: 20380145723616022}
-  sensorName: CameraSensor
-  width: 84
-  height: 84
-  grayscale: 0
-  compression: 1
+  m_Camera: {fileID: 20380145723616022}
+  m_SensorName: CameraSensor
+  m_Width: 84
+  m_Height: 84
+  m_Grayscale: 0
+  m_ObservationStacks: 1
+  m_Compression: 1
 --- !u!114 &4034342608499629224
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
-  offsetStep: 0
+  TakeActionsBetweenDecisions: 1
 --- !u!1 &1179319070824364
 GameObject:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 1
-    vectorActionSize: 03000000030000000300000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
-  m_Model: {fileID: 0}
+    VectorObservationSize: 1
+    NumStackedVectorObservations: 1
+    VectorActionSize: 03000000030000000300000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+  m_Model: {fileID: 11400000, guid: c3b1eb0bcf06b4c0488599c7ab806de7, type: 3}
-  m_TeamID: 0
+  TeamId: 0
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114869844339180154
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  maxStep: 5000
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
+  MaxStep: 5000
  area: {fileID: 1145096862361766}
  turnSpeed: 300
  moveSpeed: 2
  myLaser: {fileID: 1307818939507544}
  contribute: 0
  useVectorObs: 0
+  useVectorFrozenFlag: 1
 --- !u!114 &114429222608880102
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  camera: {fileID: 20863703825242712}
-  sensorName: CameraSensor
-  width: 84
-  height: 84
-  grayscale: 0
-  compression: 1
+  m_Camera: {fileID: 20863703825242712}
+  m_SensorName: CameraSensor
+  m_Width: 84
+  m_Height: 84
+  m_Grayscale: 0
+  m_ObservationStacks: 1
+  m_Compression: 1
 --- !u!114 &7234640249101665162
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
-  offsetStep: 0
+  TakeActionsBetweenDecisions: 1
 --- !u!1 &1353209702154624
 GameObject:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 1
-    vectorActionSize: 03000000030000000300000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
-  m_Model: {fileID: 0}
+    VectorObservationSize: 1
+    NumStackedVectorObservations: 1
+    VectorActionSize: 03000000030000000300000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+  m_Model: {fileID: 11400000, guid: c3b1eb0bcf06b4c0488599c7ab806de7, type: 3}
-  m_TeamID: 0
+  TeamId: 0
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114484596947519388
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  maxStep: 5000
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
+  MaxStep: 5000
  area: {fileID: 1145096862361766}
  turnSpeed: 300
  moveSpeed: 2
  myLaser: {fileID: 1898252046043334}
  contribute: 0
  useVectorObs: 0
+  useVectorFrozenFlag: 1
 --- !u!114 &114036270357198286
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  camera: {fileID: 20696931947702132}
-  sensorName: CameraSensor
-  width: 84
-  height: 84
-  grayscale: 0
-  compression: 1
+  m_Camera: {fileID: 20696931947702132}
+  m_SensorName: CameraSensor
+  m_Width: 84
+  m_Height: 84
+  m_Grayscale: 0
+  m_ObservationStacks: 1
+  m_Compression: 1
 --- !u!114 &3164735207755090463
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
-  offsetStep: 0
+  TakeActionsBetweenDecisions: 1
 --- !u!1 &1399553220224106
 GameObject:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 1
-    vectorActionSize: 03000000030000000300000002000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
-  m_Model: {fileID: 0}
+    VectorObservationSize: 1
+    NumStackedVectorObservations: 1
+    VectorActionSize: 03000000030000000300000002000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+  m_Model: {fileID: 11400000, guid: c3b1eb0bcf06b4c0488599c7ab806de7, type: 3}
-  m_TeamID: 0
+  TeamId: 0
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114729119221978826
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  maxStep: 5000
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
+  MaxStep: 5000
  area: {fileID: 1145096862361766}
  turnSpeed: 300
  moveSpeed: 2
  myLaser: {fileID: 1779831409734062}
  contribute: 0
  useVectorObs: 0
+  useVectorFrozenFlag: 1
 --- !u!114 &114322691115031348
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  camera: {fileID: 20363738094913048}
-  sensorName: CameraSensor
-  width: 84
-  height: 84
-  grayscale: 0
-  compression: 1
+  m_Camera: {fileID: 20363738094913048}
+  m_SensorName: CameraSensor
+  m_Width: 84
+  m_Height: 84
+  m_Grayscale: 0
+  m_ObservationStacks: 1
+  m_Compression: 1
 --- !u!114 &5903164052970896384
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
-  offsetStep: 0
+  TakeActionsBetweenDecisions: 1
 --- !u!1 &1971119195936814
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 1145096862361766, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_Name
+      value: VisualFoodCollectorArea
+      objectReference: {fileID: 0}
    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
      propertyPath: m_LocalPosition.x
      value: 0
    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
      propertyPath: m_RootOrder
      value: 5
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.z
+      value: 0
      objectReference: {fileID: 0}
    m_RemovedComponents: []
  m_SourcePrefab: {fileID: 100100000, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
  m_AnchoredPosition: {x: -1000, y: -239.57645}
  m_SizeDelta: {x: 160, y: 30}
  m_Pivot: {x: 0.5, y: 0.5}
+--- !u!1001 &1094805673
+PrefabInstance:
+  m_ObjectHideFlags: 0
+  serializedVersion: 2
+  m_Modification:
+    m_TransformParent: {fileID: 0}
+    m_Modifications:
+    - target: {fileID: 1145096862361766, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_Name
+      value: VisualFoodCollectorArea (1)
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalPosition.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalPosition.y
+      value: 60
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalPosition.z
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalRotation.x
+      value: -0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalRotation.y
+      value: -0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalRotation.z
+      value: -0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalRotation.w
+      value: 1
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_RootOrder
+      value: 6
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4307641258646068, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.z
+      value: 0
+      objectReference: {fileID: 0}
+    m_RemovedComponents: []
+  m_SourcePrefab: {fileID: 100100000, guid: c85b585836e104587b4efdc4d8b9d62b, type: 3}
 --- !u!1001 &1232021009
 PrefabInstance:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
    public GameObject myLaser;
    public bool contribute;
    public bool useVectorObs;
+    [Tooltip("Use only the frozen flag in vector observations. If \"Use Vector Obs\" " +
+             "is checked, this option has no effect. This option is necessary for the " +
+             "VisualFoodCollector scene.")]
+    public bool useVectorFrozenFlag;

    EnvironmentParameters m_ResetParams;

            sensor.AddObservation(localVelocity.z);
            sensor.AddObservation(m_Frozen);
            sensor.AddObservation(m_Shoot);
+        }
+        else if (useVectorFrozenFlag)
+        {
+            sensor.AddObservation(m_Frozen);
        }
    }

--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
    public class ModelOverrider : MonoBehaviour
    {
        HashSet<string> k_SupportedExtensions = new HashSet<string> { "nn", "onnx" };
-        const string k_CommandLineModelOverrideFlag = "--mlagents-override-model";
        const string k_CommandLineModelOverrideDirectoryFlag = "--mlagents-override-model-directory";
        const string k_CommandLineModelOverrideExtensionFlag = "--mlagents-override-model-extension";
        const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
        Agent m_Agent;

-        // Assets paths to use, with the behavior name as the key.
-        Dictionary<string, string> m_BehaviorNameOverrides = new Dictionary<string, string>();
-
-        string m_OverrideExtension = "nn";
+        private List<string> m_OverrideExtensions = new List<string>();

        // Cached loaded NNModels, with the behavior name as the key.
        Dictionary<string, NNModel> m_CachedModels = new Dictionary<string, NNModel>();

        public bool HasOverrides
        {
-            get { return m_BehaviorNameOverrides.Count > 0 || !string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory); }
+            get
+            {
+                GetAssetPathFromCommandLine();
+                return !string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory);
+            }
        }

        public static string GetOverrideBehaviorName(string originalBehaviorName)
        /// <returns></returns>
        void GetAssetPathFromCommandLine()
        {
-            m_BehaviorNameOverrides.Clear();
-
            var maxEpisodes = 0;
            string[] commandLineArgsOverride = null;
            if (!string.IsNullOrEmpty(debugCommandLineOverride) && Application.isEditor)
            var args = commandLineArgsOverride ?? Environment.GetCommandLineArgs();
            for (var i = 0; i < args.Length; i++)
            {
-                if (args[i] == k_CommandLineModelOverrideFlag && i < args.Length - 2)
-                {
-                    var key = args[i + 1].Trim();
-                    var value = args[i + 2].Trim();
-                    m_BehaviorNameOverrides[key] = value;
-                }
-                else if (args[i] == k_CommandLineModelOverrideDirectoryFlag && i < args.Length - 1)
+                if (args[i] == k_CommandLineModelOverrideDirectoryFlag && i < args.Length - 1)
-                    m_OverrideExtension = args[i + 1].Trim().ToLower();
-                    var isKnownExtension = k_SupportedExtensions.Contains(m_OverrideExtension);
+                    var overrideExtension = args[i + 1].Trim().ToLower();
+                    var isKnownExtension = k_SupportedExtensions.Contains(overrideExtension);
-                        Debug.LogError($"loading unsupported format: {m_OverrideExtension}");
+                        Debug.LogError($"loading unsupported format: {overrideExtension}");
+                    m_OverrideExtensions.Add(overrideExtension);
                }
                else if (args[i] == k_CommandLineQuitAfterEpisodesFlag && i < args.Length - 1)
                {
                }
            }

-            if (HasOverrides)
+            if (!string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory))
            {
                // If overriding models, set maxEpisodes to 1 or the command line value
                m_MaxEpisodes = maxEpisodes > 0 ? maxEpisodes : 1;
                return m_CachedModels[behaviorName];
            }

-            string assetPath = null;
-            if (m_BehaviorNameOverrides.ContainsKey(behaviorName))
+            if (string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory))
-                assetPath = m_BehaviorNameOverrides[behaviorName];
-            }
-            else if (!string.IsNullOrEmpty(m_BehaviorNameOverrideDirectory))
-            {
-                assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.{m_OverrideExtension}");
+                Debug.Log($"No override directory set.");
+                return null;
-            if (string.IsNullOrEmpty(assetPath))
-            {
-                Debug.Log($"No override for BehaviorName {behaviorName}, and no directory set.");
-                return null;
-            }
+            // Try the override extensions in order. If they weren't set, try .nn first, then .onnx.
+            var overrideExtensions = (m_OverrideExtensions.Count > 0)
+                ? m_OverrideExtensions.ToArray()
+                : new[] { "nn", "onnx" };
-            try
+            bool isOnnx = false;
+            string assetName = null;
+            foreach (var overrideExtension in overrideExtensions)
-                rawModel = File.ReadAllBytes(assetPath);
+                var assetPath = Path.Combine(m_BehaviorNameOverrideDirectory, $"{behaviorName}.{overrideExtension}");
+                try
+                {
+                    rawModel = File.ReadAllBytes(assetPath);
+                    isOnnx = overrideExtension.Equals("onnx");
+                    assetName = "Override - " + Path.GetFileName(assetPath);
+                    break;
+                }
+                catch (IOException)
+                {
+                    // Do nothing - try the next extension, or we'll exit if nothing loaded.
+                }
-            catch (IOException)
+
+            if (rawModel == null)
-                Debug.Log($"Couldn't load file {assetPath} at full path {Path.GetFullPath(assetPath)}", this);
+                Debug.Log($"Couldn't load model file(s) for {behaviorName} in {m_BehaviorNameOverrideDirectory} (full path: {Path.GetFullPath(m_BehaviorNameOverrideDirectory)}");
-            NNModel asset;
-            var isOnnx = m_OverrideExtension.Equals("onnx");
-            if (isOnnx)
-            {
-                var converter = new ONNXModelConverter(true);
-                var onnxModel = converter.Convert(rawModel);
+            var asset = isOnnx ? LoadOnnxModel(rawModel) : LoadBarracudaModel(rawModel);
+            asset.name = assetName;
+            m_CachedModels[behaviorName] = asset;
+            return asset;
+        }
-                NNModelData assetData = ScriptableObject.CreateInstance<NNModelData>();
-                using (var memoryStream = new MemoryStream())
-                using (var writer = new BinaryWriter(memoryStream))
-                {
-                    ModelWriter.Save(writer, onnxModel);
-                    assetData.Value = memoryStream.ToArray();
-                }
-                assetData.name = "Data";
-                assetData.hideFlags = HideFlags.HideInHierarchy;
+        NNModel LoadBarracudaModel(byte[] rawModel)
+        {
+            var asset = ScriptableObject.CreateInstance<NNModel>();
+            asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
+            asset.modelData.Value = rawModel;
+            return asset;
+        }
-                asset = ScriptableObject.CreateInstance<NNModel>();
-                asset.modelData = assetData;
-            }
-            else
+        NNModel LoadOnnxModel(byte[] rawModel)
+        {
+            var converter = new ONNXModelConverter(true);
+            var onnxModel = converter.Convert(rawModel);
+
+            NNModelData assetData = ScriptableObject.CreateInstance<NNModelData>();
+            using (var memoryStream = new MemoryStream())
+            using (var writer = new BinaryWriter(memoryStream))
-                // Note - this approach doesn't work for onnx files. Need to replace with
-                // the equivalent of ONNXModelImporter.OnImportAsset()
-                asset = ScriptableObject.CreateInstance<NNModel>();
-                asset.modelData = ScriptableObject.CreateInstance<NNModelData>();
-                asset.modelData.Value = rawModel;
+                ModelWriter.Save(writer, onnxModel);
+                assetData.Value = memoryStream.ToArray();
+            assetData.name = "Data";
+            assetData.hideFlags = HideFlags.HideInHierarchy;
-            asset.name = "Override - " + Path.GetFileName(assetPath);
-            m_CachedModels[behaviorName] = asset;
+            var asset = ScriptableObject.CreateInstance<NNModel>();
+            asset.modelData = assetData;
+

        /// <summary>
        /// Load the NNModel file from the specified path, and give it to the attached agent.
--- a/README.md
+++ b/README.md

 # Unity ML-Agents Toolkit

-[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/)
+[![docs badge](https://img.shields.io/badge/docs-reference-blue.svg)](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/)

 [![license badge](https://img.shields.io/badge/license-Apache--2.0-green.svg)](LICENSE)


 ## Releases & Documentation

-**Our latest, stable release is `Release 7`. Click
-[here](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md)
+**Our latest, stable release is `Release 8`. Click
+[here](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/Readme.md)
 to get started with the latest release of ML-Agents.**

 The table below lists all our releases, including our `master` branch which is
 | **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
 |:-------:|:------:|:-------------:|:-------:|:------------:|
 | **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
-| **Release 7** | **September 16, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_7)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip)** |
+| **Release 8** | **October 14, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_8)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_8.zip)** |
+| **Release 7** | September 16, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_7) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip) |
-| **Release 1** | April 30, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_1_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_1.zip) |

 ## Citation

--- a/com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md
+++ b/com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md

 The Grid Sensor combines the generality of data extraction from Raycasts with the image processing power of Convolutional Neural Networks. The Grid Sensor can be used to collect data in the general form of a "Width x Height x Channel" matrix which can be used for training Reinforcement Learning agents or for data analysis.

+<img src="images/gridsensor-debug.png" align="middle" width="3000"/>

 # Motivation

--- a/com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md
+++ b/com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md
 # About ML-Agents Extensions package (`com.unity.ml-agents.extensions`)

-The Unity ML-Agents Extensions package optional add-ons to the C# SDK for the
+The Unity ML-Agents Extensions package contains optional add-ons to the C# SDK for the
 [Unity ML-Agents Toolkit](https://github.com/Unity-Technologies/ml-agents).

 These extensions are all considered experimental, and their API or behavior
 | _Runtime_        | Contains core C# APIs for integrating ML-Agents into your Unity scene. |
 | _Tests_          | Contains the unit tests for the package.                               |

-<a name="Installation"></a>
+## Installation
+The ML-Agents Extensions package is not currently available in the Package Manager. There are two
+recommended ways to install the package:
-## Installation
+### Local Installation
+[Clone the repository](../../docs/Installation.md#clone-the-ml-agents-toolkit-repository-optional) and follow the
+[Local Installation for Development](../../docs/Installation.md#advanced-local-installation-for-development-1)
+directions (substituting `com.unity.ml-agents.extensions` for the package name).
+
+### Github via Package Manager
+In Unity 2019.4 or later, open the Package Manager, hit the "+" button, and select "Add package from git URL".
+
+![Package Manager git URL](../../docs/images/unity_package_manager_git_url.png)
+In the dialog that appears, enter
+ ```
+ git+https://github.com/Unity-Technologies/ml-agents.git?path=com.unity.ml-agents.extensions
+```
-To install this _ML-Agents_ package, follow the instructions in the [Package
-Manager documentation](https://docs.unity3d.com/Manual/upm-ui-install.html).
+You can also edit your project's `manifest.json` directly and add the following line to the `dependencies`
+section:
+```
+"com.unity.ml-agents.extensions": "git+https://github.com/Unity-Technologies/ml-agents.git?path=com.unity.ml-agents.extensions",
+```
+See [Git dependencies](https://docs.unity3d.com/Manual/upm-git.html#subfolder) for more information.


 ## Requirements
 - 2018.4 and later

 ## Known Limitations
+none
+## Need Help?
+The main [README](../../README.md) contains links for contacting the team or getting support.
--- a/com.unity.ml-agents.extensions/README.md
+++ b/com.unity.ml-agents.extensions/README.md
 # ML-Agents Extensions

-This is a source-only package for new features based on ML-Agents.
-
-More details coming soon.
+See the [package documentation](Documentation~/com.unity.ml-agents.extensions.md) for more information
--- a/com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
        [Tooltip("Whether to show gizmos or not")]
        public bool ShowGizmos = false;

+        public SensorCompressionType CompressionType = SensorCompressionType.PNG;
+
        /// <summary>
        /// Array of colors displaying the DebugColors for each cell in OnDrawGizmos. Only updated if ShowGizmos.
        /// </summary>
        /// <inheritdoc/>
        public virtual SensorCompressionType GetCompressionType()
        {
-            return SensorCompressionType.PNG;
+            return CompressionType;
        }

        /// <summary>
--- a/com.unity.ml-agents.extensions/package.json
+++ b/com.unity.ml-agents.extensions/package.json
  "unity": "2018.4",
  "description": "A source-only package for new features based on ML-Agents",
  "dependencies": {
-    "com.unity.ml-agents": "1.4.0-preview"
+    "com.unity.ml-agents": "1.5.0-preview"
  }
 }
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 and this project adheres to
 [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

+
+
 ### Major Changes
 #### com.unity.ml-agents (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)

 ### Bug Fixes
 #### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+
+## [1.5.0-preview] - 2020-10-14
+### Major Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+ - Added the Random Network Distillation (RND) intrinsic reward signal to the Pytorch
+ trainers. To use RND, add a `rnd` section to the `reward_signals` section of your
+ yaml configuration file. [More information here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-Configuration-File.md#rnd-intrinsic-reward) (#4473)
+### Minor Changes
+#### com.unity.ml-agents (C#)
+ - Stacking for compressed observations is now supported. An additional setting
+ option `Observation Stacks` is added in editor to sensor components that support
+ compressed observations. A new class `ISparseChannelSensor` with an
+ additional method `GetCompressedChannelMapping()`is added to generate a mapping
+ of the channels in compressed data to the actual channel after decompression,
+ for the python side to decompress correctly. (#4476)
+ - Added a new visual 3DBall environment. (#4513)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+ - The Communication API was changed to 1.2.0 to indicate support for stacked
+ compressed observation. A new entry `compressed_channel_mapping` is added to the
+ proto to handle decompression correctly. Newer versions of the package that wish to
+ make use of this will also need a compatible version of the Python trainers. (#4476)
+ - In the `VisualFoodCollector` scene, a vector flag representing the frozen state of
+ the agent is added to the input observations in addition to the original first-person
+ camera frame. The scene is able to train with the provided default config file. (#4511)
+ - Added conversion to string for sampler classes to increase the verbosity of
+ the curriculum lesson changes. The lesson updates would now output the sampler
+ stats in addition to the lesson and parameter name to the console.  (#4484)
+ - Localized documentation in Russian is added. Thanks to @SergeyMatrosov for
+ the contribution. (#4529)
+### Bug Fixes
+#### com.unity.ml-agents (C#)
+ - Fixed a bug where accessing the Academy outside of play mode would cause the
+ Academy to get stepped multiple times when in play mode. (#4532)
 #### ml-agents / ml-agents-envs / gym-unity (Python)


--- a/com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
+++ b/com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
 [unity ML-Agents Toolkit]: https://github.com/Unity-Technologies/ml-agents
 [unity inference engine]: https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html
 [package manager documentation]: https://docs.unity3d.com/Manual/upm-ui-install.html
-[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Installation.md
+[installation instructions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Installation.md
 [github repository]: https://github.com/Unity-Technologies/ml-agents
 [python package]: https://github.com/Unity-Technologies/ml-agents
 [execution order of event functions]: https://docs.unity3d.com/Manual/ExecutionOrder.html
--- a/com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs
+++ b/com.unity.ml-agents/Editor/CameraSensorComponentEditor.cs
                EditorGUILayout.PropertyField(so.FindProperty("m_Width"), true);
                EditorGUILayout.PropertyField(so.FindProperty("m_Height"), true);
                EditorGUILayout.PropertyField(so.FindProperty("m_Grayscale"), true);
+                EditorGUILayout.PropertyField(so.FindProperty("m_ObservationStacks"), true);
            }
            EditorGUI.EndDisabledGroup();
            EditorGUILayout.PropertyField(so.FindProperty("m_Compression"), true);
--- a/com.unity.ml-agents/Editor/RenderTextureSensorComponentEditor.cs
+++ b/com.unity.ml-agents/Editor/RenderTextureSensorComponentEditor.cs
                EditorGUILayout.PropertyField(so.FindProperty("m_RenderTexture"), true);
                EditorGUILayout.PropertyField(so.FindProperty("m_SensorName"), true);
                EditorGUILayout.PropertyField(so.FindProperty("m_Grayscale"), true);
+                EditorGUILayout.PropertyField(so.FindProperty("m_ObservationStacks"), true);
            }
            EditorGUI.EndDisabledGroup();

--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
 * API. For more information on each of these entities, in addition to how to
 * set-up a learning environment and train the behavior of characters in a
 * Unity scene, please browse our documentation pages on GitHub:
- * https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/
+ * https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/
 */

 namespace Unity.MLAgents
    {
        void FixedUpdate()
        {
-            Academy.Instance.EnvironmentStep();
+            // Check if the stepper belongs to the current Academy and destroy it if it's not.
+            // This is to prevent from having leaked stepper from previous runs.
+            if (!Academy.IsInitialized || !Academy.Instance.IsStepperOwner(this))
+            {
+                Destroy(this.gameObject);
+            }
+            else
+            {
+                Academy.Instance.EnvironmentStep();
+            }
        }
    }

    /// fall back to inference or heuristic decisions. (You can also set agents to always use
    /// inference or heuristics.)
    /// </remarks>
-    [HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/" +
+    [HelpURL("https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/" +
        "docs/Learning-Environment-Design.md")]
    public class Academy : IDisposable
    {
        ///         <term>1.1.0</term>
        ///         <description>Support concatenated PNGs for compressed observations.</description>
        ///     </item>
+        ///     <item>
+        ///         <term>1.2.0</term>
+        ///         <description>Support compression mapping for stacked compressed observations.</description>
+        ///     </item>
-        const string k_ApiVersion = "1.1.0";
+        const string k_ApiVersion = "1.2.0";
-        internal const string k_PackageVersion = "1.4.0-preview";
+        internal const string k_PackageVersion = "1.5.0-preview";

        const int k_EditorTrainingPort = 5004;

            Application.quitting += Dispose;

            LazyInitialize();
+
+#if UNITY_EDITOR
+            EditorApplication.playModeStateChanged += HandleOnPlayModeChanged;
+#endif
+
+#if UNITY_EDITOR
+        /// <summary>
+        /// Clean up the Academy when switching from edit mode to play mode
+        /// </summary>
+        /// <param name="state">State.</param>
+        void HandleOnPlayModeChanged(PlayModeStateChange state)
+        {
+            if (state == PlayModeStateChange.ExitingEditMode)
+            {
+                Dispose();
+            }
+        }
+#endif

        /// <summary>
        /// Initialize the Academy if it hasn't already been initialized.

            // Reset the Lazy instance
            s_Lazy = new Lazy<Academy>(() => new Academy());
+        }
+
+        /// <summary>
+        /// Check if the input AcademyFixedUpdateStepper belongs to this Academy.
+        /// </summary>
+        internal bool IsStepperOwner(AcademyFixedUpdateStepper stepper)
+        {
+            return GameObject.ReferenceEquals(stepper.gameObject, Academy.Instance.m_StepperObject);
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
        ///
        /// See [Agents - Actions] for more information on masking actions.
        ///
-        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
+        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// </remarks>
        /// <seealso cref="IActionReceiver.OnActionReceived"/>
        void WriteDiscreteActionMask(IDiscreteActionMask actionMask);
--- a/com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs
        ///
        /// See [Agents - Actions] for more information on masking actions.
        ///
-        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
+        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// </remarks>
        /// <param name="branch">The branch for which the actions will be masked.</param>
        /// <param name="actionIndices">The indices of the masked actions.</param>
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
    /// [OnDisable()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnDisable.html]
    /// [OnBeforeSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnBeforeSerialize.html
    /// [OnAfterSerialize()]: https://docs.unity3d.com/ScriptReference/MonoBehaviour.OnAfterSerialize.html
-    /// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md
-    /// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design.md
+    /// [Agents]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md
+    /// [Reinforcement Learning in Unity]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design.md
-    /// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Readme.md
+    /// [Unity ML-Agents Toolkit manual]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Readme.md
-    [HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/" +
+    [HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/" +
        "docs/Learning-Environment-Design-Agents.md")]
    [Serializable]
    [RequireComponent(typeof(BehaviorParameters))]
        /// for information about mixing reward signals from curiosity and Generative Adversarial
        /// Imitation Learning (GAIL) with rewards supplied through this method.
        ///
-        /// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#rewards
-        /// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
+        /// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#rewards
+        /// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
        /// </remarks>
        /// <param name="reward">The new value of the reward.</param>
        public void SetReward(float reward)
        /// for information about mixing reward signals from curiosity and Generative Adversarial
        /// Imitation Learning (GAIL) with rewards supplied through this method.
        ///
-        /// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#rewards
-        /// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
+        /// [Agents - Rewards]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#rewards
+        /// [Reward Signals]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/ML-Agents-Overview.md#a-quick-note-on-reward-signals
        ///</remarks>
        /// <param name="increment">Incremental reward value.</param>
        public void AddReward(float increment)
        /// implementing a simple heuristic function can aid in debugging agent actions and interactions
        /// with its environment.
        ///
-        /// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
-        /// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
+        /// [Demonstration Recorder]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#recording-demonstrations
+        /// [Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
        /// </remarks>
        /// <example>
        /// For more information about observations, see [Observations and Sensors].
        ///
        /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
-        /// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
+        /// [Observations and Sensors]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#observations-and-sensors
        /// </remarks>
        public virtual void CollectObservations(VectorSensor sensor)
        {
        ///
        /// See [Agents - Actions] for more information on masking actions.
        ///
-        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
+        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// </remarks>
        /// <seealso cref="IActionReceiver.OnActionReceived"/>
        public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
        ///
        /// For more information about implementing agent actions see [Agents - Actions].
        ///
-        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
+        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// </remarks>
        /// <param name="actions">
        /// Struct containing the buffers of actions to be executed at this step.
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
        /// <summary>
        /// Static flag to make sure that we only fire the warning once.
        /// </summary>
-        private static bool s_HaveWarnedAboutTrainerCapabilities = false;
+        private static bool s_HaveWarnedTrainerCapabilitiesMultiPng = false;
+        private static bool s_HaveWarnedTrainerCapabilitiesMapping = false;

        /// <summary>
        /// Generate an ObservationProto for the sensor using the provided ObservationWriter.
                var trainerCanHandle = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.ConcatenatedPngObservations;
                if (!trainerCanHandle)
                {
-                    if (!s_HaveWarnedAboutTrainerCapabilities)
+                    if (!s_HaveWarnedTrainerCapabilitiesMultiPng)
-                        s_HaveWarnedAboutTrainerCapabilities = true;
+                        s_HaveWarnedTrainerCapabilitiesMultiPng = true;
+                    }
+                    compressionType = SensorCompressionType.None;
+                }
+            }
+            // Check capabilities if we need mapping for compressed observations
+            if (compressionType != SensorCompressionType.None && shape.Length == 3 && shape[2] > 3)
+            {
+                var trainerCanHandleMapping = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.CompressedChannelMapping;
+                var isTrivialMapping = IsTrivialMapping(sensor);
+                if (!trainerCanHandleMapping && !isTrivialMapping)
+                {
+                    if (!s_HaveWarnedTrainerCapabilitiesMapping)
+                    {
+                        Debug.LogWarning($"The sensor {sensor.GetName()} is using non-trivial mapping and " +
+                                "the attached trainer doesn't support compression mapping. " +
+                                "Switching to uncompressed observations.");
+                        s_HaveWarnedTrainerCapabilitiesMapping = true;
                    }
                    compressionType = SensorCompressionType.None;
                }
                        "return SensorCompressionType.None from GetCompressionType()."
                        );
                }
-
+                var compressibleSensor = sensor as ISparseChannelSensor;
+                if (compressibleSensor != null)
+                {
+                    observationProto.CompressedChannelMapping.AddRange(compressibleSensor.GetCompressedChannelMapping());
+                }
            }
            observationProto.Shape.AddRange(shape);
            return observationProto;
            return new UnityRLCapabilities
            {
                BaseRLCapabilities = proto.BaseRLCapabilities,
-                ConcatenatedPngObservations = proto.ConcatenatedPngObservations
+                ConcatenatedPngObservations = proto.ConcatenatedPngObservations,
+                CompressedChannelMapping = proto.CompressedChannelMapping,
            };
        }

            {
                BaseRLCapabilities = rlCaps.BaseRLCapabilities,
                ConcatenatedPngObservations = rlCaps.ConcatenatedPngObservations,
+                CompressedChannelMapping = rlCaps.CompressedChannelMapping,
+        }
+
+        internal static bool IsTrivialMapping(ISensor sensor)
+        {
+            var compressibleSensor = sensor as ISparseChannelSensor;
+            if (compressibleSensor is null)
+            {
+                return true;
+            }
+            var mapping = compressibleSensor.GetCompressedChannelMapping();
+            if (mapping == null)
+            {
+                return true;
+            }
+            // check if mapping equals zero mapping
+            if (mapping.Length == 3 && mapping.All(m => m == 0))
+            {
+                return true;
+            }
+            // check if mapping equals identity mapping
+            for (var i = 0; i < mapping.Length; i++)
+            {
+                if (mapping[i] != i)
+                {
+                    return false;
+                }
+            }
+            return true;
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
    {
        public bool BaseRLCapabilities;
        public bool ConcatenatedPngObservations;
+        public bool CompressedChannelMapping;
-        public UnityRLCapabilities(bool baseRlCapabilities = true, bool concatenatedPngObservations = true)
+        public UnityRLCapabilities(bool baseRlCapabilities = true, bool concatenatedPngObservations = true, bool compressedChannelMapping = true)
+            CompressedChannelMapping = compressedChannelMapping;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
+++ b/com.unity.ml-agents/Runtime/Demonstrations/DemonstrationRecorder.cs
    /// See [Imitation Learning - Recording Demonstrations] for more information.
    ///
    /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
-    /// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
+    /// [Imitation Learning - Recording Demonstrations]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs//Learning-Environment-Design-Agents.md#recording-demonstrations
    /// </remarks>
    [RequireComponent(typeof(Agent))]
    [AddComponentMenu("ML Agents/Demonstration Recorder", (int)MenuGroup.Default)]
--- a/com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
+++ b/com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
        ///
        /// See [Agents - Actions] for more information on masking actions.
        ///
-        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/docs/Learning-Environment-Design-Agents.md#actions
+        /// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/docs/Learning-Environment-Design-Agents.md#actions
        /// </remarks>
        /// <param name="branch">The branch for which the actions will be masked.</param>
        /// <param name="actionIndices">The indices of the masked actions.</param>
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
-            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMiWwoYVW5pdHlSTENh",
+            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMifQoYVW5pdHlSTENh",
-            "Chtjb25jYXRlbmF0ZWRQbmdPYnNlcnZhdGlvbnMYAiABKAhCJaoCIlVuaXR5",
-            "Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
+            "Chtjb25jYXRlbmF0ZWRQbmdPYnNlcnZhdGlvbnMYAiABKAgSIAoYY29tcHJl",
+            "c3NlZENoYW5uZWxNYXBwaW5nGAMgASgIQiWqAiJVbml0eS5NTEFnZW50cy5D",
+            "b21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping" }, null, null, null)
          }));
    }
    #endregion
    public UnityRLCapabilitiesProto(UnityRLCapabilitiesProto other) : this() {
      baseRLCapabilities_ = other.baseRLCapabilities_;
      concatenatedPngObservations_ = other.concatenatedPngObservations_;
+      compressedChannelMapping_ = other.compressedChannelMapping_;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

      }
    }

+    /// <summary>Field number for the "compressedChannelMapping" field.</summary>
+    public const int CompressedChannelMappingFieldNumber = 3;
+    private bool compressedChannelMapping_;
+    /// <summary>
+    /// compression mapping for stacking compressed observations.
+    /// </summary>
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public bool CompressedChannelMapping {
+      get { return compressedChannelMapping_; }
+      set {
+        compressedChannelMapping_ = value;
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as UnityRLCapabilitiesProto);
      }
      if (BaseRLCapabilities != other.BaseRLCapabilities) return false;
      if (ConcatenatedPngObservations != other.ConcatenatedPngObservations) return false;
+      if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

      if (BaseRLCapabilities != false) hash ^= BaseRLCapabilities.GetHashCode();
      if (ConcatenatedPngObservations != false) hash ^= ConcatenatedPngObservations.GetHashCode();
+      if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }
      if (ConcatenatedPngObservations != false) {
        output.WriteRawTag(16);
        output.WriteBool(ConcatenatedPngObservations);
+      }
+      if (CompressedChannelMapping != false) {
+        output.WriteRawTag(24);
+        output.WriteBool(CompressedChannelMapping);
      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      if (ConcatenatedPngObservations != false) {
        size += 1 + 1;
      }
+      if (CompressedChannelMapping != false) {
+        size += 1 + 1;
+      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
      if (other.ConcatenatedPngObservations != false) {
        ConcatenatedPngObservations = other.ConcatenatedPngObservations;
      }
+      if (other.CompressedChannelMapping != false) {
+        CompressedChannelMapping = other.CompressedChannelMapping;
+      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

          }
          case 16: {
            ConcatenatedPngObservations = input.ReadBool();
+            break;
+          }
+          case 24: {
+            CompressedChannelMapping = input.ReadBool();
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
-            "aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyL5AQoQT2JzZXJ2YXRp",
+            "aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKdAgoQT2JzZXJ2YXRp",
-            "RmxvYXREYXRhSAAaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2Jz",
-            "ZXJ2YXRpb25fZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05F",
-            "EAASBwoDUE5HEAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9i",
-            "amVjdHNiBnByb3RvMw=="));
+            "RmxvYXREYXRhSAASIgoaY29tcHJlc3NlZF9jaGFubmVsX21hcHBpbmcYBSAD",
+            "KAUaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25f",
+            "ZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5H",
+            "EAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
+            "b3RvMw=="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
          }));
    }
    #endregion
    public ObservationProto(ObservationProto other) : this() {
      shape_ = other.shape_.Clone();
      compressionType_ = other.compressionType_;
+      compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
      switch (other.ObservationDataCase) {
        case ObservationDataOneofCase.CompressedData:
          CompressedData = other.CompressedData;
      }
    }

+    /// <summary>Field number for the "compressed_channel_mapping" field.</summary>
+    public const int CompressedChannelMappingFieldNumber = 5;
+    private static readonly pb::FieldCodec<int> _repeated_compressedChannelMapping_codec
+        = pb::FieldCodec.ForInt32(42);
+    private readonly pbc::RepeatedField<int> compressedChannelMapping_ = new pbc::RepeatedField<int>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<int> CompressedChannelMapping {
+      get { return compressedChannelMapping_; }
+    }
+
    private object observationData_;
    /// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
    public enum ObservationDataOneofCase {
      if (CompressionType != other.CompressionType) return false;
      if (CompressedData != other.CompressedData) return false;
      if (!object.Equals(FloatData, other.FloatData)) return false;
+      if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
      if (ObservationDataCase != other.ObservationDataCase) return false;
      return Equals(_unknownFields, other._unknownFields);
    }
      if (CompressionType != 0) hash ^= CompressionType.GetHashCode();
      if (observationDataCase_ == ObservationDataOneofCase.CompressedData) hash ^= CompressedData.GetHashCode();
      if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
+      hash ^= compressedChannelMapping_.GetHashCode();
      hash ^= (int) observationDataCase_;
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
        output.WriteRawTag(34);
        output.WriteMessage(FloatData);
      }
+      compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
      if (observationDataCase_ == ObservationDataOneofCase.FloatData) {
        size += 1 + pb::CodedOutputStream.ComputeMessageSize(FloatData);
      }
+      size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
      if (other.CompressionType != 0) {
        CompressionType = other.CompressionType;
      }
+      compressedChannelMapping_.Add(other.compressedChannelMapping_);
      switch (other.ObservationDataCase) {
        case ObservationDataOneofCase.CompressedData:
          CompressedData = other.CompressedData;
            }
            input.ReadMessage(subBuilder);
            FloatData = subBuilder;
+            break;
+          }
+          case 42:
+          case 40: {
+            compressedChannelMapping_.AddEntriesFrom(input, _repeated_compressedChannelMapping_codec);
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/SensorHelper.cs
+++ b/com.unity.ml-agents/Runtime/SensorHelper.cs
 using UnityEngine;
+using Unity.Barracuda;

 namespace Unity.MLAgents.Sensors
 {
                }
            }

+            errorMessage = null;
+            return true;
+        }
+
+        /// <summary>
+        /// Generates the observations for the provided sensor, and returns true if they equal the
+        /// expected values. If they are unequal, errorMessage is also set.
+        /// This should not generally be used in production code. It is only intended for
+        /// simplifying unit tests.
+        /// </summary>
+        /// <param name="sensor"></param>
+        /// <param name="expected"></param>
+        /// <param name="errorMessage"></param>
+        /// <returns></returns>
+        public static bool CompareObservation(ISensor sensor, float[,,] expected, out string errorMessage)
+        {
+            var tensorShape = new TensorShape(0, expected.GetLength(0), expected.GetLength(1), expected.GetLength(2));
+            var numExpected = tensorShape.height * tensorShape.width * tensorShape.channels;
+            const float fill = -1337f;
+            var output = new float[numExpected];
+            for (var i = 0; i < numExpected; i++)
+            {
+                output[i] = fill;
+            }
+
+            if (numExpected > 0)
+            {
+                if (fill != output[0])
+                {
+                    errorMessage = "Error setting output buffer.";
+                    return false;
+                }
+            }
+
+            ObservationWriter writer = new ObservationWriter();
+            writer.SetTarget(output, sensor.GetObservationShape(), 0);
+
+            // Make sure ObservationWriter didn't touch anything
+            if (numExpected > 0)
+            {
+                if (fill != output[0])
+                {
+                    errorMessage = "ObservationWriter.SetTarget modified a buffer it shouldn't have.";
+                    return false;
+                }
+            }
+
+            sensor.Write(writer);
+            for (var h = 0; h < tensorShape.height; h++)
+            {
+                for (var w = 0; w < tensorShape.width; w++)
+                {
+                    for (var c = 0; c < tensorShape.channels; c++)
+                    {
+                        if (expected[h, w, c] != output[tensorShape.Index(0, h, w, c)])
+                        {
+                            errorMessage = $"Expected and actual differed in position [{h}, {w}, {c}]. " +
+                                "Expected: {expected[h, w, c]}  Actual: {output[tensorShape.Index(0, h, w, c)]} ";
+                            return false;
+                        }
+                    }
+                }
+            }
            errorMessage = null;
            return true;
        }
--- a/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
            using (TimerStack.Instance.Scoped("CameraSensor.WriteToTensor"))
            {
                var texture = ObservationToTexture(m_Camera, m_Width, m_Height);
-                var numWritten = Utilities.TextureToTensorProxy(texture, writer, m_Grayscale);
+                var numWritten = writer.WriteTexture(texture, m_Grayscale);
                DestroyTexture(texture);
                return numWritten;
            }
--- a/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs
            set { m_Grayscale = value; }
        }

+        [HideInInspector, SerializeField]
+        [Range(1, 50)]
+        [Tooltip("Number of camera frames that will be stacked before being fed to the neural network.")]
+        int m_ObservationStacks = 1;
+
        [HideInInspector, SerializeField, FormerlySerializedAs("compression")]
        SensorCompressionType m_Compression = SensorCompressionType.PNG;

        }

        /// <summary>
+        /// Whether to stack previous observations. Using 1 means no previous observations.
+        /// Note that changing this after the sensor is created has no effect.
+        /// </summary>
+        public int ObservationStacks
+        {
+            get { return m_ObservationStacks; }
+            set { m_ObservationStacks = value; }
+        }
+
+        /// <summary>
        /// Creates the <see cref="CameraSensor"/>
        /// </summary>
        /// <returns>The created <see cref="CameraSensor"/> object for this component.</returns>
+
+            if (ObservationStacks != 1)
+            {
+                return new StackingSensor(m_Sensor, ObservationStacks);
+            }
            return m_Sensor;
        }

        /// <returns>The observation shape of the associated <see cref="CameraSensor"/> object.</returns>
        public override int[] GetObservationShape()
        {
-            return CameraSensor.GenerateShape(m_Width, m_Height, Grayscale);
+            var stacks = ObservationStacks > 1 ? ObservationStacks : 1;
+            var cameraSensorshape = CameraSensor.GenerateShape(m_Width, m_Height, Grayscale);
+            if (stacks > 1)
+            {
+                cameraSensorshape[cameraSensorshape.Length - 1] *= stacks;
+            }
+            return cameraSensorshape;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
            }
        }
    }
+
+    public static class ObservationWriterExtension
+    {
+
+        /// <summary>
+        /// Writes a Texture2D into a ObservationWriter.
+        /// </summary>
+        /// <param name="obsWriter">
+        /// Writer to fill with Texture data.
+        /// </param>
+        /// <param name="texture">
+        /// The texture to be put into the tensor.
+        /// </param>
+        /// <param name="grayScale">
+        /// If set to <c>true</c> the textures will be converted to grayscale before
+        /// being stored in the tensor.
+        /// </param>
+        /// <returns>The number of floats written</returns>
+        public static int WriteTexture(
+            this ObservationWriter obsWriter,
+            Texture2D texture,
+            bool grayScale)
+        {
+            var width = texture.width;
+            var height = texture.height;
+
+            var texturePixels = texture.GetPixels32();
+            // During training, we convert from Texture to PNG before sending to the trainer, which has the
+            // effect of flipping the image. We need another flip here at inference time to match this.
+            for (var h = height - 1; h >= 0; h--)
+            {
+                for (var w = 0; w < width; w++)
+                {
+                    var currentPixel = texturePixels[(height - h - 1) * width + w];
+                    if (grayScale)
+                    {
+                        obsWriter[h, w, 0] =
+                            (currentPixel.r + currentPixel.g + currentPixel.b) / 3f / 255.0f;
+                    }
+                    else
+                    {
+                        // For Color32, the r, g and b values are between 0 and 255.
+                        obsWriter[h, w, 0] = currentPixel.r / 255.0f;
+                        obsWriter[h, w, 1] = currentPixel.g / 255.0f;
+                        obsWriter[h, w, 2] = currentPixel.b / 255.0f;
+                    }
+                }
+            }
+
+            return height * width * (grayScale ? 1 : 3);
+        }
+    }
 }
--- a/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
            using (TimerStack.Instance.Scoped("RenderTextureSensor.Write"))
            {
                var texture = ObservationToTexture(m_RenderTexture);
-                var numWritten = Utilities.TextureToTensorProxy(texture, writer, m_Grayscale);
+                var numWritten = writer.WriteTexture(texture, m_Grayscale);
                DestroyTexture(texture);
                return numWritten;
            }
--- a/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs
            set { m_Grayscale = value; }
        }

+        [HideInInspector, SerializeField]
+        [Range(1, 50)]
+        [Tooltip("Number of frames that will be stacked before being fed to the neural network.")]
+        int m_ObservationStacks = 1;
+
        [HideInInspector, SerializeField, FormerlySerializedAs("compression")]
        SensorCompressionType m_Compression = SensorCompressionType.PNG;

            set { m_Compression = value; UpdateSensor(); }
        }

+        /// <summary>
+        /// Whether to stack previous observations. Using 1 means no previous observations.
+        /// Note that changing this after the sensor is created has no effect.
+        /// </summary>
+        public int ObservationStacks
+        {
+            get { return m_ObservationStacks; }
+            set { m_ObservationStacks = value; }
+        }
+
+            if (ObservationStacks != 1)
+            {
+                return new StackingSensor(m_Sensor, ObservationStacks);
+            }
            return m_Sensor;
        }

            var width = RenderTexture != null ? RenderTexture.width : 0;
            var height = RenderTexture != null ? RenderTexture.height : 0;
+            var observationShape = new[] { height, width, Grayscale ? 1 : 3 };
-            return new[] { height, width, Grayscale ? 1 : 3 };
+            var stacks = ObservationStacks > 1 ? ObservationStacks : 1;
+            if (stacks > 1)
+            {
+                observationShape[2] *= stacks;
+            }
+
+            return observationShape;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
 using System;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using UnityEngine;
+using Unity.Barracuda;
+
+
+[assembly: InternalsVisibleTo("Unity.ML-Agents.Editor.Tests")]

 namespace Unity.MLAgents.Sensors
 {
    /// For example, 4 stacked sets of observations would be output like
    ///   |  t = now - 3  |  t = now -3  |  t = now - 2  |  t = now  |
    /// Internally, a circular buffer of arrays is used. The m_CurrentIndex represents the most recent observation.
-    ///
-    /// Currently, compressed and multidimensional observations are not supported.
+    /// Currently, observations are stacked on the last dimension.
-    public class StackingSensor : ISensor
+    public class StackingSensor : ISparseChannelSensor
    {
        /// <summary>
        /// The wrapped sensor.

        string m_Name;
        int[] m_Shape;
+        int[] m_WrappedShape;

        /// <summary>
        /// Buffer of previous observations
+        byte[][] m_StackedCompressedObservations;
+
+
+        byte[] m_EmptyCompressedObservation;
+        int[] m_CompressionMapping;
+        TensorShape m_tensorShape;

        /// <summary>
        /// Initializes the sensor.

            m_Name = $"StackingSensor_size{numStackedObservations}_{wrapped.GetName()}";

-            if (wrapped.GetCompressionType() != SensorCompressionType.None)
+            m_WrappedShape = wrapped.GetObservationShape();
+            m_Shape = new int[m_WrappedShape.Length];
+
+            m_UnstackedObservationSize = wrapped.ObservationSize();
+            for (int d = 0; d < m_WrappedShape.Length; d++)
-                throw new UnityAgentsException("StackingSensor doesn't support compressed observations.'");
+                m_Shape[d] = m_WrappedShape[d];
-            var shape = wrapped.GetObservationShape();
-            if (shape.Length != 1)
+            // TODO support arbitrary stacking dimension
+            m_Shape[m_Shape.Length - 1] *= numStackedObservations;
+
+            // Initialize uncompressed buffer anyway in case python trainer does not
+            // support the compression mapping and has to fall back to uncompressed obs.
+            m_StackedObservations = new float[numStackedObservations][];
+            for (var i = 0; i < numStackedObservations; i++)
-                throw new UnityAgentsException("Only 1-D observations are supported by StackingSensor");
+                m_StackedObservations[i] = new float[m_UnstackedObservationSize];
-            m_Shape = new int[shape.Length];
-            m_UnstackedObservationSize = wrapped.ObservationSize();
-            for (int d = 0; d < shape.Length; d++)
+            if (m_WrappedSensor.GetCompressionType() != SensorCompressionType.None)
-                m_Shape[d] = shape[d];
+                m_StackedCompressedObservations = new byte[numStackedObservations][];
+                m_EmptyCompressedObservation = CreateEmptyPNG();
+                for (var i = 0; i < numStackedObservations; i++)
+                {
+                    m_StackedCompressedObservations[i] = m_EmptyCompressedObservation;
+                }
+                m_CompressionMapping = ConstructStackedCompressedChannelMapping(wrapped);
-            // TODO support arbitrary stacking dimension
-            m_Shape[0] *= numStackedObservations;
-            m_StackedObservations = new float[numStackedObservations][];
-            for (var i = 0; i < numStackedObservations; i++)
+            if (m_Shape.Length != 1)
-                m_StackedObservations[i] = new float[m_UnstackedObservationSize];
+                m_tensorShape = new TensorShape(0, m_WrappedShape[0], m_WrappedShape[1], m_WrappedShape[2]);
            }
        }

            // First, call the wrapped sensor's write method. Make sure to use our own writer, not the passed one.
-            var wrappedShape = m_WrappedSensor.GetObservationShape();
-            m_LocalWriter.SetTarget(m_StackedObservations[m_CurrentIndex], wrappedShape, 0);
+            m_LocalWriter.SetTarget(m_StackedObservations[m_CurrentIndex], m_WrappedShape, 0);
-            for (var i = 0; i < m_NumStackedObservations; i++)
+            if (m_WrappedShape.Length == 1)
+            {
+                for (var i = 0; i < m_NumStackedObservations; i++)
+                {
+                    var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
+                    writer.AddRange(m_StackedObservations[obsIndex], numWritten);
+                    numWritten += m_UnstackedObservationSize;
+                }
+            }
+            else
-                var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
-                writer.AddRange(m_StackedObservations[obsIndex], numWritten);
-                numWritten += m_UnstackedObservationSize;
+                for (var i = 0; i < m_NumStackedObservations; i++)
+                {
+                    var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
+                    for (var h = 0; h < m_WrappedShape[0]; h++)
+                    {
+                        for (var w = 0; w < m_WrappedShape[1]; w++)
+                        {
+                            for (var c = 0; c < m_WrappedShape[2]; c++)
+                            {
+                                writer[h, w, i * m_WrappedShape[2] + c] = m_StackedObservations[obsIndex][m_tensorShape.Index(0, h, w, c)];
+                            }
+                        }
+                    }
+                }
+                numWritten = m_WrappedShape[0] * m_WrappedShape[1] * m_WrappedShape[2] * m_NumStackedObservations;
            }

            return numWritten;
            {
                Array.Clear(m_StackedObservations[i], 0, m_StackedObservations[i].Length);
            }
+            if (m_WrappedSensor.GetCompressionType() != SensorCompressionType.None)
+            {
+                for (var i = 0; i < m_NumStackedObservations; i++)
+                {
+                    m_StackedCompressedObservations[i] = m_EmptyCompressedObservation;
+                }
+            }
        }

        /// <inheritdoc/>
        }

        /// <inheritdoc/>
-        public virtual byte[] GetCompressedObservation()
+        public byte[] GetCompressedObservation()
-            return null;
+            var compressed = m_WrappedSensor.GetCompressedObservation();
+            m_StackedCompressedObservations[m_CurrentIndex] = compressed;
+
+            int bytesLength = 0;
+            foreach (byte[] compressedObs in m_StackedCompressedObservations)
+            {
+                bytesLength += compressedObs.Length;
+            }
+
+            byte[] outputBytes = new byte[bytesLength];
+            int offset = 0;
+            for (var i = 0; i < m_NumStackedObservations; i++)
+            {
+                var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
+                Buffer.BlockCopy(m_StackedCompressedObservations[obsIndex],
+                    0, outputBytes, offset, m_StackedCompressedObservations[obsIndex].Length);
+                offset += m_StackedCompressedObservations[obsIndex].Length;
+            }
+
+            return outputBytes;
-        public virtual SensorCompressionType GetCompressionType()
+        public int[] GetCompressedChannelMapping()
-            return SensorCompressionType.None;
+            return m_CompressionMapping;
-        // TODO support stacked compressed observations (byte stream)
+        /// <inheritdoc/>
+        public SensorCompressionType GetCompressionType()
+        {
+            return m_WrappedSensor.GetCompressionType();
+        }
+
+        /// <summary>
+        /// Create Empty PNG for initializing the buffer for stacking.
+        /// </summary>
+        internal byte[] CreateEmptyPNG()
+        {
+            int height = m_WrappedSensor.GetObservationShape()[0];
+            int width = m_WrappedSensor.GetObservationShape()[1];
+            var texture2D = new Texture2D(width, height, TextureFormat.RGB24, false);
+            Color32[] resetColorArray = texture2D.GetPixels32();
+            Color32 black = new Color32(0, 0, 0, 0);
+            for (int i = 0; i < resetColorArray.Length; i++)
+            {
+                resetColorArray[i] = black;
+            }
+            texture2D.SetPixels32(resetColorArray);
+            texture2D.Apply();
+            return texture2D.EncodeToPNG();
+        }
+
+        /// <summary>
+        /// Constrct stacked CompressedChannelMapping.
+        /// </summary>
+        internal int[] ConstructStackedCompressedChannelMapping(ISensor wrappedSenesor)
+        {
+            // Get CompressedChannelMapping of the wrapped sensor. If the
+            // wrapped sensor doesn't have one, use default mapping.
+            // Default mapping: {0, 0, 0} for grayscale, identity mapping {1, 2, ..., n} otherwise.
+            int[] wrappedMapping = null;
+            int wrappedNumChannel = wrappedSenesor.GetObservationShape()[2];
+            var sparseChannelSensor = m_WrappedSensor as ISparseChannelSensor;
+            if (sparseChannelSensor != null)
+            {
+                wrappedMapping = sparseChannelSensor.GetCompressedChannelMapping();
+            }
+            if (wrappedMapping == null)
+            {
+                if (wrappedNumChannel == 1)
+                {
+                    wrappedMapping = new int[] { 0, 0, 0 };
+                }
+                else
+                {
+                    wrappedMapping = Enumerable.Range(0, wrappedNumChannel).ToArray();
+                }
+            }
+
+            // Construct stacked mapping using the mapping of wrapped sensor.
+            // First pad the wrapped mapping to multiple of 3, then repeat
+            // and add offset to each copy to form the stacked mapping.
+            int paddedMapLength = (wrappedMapping.Length + 2) / 3 * 3;
+            var compressionMapping = new int[paddedMapLength * m_NumStackedObservations];
+            for (var i = 0; i < m_NumStackedObservations; i++)
+            {
+                var offset = wrappedNumChannel * i;
+                for (var j = 0; j < paddedMapLength; j++)
+                {
+                    if (j < wrappedMapping.Length)
+                    {
+                        compressionMapping[j + paddedMapLength * i] = wrappedMapping[j] >= 0 ? wrappedMapping[j] + offset : -1;
+                    }
+                    else
+                    {
+                        compressionMapping[j + paddedMapLength * i] = -1;
+                    }
+                }
+            }
+            return compressionMapping;
+        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Utilities.cs
+++ b/com.unity.ml-agents/Runtime/Utilities.cs
 {
    internal static class Utilities
    {
-        /// <summary>
-        /// Puts a Texture2D into a ObservationWriter.
-        /// </summary>
-        /// <param name="texture">
-        /// The texture to be put into the tensor.
-        /// </param>
-        /// <param name="obsWriter">
-        /// Writer to fill with Texture data.
-        /// </param>
-        /// <param name="grayScale">
-        /// If set to <c>true</c> the textures will be converted to grayscale before
-        /// being stored in the tensor.
-        /// </param>
-        /// <returns>The number of floats written</returns>
-        internal static int TextureToTensorProxy(
-            Texture2D texture,
-            ObservationWriter obsWriter,
-            bool grayScale)
-        {
-            var width = texture.width;
-            var height = texture.height;
-
-            var texturePixels = texture.GetPixels32();
-            // During training, we convert from Texture to PNG before sending to the trainer, which has the
-            // effect of flipping the image. We need another flip here at inference time to match this.
-            for (var h = height - 1; h >= 0; h--)
-            {
-                for (var w = 0; w < width; w++)
-                {
-                    var currentPixel = texturePixels[(height - h - 1) * width + w];
-                    if (grayScale)
-                    {
-                        obsWriter[h, w, 0] =
-                            (currentPixel.r + currentPixel.g + currentPixel.b) / 3f / 255.0f;
-                    }
-                    else
-                    {
-                        // For Color32, the r, g and b values are between 0 and 255.
-                        obsWriter[h, w, 0] = currentPixel.r / 255.0f;
-                        obsWriter[h, w, 1] = currentPixel.g / 255.0f;
-                        obsWriter[h, w, 2] = currentPixel.b / 255.0f;
-                    }
-                }
-            }
-
-            return height * width * (grayScale ? 1 : 3);
-        }

        /// <summary>
        /// Calculates the cumulative sum of an integer array. The result array will be one element
--- a/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
            }
        }

+        class DummySparseChannelSensor : DummySensor, ISparseChannelSensor
+        {
+            public int[] Mapping;
+            internal DummySparseChannelSensor()
+            {
+            }
+
+            public int[] GetCompressedChannelMapping()
+            {
+                return Mapping;
+            }
+        }
+
        [Test]
        public void TestGetObservationProtoCapabilities()
        {
            }


+        }
+
+        [Test]
+        public void TestIsTrivialMapping()
+        {
+            Assert.AreEqual(GrpcExtensions.IsTrivialMapping(new DummySensor()), true);
+
+            var sparseChannelSensor = new DummySparseChannelSensor();
+            sparseChannelSensor.Mapping = null;
+            Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), true);
+            sparseChannelSensor.Mapping = new int[] { 0, 0, 0 };
+            Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), true);
+            sparseChannelSensor.Mapping = new int[] { 0, 1, 2, 3, 4 };
+            Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), true);
+            sparseChannelSensor.Mapping = new int[] { 1, 2, 3, 4, -1, -1 };
+            Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), false);
+            sparseChannelSensor.Mapping = new int[] { 0, 0, 0, 1, 1, 1 };
+            Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), false);
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
 using NUnit.Framework;
+using System;
+using System.Linq;
+using UnityEngine;
 using Unity.MLAgents.Sensors;

 namespace Unity.MLAgents.Tests
        }

        [Test]
-        public void TestStacking()
+        public void TestVectorStacking()
        {
            VectorSensor wrapped = new VectorSensor(2);
            ISensor sensor = new StackingSensor(wrapped, 3);
        }

        [Test]
-        public void TestStackingReset()
+        public void TestVectorStackingReset()
        {
            VectorSensor wrapped = new VectorSensor(2);
            ISensor sensor = new StackingSensor(wrapped, 3);
            sensor.Reset();
            wrapped.AddObservation(new[] { 5f, 6f });
            SensorTestHelper.CompareObservation(sensor, new[] { 0f, 0f, 0f, 0f, 5f, 6f });
+        }
+
+        class Dummy3DSensor : ISparseChannelSensor
+        {
+            public SensorCompressionType CompressionType = SensorCompressionType.PNG;
+            public int[] Mapping;
+            public int[] Shape;
+            public float[,,] CurrentObservation;
+
+            internal Dummy3DSensor()
+            {
+            }
+
+            public int[] GetObservationShape()
+            {
+                return Shape;
+            }
+
+            public int Write(ObservationWriter writer)
+            {
+                for (var h = 0; h < Shape[0]; h++)
+                {
+                    for (var w = 0; w < Shape[1]; w++)
+                    {
+                        for (var c = 0; c < Shape[2]; c++)
+                        {
+                            writer[h, w, c] = CurrentObservation[h, w, c];
+                        }
+                    }
+                }
+                return Shape[0] * Shape[1] * Shape[2];
+            }
+
+            public byte[] GetCompressedObservation()
+            {
+                var writer = new ObservationWriter();
+                var flattenedObservation = new float[Shape[0] * Shape[1] * Shape[2]];
+                writer.SetTarget(flattenedObservation, Shape, 0);
+                Write(writer);
+                byte[] bytes = Array.ConvertAll(flattenedObservation, (z) => (byte)z);
+                return bytes;
+            }
+
+            public void Update() { }
+
+            public void Reset() { }
+
+            public SensorCompressionType GetCompressionType()
+            {
+                return CompressionType;
+            }
+
+            public string GetName()
+            {
+                return "Dummy";
+            }
+
+            public int[] GetCompressedChannelMapping()
+            {
+                return Mapping;
+            }
+        }
+
+        [Test]
+        public void TestStackingMapping()
+        {
+            // Test grayscale stacked mapping with CameraSensor
+            var cameraSensor = new CameraSensor(new Camera(), 64, 64,
+                true, "grayscaleCamera", SensorCompressionType.PNG);
+            var stackedCameraSensor = new StackingSensor(cameraSensor, 2);
+            Assert.AreEqual(stackedCameraSensor.GetCompressedChannelMapping(), new[] { 0, 0, 0, 1, 1, 1 });
+
+            // Test RGB stacked mapping with RenderTextureSensor
+            var renderTextureSensor = new RenderTextureSensor(new RenderTexture(24, 16, 0),
+                false, "renderTexture", SensorCompressionType.PNG);
+            var stackedRenderTextureSensor = new StackingSensor(renderTextureSensor, 2);
+            Assert.AreEqual(stackedRenderTextureSensor.GetCompressedChannelMapping(), new[] { 0, 1, 2, 3, 4, 5 });
+
+            // Test mapping with number of layers not being multiple of 3
+            var dummySensor = new Dummy3DSensor();
+            dummySensor.Shape = new int[] { 2, 2, 4 };
+            dummySensor.Mapping = new int[] { 0, 1, 2, 3 };
+            var stackedDummySensor = new StackingSensor(dummySensor, 2);
+            Assert.AreEqual(stackedDummySensor.GetCompressedChannelMapping(), new[] { 0, 1, 2, 3, -1, -1, 4, 5, 6, 7, -1, -1 });
+
+            // Test mapping with dummy layers that should be dropped
+            var paddedDummySensor = new Dummy3DSensor();
+            paddedDummySensor.Shape = new int[] { 2, 2, 4 };
+            paddedDummySensor.Mapping = new int[] { 0, 1, 2, 3, -1, -1 };
+            var stackedPaddedDummySensor = new StackingSensor(paddedDummySensor, 2);
+            Assert.AreEqual(stackedPaddedDummySensor.GetCompressedChannelMapping(), new[] { 0, 1, 2, 3, -1, -1, 4, 5, 6, 7, -1, -1 });
+        }
+
+        [Test]
+        public void Test3DStacking()
+        {
+            var wrapped = new Dummy3DSensor();
+            wrapped.Shape = new int[] { 2, 1, 2 };
+            var sensor = new StackingSensor(wrapped, 2);
+
+            // Check the stacking is on the last dimension
+            wrapped.CurrentObservation = new[, ,] { { { 1f, 2f } }, { { 3f, 4f } } };
+            SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 0f, 0f, 1f, 2f } }, { { 0f, 0f, 3f, 4f } } });
+
+            sensor.Update();
+            wrapped.CurrentObservation = new[, ,] { { { 5f, 6f } }, { { 7f, 8f } } };
+            SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 1f, 2f, 5f, 6f } }, { { 3f, 4f, 7f, 8f } } });
+
+            sensor.Update();
+            wrapped.CurrentObservation = new[, ,] { { { 9f, 10f } }, { { 11f, 12f } } };
+            SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 5f, 6f, 9f, 10f } }, { { 7f, 8f, 11f, 12f } } });
+
+            // Check that if we don't call Update(), the same observations are produced
+            SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 5f, 6f, 9f, 10f } }, { { 7f, 8f, 11f, 12f } } });
+
+            // Test reset
+            sensor.Reset();
+            wrapped.CurrentObservation = new[, ,] { { { 13f, 14f } }, { { 15f, 16f } } };
+            SensorTestHelper.CompareObservation(sensor, new[, ,] { { { 0f, 0f, 13f, 14f } }, { { 0f, 0f, 15f, 16f } } });
+        }
+
+        [Test]
+        public void TestStackedGetCompressedObservation()
+        {
+            var wrapped = new Dummy3DSensor();
+            wrapped.Shape = new int[] { 1, 1, 3 };
+            var sensor = new StackingSensor(wrapped, 2);
+
+            wrapped.CurrentObservation = new[, ,] { { { 1f, 2f, 3f } } };
+            var expected1 = sensor.CreateEmptyPNG();
+            expected1 = expected1.Concat(Array.ConvertAll(new[] { 1f, 2f, 3f }, (z) => (byte)z)).ToArray();
+            Assert.AreEqual(sensor.GetCompressedObservation(), expected1);
+
+            sensor.Update();
+            wrapped.CurrentObservation = new[, ,] { { { 4f, 5f, 6f } } };
+            var expected2 = Array.ConvertAll(new[] { 1f, 2f, 3f, 4f, 5f, 6f }, (z) => (byte)z);
+            Assert.AreEqual(sensor.GetCompressedObservation(), expected2);
+
+            sensor.Update();
+            wrapped.CurrentObservation = new[, ,] { { { 7f, 8f, 9f } } };
+            var expected3 = Array.ConvertAll(new[] { 4f, 5f, 6f, 7f, 8f, 9f }, (z) => (byte)z);
+            Assert.AreEqual(sensor.GetCompressedObservation(), expected3);
+
+            // Test reset
+            sensor.Reset();
+            wrapped.CurrentObservation = new[, ,] { { { 10f, 11f, 12f } } };
+            var expected4 = sensor.CreateEmptyPNG();
+            expected4 = expected4.Concat(Array.ConvertAll(new[] { 10f, 11f, 12f }, (z) => (byte)z)).ToArray();
+            Assert.AreEqual(sensor.GetCompressedObservation(), expected4);
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs

 namespace Unity.MLAgents.Tests
 {
-    public static class SensorTestHelper
-    {
-        public static void CompareObservation(ISensor sensor, float[] expected)
-        {
-            string errorMessage;
-            bool isOK = SensorHelper.CompareObservation(sensor, expected, out errorMessage);
-            Assert.IsTrue(isOK, errorMessage);
-        }
-    }
-
    public class VectorSensorTests
    {
        [Test]
--- a/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
+++ b/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
        [SetUp]
        public static void Setup()
        {
+            if (Academy.IsInitialized)
+            {
+                Academy.Instance.Dispose();
+            }
            Academy.Instance.AutomaticSteppingEnabled = false;
        }

--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
 {
  "name": "com.unity.ml-agents",
  "displayName": "ML Agents",
-  "version": "1.4.0-preview",
+  "version": "1.5.0-preview",
  "unity": "2018.4",
  "description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
  "dependencies": {
--- a/docs/Installation-Anaconda-Windows.md
+++ b/docs/Installation-Anaconda-Windows.md
 the ml-agents Conda environment by typing `activate ml-agents`)_:

 ```sh
-git clone --branch release_7 https://github.com/Unity-Technologies/ml-agents.git
+git clone --branch release_8 https://github.com/Unity-Technologies/ml-agents.git
-The `--branch release_7` option will switch to the tag of the latest stable
+The `--branch release_8` option will switch to the tag of the latest stable
 release. Omitting that will get the `master` branch which is potentially
 unstable.

--- a/docs/Installation.md
+++ b/docs/Installation.md
 of our tutorials / guides assume you have access to our example environments).

 ```sh
-git clone --branch release_7 https://github.com/Unity-Technologies/ml-agents.git
+git clone --branch release_8 https://github.com/Unity-Technologies/ml-agents.git
-The `--branch release_7` option will switch to the tag of the latest stable
+The `--branch release_8` option will switch to the tag of the latest stable
 release. Omitting that will get the `master` branch which is potentially
 unstable.

 ML-Agents Toolkit for your purposes. If you plan to contribute those changes
-back, make sure to clone the `master` branch (by omitting `--branch release_7`
+back, make sure to clone the `master` branch (by omitting `--branch release_8`
 from the command above). See our
 [Contributions Guidelines](../com.unity.ml-agents/CONTRIBUTING.md) for more
 information on contributing to the ML-Agents Toolkit.
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
    }

    // Fell off platform
-    if (this.transform.localPosition.y < 0)
+    else if (this.transform.localPosition.y < 0)
    {
        EndEpisode();
    }
 1. Add the `Decision Requester` script with the Add Component button from the
   RollerAgent Inspector.
 1. Change **Decision Period** to `10`. For more information on decisions, see [the Agent documentation](Learning-Environment-Design-Agents.md#decisions)
-1. Drag the Target GameObject from the Hierarchy window to the RollerAgent
-   Target field.
 1. Add the `Behavior Parameters` script with the Add Component button from the
   RollerAgent Inspector.
 1. Modify the Behavior Parameters of the Agent :
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
    AddReward(1.0f);
    EndEpisode();
 }
-if (hitObjects.Where(col => col.gameObject.tag == "pit").ToArray().Length == 1)
+else if (hitObjects.Where(col => col.gameObject.tag == "pit").ToArray().Length == 1)
 {
    AddReward(-1f);
    EndEpisode();
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
    rotation of the agent cube and position of ball.
  - Vector Action space: (Continuous) Size of 2, with one value corresponding to
    X-rotation, and the other to Z-rotation.
-  - Visual Observations: None.
+  - Visual Observations: Third-person view from the upper-front of the agent. Use
+    `Visual3DBall` scene.
 - Float Properties: Three
  - scale: Specifies the scale of the ball in the 3 dimensions (equal across the
    three dimensions)
    - Side Motion (3 possible actions: Left, Right, No Action)
    - Rotation (3 possible actions: Rotate Left, Rotate Right, No Action)
    - Laser (2 possible actions: Laser, No Action)
-  - Visual Observations (Optional): First-person camera per-agent. Use
-    `VisualFoodCollector` scene. **The visual observation version of this
-    environment does not train with the provided default training parameters.**
+  - Visual Observations (Optional): First-person camera per-agent, plus one vector
+    flag representing the frozen state of the agent. This scene uses a combination
+    of vector and visual observations and the training will not succeed without
+    the frozen vector flag. Use `VisualFoodCollector` scene.
 - Float Properties: Two
  - laser_length: Length of the laser used by the agent
    - Default: 1
--- a/docs/ML-Agents-Overview.md
+++ b/docs/ML-Agents-Overview.md
  - [A Quick Note on Reward Signals](#a-quick-note-on-reward-signals)
  - [Deep Reinforcement Learning](#deep-reinforcement-learning)
    - [Curiosity for Sparse-reward Environments](#curiosity-for-sparse-reward-environments)
+    - [RND for Sparse-reward Environments](#rnd-for-sparse-reward-environments)
  - [Imitation Learning](#imitation-learning)
    - [GAIL (Generative Adversarial Imitation Learning)](#gail-generative-adversarial-imitation-learning)
    - [Behavioral Cloning (BC)](#behavioral-cloning-bc)
 and intrinsic reward signals.

 The ML-Agents Toolkit allows reward signals to be defined in a modular way, and
-we provide three reward signals that can the mixed and matched to help shape
+we provide four reward signals that can the mixed and matched to help shape
 your agent's behavior:

 - `extrinsic`: represents the rewards defined in your environment, and is
 - `curiosity`: represents an intrinsic reward signal that encourages exploration
  in sparse-reward environments that is defined by the Curiosity module (see
  below).
+- `rnd`: represents an intrinsic reward signal that encourages exploration
+  in sparse-reward environments that is defined by the Curiosity module (see
+  below). (Not available for TensorFlow trainers)

 ### Deep Reinforcement Learning


 For more information, see our dedicated
 [blog post on the Curiosity module](https://blogs.unity3d.com/2018/06/26/solving-sparse-reward-tasks-with-curiosity/).
+
+#### RND for Sparse-reward Environments
+
+Similarly to Curiosity, Random Network Distillation (RND) is useful in sparse or rare
+reward environments as it helps the Agent explore. The RND Module is implemented following
+the paper [Exploration by Random Network Distillation](https://arxiv.org/abs/1810.12894).
+RND uses two networks:
+ - The first is a network with fixed random weights that takes observations as inputs and
+ generates an encoding
+ - The second is a network with similar architecture that is trained to predict the
+ outputs of the first network and uses the observations the Agent collects as training data.
+
+The loss (the squared difference between the predicted and actual encoded observations)
+of the trained model is used as intrinsic reward. The more an Agent visits a state, the
+more accurate the predictions and the lower the rewards which encourages the Agent to
+explore new states with higher prediction errors.
+
+__Note:__ RND is not available for TensorFlow trainers (only PyTorch trainers)

 ### Imitation Learning

--- a/docs/Migrating.md
+++ b/docs/Migrating.md

 # Migrating

-## Migrating from Release 3 to latest
+## Migrating from Release 7 to latest
+
+### Important changes
+- Some trainer files were moved. If you were using the `TrainerFactory` class, it was moved to
+the `trainers/trainer` folder.
+- The `components` folder containing `bc` and `reward_signals` code was moved to the `trainers/tf`
+folder
+
+### Steps to Migrate
+- Replace calls to `from mlagents.trainers.trainer_util import TrainerFactory` to `from mlagents.trainers.trainer import TrainerFactory`
+- Replace calls to `from mlagents.trainers.trainer_util import handle_existing_directories` to `from mlagents.trainers.directory_utils import validate_existing_directories`
+- Replace `mlagents.trainers.components` with `mlagents.trainers.tf.components` in your import statements.
+
+
+## Migrating from Release 3 to Release 7

 ### Important changes
 - The Parameter Randomization feature has been merged with the Curriculum feature. It is now possible to specify a sampler
--- a/docs/Training-Configuration-File.md
+++ b/docs/Training-Configuration-File.md
  - [Extrinsic Rewards](#extrinsic-rewards)
  - [Curiosity Intrinsic Reward](#curiosity-intrinsic-reward)
  - [GAIL Intrinsic Reward](#gail-intrinsic-reward)
+  - [RND Intrinsic Reward](#rnd-intrinsic-reward)
  - [Reward Signal Settings for SAC](#reward-signal-settings-for-sac)
 - [Behavioral Cloning](#behavioral-cloning)
 - [Memory-enhanced Agents using Recurrent Neural Networks](#memory-enhanced-agents-using-recurrent-neural-networks)
 | `gail -> learning_rate` | (Optional, default = `3e-4`) Learning rate used to update the discriminator. This should typically be decreased if training is unstable, and the GAIL loss is unstable. <br><br>Typical range: `1e-5` - `1e-3`                                                                                                                                                                                                                                                                  |
 | `gail -> use_actions`   | (default = `false`) Determines whether the discriminator should discriminate based on both observations and actions, or just observations. Set to True if you want the agent to mimic the actions from the demonstrations, and False if you'd rather have the agent visit the same states as in the demonstrations but with possibly different actions. Setting to False is more likely to be stable, especially with imperfect demonstrations, but may learn slower. |
 | `gail -> use_vail`      | (default = `false`) Enables a variational bottleneck within the GAIL discriminator. This forces the discriminator to learn a more general representation and reduces its tendency to be "too good" at discriminating, making learning more stable. However, it does increase training time. Enable this if you notice your imitation learning is unstable, or unable to learn the task at hand.                                                                       |
+
+### RND Intrinsic Reward
+
+Random Network Distillation (RND) is only available for the PyTorch trainers.
+To enable RND, provide these settings:
+
+| **Setting**                  | **Description**                                                                                                                                                                                                                                                                                                                       |
+| :--------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `rnd -> strength`      | (default = `1.0`) Magnitude of the curiosity reward generated by the intrinsic rnd module. This should be scaled in order to ensure it is large enough to not be overwhelmed by extrinsic reward signals in the environment. Likewise it should not be too large to overwhelm the extrinsic reward signal. <br><br>Typical range: `0.001` - `0.01` |
+| `rnd -> gamma`         | (default = `0.99`) Discount factor for future rewards. <br><br>Typical range: `0.8` - `0.995`                                                                                                                                                                                                                                                            |
+| `rnd -> encoding_size` | (default = `64`) Size of the encoding used by the intrinsic RND model. <br><br>Typical range: `64` - `256` |
+| `curiosity -> learning_rate` | (default = `3e-4`) Learning rate used to update the RND module. This should be large enough for the RND module to quickly learn the state representation, but small enough to allow for stable learning. <br><br>Typical range: `1e-5` - `1e-3`


 ## Behavioral Cloning
--- a/docs/Training-on-Amazon-Web-Service.md
+++ b/docs/Training-on-Amazon-Web-Service.md
 2. Clone the ML-Agents repo and install the required Python packages

   ```sh
-   git clone --branch release_7 https://github.com/Unity-Technologies/ml-agents.git
+   git clone --branch release_8 https://github.com/Unity-Technologies/ml-agents.git
   cd ml-agents/ml-agents/
   pip3 install -e .
   ```
--- a/docs/Unity-Inference-Engine.md
+++ b/docs/Unity-Inference-Engine.md
 loading expects certain conventions for constants and tensor names. While it is
 possible to construct a model that follows these conventions, we don't provide
 any additional help for this. More details can be found in
-[TensorNames.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/com.unity.ml-agents/Runtime/Inference/TensorNames.cs)
+[TensorNames.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/com.unity.ml-agents/Runtime/Inference/TensorNames.cs)
-[BarracudaModelParamLoader.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_7_docs/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs).
+[BarracudaModelParamLoader.cs](https://github.com/Unity-Technologies/ml-agents/blob/release_8_docs/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs).

 If you wish to run inference on an externally trained model, you should use
 Barracuda directly, instead of trying to run it through ML-Agents.
--- a/gym-unity/gym_unity/init.py
+++ b/gym-unity/gym_unity/init.py
 # Version of the library that will be used to upload to pypi
-__version__ = "0.21.0.dev0"
+__version__ = "0.22.0.dev0"

 # Git tag that will be checked to determine whether to trigger upload to pypi
 __release_tag__ = None
--- a/gym-unity/setup.py
+++ b/gym-unity/setup.py
 class VerifyVersionCommand(install):
    """
    Custom command to verify that the git tag is the expected one for the release.
-    Based on https://circleci.com/blog/continuously-deploying-python-packages-to-pypi-with-circleci/
+    Originally based on https://circleci.com/blog/continuously-deploying-python-packages-to-pypi-with-circleci/
    This differs slightly because our tags and versions are different.
    """

-        tag = os.getenv("CIRCLE_TAG")
+        tag = os.getenv("GITHUB_REF", "NO GITHUB TAG!").replace("refs/tags/", "")

        if tag != EXPECTED_TAG:
            info = "Git tag: {} does not match the expected tag of this app: {}".format(
--- a/ml-agents-envs/mlagents_envs/init.py
+++ b/ml-agents-envs/mlagents_envs/init.py
 # Version of the library that will be used to upload to pypi
-__version__ = "0.21.0.dev0"
+__version__ = "0.22.0.dev0"

 # Git tag that will be checked to determine whether to trigger upload to pypi
 __release_tag__ = None
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  name='mlagents_envs/communicator_objects/capabilities.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"[\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"}\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )


      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='compressedChannelMapping', full_name='communicator_objects.UnityRLCapabilitiesProto.compressedChannelMapping', index=2,
+      number=3, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  oneofs=[
  ],
  serialized_start=79,
-  serialized_end=170,
+  serialized_end=204,
 )

 DESCRIPTOR.message_types_by_name['UnityRLCapabilitiesProto'] = _UNITYRLCAPABILITIESPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
    DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
    baseRLCapabilities = ... # type: builtin___bool
    concatenatedPngObservations = ... # type: builtin___bool
+    compressedChannelMapping = ... # type: builtin___bool
+        compressedChannelMapping : typing___Optional[builtin___bool] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> UnityRLCapabilitiesProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"concatenatedPngObservations"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"concatenatedPngObservations",b"concatenatedPngObservations"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  name='mlagents_envs/communicator_objects/observation.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xf9\x01\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x9d\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )

 _COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(
  ],
  containing_type=None,
  options=None,
-  serialized_start=330,
-  serialized_end=371,
+  serialized_start=366,
+  serialized_end=407,
 )
 _sym_db.RegisterEnumDescriptor(_COMPRESSIONTYPEPROTO)

  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=283,
-  serialized_end=308,
+  serialized_start=319,
+  serialized_end=344,
 )

 _OBSERVATIONPROTO = _descriptor.Descriptor(
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='compressed_channel_mapping', full_name='communicator_objects.ObservationProto.compressed_channel_mapping', index=4,
+      number=5, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=79,
-  serialized_end=328,
+  serialized_end=364,
 )

 _OBSERVATIONPROTO_FLOATDATA.containing_type = _OBSERVATIONPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
    shape = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
    compression_type = ... # type: CompressionTypeProto
    compressed_data = ... # type: builtin___bytes
+    compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]

    @property
    def float_data(self) -> ObservationProto.FloatData: ...
        compression_type : typing___Optional[CompressionTypeProto] = None,
        compressed_data : typing___Optional[builtin___bytes] = None,
        float_data : typing___Optional[ObservationProto.FloatData] = None,
+        compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> ObservationProto: ...
        def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
    def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
    # Revision history:
    #  * 1.0.0 - initial version
    #  * 1.1.0 - support concatenated PNGs for compressed observations.
-    API_VERSION = "1.1.0"
+    #  * 1.2.0 - support compression mapping for stacked compressed observations.
+    API_VERSION = "1.2.0"

    # Default port that the editor listens on. If an environment executable
    # isn't specified, this port will be used.
        capabilities = UnityRLCapabilitiesProto()
        capabilities.baseRLCapabilities = True
        capabilities.concatenatedPngObservations = True
+        capabilities.compressedChannelMapping = True
        return capabilities

    @staticmethod
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py


@timed
-def process_pixels(image_bytes: bytes, expected_channels: int) -> np.ndarray:
+def process_pixels(
+    image_bytes: bytes, expected_channels: int, mappings: Optional[List[int]] = None
+) -> np.ndarray:
    """
    Converts byte array observation image into numpy array, re-sizes it,
    and optionally converts it to grey scale
    """
    image_fp = OffsetBytesIO(image_bytes)

-    if expected_channels == 1:
-        # Convert to grayscale
-        with hierarchical_timer("image_decompress"):
-            image = Image.open(image_fp)
-            # Normally Image loads lazily, load() forces it to do loading in the timer scope.
-            image.load()
-        s = np.array(image, dtype=np.float32) / 255.0
-        s = np.mean(s, axis=2)
-        s = np.reshape(s, [s.shape[0], s.shape[1], 1])
-        return s
-
-
+            # Normally Image loads lazily, load() forces it to do loading in the timer scope.
            image.load()
        image_arrays.append(np.array(image, dtype=np.float32) / 255.0)

            # Didn't find the header, so must be at the end.
            break

-    img = np.concatenate(image_arrays, axis=2)
-    # We can drop additional channels since they may need to be added to include
-    # numbers of observation channels not divisible by 3.
-    actual_channels = list(img.shape)[2]
-    if actual_channels > expected_channels:
-        img = img[..., 0:expected_channels]
+    if mappings is not None and len(mappings) > 0:
+        return _process_images_mapping(image_arrays, mappings)
+    else:
+        return _process_images_num_channels(image_arrays, expected_channels)
+
+
+def _process_images_mapping(image_arrays, mappings):
+    """
+    Helper function for processing decompressed images with compressed channel mappings.
+    """
+    image_arrays = np.concatenate(image_arrays, axis=2).transpose((2, 0, 1))
+
+    if len(mappings) != len(image_arrays):
+        raise UnityObservationException(
+            f"Compressed observation and its mapping had different number of channels - "
+            f"observation had {len(image_arrays)} channels but its mapping had {len(mappings)} channels"
+        )
+    if len({m for m in mappings if m > -1}) != max(mappings) + 1:
+        raise UnityObservationException(
+            f"Invalid Compressed Channel Mapping: the mapping {mappings} does not have the correct format."
+        )
+    if max(mappings) >= len(image_arrays):
+        raise UnityObservationException(
+            f"Invalid Compressed Channel Mapping: the mapping has index larger than the total "
+            f"number of channels in observation - mapping index {max(mappings)} is"
+            f"invalid for input observation with {len(image_arrays)} channels."
+        )
+    processed_image_arrays: List[np.array] = [[] for _ in range(max(mappings) + 1)]
+    for mapping_idx, img in zip(mappings, image_arrays):
+        if mapping_idx > -1:
+            processed_image_arrays[mapping_idx].append(img)
+
+    for i, img_array in enumerate(processed_image_arrays):
+        processed_image_arrays[i] = np.mean(img_array, axis=0)
+    img = np.stack(processed_image_arrays, axis=2)
+    return img
+
+
+def _process_images_num_channels(image_arrays, expected_channels):
+    """
+    Helper function for processing decompressed images with number of expected channels.
+    This is for old API without mapping provided. Use the first n channel, n=expected_channels.
+    """
+    if expected_channels == 1:
+        # Convert to grayscale
+        img = np.mean(image_arrays[0], axis=2)
+        img = np.reshape(img, [img.shape[0], img.shape[1], 1])
+    else:
+        img = np.concatenate(image_arrays, axis=2)
+        # We can drop additional channels since they may need to be added to include
+        # numbers of observation channels not divisible by 3.
+        actual_channels = list(img.shape)[2]
+        if actual_channels > expected_channels:
+            img = img[..., 0:expected_channels]
    return img


        img = np.reshape(img, obs.shape)
        return img
    else:
-        img = process_pixels(obs.compressed_data, expected_channels)
+        img = process_pixels(
+            obs.compressed_data, expected_channels, list(obs.compressed_channel_mapping)
+        )
        # Compare decompressed image size to observation shape and make sure they match
        if list(obs.shape) != list(img.shape):
            raise UnityObservationException(
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
    return bytes_out


-def generate_compressed_proto_obs(in_array: np.ndarray) -> ObservationProto:
+# test helper function for old C# API (no compressed channel mapping)
+def generate_compressed_proto_obs(
+    in_array: np.ndarray, grayscale: bool = False
+) -> ObservationProto:
-    obs_proto.shape.extend(in_array.shape)
+    if grayscale:
+        # grayscale flag is only used for old API without mapping
+        expected_shape = [in_array.shape[0], in_array.shape[1], 1]
+        obs_proto.shape.extend(expected_shape)
+    else:
+        obs_proto.shape.extend(in_array.shape)
+    return obs_proto
+
+
+# test helper function for new C# API (with compressed channel mapping)
+def generate_compressed_proto_obs_with_mapping(
+    in_array: np.ndarray, mapping: List[int]
+) -> ObservationProto:
+    obs_proto = ObservationProto()
+    obs_proto.compressed_data = generate_compressed_data(in_array)
+    obs_proto.compression_type = PNG
+    if mapping is not None:
+        obs_proto.compressed_channel_mapping.extend(mapping)
+        expected_shape = [
+            in_array.shape[0],
+            in_array.shape[1],
+            len({m for m in mapping if m >= 0}),
+        ]
+        obs_proto.shape.extend(expected_shape)
+    else:
+        obs_proto.shape.extend(in_array.shape)
    return obs_proto


    in_array_1 = np.random.rand(128, 64, 3)
    proto_obs_1 = generate_compressed_proto_obs(in_array_1)
    in_array_2 = np.random.rand(128, 64, 3)
-    proto_obs_2 = generate_uncompressed_proto_obs(in_array_2)
+    in_array_2_mapping = [0, 1, 2]
+    proto_obs_2 = generate_compressed_proto_obs_with_mapping(
+        in_array_2, in_array_2_mapping
+    )
+
    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap2 = AgentInfoProto()
    assert list(arr.shape) == [2, 128, 64, 3]
    assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
    assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)
+
+
+def test_process_visual_observation_grayscale():
+    in_array_1 = np.random.rand(128, 64, 3)
+    proto_obs_1 = generate_compressed_proto_obs(in_array_1, grayscale=True)
+    expected_out_array_1 = np.mean(in_array_1, axis=2, keepdims=True)
+    in_array_2 = np.random.rand(128, 64, 3)
+    in_array_2_mapping = [0, 0, 0]
+    proto_obs_2 = generate_compressed_proto_obs_with_mapping(
+        in_array_2, in_array_2_mapping
+    )
+    expected_out_array_2 = np.mean(in_array_2, axis=2, keepdims=True)
+
+    ap1 = AgentInfoProto()
+    ap1.observations.extend([proto_obs_1])
+    ap2 = AgentInfoProto()
+    ap2.observations.extend([proto_obs_2])
+    ap_list = [ap1, ap2]
+    arr = _process_visual_observation(0, (128, 64, 1), ap_list)
+    assert list(arr.shape) == [2, 128, 64, 1]
+    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
+    assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)
+
+
+def test_process_visual_observation_padded_channels():
+    in_array_1 = np.random.rand(128, 64, 12)
+    in_array_1_mapping = [0, 1, 2, 3, -1, -1, 4, 5, 6, 7, -1, -1]
+    proto_obs_1 = generate_compressed_proto_obs_with_mapping(
+        in_array_1, in_array_1_mapping
+    )
+    expected_out_array_1 = np.take(in_array_1, [0, 1, 2, 3, 6, 7, 8, 9], axis=2)
+
+    ap1 = AgentInfoProto()
+    ap1.observations.extend([proto_obs_1])
+    ap_list = [ap1]
+    arr = _process_visual_observation(0, (128, 64, 8), ap_list)
+    assert list(arr.shape) == [1, 128, 64, 8]
+    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)


 def test_process_visual_observation_bad_shape():
--- a/ml-agents-envs/setup.py
+++ b/ml-agents-envs/setup.py
 class VerifyVersionCommand(install):
    """
    Custom command to verify that the git tag is the expected one for the release.
-    Based on https://circleci.com/blog/continuously-deploying-python-packages-to-pypi-with-circleci/
+    Originally based on https://circleci.com/blog/continuously-deploying-python-packages-to-pypi-with-circleci/
    This differs slightly because our tags and versions are different.
    """

-        tag = os.getenv("CIRCLE_TAG")
+        tag = os.getenv("GITHUB_REF", "NO GITHUB TAG!").replace("refs/tags/", "")

        if tag != EXPECTED_TAG:
            info = "Git tag: {} does not match the expected tag of this app: {}".format(
--- a/ml-agents/mlagents/torch_utils/cpu_utils.py
+++ b/ml-agents/mlagents/torch_utils/cpu_utils.py
    """
    period = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_period_us")
    quota = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")
+    share = _read_in_integer_file("/sys/fs/cgroup/cpu/cpu.shares")
+    is_kubernetes = os.getenv("KUBERNETES_SERVICE_HOST") is not None
+
+    elif period > 0 and share > 0 and is_kubernetes:
+        # In kubernetes, each requested CPU is 1024 CPU shares
+        # https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/#how-pods-with-resource-limits-are-run
+        return int(share // 1024)
    else:
        return os.cpu_count()

--- a/ml-agents/mlagents/trainers/init.py
+++ b/ml-agents/mlagents/trainers/init.py
 # Version of the library that will be used to upload to pypi
-__version__ = "0.21.0.dev0"
+__version__ = "0.22.0.dev0"

 # Git tag that will be checked to determine whether to trigger upload to pypi
 __release_tag__ = None
--- a/ml-agents/mlagents/trainers/buffer.py
+++ b/ml-agents/mlagents/trainers/buffer.py
 import numpy as np
 import h5py
 from typing import List, BinaryIO
+import itertools

 from mlagents_envs.exception import UnityException

            np.random.randint(num_sequences_in_buffer, size=num_seq_to_sample)
            * sequence_length
        )  # Sample random sequence starts
-        for i in start_idxes:
-            for key in self:
-                mini_batch[key].extend(self[key][i : i + sequence_length])
+        for key in self:
+            mb_list = [self[key][i : i + sequence_length] for i in start_idxes]
+            # See comparison of ways to make a list from a list of lists here:
+            # https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists
+            mini_batch[key].set(list(itertools.chain.from_iterable(mb_list)))
        return mini_batch

    def save_to_file(self, file_object: BinaryIO) -> None:
--- a/ml-agents/mlagents/trainers/environment_parameter_manager.py
+++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py
            lesson_num = GlobalTrainingStatus.get_parameter_state(
                param_name, StatusType.LESSON_NUM
            )
+            next_lesson_num = lesson_num + 1
-                and len(settings.curriculum) > lesson_num + 1
+                and len(settings.curriculum) > next_lesson_num
            ):
                behavior_to_consider = lesson.completion_criteria.behavior
                if behavior_to_consider in trainer_steps:
                    self._smoothed_values[param_name] = new_smoothing
                    if must_increment:
                        GlobalTrainingStatus.set_parameter_state(
-                            param_name, StatusType.LESSON_NUM, lesson_num + 1
+                            param_name, StatusType.LESSON_NUM, next_lesson_num
-                        new_lesson_name = settings.curriculum[lesson_num + 1].name
+                        new_lesson_name = settings.curriculum[next_lesson_num].name
+                        new_lesson_value = settings.curriculum[next_lesson_num].value
+
-                            f"Parameter '{param_name}' has changed. Now in lesson '{new_lesson_name}'"
+                            f"Parameter '{param_name}' has been updated to {new_lesson_value}."
+                            + f" Now in lesson '{new_lesson_name}'"
                        )
                        updated = True
                        if lesson.completion_criteria.require_reset:
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents import tf_utils
 from mlagents.trainers.trainer_controller import TrainerController
 from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
-from mlagents.trainers.trainer_util import TrainerFactory, handle_existing_directories
+from mlagents.trainers.trainer import TrainerFactory
+from mlagents.trainers.directory_utils import validate_existing_directories
 from mlagents.trainers.stats import (
    TensorboardWriter,
    StatsReporter,
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
-        handle_existing_directories(
+        validate_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
--- a/ml-agents/mlagents/trainers/model_saver/tf_model_saver.py
+++ b/ml-agents/mlagents/trainers/model_saver/tf_model_saver.py
        # only on worker-0 if there are multiple workers
        if self.policy and self.policy.rank is not None and self.policy.rank != 0:
            return
+        if self.graph is None:
+            logger.info("No model to export")
+            return
        export_policy_model(
            self.model_path, output_filepath, behavior_name, self.graph, self.sess
        )
--- a/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.optimizer import Optimizer
 from mlagents.trainers.trajectory import SplitObservations
-from mlagents.trainers.components.reward_signals.reward_signal_factory import (
+from mlagents.trainers.tf.components.reward_signals.reward_signal_factory import (
-from mlagents.trainers.components.bc.module import BCModule
+from mlagents.trainers.tf.components.bc.module import BCModule


 class TFOptimizer(Optimizer):  # pylint: disable=W0223
--- a/ml-agents/mlagents/trainers/policy/checkpoint_manager.py
+++ b/ml-agents/mlagents/trainers/policy/checkpoint_manager.py


@attr.s(auto_attribs=True)
-class NNCheckpoint:
+class ModelCheckpoint:
    steps: int
    file_path: str
    reward: Optional[float]
-class NNCheckpointManager:
+class ModelCheckpointManager:
    @staticmethod
    def get_checkpoints(behavior_name: str) -> List[Dict[str, Any]]:
        checkpoint_list = GlobalTrainingStatus.get_parameter_state(
        while len(checkpoints) > keep_checkpoints:
            if keep_checkpoints <= 0 or len(checkpoints) == 0:
                break
-            NNCheckpointManager.remove_checkpoint(checkpoints.pop(0))
+            ModelCheckpointManager.remove_checkpoint(checkpoints.pop(0))
-        cls, behavior_name: str, new_checkpoint: NNCheckpoint, keep_checkpoints: int
+        cls, behavior_name: str, new_checkpoint: ModelCheckpoint, keep_checkpoints: int
    ) -> None:
        """
        Make room for new checkpoint if needed and insert new checkpoint information.

    @classmethod
    def track_final_checkpoint(
-        cls, behavior_name: str, final_checkpoint: NNCheckpoint
+        cls, behavior_name: str, final_checkpoint: ModelCheckpoint
    ) -> None:
        """
        Ensures number of checkpoints stored is within the max number of checkpoints
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
        self.actor_critic = ac_class(
            observation_shapes=self.behavior_spec.observation_shapes,
            network_settings=trainer_settings.network_settings,
-            continuous_act_size=self.continuous_act_size,
-            discrete_act_size=self.discrete_act_size,
+            action_spec=self.behavior_spec.action_spec,
            stream_names=reward_signal_names,
            conditional_sigma=self.condition_sigma_on_obs,
            tanh_squash=tanh_squash,
    ) -> Tuple[SplitObservations, np.ndarray]:
        vec_vis_obs = SplitObservations.from_observations(decision_requests.obs)
        mask = None
-        if len(self.discrete_act_size) > 0:
-            mask = torch.ones([len(decision_requests), np.sum(self.discrete_act_size)])
+        if self.discrete_act_size > 0:
+            mask = torch.ones([len(decision_requests), np.sum(self.discrete_act_branches)])
        if decision_requests.action_mask is not None:
            mask = torch.as_tensor(
                1 - np.concatenate(decision_requests.action_mask, axis=1)
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
 from mlagents.trainers.trajectory import Trajectory
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.settings import TrainerSettings, PPOSettings, FrameworkType
-from mlagents.trainers.components.reward_signals import RewardSignal
+from mlagents.trainers.tf.components.reward_signals import RewardSignal
 from mlagents import torch_utils

 if torch_utils.is_available():
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
 from mlagents_envs.timers import timed
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.settings import TrainerSettings, SACSettings
+from contextlib import ExitStack

 EPSILON = 1e-6  # Small value to avoid divide by zero

            actions: Optional[torch.Tensor] = None,
            memories: Optional[torch.Tensor] = None,
            sequence_length: int = 1,
+            q1_grad: bool = True,
+            q2_grad: bool = True,
-            q1_out, _ = self.q1_network(
-                vec_inputs,
-                vis_inputs,
-                actions=actions,
-                memories=memories,
-                sequence_length=sequence_length,
-            )
-            q2_out, _ = self.q2_network(
-                vec_inputs,
-                vis_inputs,
-                actions=actions,
-                memories=memories,
-                sequence_length=sequence_length,
-            )
+            """
+            Performs a forward pass on the value network, which consists of a Q1 and Q2
+            network. Optionally does not evaluate gradients for either the Q1, Q2, or both.
+            :param vec_inputs: List of vector observation tensors.
+            :param vis_input: List of visual observation tensors.
+            :param actions: For a continuous Q function (has actions), tensor of actions.
+                Otherwise, None.
+            :param memories: Initial memories if using memory. Otherwise, None.
+            :param sequence_length: Sequence length if using memory.
+            :param q1_grad: Whether or not to compute gradients for the Q1 network.
+            :param q2_grad: Whether or not to compute gradients for the Q2 network.
+            :return: Tuple of two dictionaries, which both map {reward_signal: Q} for Q1 and Q2,
+                respectively.
+            """
+            # ExitStack allows us to enter the torch.no_grad() context conditionally
+            with ExitStack() as stack:
+                if not q1_grad:
+                    stack.enter_context(torch.no_grad())
+                q1_out, _ = self.q1_network(
+                    vec_inputs,
+                    vis_inputs,
+                    actions=actions,
+                    memories=memories,
+                    sequence_length=sequence_length,
+                )
+            with ExitStack() as stack:
+                if not q2_grad:
+                    stack.enter_context(torch.no_grad())
+                q2_out, _ = self.q2_network(
+                    vec_inputs,
+                    vis_inputs,
+                    actions=actions,
+                    memories=memories,
+                    sequence_length=sequence_length,
+                )
            return q1_out, q2_out

    def __init__(self, policy: TorchPolicy, trainer_params: TrainerSettings):
            self.policy.behavior_spec.observation_shapes,
            policy_network_settings,
        )
-        self.soft_update(self.policy.actor_critic.critic, self.target_network, 1.0)
+        ModelUtils.soft_update(
+            self.policy.actor_critic.critic, self.target_network, 1.0
+        )

        self._log_ent_coef = torch.nn.Parameter(
            torch.log(torch.as_tensor([self.init_entcoef] * len(self.act_size))),
        q2_loss = torch.mean(torch.stack(q2_losses))
        return q1_loss, q2_loss

-    def soft_update(self, source: nn.Module, target: nn.Module, tau: float) -> None:
-        for source_param, target_param in zip(source.parameters(), target.parameters()):
-            target_param.data.copy_(
-                target_param.data * (1.0 - tau) + source_param.data * tau
-            )
-
    def sac_value_loss(
        self,
        log_probs: torch.Tensor,
        min_policy_qs = {}
        with torch.no_grad():
            _ent_coef = torch.exp(self._log_ent_coef)
-        for name in values.keys():
-            if not discrete:
-                min_policy_qs[name] = torch.min(q1p_out[name], q2p_out[name])
-            else:
-                action_probs = log_probs.exp()
-                _branched_q1p = ModelUtils.break_into_branches(
-                    q1p_out[name] * action_probs, self.act_size
-                )
-                _branched_q2p = ModelUtils.break_into_branches(
-                    q2p_out[name] * action_probs, self.act_size
-                )
-                _q1p_mean = torch.mean(
-                    torch.stack(
-                        [torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q1p]
-                    ),
-                    dim=0,
-                )
-                _q2p_mean = torch.mean(
-                    torch.stack(
-                        [torch.sum(_br, dim=1, keepdim=True) for _br in _branched_q2p]
-                    ),
-                    dim=0,
-                )
+            for name in values.keys():
+                if not discrete:
+                    min_policy_qs[name] = torch.min(q1p_out[name], q2p_out[name])
+                else:
+                    action_probs = log_probs.exp()
+                    _branched_q1p = ModelUtils.break_into_branches(
+                        q1p_out[name] * action_probs, self.act_size
+                    )
+                    _branched_q2p = ModelUtils.break_into_branches(
+                        q2p_out[name] * action_probs, self.act_size
+                    )
+                    _q1p_mean = torch.mean(
+                        torch.stack(
+                            [
+                                torch.sum(_br, dim=1, keepdim=True)
+                                for _br in _branched_q1p
+                            ]
+                        ),
+                        dim=0,
+                    )
+                    _q2p_mean = torch.mean(
+                        torch.stack(
+                            [
+                                torch.sum(_br, dim=1, keepdim=True)
+                                for _br in _branched_q2p
+                            ]
+                        ),
+                        dim=0,
+                    )
-                min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)
+                    min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)

        value_losses = []
        if not discrete:
        self.target_network.network_body.copy_normalization(
            self.policy.actor_critic.network_body
        )
-        (
-            sampled_actions,
-            log_probs,
-            entropies,
-            sampled_values,
-            _,
-        ) = self.policy.sample_actions(
+        (sampled_actions, log_probs, _, _) = self.policy.sample_actions(
            vec_obs,
            vis_obs,
            masks=act_masks,
        )
+        value_estimates, _ = self.policy.actor_critic.critic_pass(
+            vec_obs, vis_obs, memories, sequence_length=self.policy.sequence_length
+        )
+            # Only need grad for q1, as that is used for policy.
            q1p_out, q2p_out = self.value_network(
                vec_obs,
                vis_obs,
+                q2_grad=False,
            )
            q1_out, q2_out = self.value_network(
                vec_obs,
            )
            q1_stream, q2_stream = q1_out, q2_out
        else:
-            with torch.no_grad():
-                q1p_out, q2p_out = self.value_network(
-                    vec_obs,
-                    vis_obs,
-                    memories=q_memories,
-                    sequence_length=self.policy.sequence_length,
-                )
+            # For discrete, you don't need to backprop through the Q for the policy
+            q1p_out, q2p_out = self.value_network(
+                vec_obs,
+                vis_obs,
+                memories=q_memories,
+                sequence_length=self.policy.sequence_length,
+                q1_grad=False,
+                q2_grad=False,
+            )
            q1_out, q2_out = self.value_network(
                vec_obs,
                vis_obs,
            q1_stream, q2_stream, target_values, dones, rewards, masks
        )
        value_loss = self.sac_value_loss(
-            log_probs, sampled_values, q1p_out, q2p_out, masks, use_discrete
+            log_probs, value_estimates, q1p_out, q2p_out, masks, use_discrete
        )
        policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks, use_discrete)
        entropy_loss = self.sac_entropy_loss(log_probs, masks, use_discrete)
        self.entropy_optimizer.step()

        # Update target network
-        self.soft_update(self.policy.actor_critic.critic, self.target_network, self.tau)
+        ModelUtils.soft_update(
+            self.policy.actor_critic.critic, self.target_network, self.tau
+        )
        update_stats = {
            "Losses/Policy Loss": policy_loss.item(),
            "Losses/Value Loss": value_loss.item(),
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
 import os

 import numpy as np
-from mlagents.trainers.policy.checkpoint_manager import NNCheckpoint
+from mlagents.trainers.policy.checkpoint_manager import ModelCheckpoint

 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.timers import timed
 from mlagents.trainers.trajectory import Trajectory, SplitObservations
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.settings import TrainerSettings, SACSettings, FrameworkType
-from mlagents.trainers.components.reward_signals import RewardSignal
+from mlagents.trainers.tf.components.reward_signals import RewardSignal
 from mlagents import torch_utils

 if torch_utils.is_available():

        self.checkpoint_replay_buffer = self.hyperparameters.save_replay_buffer

-    def _checkpoint(self) -> NNCheckpoint:
+    def _checkpoint(self) -> ModelCheckpoint:
        """
        Writes a checkpoint model to memory
        Overrides the default to save the replay buffer.
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
    EXTRINSIC: str = "extrinsic"
    GAIL: str = "gail"
    CURIOSITY: str = "curiosity"
+    RND: str = "rnd"

    def to_settings(self) -> type:
        _mapping = {
+            RewardSignalType.RND: RNDSettings,
        }
        return _mapping[self]

    learning_rate: float = 3e-4


+@attr.s(auto_attribs=True)
+class RNDSettings(RewardSignalSettings):
+    encoding_size: int = 64
+    learning_rate: float = 1e-4
+
+
 # SAMPLERS #############################################################################
 class ParameterRandomizationType(Enum):
    UNIFORM: str = "uniform"
 class ParameterRandomizationSettings(abc.ABC):
    seed: int = parser.get_default("seed")

+    def __str__(self) -> str:
+        """
+        Helper method to output sampler stats to console.
+        """
+        raise TrainerConfigError(f"__str__ not implemented for type {self.__class__}.")
+
    @staticmethod
    def structure(
        d: Union[Mapping, float], t: type
 class ConstantSettings(ParameterRandomizationSettings):
    value: float = 0.0

+    def __str__(self) -> str:
+        """
+        Helper method to output sampler stats to console.
+        """
+        return f"Float: value={self.value}"
+
    def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
        """
        Helper method to send sampler settings over EnvironmentParametersChannel
 class UniformSettings(ParameterRandomizationSettings):
    min_value: float = attr.ib()
    max_value: float = 1.0
+
+    def __str__(self) -> str:
+        """
+        Helper method to output sampler stats to console.
+        """
+        return f"Uniform sampler: min={self.min_value}, max={self.max_value}"

    @min_value.default
    def _min_value_default(self):
    mean: float = 1.0
    st_dev: float = 1.0

+    def __str__(self) -> str:
+        """
+        Helper method to output sampler stats to console.
+        """
+        return f"Gaussian sampler: mean={self.mean}, stddev={self.st_dev}"
+
    def apply(self, key: str, env_channel: EnvironmentParametersChannel) -> None:
        """
        Helper method to send sampler settings over EnvironmentParametersChannel
@attr.s(auto_attribs=True)
 class MultiRangeUniformSettings(ParameterRandomizationSettings):
    intervals: List[Tuple[float, float]] = attr.ib()
+
+    def __str__(self) -> str:
+        """
+        Helper method to output sampler stats to console.
+        """
+        return f"MultiRangeUniform sampler: intervals={self.intervals}"

    @intervals.default
    def _intervals_default(self):
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py

@patch("mlagents.trainers.learn.write_timing_tree")
@patch("mlagents.trainers.learn.write_run_options")
-@patch("mlagents.trainers.learn.handle_existing_directories")
+@patch("mlagents.trainers.learn.validate_existing_directories")
@patch("mlagents.trainers.learn.TrainerFactory")
@patch("mlagents.trainers.learn.SubprocessEnvManager")
@patch("mlagents.trainers.learn.create_environment_factory")
--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
 from unittest import mock
 import pytest
 import mlagents.trainers.tests.mock_brain as mb
-from mlagents.trainers.policy.checkpoint_manager import NNCheckpoint
+from mlagents.trainers.policy.checkpoint_manager import ModelCheckpoint
 from mlagents.trainers.trainer.rl_trainer import RLTrainer
 from mlagents.trainers.tests.test_buffer import construct_fake_buffer
 from mlagents.trainers.agent_processor import AgentManagerQueue
    "framework", [FrameworkType.TENSORFLOW, FrameworkType.PYTORCH], ids=["tf", "torch"]
 )
@mock.patch("mlagents.trainers.trainer.trainer.StatsReporter.write_stats")
-@mock.patch("mlagents.trainers.trainer.rl_trainer.NNCheckpointManager.add_checkpoint")
+@mock.patch(
+    "mlagents.trainers.trainer.rl_trainer.ModelCheckpointManager.add_checkpoint"
+)
 def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary, framework):
    trainer = create_rl_trainer(framework)
    mock_policy = mock.Mock()
    add_checkpoint_calls = [
        mock.call(
            trainer.brain_name,
-            NNCheckpoint(
+            ModelCheckpoint(
                step,
                f"{trainer.model_saver.model_path}/{trainer.brain_name}-{step}.{export_ext}",
                None,
--- a/ml-agents/mlagents/trainers/tests/test_settings.py
+++ b/ml-agents/mlagents/trainers/tests/test_settings.py
    assert isinstance(
        env_param_settings["length"].curriculum[0].value, MultiRangeUniformSettings
    )
+
+    # Check __str__ is correct
+    assert (
+        str(env_param_settings["mass"].curriculum[0].value)
+        == "Uniform sampler: min=1.0, max=2.0"
+    )
+    assert (
+        str(env_param_settings["scale"].curriculum[0].value)
+        == "Gaussian sampler: mean=1.0, stddev=2.0"
+    )
+    assert (
+        str(env_param_settings["length"].curriculum[0].value)
+        == "MultiRangeUniform sampler: intervals=[(1.0, 2.0), (3.0, 4.0)]"
+    )
+    assert str(env_param_settings["gravity"].curriculum[0].value) == "Float: value=1"
+
    assert isinstance(
        env_param_settings["wall_height"].curriculum[0].value, ConstantSettings
    )
--- a/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
 from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment
 from mlagents.trainers.stats import StatsReporter
 from mlagents.trainers.agent_processor import AgentManagerQueue
-from mlagents.trainers.tests.test_simple_rl import (
-    _check_environment_trains,
-    PPO_CONFIG,
+from mlagents.trainers.tests.check_env_trains import (
+    check_environment_trains,
+from mlagents.trainers.tests.dummy_config import ppo_dummy_config


 def mock_env_factory(worker_id):
        simple_env_factory, EngineConfig.default_config(), num_envs
    )
    # Run PPO using env_manager
-    _check_environment_trains(
+    check_environment_trains(
-        {"1D": PPO_CONFIG},
+        {"1D": ppo_dummy_config()},
        env_manager=env_manager,
        success_threshold=None,
    )
--- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
    trainer_mock.write_tensorboard_text = MagicMock()

    tc = basic_trainer_controller
-    tc.initialize_trainers = MagicMock()
    tc.trainers = {"testbrain": trainer_mock}
    tc.advance = MagicMock()
    tc.trainers["testbrain"].get_step = 0
--- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py
 import os
 from unittest.mock import patch

-from mlagents.trainers import trainer_util
+from mlagents.trainers.trainer import TrainerFactory
-from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG
+from mlagents.trainers.tests.dummy_config import ppo_dummy_config
+from mlagents.trainers.directory_utils import validate_existing_directories
-    return RunOptions(behaviors={"testbrain": PPO_CONFIG})
+    return RunOptions(behaviors={"testbrain": ppo_dummy_config()})


@patch("mlagents_envs.base_env.BehaviorSpec")
    expected_reward_buff_cap = 1

    base_config = dummy_config.behaviors
-    expected_config = PPO_CONFIG
+    expected_config = ppo_dummy_config()

    def mock_constructor(
        self,
        assert artifact_path == os.path.join(output_path, brain_name)

    with patch.object(PPOTrainer, "__init__", mock_constructor):
-        trainer_factory = trainer_util.TrainerFactory(
+        trainer_factory = TrainerFactory(
            trainer_config=base_config,
            output_path=output_path,
            train_model=train_model,
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors

-    trainer_factory = trainer_util.TrainerFactory(
+    trainer_factory = TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
 def test_existing_directories(tmp_path):
    output_path = os.path.join(tmp_path, "runid")
    # Test fresh new unused path - should do nothing.
-    trainer_util.handle_existing_directories(output_path, False, False)
+    validate_existing_directories(output_path, False, False)
-        trainer_util.handle_existing_directories(output_path, True, False)
+        validate_existing_directories(output_path, True, False)
-        trainer_util.handle_existing_directories(output_path, False, False)
+        validate_existing_directories(output_path, False, False)
-    trainer_util.handle_existing_directories(output_path, True, False)
+    validate_existing_directories(output_path, True, False)
-    trainer_util.handle_existing_directories(output_path, False, True)
+    validate_existing_directories(output_path, False, True)
-        trainer_util.handle_existing_directories(output_path, False, True, init_path)
+        validate_existing_directories(output_path, False, True, init_path)
-    trainer_util.handle_existing_directories(output_path, False, True, init_path)
+    validate_existing_directories(output_path, False, True, init_path)
--- a/ml-agents/mlagents/trainers/tests/test_training_status.py
+++ b/ml-agents/mlagents/trainers/tests/test_training_status.py
    GlobalTrainingStatus,
 )
 from mlagents.trainers.policy.checkpoint_manager import (
-    NNCheckpointManager,
-    NNCheckpoint,
+    ModelCheckpointManager,
+    ModelCheckpoint,
 )


        brain_name, StatusType.CHECKPOINTS, test_checkpoint_list
    )

-    new_checkpoint_4 = NNCheckpoint(
+    new_checkpoint_4 = ModelCheckpoint(
-    NNCheckpointManager.add_checkpoint(brain_name, new_checkpoint_4, 4)
-    assert len(NNCheckpointManager.get_checkpoints(brain_name)) == 4
+    ModelCheckpointManager.add_checkpoint(brain_name, new_checkpoint_4, 4)
+    assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4
-    new_checkpoint_5 = NNCheckpoint(
+    new_checkpoint_5 = ModelCheckpoint(
-    NNCheckpointManager.add_checkpoint(brain_name, new_checkpoint_5, 4)
-    assert len(NNCheckpointManager.get_checkpoints(brain_name)) == 4
+    ModelCheckpointManager.add_checkpoint(brain_name, new_checkpoint_5, 4)
+    assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4
-    final_model = NNCheckpoint(current_step, final_model_path, 3.294, final_model_time)
+    final_model = ModelCheckpoint(
+        current_step, final_model_path, 3.294, final_model_time
+    )
-    NNCheckpointManager.track_final_checkpoint(brain_name, final_model)
-    assert len(NNCheckpointManager.get_checkpoints(brain_name)) == 4
+    ModelCheckpointManager.track_final_checkpoint(brain_name, final_model)
+    assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4

    check_checkpoints = GlobalTrainingStatus.saved_state[brain_name][
        StatusType.CHECKPOINTS.value
--- a/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
+++ b/ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
    ).unsqueeze(0)

    with torch.no_grad():
-        _, log_probs1, _, _, _ = policy1.sample_actions(
+        _, log_probs1, _, _ = policy1.sample_actions(
-        _, log_probs2, _, _, _ = policy2.sample_actions(
+        _, log_probs2, _, _ = policy2.sample_actions(
            vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
        )

--- a/ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py
+++ b/ml-agents/mlagents/trainers/tests/torch/saver/test_saver_reward_providers.py
 import pytest
 import os

+import numpy as np
+
 from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
 from mlagents.trainers.sac.optimizer_torch import TorchSACOptimizer
 from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver
    CuriositySettings,
    GAILSettings,
+    RNDSettings,
+from mlagents.trainers.tests.torch.test_reward_providers.utils import (
+    create_agent_buffer,
+)

 DEMO_PATH = (
    os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)
    trainer_settings.reward_signals = {
        RewardSignalType.CURIOSITY: CuriositySettings(),
        RewardSignalType.GAIL: GAILSettings(demo_path=DEMO_PATH),
+        RewardSignalType.RND: RNDSettings(),
    }
    policy = create_policy_mock(trainer_settings, use_discrete=False)
    optimizer = OptimizerClass(policy, trainer_settings)
    module_dict_2 = optimizer2.get_modules()
    assert "Module:GAIL" in module_dict_1
    assert "Module:GAIL" in module_dict_2
+    assert "Module:Curiosity" in module_dict_1
+    assert "Module:Curiosity" in module_dict_2
+    assert "Module:RND-pred" in module_dict_1
+    assert "Module:RND-pred" in module_dict_2
+    assert "Module:RND-target" in module_dict_1
+    assert "Module:RND-target" in module_dict_2
    for name, module1 in module_dict_1.items():
        assert name in module_dict_2
        module2 = module_dict_2[name]
+
+    # Run some rewards
+    data = create_agent_buffer(policy.behavior_spec, 1)
+    for reward_name in optimizer.reward_signals.keys():
+        rp_1 = optimizer.reward_signals[reward_name]
+        rp_2 = optimizer2.reward_signals[reward_name]
+        assert np.array_equal(rp_1.evaluate(data), rp_2.evaluate(data))
--- a/ml-agents/mlagents/trainers/tests/torch/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_policy.py
    if len(memories) > 0:
        memories = torch.stack(memories).unsqueeze(0)

-    (
-        sampled_actions,
-        log_probs,
-        entropies,
-        sampled_values,
-        memories,
-    ) = policy.sample_actions(
+    (sampled_actions, log_probs, entropies, memories) = policy.sample_actions(
        vec_obs,
        vis_obs,
        masks=act_masks,
    else:
        assert log_probs.shape == (64, policy.behavior_spec.action_shape)
    assert entropies.shape == (64, policy.behavior_spec.action_size)
-    for val in sampled_values.values():
-        assert val.shape == (64,)

    if rnn:
        assert memories.shape == (1, 1, policy.m_size)
--- a/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ppo.py

 import numpy as np
 from mlagents.tf_utils import tf
-import copy
 import attr

 from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
-from mlagents.trainers.settings import NetworkSettings
-from mlagents.trainers.tests.test_simple_rl import PPO_CONFIG
-from mlagents.trainers.tests.test_reward_signals import (  # noqa: F401; pylint: disable=unused-variable
+from mlagents.trainers.settings import NetworkSettings, FrameworkType
+from mlagents.trainers.tests.dummy_config import (  # noqa: F401; pylint: disable=unused-variable
+    ppo_dummy_config,
    curiosity_dummy_config,
    gail_dummy_config,
 )
 def dummy_config():
-    return copy.deepcopy(PPO_CONFIG)
+    return attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)


 VECTOR_ACTION_SPACE = 2
 def test_ppo_optimizer_update_gail(gail_dummy_config, dummy_config):  # noqa: F811
    # Test evaluate
    dummy_config.reward_signals = gail_dummy_config
+    config = attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)
-        PPO_CONFIG, use_rnn=False, use_discrete=False, use_visual=False
+        config, use_rnn=False, use_discrete=False, use_visual=False
    )
    # Test update
    update_buffer = mb.simulate_rollout(
--- a/ml-agents/mlagents/trainers/tests/torch/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_sac.py
 import pytest
-import copy
+import attr
-from mlagents.trainers.tests.torch.test_simple_rl import SAC_CONFIG
-from mlagents.trainers.settings import NetworkSettings
-from mlagents.trainers.tests.test_reward_signals import (  # noqa: F401; pylint: disable=unused-variable
+from mlagents.trainers.settings import NetworkSettings, FrameworkType
+from mlagents.trainers.tests.dummy_config import (  # noqa: F401; pylint: disable=unused-variable
+    sac_dummy_config,
    curiosity_dummy_config,
 )

-    return copy.deepcopy(SAC_CONFIG)
+    return attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)


 VECTOR_ACTION_SPACE = 2
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
-import math
-import tempfile
-import numpy as np
-from typing import Dict
+

 from mlagents.trainers.tests.simple_test_envs import (
    SimpleEnvironment,
 )
-from mlagents.trainers.trainer_controller import TrainerController
-from mlagents.trainers.trainer_util import TrainerFactory
-from mlagents.trainers.simple_env_manager import SimpleEnvManager
+
-from mlagents.trainers.stats import StatsReporter, StatsWriter, StatsSummary
+
-    TrainerSettings,
-    PPOSettings,
-    SACSettings,
-    TrainerType,
-    ScheduleType,
-from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
-from mlagents_envs.side_channel.environment_parameters_channel import (
-    EnvironmentParametersChannel,
-)
+
 from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
    DemonstrationMetaProto,
 )
-BRAIN_NAME = "1D"
-
-
-PPO_CONFIG = TrainerSettings(
-    trainer_type=TrainerType.PPO,
-    hyperparameters=PPOSettings(
-        learning_rate=5.0e-3,
-        learning_rate_schedule=ScheduleType.CONSTANT,
-        batch_size=16,
-        buffer_size=64,
-    ),
-    network_settings=NetworkSettings(num_layers=1, hidden_units=32),
-    summary_freq=500,
-    max_steps=3000,
-    threaded=False,
-    framework=FrameworkType.PYTORCH,
+from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
+from mlagents.trainers.tests.check_env_trains import (
+    check_environment_trains,
+    default_reward_processor,
-SAC_CONFIG = TrainerSettings(
-    trainer_type=TrainerType.SAC,
-    hyperparameters=SACSettings(
-        learning_rate=5.0e-3,
-        learning_rate_schedule=ScheduleType.CONSTANT,
-        batch_size=8,
-        buffer_init_steps=100,
-        buffer_size=5000,
-        tau=0.01,
-        init_entcoef=0.01,
-    ),
-    network_settings=NetworkSettings(num_layers=1, hidden_units=16),
-    summary_freq=100,
-    max_steps=1000,
-    threaded=False,
-    framework=FrameworkType.PYTORCH,
-)
+BRAIN_NAME = "1D"
+PPO_TORCH_CONFIG = attr.evolve(ppo_dummy_config(), framework=FrameworkType.PYTORCH)
+SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
-# The reward processor is passed as an argument to _check_environment_trains.
-# It is applied to the list of all final rewards for each brain individually.
-# This is so that we can process all final rewards in different ways for different algorithms.
-# Custom reward processors should be built within the test function and passed to _check_environment_trains
-# Default is average over the last 5 final rewards
-def default_reward_processor(rewards, last_n_rewards=5):
-    rewards_to_use = rewards[-last_n_rewards:]
-    # For debugging tests
-    print(f"Last {last_n_rewards} rewards:", rewards_to_use)
-    return np.array(rewards[-last_n_rewards:], dtype=np.float32).mean()
-
-
-class DebugWriter(StatsWriter):
-    """
-    Print to stdout so stats can be viewed in pytest
-    """
-
-    def __init__(self):
-        self._last_reward_summary: Dict[str, float] = {}
-
-    def get_last_rewards(self):
-        return self._last_reward_summary
-
-    def write_stats(
-        self, category: str, values: Dict[str, StatsSummary], step: int
-    ) -> None:
-        for val, stats_summary in values.items():
-            if val == "Environment/Cumulative Reward":
-                print(step, val, stats_summary.mean)
-                self._last_reward_summary[category] = stats_summary.mean
-
-
-def _check_environment_trains(
-    env,
-    trainer_config,
-    reward_processor=default_reward_processor,
-    env_parameter_manager=None,
-    success_threshold=0.9,
-    env_manager=None,
-):
-    if env_parameter_manager is None:
-        env_parameter_manager = EnvironmentParameterManager()
-    # Create controller and begin training.
-    with tempfile.TemporaryDirectory() as dir:
-        run_id = "id"
-        seed = 1337
-        StatsReporter.writers.clear()  # Clear StatsReporters so we don't write to file
-        debug_writer = DebugWriter()
-        StatsReporter.add_writer(debug_writer)
-        if env_manager is None:
-            env_manager = SimpleEnvManager(env, EnvironmentParametersChannel())
-        trainer_factory = TrainerFactory(
-            trainer_config=trainer_config,
-            output_path=dir,
-            train_model=True,
-            load_model=False,
-            seed=seed,
-            param_manager=env_parameter_manager,
-            multi_gpu=False,
-        )
-
-        tc = TrainerController(
-            trainer_factory=trainer_factory,
-            output_path=dir,
-            run_id=run_id,
-            param_manager=env_parameter_manager,
-            train=True,
-            training_seed=seed,
-        )
-
-        # Begin training
-        tc.start_learning(env_manager)
-        if (
-            success_threshold is not None
-        ):  # For tests where we are just checking setup and not reward
-            processed_rewards = [
-                reward_processor(rewards) for rewards in env.final_rewards.values()
-            ]
-            assert all(not math.isnan(reward) for reward in processed_rewards)
-            assert all(reward > success_threshold for reward in processed_rewards)
-
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_simple_ppo(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_simple_ppo(use_discrete):
+
-    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=1, step_size=0.8)
+    env = HybridEnvironment(
+        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=1, step_size=0.8
+    )
    new_hyperparams = attr.evolve(
        PPO_CONFIG.hyperparameters, batch_size=32, buffer_size=1280
    )
+
-    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=0, step_size=0.8)
+    env = HybridEnvironment(
+        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=0, step_size=0.8
+    )
+
-    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=0, discrete_action_size=1, step_size=0.8)
+    env = HybridEnvironment(
+        [BRAIN_NAME], continuous_action_size=0, discrete_action_size=1, step_size=0.8
+    )
+
-    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8)
+    env = HybridEnvironment(
+        [BRAIN_NAME], continuous_action_size=2, discrete_action_size=1, step_size=0.8
+    )
-        PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=.01
+        PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.01
+
-    env = HybridEnvironment([BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8)
+    env = HybridEnvironment(
+        [BRAIN_NAME], continuous_action_size=1, discrete_action_size=2, step_size=0.8
+    )
-        PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=.05
+        PPO_CONFIG.hyperparameters, batch_size=128, buffer_size=1280, beta=0.05
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_2d_ppo(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_2d_ppo(use_discrete):
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
 #    )
 #    _check_environment_trains(env, {BRAIN_NAME: config})
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#@pytest.mark.parametrize("num_visual", [1, 2])
-#def test_visual_ppo(num_visual, use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# @pytest.mark.parametrize("num_visual", [1, 2])
+# def test_visual_ppo(num_visual, use_discrete):
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME],
 #        use_discrete=use_discrete,
 #    _check_environment_trains(env, {BRAIN_NAME: config})
 #
 #
-#@pytest.mark.parametrize("num_visual", [1, 2])
-#@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
-#def test_visual_advanced_ppo(vis_encode_type, num_visual):
+# @pytest.mark.parametrize("num_visual", [1, 2])
+# @pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
+# def test_visual_advanced_ppo(vis_encode_type, num_visual):
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME],
 #        use_discrete=True,
 #    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_recurrent_ppo(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_recurrent_ppo(use_discrete):
 #    env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
 #    new_network_settings = attr.evolve(
 #        PPO_CONFIG.network_settings,
 #    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_simple_sac(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_simple_sac(use_discrete):
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_2d_sac(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_2d_sac(use_discrete):
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
 #    )
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#@pytest.mark.parametrize("num_visual", [1, 2])
-#def test_visual_sac(num_visual, use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# @pytest.mark.parametrize("num_visual", [1, 2])
+# def test_visual_sac(num_visual, use_discrete):
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME],
 #        use_discrete=use_discrete,
 #    _check_environment_trains(env, {BRAIN_NAME: config})
 #
 #
-#@pytest.mark.parametrize("num_visual", [1, 2])
-#@pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
-#def test_visual_advanced_sac(vis_encode_type, num_visual):
+# @pytest.mark.parametrize("num_visual", [1, 2])
+# @pytest.mark.parametrize("vis_encode_type", ["resnet", "nature_cnn", "match3"])
+# def test_visual_advanced_sac(vis_encode_type, num_visual):
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME],
 #        use_discrete=True,
 #    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_recurrent_sac(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_recurrent_sac(use_discrete):
 #    step_size = 0.2 if use_discrete else 0.5
 #    env = MemoryEnvironment(
 #        [BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
 #    _check_environment_trains(env, {BRAIN_NAME: config})
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_simple_ghost(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_simple_ghost(use_discrete):
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
 #    )
 #    _check_environment_trains(env, {BRAIN_NAME: config})
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_simple_ghost_fails(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_simple_ghost_fails(use_discrete):
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
 #    )
 #    )
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_simple_asymm_ghost(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_simple_asymm_ghost(use_discrete):
 #    # Make opponent for asymmetric case
 #    brain_name_opp = BRAIN_NAME + "Opp"
 #    env = SimpleEnvironment(
 #    _check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_simple_asymm_ghost_fails(use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_simple_asymm_ghost_fails(use_discrete):
 #    # Make opponent for asymmetric case
 #    brain_name_opp = BRAIN_NAME + "Opp"
 #    env = SimpleEnvironment(
 #    )
 #
 #
-#@pytest.fixture(scope="session")
-#def simple_record(tmpdir_factory):
+# @pytest.fixture(scope="session")
+# def simple_record(tmpdir_factory):
 #    def record_demo(use_discrete, num_visual=0, num_vector=1):
 #        env = RecordEnvironment(
 #            [BRAIN_NAME],
 #    return record_demo
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#@pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
-#def test_gail(simple_record, use_discrete, trainer_config):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# @pytest.mark.parametrize("trainer_config", [PPO_CONFIG, SAC_CONFIG])
+# def test_gail(simple_record, use_discrete, trainer_config):
 #    demo_path = simple_record(use_discrete)
 #    env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
 #    bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
 #    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_gail_visual_ppo(simple_record, use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_gail_visual_ppo(simple_record, use_discrete):
 #    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME],
 #    _check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
 #
 #
-#@pytest.mark.parametrize("use_discrete", [True, False])
-#def test_gail_visual_sac(simple_record, use_discrete):
+# @pytest.mark.parametrize("use_discrete", [True, False])
+# def test_gail_visual_sac(simple_record, use_discrete):
 #    demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
 #    env = SimpleEnvironment(
 #        [BRAIN_NAME],
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
    masks = torch.tensor([False, False, True, True, True])
    mean = ModelUtils.masked_mean(test_input, masks=masks)
    assert mean == 4.0
+
+
+def test_soft_update():
+    class TestModule(torch.nn.Module):
+        def __init__(self, vals):
+            super().__init__()
+            self.parameter = torch.nn.Parameter(torch.ones(5, 5, 5) * vals)
+
+    tm1 = TestModule(0)
+    tm2 = TestModule(1)
+    tm3 = TestModule(2)
+
+    ModelUtils.soft_update(tm1, tm3, tau=0.5)
+    assert torch.equal(tm3.parameter, torch.ones(5, 5, 5))
+
+    ModelUtils.soft_update(tm1, tm2, tau=1.0)
+    assert torch.equal(tm2.parameter, tm1.parameter)
--- a/ml-agents/mlagents/trainers/torch/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/torch/components/bc/module.py
        else:
            vis_obs = []

-        selected_actions, all_log_probs, _, _, _ = self.policy.sample_actions(
+        selected_actions, all_log_probs, _, _ = self.policy.sample_actions(
            vec_obs,
            vis_obs,
            masks=act_masks,