Merge branch 'develop-agentprocessor-teammanager' into develop-coma2-trainer-mm

4 年前 · 08db7c2f
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
      run: python -c "import sys; print(sys.version)"
    - name: Install dependencies
      run: |
-        # pin pip to workaround https://github.com/pypa/pip/issues/9180
-        python -m pip install pip==20.2
+        python -m pip install --upgrade pip
        python -m pip install --upgrade setuptools
        python -m pip install --progress-bar=off -e ./ml-agents-envs
        python -m pip install --progress-bar=off -e ./ml-agents
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
        args: [--py3-plus, --py36-plus]
        exclude: >
            (?x)^(
-                .*barracuda.py|
                .*_pb2.py|
                .*_pb2_grpc.py
            )$
        args: [--assume-in-merge]
    -   id: check-yaml
        # Won't handle the templating in yamato
-        exclude: \.yamato/*
+        exclude: \.yamato/.*

 -   repo: https://github.com/pre-commit/pygrep-hooks
    rev: v1.4.2
--- a/.yamato/com.unity.ml-agents-performance.yml
+++ b/.yamato/com.unity.ml-agents-performance.yml
 test_editors:
  - version: 2019.4
-  - version: 2020.1
  - version: 2020.2
 ---
 {% for editor in test_editors %}
--- a/.yamato/com.unity.ml-agents-test.yml
+++ b/.yamato/com.unity.ml-agents-test.yml
    enableCodeCoverage: !!bool true
    testProject: DevProject
    enableNoDefaultPackages: !!bool true
-  - version: 2020.1
-    enableCodeCoverage: !!bool true
-    testProject: DevProject
-    enableNoDefaultPackages: !!bool true
  - version: 2020.2
    enableCodeCoverage: !!bool true
    testProject: DevProject
    assembly: Unity.ML-Agents
    minCoveragePct: 72
  - name: com.unity.ml-agents.extensions
-    assembly: Unity.ML-Agents.Extensions
+    assembly: Unity.ML-Agents.Extensions*
    minCoveragePct: 75
 ---

--- a/.yamato/compressed-sensor-test.yml
+++ b/.yamato/compressed-sensor-test.yml
    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
+    {% if editor.extra_test == "sensor" %}
    expression: |
      (pull_request.target eq "master" OR
      pull_request.target match "release.+") AND
      pull_request.changes.any match "Project/**" OR
-      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents/tests/yamato/**" OR
+    {% endif %}
 {% endfor %}
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
+    {% if editor.extra_test == "gym" %}
    expression: |
      (pull_request.target eq "master" OR
      pull_request.target match "release.+") AND
-      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents/tests/yamato/**" OR
+    {% endif %}
 {% endfor %}
--- a/.yamato/python-ll-api-test.yml
+++ b/.yamato/python-ll-api-test.yml
    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
+    {% if editor.extra_test == "llapi" %}
    expression: |
      (pull_request.target eq "master" OR
      pull_request.target match "release.+") AND
-      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents/tests/yamato/**" OR
+    {% endif %}
 {% endfor %}
--- a/.yamato/test_versions.metafile
+++ b/.yamato/test_versions.metafile
 # List of editor versions for standalone-build-test and its dependencies.
-# csharp_backcompat_version is used in training-int-tests to determine the
-# older package version to run the backwards compat tests against.
+# We always run training-int-tests for all versions of the editor
+# For each "other" test, we only run it against a single version of the
+# editor to reduce the number of yamato jobs
-    csharp_backcompat_version: 1.0.0
+    extra_test: llapi
-    csharp_backcompat_version: 1.0.0
-  - version: 2020.1
-    csharp_backcompat_version: 1.0.0
+    extra_test: gym
-    # 2020.2 moved the AssetImporters namespace
-    # but we didn't handle this until 1.2.0
-    csharp_backcompat_version: 1.2.0
+    extra_test: sensor
--- a/DevProject/Packages/manifest.json
+++ b/DevProject/Packages/manifest.json
  "dependencies": {
    "com.unity.2d.sprite": "1.0.0",
    "com.unity.2d.tilemap": "1.0.0",
-    "com.unity.ads": "3.4.9",
+    "com.unity.ads": "3.6.1",
-    "com.unity.ide.vscode": "1.2.1",
+    "com.unity.ide.vscode": "1.2.3",
+    "com.unity.inputsystem": "1.1.0-preview.3",
-    "com.unity.multiplayer-hlapi": "1.0.6",
+    "com.unity.multiplayer-hlapi": "1.0.8",
-    "com.unity.purchasing": "2.1.0",
-    "com.unity.test-framework": "1.1.16",
+    "com.unity.purchasing": "2.2.1",
+    "com.unity.test-framework": "1.1.20",
-    "com.unity.xr.legacyinputhelpers": "2.1.4",
+    "com.unity.xr.legacyinputhelpers": "2.1.7",
    "com.unity.modules.ai": "1.0.0",
    "com.unity.modules.androidjni": "1.0.0",
    "com.unity.modules.animation": "1.0.0",
  "registry": "https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-candidates",
  "testables": [
    "com.unity.ml-agents",
-    "com.unity.ml-agents.extensions"
+    "com.unity.ml-agents.extensions",
+    "com.unity.inputsystem"
  ]
 }
--- a/DevProject/ProjectSettings/ProjectSettings.asset
+++ b/DevProject/ProjectSettings/ProjectSettings.asset
  androidBlitType: 0
  defaultIsNativeResolution: 1
  macRetinaSupport: 1
-  runInBackground: 0
+  runInBackground: 1
  captureSingleScreen: 0
  muteOtherAudioSources: 0
  Prepare IOS For Recording: 0
  xboxOneMonoLoggingLevel: 0
  xboxOneLoggingLevel: 1
  xboxOneDisableEsram: 0
+  xboxOneEnableTypeOptimization: 0
  xboxOnePresentImmediateThreshold: 0
  switchQueueCommandMemory: 1048576
  switchQueueControlMemory: 16384
  switchNVNOtherPoolsGranularity: 16777216
+  switchNVNMaxPublicTextureIDCount: 0
+  switchNVNMaxPublicSamplerIDCount: 0
+  stadiaPresentMode: 0
+  stadiaTargetFramerate: 0
+  vulkanEnableLateAcquireNextImage: 0
  m_SupportedAspectRatios:
    4:3: 1
    5:4: 1
  useHDRDisplay: 0
  D3DHDRBitDepth: 0
  m_ColorGamuts: 00000000
-  targetPixelDensity: 0
+  targetPixelDensity: 30
  resolutionScalingMode: 0
  androidSupportedAspectRatio: 1
  androidMaxAspectRatio: 2.1
  StripUnusedMeshComponents: 0
  VertexChannelCompressionMask: 4054
  iPhoneSdkVersion: 988
-  iOSTargetOSVersionString: 
+  iOSTargetOSVersionString: 10.0
-  tvOSTargetOSVersionString: 
+  tvOSTargetOSVersionString: 10.0
  uIPrerenderedIcon: 0
  uIRequiresPersistentWiFi: 0
  uIRequiresFullScreen: 1
-  iPhoneSplashScreen: {fileID: 0}
-  iPhoneHighResSplashScreen: {fileID: 0}
-  iPhoneTallHighResSplashScreen: {fileID: 0}
-  iPhone47inSplashScreen: {fileID: 0}
-  iPhone55inPortraitSplashScreen: {fileID: 0}
-  iPhone55inLandscapeSplashScreen: {fileID: 0}
-  iPhone58inPortraitSplashScreen: {fileID: 0}
-  iPhone58inLandscapeSplashScreen: {fileID: 0}
-  iPadPortraitSplashScreen: {fileID: 0}
-  iPadHighResPortraitSplashScreen: {fileID: 0}
-  iPadLandscapeSplashScreen: {fileID: 0}
-  iPadHighResLandscapeSplashScreen: {fileID: 0}
-  iPhone65inPortraitSplashScreen: {fileID: 0}
-  iPhone65inLandscapeSplashScreen: {fileID: 0}
-  iPhone61inPortraitSplashScreen: {fileID: 0}
-  iPhone61inLandscapeSplashScreen: {fileID: 0}
  appleTVSplashScreen: {fileID: 0}
  appleTVSplashScreen2x: {fileID: 0}
  tvOSSmallIconLayers: []
  metalEditorSupport: 1
  metalAPIValidation: 1
  iOSRenderExtraFrameOnPause: 0
+  iosCopyPluginsCodeInsteadOfSymlink: 0
  appleDeveloperTeamID: 
  iOSManualSigningProvisioningProfileID: 
  tvOSManualSigningProvisioningProfileID: 
  ps4ShareFilePath: 
  ps4ShareOverlayImagePath: 
  ps4PrivacyGuardImagePath: 
+  ps4ExtraSceSysFile: 
  ps4NPtitleDatPath: 
  ps4RemotePlayKeyAssignment: -1
  ps4RemotePlayKeyMappingDir: 
  ps4UseResolutionFallback: 0
  ps4ReprojectionSupport: 0
  ps4UseAudio3dBackend: 0
+  ps4UseLowGarlicFragmentationMode: 1
  ps4SocialScreenEnabled: 0
  ps4ScriptOptimizationLevel: 2
  ps4Audio3dVirtualSpeakerCount: 14
  ps4disableAutoHideSplash: 0
  ps4videoRecordingFeaturesUsed: 0
  ps4contentSearchFeaturesUsed: 0
+  ps4CompatibilityPS5: 0
+  ps4GPU800MHz: 1
  ps4attribEyeToEyeDistanceSettingVR: 0
  ps4IncludedModules: []
  ps4attribVROutputEnabled: 0
  additionalIl2CppArgs: 
  scriptingRuntimeVersion: 1
  gcIncremental: 0
+  assemblyVersionValidation: 1
  gcWBarrierValidation: 0
  apiCompatibilityLevelPerPlatform: {}
  m_RenderingPath: 1
  XboxOneCapability: []
  XboxOneGameRating: {}
  XboxOneIsContentPackage: 0
+  XboxOneEnhancedXboxCompatibilityMode: 0
  XboxOneEnableGPUVariability: 1
  XboxOneSockets: {}
  XboxOneSplashScreen: {fileID: 0}
  XboxOneOverrideIdentityName: 
+  XboxOneOverrideIdentityPublisher: 
  vrEditorSettings:
    daydream:
      daydreamIconForeground: {fileID: 0}
  projectName: 
  organizationId: 
  cloudEnabled: 0
-  enableNativePlatformBackendsForNewInputSystem: 0
-  disableOldInputManagerSupport: 0
+  enableNativePlatformBackendsForNewInputSystem: 1
+  disableOldInputManagerSupport: 1
  legacyClampBlendShapeWeights: 0
--- a/DevProject/ProjectSettings/ProjectVersion.txt
+++ b/DevProject/ProjectSettings/ProjectVersion.txt
-m_EditorVersion: 2019.4.7f1
-m_EditorVersionWithRevision: 2019.4.7f1 (e992b1a16e65)
+m_EditorVersion: 2019.4.19f1
+m_EditorVersionWithRevision: 2019.4.19f1 (ca5b14067cec)
--- a/Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
+++ b/Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
                scenes,
                outputPath,
                buildTarget,
-                BuildOptions.None
+                BuildOptions.Development
            );
            var isOk = buildResult.summary.result == BuildResult.Succeeded;
            var error = "";
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
        const string k_CommandLineModelOverrideDirectoryFlag = "--mlagents-override-model-directory";
        const string k_CommandLineModelOverrideExtensionFlag = "--mlagents-override-model-extension";
        const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
+        const string k_CommandLineQuitAfterSeconds = "--mlagents-quit-after-seconds";
        const string k_CommandLineQuitOnLoadFailure = "--mlagents-quit-on-load-failure";

        // The attached Agent
        // Max episodes to run. Only used if > 0
        // Will default to 1 if override models are specified, otherwise 0.
        int m_MaxEpisodes;
+
+        // Deadline - exit if the time exceeds this
+        DateTime m_Deadline = DateTime.MaxValue;

        int m_NumSteps;
        int m_PreviousNumSteps;
        void GetAssetPathFromCommandLine()
        {
            var maxEpisodes = 0;
+            var timeoutSeconds = 0;
+
            string[] commandLineArgsOverride = null;
            if (!string.IsNullOrEmpty(debugCommandLineOverride) && Application.isEditor)
            {
                {
                    Int32.TryParse(args[i + 1], out maxEpisodes);
                }
+                else if (args[i] == k_CommandLineQuitAfterSeconds && i < args.Length - 1)
+                {
+                    Int32.TryParse(args[i + 1], out timeoutSeconds);
+                }
                else if (args[i] == k_CommandLineQuitOnLoadFailure)
                {
                    m_QuitOnLoadFailure = true;
                m_MaxEpisodes = maxEpisodes > 0 ? maxEpisodes : 1;
                Debug.Log($"setting m_MaxEpisodes to {maxEpisodes}");
            }
+
+            if (timeoutSeconds > 0)
+            {
+                m_Deadline = DateTime.Now + TimeSpan.FromSeconds(timeoutSeconds);
+                Debug.Log($"setting deadline to {timeoutSeconds} from now.");
+
+            }
        }

        void OnEnable()
                    EditorApplication.isPlaying = false;
 #endif
                }
+                else if (DateTime.Now >= m_Deadline)
+                {
+                    Debug.Log(
+                        $"Deadline exceeded. " +
+                        $"{TotalCompletedEpisodes}/{m_MaxEpisodes} episodes and " +
+                        $"{TotalNumSteps}/{m_MaxEpisodes * m_Agent.MaxStep} steps completed. Exiting.");
+                    Application.Quit(0);
+#if UNITY_EDITOR
+                    EditorApplication.isPlaying = false;
+#endif
+                }
+
            m_NumSteps++;
        }

--- a/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
--- a/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 1141134673700168, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
+      propertyPath: m_Name
+      value: SoccerFieldTwos
+      objectReference: {fileID: 0}
    - target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
      propertyPath: m_LocalPosition.x
      value: 0
    - target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
      propertyPath: m_RootOrder
      value: 4
+      objectReference: {fileID: 0}
+    - target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.z
+      value: 0
      objectReference: {fileID: 0}
    m_RemovedComponents: []
  m_SourcePrefab: {fileID: 100100000, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
  gravityMultiplier: 1
-  monitorVerticalOffset: 0
+  reuseCollisionCallbacks: 1
 --- !u!114 &1574236051
 MonoBehaviour:
  m_ObjectHideFlags: 0
  blueMaterial: {fileID: 2100000, guid: c9fa44c2c3f8ce74ca39a3355ea42631, type: 2}
  randomizePlayersTeamForTraining: 0
  agentRunSpeed: 2
-  strikerPunish: -0.1
-  strikerReward: 1
-  goaliePunish: -1
-  goalieReward: 0.1
 --- !u!1001 &1606160104
 PrefabInstance:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scenes/StrikersVsGoalie.unity
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scenes/StrikersVsGoalie.unity
  maximumDeltaTime: 0.33333334
  solverIterations: 6
  solverVelocityIterations: 1
+  reuseCollisionCallbacks: 1
 --- !u!114 &1574236051
 MonoBehaviour:
  m_ObjectHideFlags: 0
--- a/Project/Packages/manifest.json
+++ b/Project/Packages/manifest.json
    "com.unity.collab-proxy": "1.2.15",
    "com.unity.ml-agents": "file:../../com.unity.ml-agents",
    "com.unity.ml-agents.extensions": "file:../../com.unity.ml-agents.extensions",
-    "com.unity.package-manager-ui": "2.0.8",
-    "com.unity.purchasing": "2.0.3",
+    "com.unity.package-manager-ui": "2.0.13",
+    "com.unity.purchasing": "2.2.1",
    "com.unity.textmeshpro": "1.4.1",
    "com.unity.modules.ai": "1.0.0",
    "com.unity.modules.animation": "1.0.0",
--- a/Project/ProjectSettings/ProjectVersion.txt
+++ b/Project/ProjectSettings/ProjectVersion.txt
-m_EditorVersion: 2018.4.24f1
+m_EditorVersion: 2018.4.32f1
--- a/Project/ProjectSettings/TagManager.asset
+++ b/Project/ProjectSettings/TagManager.asset
  - symbol_O_Goal
  - purpleAgent
  - purpleGoal
+  - tile
  layers:
  - Default
  - TransparentFX
--- a/com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md
+++ b/com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md
 # Summary

-The Grid Sensor combines the generality of data extraction from Raycasts with the image processing power of Convolutional Neural Networks. The Grid Sensor can be used to collect data in the general form of a "Width x Height x Channel" matrix which can be used for training Reinforcement Learning agents or for data analysis.
+The Grid Sensor is an alternative method for collecting observations which combines the generality of data extraction from Raycasts with the image processing power of Convolutional Neural Networks. The Grid Sensor can be used to collect data in the general form of a "Width x Height x Channel" matrix which can be used for training agent policies or for data analysis.
-In MLAgents there are 2 main sensors for observing information that is "physically" around the agent.
+In ML-Agents there are two main sensors for observing information that is "physically" around the agent.
+
-This is simple to implement and provides enough information for most simple games. When few are used, they are computationally fast. However, there are multiple limiting factors:
-* The rays need to be at the same height as the things the agent should observe
-* Objects can remain hidden by line of sight and if the knowledge of those objects is crucial to the success of the agent, then this limitation must be compensated for by the agents networks capacity (i.e., need a bigger brain with memory)
+Raycasts are simple to implement and provides enough information for most simple games. When few are used, they are also computationally lightweight. However, there are multiple limiting factors:
+
+* The rays need to be at the same height as the things the agent should observe.
+* Objects can remain hidden by line of sight and if the knowledge of those objects is crucial to the success of the agent, then this limitation must be compensated for by the agents networks capacity (i.e., need a bigger brain with memory).
-* Typically the length of the raycasts is limited because the agent need not know about objects that are at the other side of the level. Combined with few raycasts for computational efficiency, this means that an agent may not observe objects that fall between these rays and the issue becomes worse as the objects reduce in size.
+* Typically, the length of the raycasts is limited because the agent need not know about objects that are at the other side of the level. Combined with few raycasts for computational efficiency, this means that an agent may not observe objects that fall between these rays and the issue becomes worse as the objects reduce in size.
-The Camera provides the agent with either a grayscale or an RGB image of the game environment. It goes without saying that there non-linear relationships between nearby pixels in an image. It is this intuition that helps form the basis of Convolutional Neural Networks (CNNs) and established the literature of designing networks that take advantage of these relationships between pixels. Following this established literature of CNNs on image based data, the MLAgent's Camera Sensor provides a means by which the agent can include high dimensional inputs (images) into its observation stream.
+The Camera provides the agent with either a grayscale or an RGB image of the game environment. In many cases, what we want to extract from a set of pixels is invariant to the location of those pixels in the image. It is this intuition that helps form the basis of Convolutional Neural Networks (CNNs) and established the literature of designing networks that take advantage of these relationships between pixels. Following this established literature of CNNs on image based data, the ML-Agent's Camera Sensor provides a means by which the agent can include high dimensional inputs (images) into its observation stream.
-* It requires render the scene and thus is computationally slower than alternatives that do not use rendering
-* It has yet been shown that the Camera Sensor can be used on a headless machine which means it is not yet possible (if at all) to train an agent on a headless infrastructure.
+
+* It requires rendering the scene and thus is computationally slower than alternatives that do not use rendering.
-* The RGB of the camera only provides a maximum of 3 channels to the agent.
+* The RGB of the camera only provides a maximum of three channels to the agent.
-An image can be thought of as a matrix of a predefined width (W) and a height (H) and each pixel can be thought of as simply an array of length 3 (in the case of RGB), `[Red, Green, Blue]` holding the different channel information of the color (channel) intensities at that pixel location. Thus an image is just a 3 dimensional matrix of size WxHx3. A Grid Observation can be thought of as a generalization of this setup where in place of a pixel there is a "cell" which is an array of length N representing different channel intensities at that cell position. From a Convolutional Neural Network point of view, the introduction of multiple channels in an "image" isn't a new concept. One such example is using an RGB-Depth image which is used in several robotics applications. The distinction of Grid Observations is what the data within the channels represents. Instead of limiting the channels to color intensities, the channels within a cell of a Grid Observation generalize to any data that can be represented by a single number (float or int).
-
-Before jumping into the details of the Grid Sensor, an important thing to note is the agent performance and qualitatively different behavior over raycasts. Unity MLAgent's comes with a suite of example environments. One in particular, the [Food Collector](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Learning-Environment-Examples.md#food-collector), has been the focus of the Grid Sensor development.
+## Overview
-The Food Collector environment can be described as:
-* Set-up: A multi-agent environment where agents compete to collect food.
-* Goal: The agents must learn to collect as many green food spheres as possible while avoiding red spheres.
-* Agents: The environment contains 5 agents with same Behavior Parameters.
+There are three main phases to the observation process of the Grid Sensor:
-When applying the Grid Sensor to this environment, in place of the Raycast Vector Sensor or the Camera Sensor, a Mean Reward of 40-50 is observed. This performance is on par with what is seen by agents trained with RayCasts but the side-by-side comparison of trained agents, shows a qualitative difference in behavior. A deeper study and interpretation of the qualitative differences between agents trained with Raycasts and Vector Sensors verses Grid Sensors is left to future studies.
-
-<img src="images/gridobs-vs-vectorobs.gif" align="middle" width="3000"/>
-
-## Overview
-
-There are 3 main phases to the Grid Sensor:
 1. **Collection** - data is extracted from observed objects
 2. **Encoding** - the extracted data is encoded into a grid observation
 3. **Communication** - the grid observation is sent to python or used by a trained model
 ## Collection

-A Grid Sensor is the Grid Observation analog of a Unity Camera but with some notable differences. The sensor is made up of a grid of identical Box Colliders which designate the "cells" of the grid. The Grid Sensor also has a list of "detectable objects" in the form of Unity gameobject tags. When an object that is tagged as a detectable object is present within a cell's Box Collider, that cell is "activated" and a method on the Grid Sensor extracts data from said object and associates that data with the position of the activated cell. Thus the Grid Sensor is always orthographic:
+A Grid Sensor is the Grid Observation analog of a Unity Camera but with some notable differences. The sensor is made up of a grid of identical Box Colliders which designate the "cells" of the grid. The Grid Sensor also has a list of "detectable objects" in the form of Unity GameObject tags. When an object that is tagged as a detectable object is present within a cell's Box Collider, that cell is "activated" and a method on the Grid Sensor extracts data from said object and associates that data with the position of the activated cell. Thus the Grid Sensor is always orthographic:

 <img src="images/persp_ortho_proj.png" width="500">
 <cite><a href="https://www.geofx.com/graphics/nehe-three-js/lessons17-24/lesson21/lesson21.html">geofx.com</a></cite>
-Just like the Raycasts mentioned earlier, the Grid Sensor can extract any kind of data from a detected object and just like the Camera, the Grid Sensor maintains the spacial relationship between nearby cells that allows one to take advantage of the CNN literature. Thus the Grid Sensor tries to take the best of both sensors and combines them to something that is more expressive.
+Just like the Raycasts mentioned earlier, the Grid Sensor can extract any kind of data from a detected object, and just like the Camera, the Grid Sensor maintains the spacial relationship between nearby cells that allows one to take advantage of the computational properties of CNNs. Thus the Grid Sensor tries to take the best of both sensors and combines them to something that is more expressive.
-Lets imagine a scenario where an agent is faced with 2 enemies and there are 2 "equipable" weapons somewhat behind the agent. Lets also keep in mind some important properties of the enemies and weapons that would be useful for the agent to know. For simplicity, lets assume enemies represent their health as a percentage (0-100%). Lets also assume that enemies and weapons are the only 2 kind of objects that the agent would see in the entire game.
+Let's imagine a scenario where an agent is faced with two enemies and there are two "equipable" weapons somewhat behind the agent. It would be helpful for the agent to know the location and properties of both the enemies as well as the equippable items. For simplicity, let's assume enemies represent their health as a percentage (0-100%). Also assume that enemies and weapons are the only two kinds of objects that the agent would see in the entire game.
-<img src="images/gridsensor-example.png" align="middle" width="3000"/>
+<img src="images/gridsensor-example.png" align="middle" width="512"/>
-If a raycast hits an object, not only could we get the distance (normalized by the maximum raycast distance) we would be able to extract its type (enemy vs weapon) and if its an enemy then we could get its health (e.g., .6).
+If a raycast hits an object, not only could we get the distance (normalized by the maximum raycast distance) we would be able to extract its type (enemy vs weapon) and any attribute associate with it (e.g. an enemy's health).

 There are many ways in which one could encode this information but one reasonable encoding is this:
 ```
 For example, if the raycast hit nothing then this would be represented by `[0, 0, 0, 1]`.
 If instead the raycast hit an enemy with 60% health that is 50% of the maximum raycast distance, the data would be represented by `[0, 1, .6, .5]`.

-The limitations of raycasts which were presented above are easy to visualize in the below image. The agent is unable to see where the weapons are and only sees one of the enemies. Typically in the MLAgents examples, this situation is mitigated by including previous frames of data so that the agent observes changes through time. However, in more complex games, it is not difficult to imagine scenarios where an agent would not be able to observe important information using only Raycasts.
+The limitations of raycasts which were presented above are easy to visualize in the below image. The agent is unable to see where the weapons are and only sees one of the enemies. Typically in the ML-Agents examples, this situation is mitigated by including previous frames of data so that the agent observes changes through time. However, in more complex games, it is not difficult to imagine scenarios where an agent might miss important information using only Raycasts.
-<img src="images/gridsensor-example-raycast.png" align="middle" width="3000"/>
+<img src="images/gridsensor-example-raycast.png" align="middle" width="512"/>
-<img src="images/gridsensor-example-camera.png" align="middle" width="3000"/>
+<img src="images/gridsensor-example-camera.png" align="middle" width="512"/>

 #### Grid Sensor


 Following the same data extraction method presented in the section on raycasts, if a Grid Sensor was used instead of Raycasts or a Camera, then not only would the agent be able to extract the health value of the enemies but it would also be able to encode the relative positions of those objects as is done with Camera. Additionally, as the texture of the objects is not used, this data can be collected without rendering the scene.

-<img src="images/gridsensor-example-gridsensor.png" align="middle" width="3000"/>
+In our example, we can collect data in the form of [objectType, health] by overriding `GetObjectData` as the following:
+```csharp
+    protected override float[] GetObjectData(GameObject currentColliderGo, float type_index, float normalized_distance)
+    {
+        float[] channelValues = new float[ChannelDepth.Length]; // ChannelDepth.Length = 2 in this example
+        channelValues[0] = type_index; // this is the observation collected in default implementation
+        if (currentColliderGo.tag == "enemy")
+        {
+            var enemy = currentColliderGo.GetComponent<EnemyClass>();
+            channelValues[1] = enemy.health; // the value may have to be normalized depends on the type of GridSensor encoding you use (see sections below)
+        }
+        return channelValues;
+    }
+```
-At the end of the Collection phase, each cell with an object inside of it has `GetObjectData` called and the returned values (named `channelValues`) is then processed in the Encoding phase which is described in the next section.
+<img src="images/gridsensor-example-gridsensor.png" align="middle" width="512"/>
+
+At the end of the Collection phase, each cell with an object inside of it has `GetObjectData` called and the returned values is then processed in the Encoding phase which is described in the next section.
-The CountingGridSensor builds on the GridSesnor to perform the specific job of counting the number of object types that are based on the different detectable object tags. The encoding and is meant to exploit a key feature of the Grid Sensor. In both the Channel and the Channel Hot DepthTypes, the closest detectable object, in relation to the agent, that lays within a cell is used for encoding the value for that cell. In the CountingGridSensor, the number of each type of object is recorded and then normalized according to a max count, stored in the ChannelDepth.
+The CountingGridSensor builds on the GridSensor to perform the specific job of counting the number of object types that are based on the different detectable object tags. The encoding is meant to exploit a key feature of the GridSensor. In original GridSensor, only the closest detectable object, in relation to the agent, that lies within a cell is used for encoding the value for that cell. In the CountingGridSensor, the number of each type of object is recorded and then normalized according to a max count.

 An example of the CountingGridSensor can be found below.

 In order to support different ways of representing the data extracted from an object, multiple "depth types" were implemented. Each has pros and cons and, depending on the use-case of the Grid Sensor, one may be more beneficial than the others.

-The data stored that is extracted during the *Collection* phase, and stored in `channelValues`, may come from different sources. For instance, going back the Enemy/Weapon example in the previous section, an enemy's health is continuous whereas the object type (enemy or weapon) is categorical data. This distinction is important as categorical data requires a different encoding mechanism than continuous data.
+The stored data that is extracted during the *Collection* phase may come from different sources, and thus be of a different nature. For instance, going back to the Enemy/Weapon example in the previous section, an enemy's health is continuous whereas the object type (enemy or weapon) is categorical data. This distinction is important as categorical data requires a different encoding mechanism than continuous data.
-The Grid Sensor handles this distinction with 4 properties that define how this data is to be encoded:
-* DepthType - Enum signifying the encoding mode: Channel, ChannelHot
-* ObservationPerCell - the total number of values that are in each cell of the grid observation
-* ChannelDepth - int[] describing the range of each data within the `channelValues`
-* ChannelOffset - int[] describing the number of encoded values that come before each data within `channelValues`
+The GridSensor handles this distinction with two user defined properties that define how this data is to be encoded:
-The ChannelDepth and the DepthType are user defined and gives control to the developer to how they can encode their data. The ChannelDepth and ChannelOffset are both initialized and used in different ways depending on the ChannelDepth and the DepthType.
+* DepthType - Enum signifying the encoding mode: Channel, ChannelHot
+* ChannelDepth - `int[]` describing the range of each data and is used differently with different DepthType
-How categorical and continuous data is treated is different between the different DepthTypes as will be explored in the sections below. The sections will use an on-going example similar to example mentioned earlier where, within a cell, the sensor observes: `an enemy with 60% health`. Thus the cell contains 2 kinds of data: categorical data (object type) and the continuous data (health). Additionally, the order of the observed tags is important as it allows one to encode the tag of the observed object by its index within list of observed tags. Note that in the example, the observed tags is defined as ["weapon", "enemy"].
+How categorical and continuous data is treated is different between the different DepthTypes as will be explored in the sections below. The sections will use an on-going example similar to the example mentioned earlier where, within a cell, the sensor observes: `an enemy with 60% health`. Thus the cell contains two kinds of data: categorical data (object type) and the continuous data (health). Additionally, the order of the observed tags is important as it allows one to encode the tag of the observed object by its index within the list of observed tags. Note that in the example, the observed tags is defined as ["weapon", "enemy"].
-The Channel Based Grid Observations is perhaps the simplest in terms of usability and similarity with other machine learning applications. Each grid is of size WxHxC where C is the number of channels. To distinguish between categorical and continuous data, one would use the ChannelDepth array to signify the ranges that the values in the `channelValues` array could take. If one sets ChannelDepth[i] to be 1, it is assumed that the value of `channelValues[i]` is already normalized. Else ChannelDepth[i] represents the total number of possible values that `channelValues[i]` can take.
+The Channel Based Grid Observations represent obsevations in a normalized form with 0 to 1. To distinguish between categorical and continuous data, one would use the ChannelDepth array to signify the ranges that the values in the `channelValues` array could take. If one sets ChannelDepth[i] to be 1, it is assumed that the value of `channelValues[i]` is already normalized. Else ChannelDepth[i] represents the total number of possible values that `channelValues[i]` can take and will be used for normalization.
-Using the example described earlier, if one was using Channel Based Grid Observations, they would have a ChannelDepth = {2, 1} to describe that there are two possible values for the first channel and the 1 represents that the second channel is already normalized.
-As the "enemy" is in the second position of the observed tags, its value can be normalized by:
+For continuous data, you should specify `ChannelDepth[i]` to 1 and the collected data should be already normalized by its min/max range. For discrete data, you should specify `ChannelDepth[i]` to be the total number of possible values, and the collected data should be an integer value within range of `ChannelDepth[i]`.
+
+Using the example described earlier, if one was using Channel Based Grid Observations, they would have a ChannelDepth = {2, 1} to describe that there are two possible values for the first channel (ObjectType) and the 1 represents that the second channel (EnemyHealth) is continuous and should be already normalized.
+
+For ObjectType, "weapon", "enemy" will be represented respectively as:
-num = detectableObjects.IndexOfTag("enemy")/ChannelDepth[0] = 2/2 = 1;
+weapon = DetectableObjects.IndexOfTag("weapon")/ChannelDepth[0] = 1/2 = 0.5;
+enemy = DetectableObjects.IndexOfTag("enemy")/ChannelDepth[0] = 2/2 = 1;
-`[1, .6]`
+`[1, .6]`. If the health in the game is not represented in a normalized form, for example if the health is represented in an integer ranging from -100 to 100, you'll need to manully nomalize it during collection. That is, If you get value 50, you need to normalize it by `50/(100- (-100))=0.25` and collect 0.25 instead of 50.
-The Channel Hot DepthType generalizes the classic OneHot encoding to differentiate combinations of different data. Rather than normalizing the data like in the Channel Based section, each element of `channelValues` is represented by an encoding based on the ChannelDepth. If ChannelDepth[i] = 1, then this represents that `channelValues[i]` is already normalized (between 0-1) and will be used directly within the encoding. However if ChannelDepth[i] is an integer greater than 1, then the value in `channelValues[i]` will be converted into a OneHot encoding based on the following:
+The Channel Hot DepthType generalizes the classic OneHot encoding to differentiate combinations of different data. Rather than normalizing the data like in the Channel Based section, each element of `channelValues` is represented by an encoding based on the ChannelDepth. If ChannelDepth[i] = 1, then this represents that `channelValues[i]` is already normalized (between 0-1) and will be used directly within the encoding which is same as with Channel Based. However if ChannelDepth[i] is an integer greater than 1, then the value in `channelValues[i]` will be converted into a OneHot encoding based on the following:

 ```
 float[] arr = new float[ChannelDepth[i] + 1];
 The encoding of each channel is then concatenated together. Clearly using this setup allows the developer to be able to encode values using the classic OneHot encoding. Below are some different variations of the ChannelDepth which create different encodings of the example:

 ##### ChannelDepth = {3, 1}
-The first element, 3, signifies that there are 3 possibilities for the first channel and as the "enemy" is 2nd in the detected objects list, the "enemy" in the example is encoded as `[0, 0, 1]` where the first index represents "no object". The second element, 1, signifies that the health is already normalized and, following the table, is used directly. The resulting encoding is thus:
+The first element, 3, signifies that there are three possibilities for the first channel and as the "enemy" is 2nd in the detected objects list, the "enemy" in the example is encoded as `[0, 0, 1]` where the first index represents "no object". The second element, 1, signifies that the health is already normalized and, following the table, is used directly. The resulting encoding is thus:
 ```
 [0, 0, 1, 0.6]
 ```

 ### CountingGridSensor

-As introduced above, the CountingGridSensor inherits from the GridSensor for the sole purpose of counting the different objects that lay within a cell. In order to normalize the counts so that the grid can be properly encoded as PNG, the ChannelDepth is used to represent the "maximum count" of each type. For the working example, if the ChannelDepth is set as {50, 10}, which represents that the maximum count for objects with the "weapon" and "enemy" tag is 50 and 10, respectively, then the resulting data would be:
+As mentioned above, the CountingGridSensor inherits from the GridSensor for the sole purpose of counting the different objects that lay within a cell. In order to normalize the counts so that the grid can be properly encoded as PNG, the ChannelDepth is used to represent the "maximum count" of each type. For the working example, if the ChannelDepth is set as {50, 10}, which represents that the maximum count for objects with the "weapon" and "enemy" tag is 50 and 10, respectively, then the resulting data would be:
 ```
 encoding = [0 weapons/ 50 weapons, 1 enemy / 10 enemies] = [0, .1]
 ```
-At the end of the Encoding phase, all of the data for a Grid Observation is placed into a float[] referred to as the perception buffer. Now the data is ready to be sent to either the python side for training or to be used by a trained model within Unity. This is where the Grid Sensor takes advantage of 2D textures and the PNG encoding schema to reduce the number of bytes that are being sent.
+At the end of the Encoding phase, all the Grid Observations will be sent to either the python side for training or to be used by a trained model within Unity. Since the data format is similar to images collected by Camera Sensors, Grid Observations also have the CompressionType option to specify whether to send the data directly or send in PNG compressed form for better communication efficiency.
-The 2D texture is a Unity class that encodes the colors of an image. It is used for many ways through out Unity but it has 2 specific methods that the Grid Sensor takes advantage of:
-
-`SetPixels` takes a 2D array of Colors and assigns the color values to the texture.
-
-`EncodeToPNG` returns a byte[] containing the PNG encoding of the colors of the texture.
-
-Together these 2 functions allow one to "push" a WxHx3 normalized array to a PNG byte[]. And indeed, this is how the Camera Sensor in Unity MLAgents sends its data to python. However, the grid sensor can have N channels so there needs to be a more generic way to send the data.
-
-The core idea behind how a Grid Observation is encoded is the following:
-1. split the channels of a Grid Observation into groups of 3
-2. encode each of these groups as a PNG byte[]
-3. concatenate all byte[] and send the combined array to python
-4. reconstruct the Grid Observation by splitting up the array and decoding the sections
-
-Once the bytes are sent to python, they are then decoded and used as a tensor of the correct shape within the mlagents python codebase.
+Once the bytes are sent to Python, they are then decoded and provided as a tensor of the correct shape.
--- a/com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md
+++ b/com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md
 * [Match-3 sensor and actuator](Match3.md)
 * [Grid-based sensor](Grid-Sensor.md)
 * Physics-based sensors
+ * [Input System Package Integration](InputActuatorComponent.md)

 ## Installation
 The ML-Agents Extensions package is not currently available in the Package Manager. There are two
 ### Github via Package Manager
 In Unity 2019.4 or later, open the Package Manager, hit the "+" button, and select "Add package from git URL".

-![Package Manager git URL](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/images/unity_package_manager_git_url.png)
+![Package Manager git URL](https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/images/unity_package_manager_git_url.png)
+
 In the dialog that appears, enter
 ```
 git+https://github.com/Unity-Technologies/ml-agents.git?path=com.unity.ml-agents.extensions
 This version of the Unity ML-Agents Extensions package is compatible with the
 following versions of the Unity Editor:

- 2018.4 and later
+- If using the `InputActuatorComponent`
+    - 2019.4 or later
+    - install the `com.unity.inputsystem` package version `1.1.0-preview.3` or later.
+- Else 2018.4 and later
-none
+- For the `InputActuatorComponent`
+    - Limited implementation of `InputControls`
+    - No way to customize the action space of the `InputActuatorComponent`

 ## Need Help?
 The main [README](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/README.md) contains links for contacting the team or getting support.
--- a/com.unity.ml-agents.extensions/Runtime/Unity.ML-Agents.Extensions.asmdef
+++ b/com.unity.ml-agents.extensions/Runtime/Unity.ML-Agents.Extensions.asmdef
    "name": "Unity.ML-Agents.Extensions",
    "references": [
        "Unity.Barracuda",
-        "Unity.ML-Agents"
+        "Unity.ML-Agents",
+        "Unity.ML-Agents.Extensions.Input"
    ],
    "includePlatforms": [],
    "excludePlatforms": []
--- a/com.unity.ml-agents.extensions/package.json
+++ b/com.unity.ml-agents.extensions/package.json
  "unity": "2018.4",
  "description": "A source-only package for new features based on ML-Agents",
  "dependencies": {
-    "com.unity.ml-agents": "1.7.2-preview"
+    "com.unity.ml-agents": "1.8.0-preview"
  }
 }
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 and this project adheres to
 [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

-
+
+### Minor Changes
+#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+- The `encoding_size` setting for RewardSignals has been deprecated. Please use `network_settings` instead. (#4982)
+### Bug Fixes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+- An issue that caused `GAIL` to fail for environments where agents can terminate episodes by self-sacrifice has been fixed. (#4971)
+
+## [1.8.0-preview] - 2021-02-17
+### Major Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
 - TensorFlow trainers have been removed, please use the Torch trainers instead. (#4707)
 - A plugin system for `mlagents-learn` has been added. You can now define custom
  `StatsWriter` implementations and register them to be called during training.
 - `InferenceDevice.Burst` was added, indicating that Agent's model will be run using Barracuda's Burst backend.
  This is the default for new Agents, but existing ones that use `InferenceDevice.CPU` should update to
  `InferenceDevice.Burst`. (#4925)
+- Add an InputActuatorComponent to allow the generation of Agent action spaces from an InputActionAsset.
+  Projects wanting to use this feature will need to add the
+  [Input System Package](https://docs.unity3d.com/Packages/com.unity.inputsystem@1.1/manual/index.html)
+  at version 1.1.0-preview.3 or later. (#4881)

 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Tensorboard now logs the Environment Reward as both a scalar and a histogram. (#4878)
  reduced the amount of memory allocated by approximately 25%. (#4887)
 - Removed several memory allocations that happened during inference with discrete actions. (#4922)
 - Properly catch permission errors when writing timer files. (#4921)
+- Unexpected exceptions during training initialization and shutdown are now logged. If you see
+  "noisy" logs, please let us know! (#4930, #4935)

 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Fixed a bug that would cause an exception when `RunOptions` was deserialized via `pickle`. (#4842)
  while waiting for a connection, and raises a better error message if it crashes. (#4880)
 - Passing a `-logfile` option in the `--env-args` option to `mlagents-learn` is
  no longer overwritten. (#4880)
+- The `load_weights` function was being called unnecessarily often in the Ghost Trainer leading to training slowdowns. (#4934)


 ## [1.7.2-preview] - 2020-12-22
--- a/com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
+++ b/com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
 # About ML-Agents package (`com.unity.ml-agents`)

-The Unity ML-Agents package contains the C# SDK for the [Unity ML-Agents
+The _ML-Agents_ package contains the primary C# SDK for the [Unity ML-Agents
 Toolkit].

 The package allows you to convert any Unity scene to into a learning environment
 instrumenting a Unity scene, setting it up for training, and then embedding the
 trained model back into your Unity scene. The machine learning algorithms that
 orchestrate training are part of the companion [Python package].
+
+Note that we also provide an _ML-Agents Extensions_ package
+(`com.unity.ml-agents.extensions`) that contains early/experimental features
+that you may find useful. This package is only available from the [ML-Agents
+GitHub repo].

 ## Package contents

 [execution order of event functions]: https://docs.unity3d.com/Manual/ExecutionOrder.html
 [connect with us]: https://github.com/Unity-Technologies/ml-agents#community-and-feedback
 [ml-agents forum]: https://forum.unity.com/forums/ml-agents.453/
+[ML-Agents GitHub repo]: https://github.com/Unity-Technologies/ml-agents/blob/release_12_docs/com.unity.ml-agents.extensions
--- a/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
+++ b/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs

            // Grab the sensor components, since we need them to determine the observation sizes.
            // TODO make these methods of BehaviorParameters
-            SensorComponent[] sensorComponents;
-            if (behaviorParameters.UseChildSensors)
-            {
-                sensorComponents = behaviorParameters.GetComponentsInChildren<SensorComponent>();
-            }
-            else
-            {
-                sensorComponents = behaviorParameters.GetComponents<SensorComponent>();
-            }
+            var agent = behaviorParameters.gameObject.GetComponent<Agent>();
+            agent.sensors = new List<ISensor>();
+            agent.InitializeSensors();
+            var sensors = agent.sensors.ToArray();

            ActuatorComponent[] actuatorComponents;
            if (behaviorParameters.UseChildActuators)
            // Get the total size of the sensors generated by ObservableAttributes.
            // If there are any errors (e.g. unsupported type, write-only properties), display them too.
            int observableAttributeSensorTotalSize = 0;
-            var agent = behaviorParameters.GetComponent<Agent>();
            if (agent != null && behaviorParameters.ObservableAttributeHandling != ObservableAttributeOptions.Ignore)
            {
                List<string> observableErrors = new List<string>();
            if (brainParameters != null)
            {
                var failedChecks = Inference.BarracudaModelParamLoader.CheckModel(
-                    barracudaModel, brainParameters, sensorComponents, actuatorComponents,
+                    barracudaModel, brainParameters, sensors, actuatorComponents,
                    observableAttributeSensorTotalSize, behaviorParameters.BehaviorType
                );
                foreach (var check in failedChecks)
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        ///         <term>1.4.0</term>
        ///         <description>Support training analytics sent from python trainer to the editor.</description>
        ///     </item>
+        ///     <item>
+        ///         <term>1.5.0</term>
+        ///         <description>Support variable length observation training.</description>
+        ///     </item>
-        const string k_ApiVersion = "1.4.0";
+        const string k_ApiVersion = "1.5.0";
-        internal const string k_PackageVersion = "1.7.2-preview";
+        internal const string k_PackageVersion = "1.8.0-preview";

        const int k_EditorTrainingPort = 5004;

            var port = ReadPortFromArgs();
            if (port > 0)
            {
-                Communicator = new RpcCommunicator(
-                    new CommunicatorInitParameters
-                    {
-                        port = port
-                    }
-                );
+                Communicator = CommunicatorFactory.Create();
            }

            if (Communicator != null)
-                //environment must use Inference.
+                // environment must use Inference.
+                bool initSuccessful = false;
+                var communicatorInitParams = new CommunicatorInitParameters
+                {
+                    port = port,
+                    unityCommunicationVersion = k_ApiVersion,
+                    unityPackageVersion = k_PackageVersion,
+                    name = "AcademySingleton",
+                    CSharpCapabilities = new UnityRLCapabilities()
+                };
+
-                    var unityRlInitParameters = Communicator.Initialize(
-                        new CommunicatorInitParameters
-                        {
-                            unityCommunicationVersion = k_ApiVersion,
-                            unityPackageVersion = k_PackageVersion,
-                            name = "AcademySingleton",
-                            CSharpCapabilities = new UnityRLCapabilities()
-                        });
-                    UnityEngine.Random.InitState(unityRlInitParameters.seed);
-                    // We might have inference-only Agents, so set the seed for them too.
-                    m_InferenceSeed = unityRlInitParameters.seed;
-                    TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
-                    TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
+                    initSuccessful = Communicator.Initialize(
+                        communicatorInitParams,
+                        out var unityRlInitParameters
+                    );
+                    if (initSuccessful)
+                    {
+                        UnityEngine.Random.InitState(unityRlInitParameters.seed);
+                        // We might have inference-only Agents, so set the seed for them too.
+                        m_InferenceSeed = unityRlInitParameters.seed;
+                        TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
+                        TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
+                    }
+                    else
+                    {
+                        Debug.Log($"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. Will perform inference instead.");
+                        Communicator = null;
+                    }
-                catch
+                catch (Exception ex)
-                    Debug.Log($"" +
-                        $"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. " +
-                        "Will perform inference instead."
-                    );
+                    Debug.Log($"Unexpected exception when trying to initialize communication: {ex}\nWill perform inference instead.");
+
            if (Communicator != null)
            {
                Communicator.QuitCommandReceived += OnQuitCommandReceived;
--- a/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
 using System;
+using System.Collections.Generic;
-using Unity.MLAgents.Policies;
 using UnityEngine;

 namespace Unity.MLAgents.Actuators
        public ActionSpec(int numContinuousActions = 0, int[] discreteBranchSizes = null)
        {
            m_NumContinuousActions = numContinuousActions;
-            BranchSizes = discreteBranchSizes;
+            BranchSizes = discreteBranchSizes ?? Array.Empty<int>();
        }

        /// <summary>
                    "ActionSpecs must be all continuous or all discrete."
                );
            }
+        }
+
+        /// <summary>
+        /// Combines a list of actions specs and allocates a new array of branch sizes if needed.
+        /// </summary>
+        /// <param name="specs">The list of action specs to combine.</param>
+        /// <returns>An ActionSpec which represents the aggregate of the ActionSpecs passed in.</returns>
+        public static ActionSpec Combine(params ActionSpec[] specs)
+        {
+            var numContinuous = 0;
+            var numDiscrete = 0;
+            for (var i = 0; i < specs.Length; i++)
+            {
+                var spec = specs[i];
+                numContinuous += spec.NumContinuousActions;
+                numDiscrete += spec.NumDiscreteActions;
+            }
+
+            if (numDiscrete <= 0)
+            {
+                return MakeContinuous(numContinuous);
+            }
+
+            var branchSizes = new int[numDiscrete];
+            var offset = 0;
+            for (var i = 0; i < specs.Length; i++)
+            {
+                var spec = specs[i];
+                if (spec.BranchSizes.Length == 0)
+                {
+                    continue;
+                }
+                var branchSizesLength = spec.BranchSizes.Length;
+                Array.Copy(spec.BranchSizes,
+                    0,
+                    branchSizes,
+                    offset,
+                    branchSizesLength);
+                offset += branchSizesLength;
+            }
+            return new ActionSpec(numContinuous, branchSizes);
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
 using System.Collections;
 using System.Collections.Generic;
 using UnityEngine;
+using UnityEngine.Profiling;

 namespace Unity.MLAgents.Actuators
 {
        /// actions for the IActuators in this list.</param>
        public void UpdateActions(ActionBuffers actions)
        {
+            Profiler.BeginSample("ActuatorManager.UpdateActions");
+            Profiler.EndSample();
        }

        static void UpdateActionArray<T>(ActionSegment<T> sourceActionBuffer, ActionSegment<T> destination)
        /// </summary>
        public void ApplyHeuristic(in ActionBuffers actionBuffersOut)
        {
+            Profiler.BeginSample("ActuatorManager.ApplyHeuristic");
            var continuousStart = 0;
            var discreteStart = 0;
            for (var i = 0; i < m_Actuators.Count; i++)
                continuousStart += numContinuousActions;
                discreteStart += numDiscreteActions;
            }
+            Profiler.EndSample();
        }

        /// <summary>
        /// </summary>
        public void ExecuteActions()
        {
+            Profiler.BeginSample("ActuatorManager.ExecuteActions");
            ReadyActuatorsForExecution();
            var continuousStart = 0;
            var discreteStart = 0;
                var numContinuousActions = actuator.ActionSpec.NumContinuousActions;
                var numDiscreteActions = actuator.ActionSpec.NumDiscreteActions;

+                if (numContinuousActions == 0 && numDiscreteActions == 0)
+                {
+                    continue;
+                }
+
                var continuousActions = ActionSegment<float>.Empty;
                if (numContinuousActions > 0)
                {
                continuousStart += numContinuousActions;
                discreteStart += numDiscreteActions;
            }
+            Profiler.EndSample();
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
+using UnityEngine.Profiling;
+
 namespace Unity.MLAgents.Actuators
 {
    /// <summary>
        /// <inheritdoc />
        public void OnActionReceived(ActionBuffers actionBuffers)
        {
-            ActionBuffers = actionBuffers;
-            m_ActionReceiver.OnActionReceived(ActionBuffers);
+            Profiler.BeginSample("VectorActuator.OnActionReceived");
+            m_ActionBuffers = actionBuffers;
+            m_ActionReceiver.OnActionReceived(m_ActionBuffers);
+            Profiler.EndSample();
+            Profiler.BeginSample("VectorActuator.Heuristic");
+            Profiler.EndSample();
        }

        /// <inheritdoc />
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
        public float reward;

        /// <summary>
+        /// The current group reward received by the agent.
+        /// </summary>
+        public float groupReward;
+
+        /// <summary>
        /// Whether the agent is done or not.
        /// </summary>
        public bool done;
        /// to separate between different agents in the environment.
        /// </summary>
        public int episodeId;
+
+        /// <summary>
+        /// MultiAgentGroup identifier.
+        /// </summary>
+        public int groupId;

        public void ClearActions()
        {
        /// Additionally, the magnitude of the reward should not exceed 1.0
        float m_Reward;

+        /// Represents the group reward the agent accumulated during the current step.
+        float m_GroupReward;
+
        /// Keeps track of the cumulative reward in this episode.
        float m_CumulativeReward;

        /// </summary>
        float[] m_LegacyHeuristicCache;

+        /// Currect MultiAgentGroup ID. Default to 0 (meaning no group)
+        int m_GroupId;
+
+        /// Delegate for the agent to unregister itself from the MultiAgentGroup without cyclic reference
+        /// between agent and the group
+        internal event Action<Agent> OnAgentDisabled;
+
        /// <summary>
        /// Called when the attached [GameObject] becomes enabled and active.
        /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
                new int[m_ActuatorManager.NumDiscreteActions]
            );

+            m_Info.groupId = m_GroupId;
+
            // The first time the Academy resets, all Agents in the scene will be
            // forced to reset through the <see cref="AgentForceReset"/> event.
            // To avoid the Agent resetting twice, the Agents will not begin their
                NotifyAgentDone(DoneReason.Disabled);
            }
            m_Brain?.Dispose();
+            OnAgentDisabled?.Invoke(this);
            m_Initialized = false;
        }

            }
            m_Info.episodeId = m_EpisodeId;
            m_Info.reward = m_Reward;
+            m_Info.groupReward = m_GroupReward;
+            m_Info.groupId = m_GroupId;
            if (collectObservationsSensor != null)
            {
                // Make sure the latest observations are being passed to training.
            }

            m_Reward = 0f;
+            m_GroupReward = 0f;
            m_CumulativeReward = 0f;
            m_RequestAction = false;
            m_RequestDecision = false;
            m_CumulativeReward += increment;
        }

+        internal void SetGroupReward(float reward)
+        {
+#if DEBUG
+            Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetGroupReward));
+#endif
+            m_GroupReward = reward;
+        }
+
+        internal void AddGroupReward(float increment)
+        {
+#if DEBUG
+            Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddGroupReward));
+#endif
+            m_GroupReward += increment;
+        }
+
        /// <summary>
        /// Retrieves the episode reward for the Agent.
        /// </summary>
        /// </summary>
        internal void InitializeSensors()
        {
+            if (m_PolicyFactory == null)
+            {
+                m_PolicyFactory = GetComponent<BehaviorParameters>();
+            }
            if (m_PolicyFactory.ObservableAttributeHandling != ObservableAttributeOptions.Ignore)
            {
                var excludeInherited =

            m_Info.discreteActionMasks = m_ActuatorManager.DiscreteActionMask?.GetMask();
            m_Info.reward = m_Reward;
+            m_Info.groupReward = m_GroupReward;
+            m_Info.groupId = m_GroupId;

            using (TimerStack.Instance.Scoped("RequestDecision"))
            {
            {
                SendInfoToBrain();
                m_Reward = 0f;
+                m_GroupReward = 0f;
                m_RequestDecision = false;
            }
        }
            var actions = m_Brain?.DecideAction() ?? new ActionBuffers();
            m_Info.CopyActions(actions);
            m_ActuatorManager.UpdateActions(actions);
+        }
+
+        internal void SetMultiAgentGroup(IMultiAgentGroup multiAgentGroup)
+        {
+            if (multiAgentGroup == null)
+            {
+                m_GroupId = 0;
+            }
+            else
+            {
+                var newGroupId = multiAgentGroup.GetId();
+                if (m_GroupId == 0 || m_GroupId == newGroupId)
+                {
+                    m_GroupId = newGroupId;
+                }
+                else
+                {
+                    throw new UnityAgentsException("Agent is already registered with a group. Unregister it first.");
+                }
+            }
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Agent.deprecated.cs
+++ b/com.unity.ml-agents/Runtime/Agent.deprecated.cs
 using System;
 using UnityEngine;
+using UnityEngine.Profiling;

 namespace Unity.MLAgents
 {
        [Obsolete("GetAction has been deprecated, please use GetStoredActionBuffers instead.")]
        public float[] GetAction()
        {
+            Profiler.BeginSample("Agent.GetAction.Deprecated");
            var actionSpec = m_PolicyFactory.BrainParameters.ActionSpec;
            // For continuous and discrete actions together, this shouldn't be called because we can only return one.
            if (actionSpec.NumContinuousActions > 0 && actionSpec.NumDiscreteActions > 0)
            {
                return storedAction.ContinuousActions.Array;
            }
-            else
-            {
-                return Array.ConvertAll(storedAction.DiscreteActions.Array, x => (float)x);
-            }
+            Profiler.EndSample();
+            return Array.ConvertAll(storedAction.DiscreteActions.Array, x => (float)x);
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Analytics/Events.cs
+++ b/com.unity.ml-agents/Runtime/Analytics/Events.cs
        public static EventObservationSpec FromSensor(ISensor sensor)
        {
            var shape = sensor.GetObservationShape();
+            var dimProps = (sensor as IDimensionPropertiesSensor)?.GetDimensionProperties();
-                // TODO copy flags when we have them
+                dimInfos[i].Flags = dimProps != null ? (int)dimProps[i] : 0;
            }

            var builtInSensorType =
--- a/com.unity.ml-agents/Runtime/AssemblyInfo.cs
+++ b/com.unity.ml-agents/Runtime/AssemblyInfo.cs
 [assembly: InternalsVisibleTo("Unity.ML-Agents.Editor.Tests")]
 [assembly: InternalsVisibleTo("Unity.ML-Agents.Editor")]
 [assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions")]
+[assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.Input")]
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
            var agentInfoProto = new AgentInfoProto
            {
                Reward = ai.reward,
+                GroupReward = ai.groupReward,
+                GroupId = ai.groupId,
            };

            if (ai.discreteActionMasks != null)
                {
                    observationProto.DimensionProperties.Add((int)dimensionProperties[i]);
                }
+                // Checking trainer compatibility with variable length observations
+                if (dimensionProperties.Length == 2)
+                {
+                    if (dimensionProperties[0] == DimensionProperty.VariableSize &&
+                    dimensionProperties[1] == DimensionProperty.None)
+                    {
+                        var trainerCanHandleVarLenObs = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.VariableLengthObservation;
+                        if (!trainerCanHandleVarLenObs)
+                        {
+                            throw new UnityAgentsException("Variable Length Observations are not supported by the trainer");
+                        }
+                    }
+                }
            }
            observationProto.Shape.AddRange(shape);

                CompressedChannelMapping = proto.CompressedChannelMapping,
                HybridActions = proto.HybridActions,
                TrainingAnalytics = proto.TrainingAnalytics,
+                VariableLengthObservation = proto.VariableLengthObservation,
            };
        }

                CompressedChannelMapping = rlCaps.CompressedChannelMapping,
                HybridActions = rlCaps.HybridActions,
                TrainingAnalytics = rlCaps.TrainingAnalytics,
+                VariableLengthObservation = rlCaps.VariableLengthObservation,
            };
        }

--- a/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
        /// Sends the academy parameters through the Communicator.
        /// Is used by the academy to send the AcademyParameters to the communicator.
        /// </summary>
-        /// <returns>The External Initialization Parameters received.</returns>
+        /// <returns>Whether the connection was successful.</returns>
-        UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters);
+        /// <param name="initParametersOut">The External Initialization Parameters received</param>
+        bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut);

        /// <summary>
        /// Registers a new Brain to the Communicator.
--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
-# if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
+#if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
-#endif
 #if UNITY_EDITOR
 using UnityEditor;
 #endif
        Dictionary<string, ActionSpec> m_UnsentBrainKeys = new Dictionary<string, ActionSpec>();


-#if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
-#endif
-        /// The communicator parameters sent at construction
-        CommunicatorInitParameters m_CommunicatorInitParameters;
-        /// <param name="communicatorInitParameters">Communicator parameters.</param>
-        public RpcCommunicator(CommunicatorInitParameters communicatorInitParameters)
+        public RpcCommunicator()
-            m_CommunicatorInitParameters = communicatorInitParameters;
-        #region Initialization
+#region Initialization
-            string pythonApiVersion,
-            string pythonLibraryVersion)
+            string pythonApiVersion
+            )
        {
            var unityVersion = new Version(unityCommunicationVersion);
            var pythonVersion = new Version(pythonApiVersion);
        /// Sends the initialization parameters through the Communicator.
        /// Is used by the academy to send initialization parameters to the communicator.
        /// </summary>
-        /// <returns>The External Initialization Parameters received.</returns>
+        /// <returns>Whether the connection was successful.</returns>
-        public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters)
+        /// <param name="initParametersOut">The External Initialization Parameters received.</param>
+        public bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut)
        {
            var academyParameters = new UnityRLInitializationOutputProto
            {
            try
            {
                initializationInput = Initialize(
+                    initParameters.port,
-                    out input);
-
-                var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion;
-                var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
-                var unityCommunicationVersion = initParameters.unityCommunicationVersion;
-
-                TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
-
-                var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(unityCommunicationVersion,
-                    pythonCommunicationVersion,
-                    pythonPackageVersion);
-
-                // Initialization succeeded part-way. The most likely cause is a mismatch between the communicator
-                // API strings, so log an explicit warning if that's the case.
-                if (initializationInput != null && input == null)
+                    out input
+                );
+            }
+            catch (Exception ex)
+            {
+                if (ex is RpcException rpcException)
-                    if (!communicationIsCompatible)
-                    {
-                        Debug.LogWarningFormat(
-                            "Communication protocol between python ({0}) and Unity ({1}) have different " +
-                            "versions which make them incompatible. Python library version: {2}.",
-                            pythonCommunicationVersion, initParameters.unityCommunicationVersion,
-                            pythonPackageVersion
-                        );
-                    }
-                    else
+
+                    switch (rpcException.Status.StatusCode)
-                        Debug.LogWarningFormat(
-                            "Unknown communication error between Python. Python communication protocol: {0}, " +
-                            "Python library version: {1}.",
-                            pythonCommunicationVersion,
-                            pythonPackageVersion
-                        );
+                        case StatusCode.Unavailable:
+                            // This is the common case where there's no trainer to connect to.
+                            break;
+                        case StatusCode.DeadlineExceeded:
+                            // We don't currently set a deadline for connection, but likely will in the future.
+                            break;
+                        default:
+                            Debug.Log($"Unexpected gRPC exception when trying to initialize communication: {rpcException}");
+                            break;
-
-                    throw new UnityAgentsException("ICommunicator.Initialize() failed.");
+                else
+                {
+                    Debug.Log($"Unexpected exception when trying to initialize communication: {ex}");
+                }
+                initParametersOut = new UnityRLInitParameters();
+                return false;
-            catch
-            {
-                var exceptionMessage = "The Communicator was unable to connect. Please make sure the External " +
-                    "process is ready to accept communication with Unity.";
-                // Check for common error condition and add details to the exception message.
-                var httpProxy = Environment.GetEnvironmentVariable("HTTP_PROXY");
-                var httpsProxy = Environment.GetEnvironmentVariable("HTTPS_PROXY");
-                if (httpProxy != null || httpsProxy != null)
+            var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion;
+            var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
+
+            TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
+
+            var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(
+                initParameters.unityCommunicationVersion,
+                pythonCommunicationVersion
+            );
+
+            // Initialization succeeded part-way. The most likely cause is a mismatch between the communicator
+            // API strings, so log an explicit warning if that's the case.
+            if (initializationInput != null && input == null)
+            {
+                if (!communicationIsCompatible)
-                    exceptionMessage += " Try removing HTTP_PROXY and HTTPS_PROXY from the" +
-                        "environment variables and try again.";
+                    Debug.LogWarningFormat(
+                        "Communication protocol between python ({0}) and Unity ({1}) have different " +
+                        "versions which make them incompatible. Python library version: {2}.",
+                        pythonCommunicationVersion, initParameters.unityCommunicationVersion,
+                        pythonPackageVersion
+                    );
-                throw new UnityAgentsException(exceptionMessage);
+                else
+                {
+                    Debug.LogWarningFormat(
+                        "Unknown communication error between Python. Python communication protocol: {0}, " +
+                        "Python library version: {1}.",
+                        pythonCommunicationVersion,
+                        pythonPackageVersion
+                    );
+                }
+
+                initParametersOut = new UnityRLInitParameters();
+                return false;
-            return initializationInput.RlInitializationInput.ToUnityRLInitParameters();
+            initParametersOut = initializationInput.RlInitializationInput.ToUnityRLInitParameters();
+            return true;
        }

        /// <summary>
            SendCommandEvent(rlInput.Command);
        }

-        UnityInputProto Initialize(UnityOutputProto unityOutput,
-            out UnityInputProto unityInput)
+        UnityInputProto Initialize(int port, UnityOutputProto unityOutput, out UnityInputProto unityInput)
-#if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
-            var channel = new Channel(
-                "localhost:" + m_CommunicatorInitParameters.port,
-                ChannelCredentials.Insecure);
+            var channel = new Channel($"localhost:{port}", ChannelCredentials.Insecure);

            m_Client = new UnityToExternalProto.UnityToExternalProtoClient(channel);
            var result = m_Client.Exchange(WrapMessage(unityOutput, 200));
                QuitCommandReceived?.Invoke();
            }
            return result.UnityInput;
-#else
-            throw new UnityAgentsException(
-                "You cannot perform training on this platform.");
-#endif
-        #endregion
+#endregion
-        #region Destruction
+#region Destruction

        /// <summary>
        /// Close the communicator gracefully on both sides of the communication.
-#if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
            if (!m_IsOpen)
            {
                return;
            {
                // ignored
            }
-#else
-            throw new UnityAgentsException(
-                "You cannot perform training on this platform.");
-#endif
-        #endregion
+#endregion
-        #region Sending Events
+#region Sending Events

        void SendCommandEvent(CommandProto command)
        {
            }
        }

-        #endregion
+#endregion
-        #region Sending and retreiving data
+#region Sending and retreiving data

        public void DecideBatch()
        {
        /// <param name="unityOutput">The UnityOutput to be sent.</param>
        UnityInputProto Exchange(UnityOutputProto unityOutput)
        {
-#if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
+
            try
            {
                var message = m_Client.Exchange(WrapMessage(unityOutput, 200));
                QuitCommandReceived?.Invoke();
                return message.UnityInput;
            }
-            catch
+            catch (Exception ex)
+                if (ex is RpcException rpcException)
+                {
+                    // Log more verbose errors if they're something the user can possibly do something about.
+                    switch (rpcException.Status.StatusCode)
+                    {
+                        case StatusCode.Unavailable:
+                            // This can happen when python disconnects. Ignore it to avoid noisy logs.
+                            break;
+                        case StatusCode.ResourceExhausted:
+                            // This happens is the message body is too large. There's no way to
+                            // gracefully handle this, but at least we can show the message and the
+                            // user can try to reduce the number of agents or observation sizes.
+                            Debug.LogError($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
+                            break;
+                        default:
+                            // Other unknown errors. Log at INFO level.
+                            Debug.Log($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
+                            break;
+                    }
+                }
+                else
+                {
+                    // Fall-through for other error types
+                    Debug.LogError($"Communication Exception: {ex.Message}. Disconnecting from trainer.");
+                }
+
-#else
-            throw new UnityAgentsException(
-                "You cannot perform training on this platform.");
-#endif
        }

        /// <summary>
            }
        }

-        #endregion
+#endregion

 #if UNITY_EDITOR
        /// <summary>
 #endif
    }
 }
+#endif // UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
--- a/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
        public bool CompressedChannelMapping;
        public bool HybridActions;
        public bool TrainingAnalytics;
+        public bool VariableLengthObservation;

        /// <summary>
        /// A class holding the capabilities flags for Reinforcement Learning across C# and the Trainer codebase.  This
            bool concatenatedPngObservations = true,
            bool compressedChannelMapping = true,
            bool hybridActions = true,
-            bool trainingAnalytics = true)
+            bool trainingAnalytics = true,
+            bool variableLengthObservation = true)
        {
            BaseRLCapabilities = baseRlCapabilities;
            ConcatenatedPngObservations = concatenatedPngObservations;
+            VariableLengthObservation = variableLengthObservation;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentInfo.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentInfo.cs
          string.Concat(
            "CjNtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2FnZW50X2lu",
            "Zm8ucHJvdG8SFGNvbW11bmljYXRvcl9vYmplY3RzGjRtbGFnZW50c19lbnZz",
-            "L2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0aW9uLnByb3RvItEBCg5B",
+            "L2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0aW9uLnByb3RvIvkBCg5B",
-            "YXRvcl9vYmplY3RzLk9ic2VydmF0aW9uUHJvdG9KBAgBEAJKBAgCEANKBAgD",
-            "EARKBAgEEAVKBAgFEAZKBAgGEAdKBAgMEA1CJaoCIlVuaXR5Lk1MQWdlbnRz",
-            "LkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
+            "YXRvcl9vYmplY3RzLk9ic2VydmF0aW9uUHJvdG8SEAoIZ3JvdXBfaWQYDiAB",
+            "KAUSFAoMZ3JvdXBfcmV3YXJkGA8gASgCSgQIARACSgQIAhADSgQIAxAESgQI",
+            "BBAFSgQIBRAGSgQIBhAHSgQIDBANQiWqAiJVbml0eS5NTEFnZW50cy5Db21t",
+            "dW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentInfoProto), global::Unity.MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "Reward", "Done", "MaxStepReached", "Id", "ActionMask", "Observations" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentInfoProto), global::Unity.MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "Reward", "Done", "MaxStepReached", "Id", "ActionMask", "Observations", "GroupId", "GroupReward" }, null, null, null)
          }));
    }
    #endregion
      id_ = other.id_;
      actionMask_ = other.actionMask_.Clone();
      observations_ = other.observations_.Clone();
+      groupId_ = other.groupId_;
+      groupReward_ = other.groupReward_;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

      get { return observations_; }
    }

+    /// <summary>Field number for the "group_id" field.</summary>
+    public const int GroupIdFieldNumber = 14;
+    private int groupId_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int GroupId {
+      get { return groupId_; }
+      set {
+        groupId_ = value;
+      }
+    }
+
+    /// <summary>Field number for the "group_reward" field.</summary>
+    public const int GroupRewardFieldNumber = 15;
+    private float groupReward_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public float GroupReward {
+      get { return groupReward_; }
+      set {
+        groupReward_ = value;
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as AgentInfoProto);
      if (Id != other.Id) return false;
      if(!actionMask_.Equals(other.actionMask_)) return false;
      if(!observations_.Equals(other.observations_)) return false;
+      if (GroupId != other.GroupId) return false;
+      if (!pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.Equals(GroupReward, other.GroupReward)) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

      if (Id != 0) hash ^= Id.GetHashCode();
      hash ^= actionMask_.GetHashCode();
      hash ^= observations_.GetHashCode();
+      if (GroupId != 0) hash ^= GroupId.GetHashCode();
+      if (GroupReward != 0F) hash ^= pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.GetHashCode(GroupReward);
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }
      }
      actionMask_.WriteTo(output, _repeated_actionMask_codec);
      observations_.WriteTo(output, _repeated_observations_codec);
+      if (GroupId != 0) {
+        output.WriteRawTag(112);
+        output.WriteInt32(GroupId);
+      }
+      if (GroupReward != 0F) {
+        output.WriteRawTag(125);
+        output.WriteFloat(GroupReward);
+      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
      }
      size += actionMask_.CalculateSize(_repeated_actionMask_codec);
      size += observations_.CalculateSize(_repeated_observations_codec);
+      if (GroupId != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeInt32Size(GroupId);
+      }
+      if (GroupReward != 0F) {
+        size += 1 + 4;
+      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
      }
      actionMask_.Add(other.actionMask_);
      observations_.Add(other.observations_);
+      if (other.GroupId != 0) {
+        GroupId = other.GroupId;
+      }
+      if (other.GroupReward != 0F) {
+        GroupReward = other.GroupReward;
+      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

          }
          case 106: {
            observations_.AddEntriesFrom(input, _repeated_observations_codec);
+            break;
+          }
+          case 112: {
+            GroupId = input.ReadInt32();
+            break;
+          }
+          case 125: {
+            GroupReward = input.ReadFloat();
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
-            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMirwEKGFVuaXR5UkxD",
+            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi0gEKGFVuaXR5UkxD",
-            "ASgIEhkKEXRyYWluaW5nQW5hbHl0aWNzGAUgASgIQiWqAiJVbml0eS5NTEFn",
-            "ZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
+            "ASgIEhkKEXRyYWluaW5nQW5hbHl0aWNzGAUgASgIEiEKGXZhcmlhYmxlTGVu",
+            "Z3RoT2JzZXJ2YXRpb24YBiABKAhCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11",
+            "bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions", "TrainingAnalytics" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions", "TrainingAnalytics", "VariableLengthObservation" }, null, null, null)
          }));
    }
    #endregion
      compressedChannelMapping_ = other.compressedChannelMapping_;
      hybridActions_ = other.hybridActions_;
      trainingAnalytics_ = other.trainingAnalytics_;
+      variableLengthObservation_ = other.variableLengthObservation_;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

      }
    }

+    /// <summary>Field number for the "variableLengthObservation" field.</summary>
+    public const int VariableLengthObservationFieldNumber = 6;
+    private bool variableLengthObservation_;
+    /// <summary>
+    /// Support for variable length observations of rank 2
+    /// </summary>
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public bool VariableLengthObservation {
+      get { return variableLengthObservation_; }
+      set {
+        variableLengthObservation_ = value;
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as UnityRLCapabilitiesProto);
      if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
      if (HybridActions != other.HybridActions) return false;
      if (TrainingAnalytics != other.TrainingAnalytics) return false;
+      if (VariableLengthObservation != other.VariableLengthObservation) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

      if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
      if (HybridActions != false) hash ^= HybridActions.GetHashCode();
      if (TrainingAnalytics != false) hash ^= TrainingAnalytics.GetHashCode();
+      if (VariableLengthObservation != false) hash ^= VariableLengthObservation.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }
      if (TrainingAnalytics != false) {
        output.WriteRawTag(40);
        output.WriteBool(TrainingAnalytics);
+      }
+      if (VariableLengthObservation != false) {
+        output.WriteRawTag(48);
+        output.WriteBool(VariableLengthObservation);
      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      if (TrainingAnalytics != false) {
        size += 1 + 1;
      }
+      if (VariableLengthObservation != false) {
+        size += 1 + 1;
+      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
      if (other.TrainingAnalytics != false) {
        TrainingAnalytics = other.TrainingAnalytics;
      }
+      if (other.VariableLengthObservation != false) {
+        VariableLengthObservation = other.VariableLengthObservation;
+      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

          }
          case 40: {
            TrainingAnalytics = input.ReadBool();
+            break;
+          }
+          case 48: {
+            VariableLengthObservation = input.ReadBool();
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
        /// <param name="brainParameters">
        /// The BrainParameters that are used verify the compatibility with the InferenceEngine
        /// </param>
-        /// <param name="sensorComponents">Attached sensor components</param>
+        /// <param name="sensors">Attached sensor components</param>
-            SensorComponent[] sensorComponents, ActuatorComponent[] actuatorComponents,
+            ISensor[] sensors, ActuatorComponent[] actuatorComponents,
            int observableAttributeTotalSize = 0,
            BehaviorType behaviorType = BehaviorType.Default)
        {
            }

            failedModelChecks.AddRange(
-                CheckInputTensorPresence(model, brainParameters, memorySize, sensorComponents)
+                CheckInputTensorPresence(model, brainParameters, memorySize, sensors)
-                CheckInputTensorShape(model, brainParameters, sensorComponents, observableAttributeTotalSize)
+                CheckInputTensorShape(model, brainParameters, sensors, observableAttributeTotalSize)
            );
            failedModelChecks.AddRange(
                CheckOutputTensorShape(model, brainParameters, actuatorComponents)
        /// <param name="memory">
        /// The memory size that the model is expecting.
        /// </param>
-        /// <param name="sensorComponents">Array of attached sensor components</param>
+        /// <param name="sensors">Array of attached sensor components</param>
        /// <returns>
        /// A IEnumerable of string corresponding to the failed input presence checks.
        /// </returns>
            int memory,
-            SensorComponent[] sensorComponents
+            ISensor[] sensors
        )
        {
            var failedModelChecks = new List<string>();
            // If there are not enough Visual Observation Input compared to what the
            // sensors expect.
            var visObsIndex = 0;
-            for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
+            for (var sensorIndex = 0; sensorIndex < sensors.Length; sensorIndex++)
-                var sensor = sensorComponents[sensorIndex];
+                var sensor = sensors[sensorIndex];
                if (sensor.GetObservationShape().Length == 3)
                {
                    if (!tensorsNames.Contains(
        /// Checks that the shape of the visual observation input placeholder is the same as the corresponding sensor.
        /// </summary>
        /// <param name="tensorProxy">The tensor that is expected by the model</param>
-        /// <param name="sensorComponent">The sensor that produces the visual observation.</param>
+        /// <param name="sensor">The sensor that produces the visual observation.</param>
-            TensorProxy tensorProxy, SensorComponent sensorComponent)
+            TensorProxy tensorProxy, ISensor sensor)
-            var shape = sensorComponent.GetObservationShape();
+            var shape = sensor.GetObservationShape();
            var heightBp = shape[0];
            var widthBp = shape[1];
            var pixelBp = shape[2];
        /// Checks that the shape of the rank 2 observation input placeholder is the same as the corresponding sensor.
        /// </summary>
        /// <param name="tensorProxy">The tensor that is expected by the model</param>
-        /// <param name="sensorComponent">The sensor that produces the visual observation.</param>
+        /// <param name="sensor">The sensor that produces the visual observation.</param>
-            TensorProxy tensorProxy, SensorComponent sensorComponent)
+            TensorProxy tensorProxy, ISensor sensor)
-            var shape = sensorComponent.GetObservationShape();
+            var shape = sensor.GetObservationShape();
            var dim1Bp = shape[0];
            var dim2Bp = shape[1];
            var dim1T = tensorProxy.Channels;
        /// <param name="brainParameters">
        /// The BrainParameters that are used verify the compatibility with the InferenceEngine
        /// </param>
-        /// <param name="sensorComponents">Attached sensors</param>
+        /// <param name="sensors">Attached sensors</param>
-            Model model, BrainParameters brainParameters, SensorComponent[] sensorComponents,
+            Model model, BrainParameters brainParameters, ISensor[] sensors,
-                new Dictionary<string, Func<BrainParameters, TensorProxy, SensorComponent[], int, string>>()
+                new Dictionary<string, Func<BrainParameters, TensorProxy, ISensor[], int, string>>()
            {
                {TensorNames.VectorObservationPlaceholder, CheckVectorObsShape},
                {TensorNames.PreviousActionPlaceholder, CheckPreviousActionShape},
            }

            var visObsIndex = 0;
-            for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
+            for (var sensorIndex = 0; sensorIndex < sensors.Length; sensorIndex++)
-                var sensorComponent = sensorComponents[sensorIndex];
-                if (sensorComponent.GetObservationShape().Length == 3)
+                var sens = sensors[sensorIndex];
+                if (sens.GetObservationShape().Length == 3)
-                        (bp, tensor, scs, i) => CheckVisualObsShape(tensor, sensorComponent);
+                        (bp, tensor, scs, i) => CheckVisualObsShape(tensor, sens);
-                if (sensorComponent.GetObservationShape().Length == 2)
+                if (sens.GetObservationShape().Length == 2)
-                        (bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sensorComponent);
+                        (bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sens);
                }
            }

                else
                {
                    var tester = tensorTester[tensor.name];
-                    var error = tester.Invoke(brainParameters, tensor, sensorComponents, observableAttributeTotalSize);
+                    var error = tester.Invoke(brainParameters, tensor, sensors, observableAttributeTotalSize);
                    if (error != null)
                    {
                        failedModelChecks.Add(error);
        /// The BrainParameters that are used verify the compatibility with the InferenceEngine
        /// </param>
        /// <param name="tensorProxy">The tensor that is expected by the model</param>
-        /// <param name="sensorComponents">Array of attached sensor components</param>
+        /// <param name="sensors">Array of attached sensor components</param>
        /// <param name="observableAttributeTotalSize">Sum of the sizes of all ObservableAttributes.</param>
        /// <returns>
        /// If the Check failed, returns a string containing information about why the
-            BrainParameters brainParameters, TensorProxy tensorProxy, SensorComponent[] sensorComponents,
+            BrainParameters brainParameters, TensorProxy tensorProxy, ISensor[] sensors,
            int observableAttributeTotalSize)
        {
            var vecObsSizeBp = brainParameters.VectorObservationSize;
            var totalVectorSensorSize = 0;
-            foreach (var sensorComp in sensorComponents)
+            foreach (var sens in sensors)
-                if (sensorComp.GetObservationShape().Length == 1)
+                if ((sens.GetObservationShape().Length == 1))
-                    totalVectorSensorSize += sensorComp.GetObservationShape()[0];
+                    totalVectorSensorSize += sens.GetObservationShape()[0];
-            totalVectorSensorSize += observableAttributeTotalSize;
-
-            if (vecObsSizeBp * numStackedVector + totalVectorSensorSize != totalVecObsSizeT)
+            if (totalVectorSensorSize != totalVecObsSizeT)
-                foreach (var sensorComp in sensorComponents)
+                foreach (var sensorComp in sensors)
                {
                    if (sensorComp.GetObservationShape().Length == 1)
                    {
                    $"but received: \n" +
                    $"Vector observations: {vecObsSizeBp} x {numStackedVector}\n" +
                    $"Total [Observable] attributes: {observableAttributeTotalSize}\n" +
-                    $"SensorComponent sizes: {sensorSizes}.";
+                    $"Sensor sizes: {sensorSizes}.";
            }
            return null;
        }
        /// The BrainParameters that are used verify the compatibility with the InferenceEngine
        /// </param>
        /// <param name="tensorProxy"> The tensor that is expected by the model</param>
-        /// <param name="sensorComponents">Array of attached sensor components (unused).</param>
+        /// <param name="sensors">Array of attached sensor components (unused).</param>
-            SensorComponent[] sensorComponents, int observableAttributeTotalSize)
+            ISensor[] sensors, int observableAttributeTotalSize)
        {
            var numberActionsBp = brainParameters.ActionSpec.NumDiscreteActions;
            var numberActionsT = tensorProxy.shape[tensorProxy.shape.Length - 1];
--- a/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs

        SensorShapeValidator m_SensorShapeValidator = new SensorShapeValidator();

-        bool m_VisualObservationsInitialized;
+        bool m_ObservationsInitialized;

        /// <summary>
        /// Initializes the Brain with the Model that it will use when selecting actions for
            {
                return;
            }
-            if (!m_VisualObservationsInitialized)
+            if (!m_ObservationsInitialized)
-                m_VisualObservationsInitialized = true;
+                m_ObservationsInitialized = true;
            }

            Profiler.BeginSample("ModelRunner.DecideAction");
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
        [HideInInspector, SerializeField]
        BrainParameters m_BrainParameters = new BrainParameters();

+        public delegate void PolicyUpdated(bool isInHeuristicMode);
+
+        internal event PolicyUpdated OnPolicyUpdated;
+
        /// <summary>
        /// The associated <see cref="Policies.BrainParameters"/> for this behavior.
        /// </summary>
            get { return m_BehaviorName + "?team=" + TeamId; }
        }

+        void Awake()
+        {
+            OnPolicyUpdated += mode => { };
+        }
+
        internal IPolicy GeneratePolicy(ActionSpec actionSpec, ActuatorManager actuatorManager)
        {
            switch (m_BehaviorType)
            }
        }

+        /// <summary>
+        /// Query the behavior parameters in order to see if the Agent is running in Heuristic Mode.
+        /// </summary>
+        /// <returns>true if the Agent is running in Heuristic mode.</returns>
+        public bool IsInHeuristicMode()
+        {
+            if (BehaviorType == BehaviorType.HeuristicOnly)
+            {
+                return true;
+            }
+
+            return BehaviorType == BehaviorType.Default &&
+                ReferenceEquals(Model, null) &&
+                (!Academy.IsInitialized ||
+                    Academy.IsInitialized &&
+                    !Academy.Instance.IsCommunicatorOn);
+        }
+
        internal void UpdateAgentPolicy()
        {
            var agent = GetComponent<Agent>();
            }
            agent.ReloadPolicy();
+            OnPolicyUpdated?.Invoke(IsInHeuristicMode());
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs

 namespace Unity.MLAgents.Sensors
 {
-    internal class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
+    /// <summary>
+    /// A Sensor that allows to observe a variable number of entities.
+    /// </summary>
+    public class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
+        static DimensionProperty[] s_DimensionProperties = new DimensionProperty[]{
+                DimensionProperty.VariableSize,
+                DimensionProperty.None
+            };
        public BufferSensor(int maxNumberObs, int obsSize)
        {
            m_MaxNumObs = maxNumberObs;
        /// <inheritdoc/>
        public DimensionProperty[] GetDimensionProperties()
        {
-            return new DimensionProperty[]{
-                DimensionProperty.VariableSize,
-                DimensionProperty.None
-            };
+            return s_DimensionProperties;
        }

        /// <summary>
        /// <param name="obs"> The float array observation</param>
        public void AppendObservation(float[] obs)
        {
+            if (obs.Length != m_ObsSize)
+            {
+                throw new UnityAgentsException(
+                    "The BufferSensor was expecting an observation of size " +
+                    $"{m_ObsSize} but received {obs.Length} observations instead."
+                );
+            }
            if (m_CurrentNumObservables >= m_MaxNumObs)
            {
                return;
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
 {

    /// <summary>
-    /// A component for BufferSensor.
+    /// A SensorComponent that creates a <see cref="BufferSensor"/>.
-    internal class BufferSensorComponent : SensorComponent
+    public class BufferSensorComponent : SensorComponent
+        /// <summary>
+        /// This is how many floats each entities will be represented with. This number
+        /// is fixed and all entities must have the same representation.
+        /// </summary>
+
+        /// <summary>
+        /// This is the maximum number of entities the `BufferSensor` will be able to
+        /// collect.
+        /// </summary>
+
        private BufferSensor m_Sensor;

        /// <inheritdoc/>
--- a/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
    /// <summary>
    /// A sensor that wraps a Camera object to generate visual observations for an agent.
    /// </summary>
-    public class CameraSensor : ISensor, IBuiltInSensor
+    public class CameraSensor : ISensor, IBuiltInSensor, IDimensionPropertiesSensor
    {
        Camera m_Camera;
        int m_Width;
        int[] m_Shape;
        SensorCompressionType m_CompressionType;
+        static DimensionProperty[] s_DimensionProperties = new DimensionProperty[] {
+            DimensionProperty.TranslationalEquivariance,
+            DimensionProperty.TranslationalEquivariance,
+            DimensionProperty.None };

        /// <summary>
        /// The Camera used for rendering the sensor observations.
        public int[] GetObservationShape()
        {
            return m_Shape;
+        }
+
+        /// <summary>
+        /// Accessor for the dimension properties of a camera sensor. A camera sensor
+        /// Has translational equivariance along width and hight and no property along
+        /// the channels dimension.
+        /// </summary>
+        /// <returns></returns>
+        public DimensionProperty[] GetDimensionProperties()
+        {
+            return s_DimensionProperties;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
    /// The Dimension property flags of the observations
    /// </summary>
    [System.Flags]
-    internal enum DimensionProperty
+    public enum DimensionProperty
    {
        /// <summary>
        /// No properties specified.
--- a/com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
+++ b/com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
 using System.Collections.Generic;
 using System;
+using UnityEngine;

 namespace Unity.MLAgents.SideChannels
 {

        internal void ProcessMessage(byte[] msg)
        {
-            using (var incomingMsg = new IncomingMessage(msg))
+            try
+            {
+                using (var incomingMsg = new IncomingMessage(msg))
+                {
+                    OnMessageReceived(incomingMsg);
+                }
+            }
+            catch (Exception ex)
-                OnMessageReceived(incomingMsg);
+                // Catch all errors in the sidechannel processing, so that a single
+                // bad SideChannel implementation doesn't take everything down with it.
+                Debug.LogError($"Error processing SideChannel message: {ex}.\nThe message will be skipped.");
            }
        }

--- a/com.unity.ml-agents/Tests/Editor/Analytics/InferenceAnalyticsTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Analytics/InferenceAnalyticsTests.cs
            Assert.AreEqual(2, continuousEvent.ObservationSpecs.Count);
            Assert.AreEqual(3, continuousEvent.ObservationSpecs[0].DimensionInfos.Length);
            Assert.AreEqual(20, continuousEvent.ObservationSpecs[0].DimensionInfos[0].Size);
+            Assert.AreEqual((int)DimensionProperty.TranslationalEquivariance, continuousEvent.ObservationSpecs[0].DimensionInfos[0].Flags);
+            Assert.AreEqual((int)DimensionProperty.None, continuousEvent.ObservationSpecs[0].DimensionInfos[2].Flags);
            Assert.AreEqual("None", continuousEvent.ObservationSpecs[0].CompressionType);
            Assert.AreEqual(Test3DSensor.k_BuiltInSensorType, continuousEvent.ObservationSpecs[0].BuiltInSensorType);
            Assert.AreNotEqual(null, continuousEvent.ModelHash);
--- a/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
 using NUnit.Framework;
+using Unity.Barracuda;
+using UnityEditor;
+using UnityEngine.TestTools;

 namespace Unity.MLAgents.Tests
 {
+        const string k_continuousONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.onnx";
        public void Heuristic(in ActionBuffers actionsOut)
        {
            // No-op
            {
                bp.GeneratePolicy(actionSpec, new ActuatorManager());
            });
+        }
+
+        [Test]
+        public void TestIsInHeuristicMode()
+        {
+            var gameObj = new GameObject();
+            var bp = gameObj.AddComponent<BehaviorParameters>();
+            bp.Model = null;
+            gameObj.AddComponent<Agent>();
+            bp.BehaviorType = BehaviorType.HeuristicOnly;
+            Assert.IsTrue(bp.IsInHeuristicMode());
+
+            bp.BehaviorType = BehaviorType.Default;
+            Assert.IsTrue(bp.IsInHeuristicMode());
+
+            bp.Model = ScriptableObject.CreateInstance<NNModel>();
+            Assert.IsFalse(bp.IsInHeuristicMode());
+        }
+
+        [Test]
+        public void TestPolicyUpdateEventFired()
+        {
+            var gameObj = new GameObject();
+            var bp = gameObj.AddComponent<BehaviorParameters>();
+            gameObj.AddComponent<Agent>().LazyInitialize();
+            bp.OnPolicyUpdated += delegate (bool isInHeuristicMode) { Debug.Log($"OnPolicyChanged:{isInHeuristicMode}"); };
+            bp.BehaviorType = BehaviorType.HeuristicOnly;
+            LogAssert.Expect(LogType.Log, $"OnPolicyChanged:{true}");
+
+            bp.BehaviorType = BehaviorType.Default;
+            LogAssert.Expect(LogType.Log, $"OnPolicyChanged:{true}");
+
+            Assert.Throws<UnityAgentsException>(() =>
+            {
+                bp.BehaviorType = BehaviorType.InferenceOnly;
+            });
+
+            bp.Model = AssetDatabase.LoadAssetAtPath<NNModel>(k_continuousONNXPath);
+            LogAssert.Expect(LogType.Log, $"OnPolicyChanged:{false}");
+
+            bp.BehaviorType = BehaviorType.HeuristicOnly;
+            LogAssert.Expect(LogType.Log, $"OnPolicyChanged:{true}");
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs
        {
            var unityVerStr = "1.0.0";
            var pythonVerStr = "1.0.0";
-            var pythonPackageVerStr = "0.16.0";
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));

        }
    }
--- a/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
            return Sensor.GetObservationShape();
        }
    }
-    public class Test3DSensor : ISensor, IBuiltInSensor
+    public class Test3DSensor : ISensor, IBuiltInSensor, IDimensionPropertiesSensor
    {
        int m_Width;
        int m_Height;
        public BuiltInSensorType GetBuiltInSensorType()
        {
            return (BuiltInSensorType)k_BuiltInSensorType;
+        }
+
+        public DimensionProperty[] GetDimensionProperties()
+        {
+            return new[]
+            {
+                DimensionProperty.TranslationalEquivariance,
+                DimensionProperty.TranslationalEquivariance,
+                DimensionProperty.None
+            };
        }
    }


            var errors = BarracudaModelParamLoader.CheckModel(
                model, validBrainParameters,
-                new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
+                new ISensor[] { new VectorSensor(8), sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]
            );
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }

            var errors = BarracudaModelParamLoader.CheckModel(
                model, validBrainParameters,
-                new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]
+                new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]
            );
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }

            var errors = BarracudaModelParamLoader.CheckModel(
                model, validBrainParameters,
-                new SensorComponent[] { }, new ActuatorComponent[0]
+                new ISensor[] { new VectorSensor(validBrainParameters.VectorObservationSize) }, new ActuatorComponent[0]
            );
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }
            brainParameters.VectorObservationSize = 9; // Invalid observation
            var errors = BarracudaModelParamLoader.CheckModel(
                model, brainParameters,
-                new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
+                new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]
            );
            Assert.Greater(errors.Count(), 0);

                model, brainParameters,
-                new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
+                new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]
            );
            Assert.Greater(errors.Count(), 0);
        }

            var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
            brainParameters.VectorObservationSize = 1; // Invalid observation
-            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }

            brainParameters.VectorObservationSize = 9; // Invalid observation
            var errors = BarracudaModelParamLoader.CheckModel(
                model, brainParameters,
-                new SensorComponent[] { }, new ActuatorComponent[0]
+                new ISensor[] { }, new ActuatorComponent[0]
            );
            Assert.Greater(errors.Count(), 0);

                model, brainParameters,
-                new SensorComponent[] { }, new ActuatorComponent[0]
+                new ISensor[] { }, new ActuatorComponent[0]
            );
            Assert.Greater(errors.Count(), 0);
        }

            var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
            brainParameters.ActionSpec = ActionSpec.MakeContinuous(3); // Invalid action
-            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
-            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }


            var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
            brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3, 3); // Invalid action
-            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]);
-            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
+            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }


            var brainParameters = GetHybridBrainParameters();
            brainParameters.ActionSpec = new ActionSpec(3, new[] { 3 }); // Invalid discrete action size
-            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
-            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }

            var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
-            var errors = BarracudaModelParamLoader.CheckModel(null, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(null, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }
    }
--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
 {
  "name": "com.unity.ml-agents",
  "displayName": "ML Agents",
-  "version": "1.7.2-preview",
+  "version": "1.8.0-preview",
-    "com.unity.barracuda": "1.3.0-preview",
+    "com.unity.barracuda": "1.3.1-preview",
    "com.unity.modules.imageconversion": "1.0.0",
    "com.unity.modules.jsonserialize": "1.0.0",
    "com.unity.modules.physics": "1.0.0",
--- a/config/imitation/CrawlerStatic.yaml
+++ b/config/imitation/CrawlerStatic.yaml
      gail:
        gamma: 0.99
        strength: 1.0
-        encoding_size: 128
+        network_settings:
+          normalize: true
+          hidden_units: 128
+          num_layers: 2
+          vis_encode_type: simple
        learning_rate: 0.0003
        use_actions: false
        use_vail: false
--- a/config/imitation/FoodCollector.yaml
+++ b/config/imitation/FoodCollector.yaml
      gail:
        gamma: 0.99
        strength: 0.1
-        encoding_size: 128
+        network_settings:
+          normalize: false
+          hidden_units: 128
+          num_layers: 2
+          vis_encode_type: simple
        learning_rate: 0.0003
        use_actions: false
        use_vail: false
--- a/config/imitation/Hallway.yaml
+++ b/config/imitation/Hallway.yaml
        strength: 1.0
      gail:
        gamma: 0.99
-        strength: 0.1
-        encoding_size: 128
+        strength: 0.01
        learning_rate: 0.0003
        use_actions: false
        use_vail: false
--- a/config/imitation/PushBlock.yaml
+++ b/config/imitation/PushBlock.yaml
      num_layers: 2
      vis_encode_type: simple
    reward_signals:
+      extrinsic:
+        gamma: 0.99
+        strength: 1.0
-        strength: 1.0
-        encoding_size: 128
+        strength: 0.01
+        network_settings:
+          normalize: false
+          hidden_units: 128
+          num_layers: 2
+          vis_encode_type: simple
-    max_steps: 15000000
+    max_steps: 1000000
+    behavioral_cloning:
+      demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
+      steps: 50000
+      strength: 1.0
+      samples_per_update: 0
--- a/config/imitation/Pyramids.yaml
+++ b/config/imitation/Pyramids.yaml
      curiosity:
        strength: 0.02
        gamma: 0.99
-        encoding_size: 256
+        network_settings:
+          hidden_units: 256
-        encoding_size: 128
        demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
    behavioral_cloning:
      demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
--- a/config/ppo/Pyramids.yaml
+++ b/config/ppo/Pyramids.yaml
      curiosity:
        gamma: 0.99
        strength: 0.02
-        encoding_size: 256
+        network_settings:
+          hidden_units: 256
        learning_rate: 0.0003
    keep_checkpoints: 5
    max_steps: 10000000
--- a/config/ppo/PyramidsRND.yaml
+++ b/config/ppo/PyramidsRND.yaml
      rnd:
        gamma: 0.99
        strength: 0.01
-        encoding_size: 64
+        network_settings:
+          hidden_units: 64
-    framework: pytorch
    threaded: true
--- a/config/ppo/VisualPyramids.yaml
+++ b/config/ppo/VisualPyramids.yaml
      curiosity:
        gamma: 0.99
        strength: 0.01
-        encoding_size: 256
+        network_settings:
+          hidden_units: 256
        learning_rate: 0.0003
    keep_checkpoints: 5
    max_steps: 10000000
--- a/config/sac/Pyramids.yaml
+++ b/config/sac/Pyramids.yaml
      gail:
        gamma: 0.99
        strength: 0.01
-        encoding_size: 128
        learning_rate: 0.0003
        use_actions: true
        use_vail: false
--- a/config/sac/VisualPyramids.yaml
+++ b/config/sac/VisualPyramids.yaml
      gail:
        gamma: 0.99
        strength: 0.02
-        encoding_size: 128
        learning_rate: 0.0003
        use_actions: true
        use_vail: false
--- a/docs/Installation.md
+++ b/docs/Installation.md

 - Unity package ([`com.unity.ml-agents`](../com.unity.ml-agents/)) contains the
  Unity C# SDK that will be integrated into your Unity scene.
+- Unity package
+  ([`com.unity.ml-agents.extensions`](../com.unity.ml-agents.extensions/))
+  contains experimental C#/Unity components that are not yet ready to be part
+  of the base `com.unity.ml-agents` package. `com.unity.ml-agents.extensions`
+  has a direct dependency on `com.unity.ml-agents`.
 - Three Python packages:
  - [`mlagents`](../ml-agents/) contains the machine learning algorithms that
    enables you to train behaviors in your Unity scene. Most users of ML-Agents
 - Install Python (3.6.1 or higher)
 - Clone this repository (Optional)
  - __Note:__ If you do not clone the repository, then you will not be
-  able to access the example environments and training configurations.
-  Additionally, the [Getting Started Guide](Getting-Started.md) assumes that
-  you have cloned the repository.
+  able to access the example environments and training configurations or the
+  `com.unity.ml-agents.extensions` package. Additionally, the
+  [Getting Started Guide](Getting-Started.md) assumes that you have cloned the
+  repository.
+- Install the `com.unity.ml-agents.extensions` Unity package (Optional)
 - Install the `mlagents` Python package

 ### Install **Unity 2018.4** or Later
 `com.unity.ml-agents` package
 [directly from the Package Manager registry](https://docs.unity3d.com/Manual/upm-ui-install.html).
 Please make sure you enable 'Preview Packages' in the 'Advanced' dropdown in
-order to find it.
+order to find the latest Preview release of the package.

 **NOTE:** If you do not see the ML-Agents package listed in the Package Manager
 please follow the [advanced installation instructions](#advanced-local-installation-for-development) below.
 If you are going to follow the examples from our documentation, you can open the
 `Project` folder in Unity and start tinkering immediately.

+### Install the `com.unity.ml-agents.extensions` Unity package (Optional)
+
+To install the `com.unity.ml-agents.extensions` package, you need to first
+clone the repo and then complete a local installation similar to what was
+outlined in the previous
+[Advanced: Local Installation for Development](#advanced-local-installation-for-development-1)
+section. Complete installation steps can be found in the
+[package documentation](../com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md#installation).
+
 ### Install the `mlagents` Python package

 Installing the `mlagents` Python package involves installing other Python
 installing ML-Agents. Activate your virtual environment and run from the command line:

 ```sh
-pip3 install torch==1.7.0 -f https://download.pytorch.org/whl/torch_stable.html
+pip3 install torch~=1.7.1 -f https://download.pytorch.org/whl/torch_stable.html
 ```

 Note that on Windows, you may also need Microsoft's
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
    - [Visual Observation Summary & Best Practices](#visual-observation-summary--best-practices)
  - [Raycast Observations](#raycast-observations)
    - [RayCast Observation Summary & Best Practices](#raycast-observation-summary--best-practices)
+  - [Variable Length Observations](#variable-length-observations)
+    - [Variable Length Observation Summary & Best Practices](#variable-length-observation-summary--best-practices)
 - [Actions and Actuators](#actions-and-actuators)
  - [Continuous Actions](#continuous-actions)
  - [Discrete Actions](#discrete-actions)
 - Use as few rays and tags as necessary to solve the problem in order to improve
  learning stability and agent performance.

+### Variable Length Observations
+
+It is possible for agents to collect observations from a varying number of
+GameObjects by using a `BufferSensor`.
+You can add a `BufferSensor` to your Agent by adding a `BufferSensorComponent` to
+its GameObject.
+The `BufferSensor` can be useful in situations in which the Agent must pay
+attention to a varying number of entities (for example, a varying number of
+enemies or projectiles).
+On the trainer side, the `BufferSensor`
+is processed using an attention module. More information about attention
+mechanisms can be found [here](https://arxiv.org/abs/1706.03762). Training or
+doing inference with variable length observations can be slower than using
+a flat vector observation. However, attention mechanisms enable solving
+problems that require comparative reasoning between entities in a scene
+such as our [Sorter environment](Learning-Environment-Examples.md#sorter).
+Note that even though the `BufferSensor` can process a variable number of
+entities, you still need to define a maximum number of entities. This is
+because our network architecture requires to know what the shape of the
+observations will be. If fewer entities are observed than the maximum, the
+observation will be padded with zeros and the trainer will ignore
+the padded observations. Note that attention layers are invariant to
+the order of the entities, so there is no need to properly "order" the
+entities before feeding them into the `BufferSensor`.
+
+The  the `BufferSensorComponent` Editor inspector have two arguments:
+ - `Observation Size` : This is how many floats each entities will be
+ represented with. This number is fixed and all entities must
+ have the same representation. For example, if the entities you want to
+ put into the `BufferSensor` have for relevant information position and
+ speed, then the `Observation Size` should be 6 floats.
+ - `Maximum Number of Entities` : This is the maximum number of entities
+ the `BufferSensor` will be able to collect.
+
+To add an entity's observations to a `BufferSensorComponent`, you need
+to call `BufferSensorComponent.AppendObservation()`
+with a float array of size `Observation Size` as argument.
+
+__Note__: Currently, the observations put into the `BufferSensor` are
+not normalized, you will need to normalize your observations manually
+between -1 and 1.
+
+#### Variable Length Observation Summary & Best Practices
+ - Attach `BufferSensorComponent` to use.
+ - Call `BufferSensorComponent.AppendObservation()` to add the observations
+ of an entity to the `BufferSensor`.
+ - Normalize the entities observations before feeding them into the `BufferSensor`.
+
+
 ## Actions and Actuators

 An action is an instruction from the Policy that the agent carries out. The
 ```csharp
 public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
 {
-    actionMasker.WriteMask(branch, actionIndices)
+    actionMask.WriteMask(branch, actionIndices);
 }
 ```

 are masked)

 ```csharp
-WriteMask(0, new int[2]{1,2})
+WriteMask(0, new int[2]{1,2});
 ```

 Notes:
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
    - Recommended minimum: 6
    - Recommended maximum: 20

-# Strikers Vs. Goalie
+## Strikers Vs. Goalie

 ![StrikersVsGoalie](images/strikersvsgoalie.png)

  - 37.6 for vector observations
  - 34.2 for simple heuristic (pick a random valid move)
  - 37.0 for greedy heuristic (pick the highest-scoring valid move)
+
+## Sorter
+![Sorter](images/sorter.png)
+
+ - Set-up: The Agent is in a circular room with numbered tiles. The values of the
+ tiles are random between 1 and 20. The tiles present in the room are randomized
+ at each episode. When the Agent visits a tile, it turns green.
+ - Goal: Visit all the tiles in ascending order.
+ - Agents: The environment contains a single Agent
+ - Agent Reward Function:
+  - -.0002 Existential penalty.
+  - +1 For visiting the right tile
+  - -1 For visiting the wrong tile
+ - BehaviorParameters:
+  - Vector Observations : 4 : 2 floats for Position and 2 floats for orientation
+  - Variable Length Observations : Between 1 and 20 entities (one for each tile)
+  each with 22 observations, the first 20 are one hot encoding of the value of the tile,
+  the 21st and 22nd represent the position of the tile relative to the Agent and the 23rd
+  is `1` if the tile was visited and `0` otherwise.
+  - Actions: 3 discrete branched actions corresponding to forward, backward,
+  sideways movement, as well as rotation.
+  - Float Properties: One
+    - num_tiles: The maximum number of tiles to sample.
+      - Default: 2
+      - Recommended Minimum: 1
+      - Recommended Maximum: 20
+  - Benchmark Mean Reward: Depends on the number of tiles.
--- a/docs/ML-Agents-Overview.md
+++ b/docs/ML-Agents-Overview.md
 - If you want to help your agents learn (especially with environments that have
  sparse rewards) using pre-recorded demonstrations, you can generally enable
  both GAIL and Behavioral Cloning at low strengths in addition to having an
-  extrinsic reward. An example of this is provided for the Pyramids example
-  environment under `PyramidsLearning` in `config/gail_config.yaml`.
- If you want to train purely from demonstrations, GAIL and BC _without_ an
-  extrinsic reward signal is the preferred approach. An example of this is
-  provided for the Crawler example environment under `CrawlerStaticLearning` in
-  `config/gail_config.yaml`.
+  extrinsic reward. An example of this is provided for the PushBlock example
+  environment in `config/imitation/PushBlock.yaml`.
+- If you want to train purely from demonstrations with GAIL and BC _without_ an
+  extrinsic reward signal, please see the CrawlerStatic example environment under
+  in `config/imitation/CrawlerStatic.yaml`.
+
+***Note:*** GAIL introduces a [_survivor bias_](https://arxiv.org/pdf/1809.02925.pdf)
+to the learning process. That is, by giving positive rewards based on similarity
+to the expert, the agent is incentivized to remain alive for as long as possible.
+This can directly conflict with goal-oriented tasks like our PushBlock or Pyramids
+example environments where an agent must reach a goal state thus ending the
+episode as quickly as possible. In these cases, we strongly recommend that you
+use a low strength GAIL reward signal and a sparse extrinisic signal when
+the agent achieves the task. This way, the GAIL reward signal will guide the
+agent until it discovers the extrnisic signal and will not overpower it. If the
+agent appears to be ignoring the extrinsic reward signal, you should reduce
+the strength of GAIL.

 #### GAIL (Generative Adversarial Imitation Learning)

--- a/docs/Training-Configuration-File.md
+++ b/docs/Training-Configuration-File.md
 | :--------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `curiosity -> strength`      | (default = `1.0`) Magnitude of the curiosity reward generated by the intrinsic curiosity module. This should be scaled in order to ensure it is large enough to not be overwhelmed by extrinsic reward signals in the environment. Likewise it should not be too large to overwhelm the extrinsic reward signal. <br><br>Typical range: `0.001` - `0.1` |
 | `curiosity -> gamma`         | (default = `0.99`) Discount factor for future rewards. <br><br>Typical range: `0.8` - `0.995`                                                                                                                                                                                                                                                            |
-| `curiosity -> encoding_size` | (default = `64`) Size of the encoding used by the intrinsic curiosity model. This value should be small enough to encourage the ICM to compress the original observation, but also not too small to prevent it from learning to differentiate between expected and actual observations. <br><br>Typical range: `64` - `256` |
+| `curiosity -> network_settings` | Please see the documentation for `network_settings` under [Common Trainer Configurations](#common-trainer-configurations). The network specs used by the intrinsic curiosity model. The value should of `hidden_units` should be small enough to encourage the ICM to compress the original observation, but also not too small to prevent it from learning to differentiate between expected and actual observations. <br><br>Typical range: `64` - `256` |
 | `curiosity -> learning_rate` | (default = `3e-4`) Learning rate used to update the intrinsic curiosity module. This should typically be decreased if training is unstable, and the curiosity loss is unstable. <br><br>Typical range: `1e-5` - `1e-3`                                                                                                      |

 ### GAIL Intrinsic Reward
 | `gail -> strength`      | (default = `1.0`) Factor by which to multiply the raw reward. Note that when using GAIL with an Extrinsic Signal, this value should be set lower if your demonstrations are suboptimal (e.g. from a human), so that a trained agent will focus on receiving extrinsic rewards instead of exactly copying the demonstrations. Keep the strength below about 0.1 in those cases. <br><br>Typical range: `0.01` - `1.0`                                                                              |
 | `gail -> gamma`         | (default = `0.99`) Discount factor for future rewards. <br><br>Typical range: `0.8` - `0.9`                                                                                                                                                                                                                                                                                                                                                                                                        |
 | `gail -> demo_path`     | (Required, no default) The path to your .demo file or directory of .demo files.                                                                                                                                                                                                                                                                                                                                                                                                                        |
-| `gail -> encoding_size` | (default = `64`) Size of the hidden layer used by the discriminator. This value should be small enough to encourage the discriminator to compress the original observation, but also not too small to prevent it from learning to differentiate between demonstrated and actual behavior. Dramatically increasing this size will also negatively affect training times. <br><br>Typical range: `64` - `256`                                                           |
+| `gail -> network_settings` | Please see the documentation for `network_settings` under [Common Trainer Configurations](#common-trainer-configurations). The network specs for the GAIL discriminator. The value of `hidden_units` should be small enough to encourage the discriminator to compress the original observation, but also not too small to prevent it from learning to differentiate between demonstrated and actual behavior. Dramatically increasing this size will also negatively affect training times. <br><br>Typical range: `64` - `256`                                                           |
 | `gail -> learning_rate` | (Optional, default = `3e-4`) Learning rate used to update the discriminator. This should typically be decreased if training is unstable, and the GAIL loss is unstable. <br><br>Typical range: `1e-5` - `1e-3`                                                                                                                                                                                                                                                                  |
 | `gail -> use_actions`   | (default = `false`) Determines whether the discriminator should discriminate based on both observations and actions, or just observations. Set to True if you want the agent to mimic the actions from the demonstrations, and False if you'd rather have the agent visit the same states as in the demonstrations but with possibly different actions. Setting to False is more likely to be stable, especially with imperfect demonstrations, but may learn slower. |
 | `gail -> use_vail`      | (default = `false`) Enables a variational bottleneck within the GAIL discriminator. This forces the discriminator to learn a more general representation and reduces its tendency to be "too good" at discriminating, making learning more stable. However, it does increase training time. Enable this if you notice your imitation learning is unstable, or unable to learn the task at hand.                                                                       |
 | :--------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `rnd -> strength`      | (default = `1.0`) Magnitude of the curiosity reward generated by the intrinsic rnd module. This should be scaled in order to ensure it is large enough to not be overwhelmed by extrinsic reward signals in the environment. Likewise it should not be too large to overwhelm the extrinsic reward signal. <br><br>Typical range: `0.001` - `0.01` |
 | `rnd -> gamma`         | (default = `0.99`) Discount factor for future rewards. <br><br>Typical range: `0.8` - `0.995`                                                                                                                                                                                                                                                            |
-| `rnd -> encoding_size` | (default = `64`) Size of the encoding used by the intrinsic RND model. <br><br>Typical range: `64` - `256` |
+| `rnd -> network_settings` | Please see the documentation for `network_settings` under [Common Trainer Configurations](#common-trainer-configurations). The network specs for the RND model. |
 | `curiosity -> learning_rate` | (default = `3e-4`) Learning rate used to update the RND module. This should be large enough for the RND module to quickly learn the state representation, but small enough to allow for stable learning. <br><br>Typical range: `1e-5` - `1e-3`



 - LSTM does not work well with continuous actions. Please use
  discrete actions for better results.
- Since the memories must be sent back and forth between Python and Unity, using
-  too large `memory_size` will slow down training.
 - Adding a recurrent layer increases the complexity of the neural network, it is
  recommended to decrease `num_layers` when using recurrent.
 - It is required that `memory_size` be divisible by 2.
--- a/gym-unity/README.md
+++ b/gym-unity/README.md
 def main():
    unity_env = UnityEnvironment("./envs/GridWorld")
    env = UnityToGymWrapper(unity_env, 0, uint8_visual=True)
-    logger.configure('./logs') # Çhange to log in a different directory
+    logger.configure('./logs') # Change to log in a different directory
    act = deepq.learn(
        env,
        "cnn", # conv_only is also a good choice for GridWorld
--- a/gym-unity/gym_unity/init.py
+++ b/gym-unity/gym_unity/init.py
 # Version of the library that will be used to upload to pypi
-__version__ = "0.24.0.dev0"
+__version__ = "0.25.0.dev0"

 # Git tag that will be checked to determine whether to trigger upload to pypi
 __release_tag__ = None
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py
        return -float("inf"), float("inf")

    @property
-    def spec(self):
-        return None
-
-    @property
    def action_space(self):
        return self._action_space

--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
        ] * number_visual_observations
    rewards = np.array(num_agents * [1.0])
    agents = np.array(range(0, num_agents))
-    return DecisionSteps(obs, rewards, agents, None), TerminalSteps.empty(specs)
+    group_id = np.array(num_agents * [0])
+    group_rewards = np.array(num_agents * [0.0])
+    return (
+        DecisionSteps(obs, rewards, agents, None, group_id, group_rewards),
+        TerminalSteps.empty(specs),
+    )


 def setup_mock_unityenvironment(mock_env, mock_spec, mock_decision, mock_termination):
--- a/ml-agents-envs/mlagents_envs/init.py
+++ b/ml-agents-envs/mlagents_envs/init.py
 # Version of the library that will be used to upload to pypi
-__version__ = "0.24.0.dev0"
+__version__ = "0.25.0.dev0"

 # Git tag that will be checked to determine whether to trigger upload to pypi
 __release_tag__ = None
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
    reward: float
    agent_id: AgentId
    action_mask: Optional[List[np.ndarray]]
+    group_id: int
+    group_reward: float


 class DecisionSteps(Mapping):
     this simulation step.
    """

-    def __init__(self, obs, reward, agent_id, action_mask):
+    def __init__(self, obs, reward, agent_id, action_mask, group_id, group_reward):
+        self.group_id: np.ndarray = group_id
+        self.group_reward: np.ndarray = group_reward
        self._agent_id_to_index: Optional[Dict[AgentId, int]] = None

    @property
            agent_mask = []
            for mask in self.action_mask:
                agent_mask.append(mask[agent_index])
+        group_id = self.group_id[agent_index]
+            group_id=group_id,
+            group_reward=self.group_reward[agent_index],
        )

    def __iter__(self) -> Iterator[Any]:
            reward=np.zeros(0, dtype=np.float32),
            agent_id=np.zeros(0, dtype=np.int32),
            action_mask=None,
+            group_id=np.zeros(0, dtype=np.int32),
+            group_reward=np.zeros(0, dtype=np.float32),
        )


    reward: float
    interrupted: bool
    agent_id: AgentId
+    group_id: int
+    group_reward: float


 class TerminalSteps(Mapping):
     across simulation steps.
    """

-    def __init__(self, obs, reward, interrupted, agent_id):
+    def __init__(self, obs, reward, interrupted, agent_id, group_id, group_reward):
+        self.group_id: np.ndarray = group_id
+        self.group_reward: np.ndarray = group_reward
        self._agent_id_to_index: Optional[Dict[AgentId, int]] = None

    @property
        agent_obs = []
        for batched_obs in self.obs:
            agent_obs.append(batched_obs[agent_index])
+        group_id = self.group_id[agent_index]
+            group_id=group_id,
+            group_reward=self.group_reward[agent_index],
        )

    def __iter__(self) -> Iterator[Any]:
            reward=np.zeros(0, dtype=np.float32),
            interrupted=np.zeros(0, dtype=np.bool),
            agent_id=np.zeros(0, dtype=np.int32),
+            group_id=np.zeros(0, dtype=np.int32),
+            group_reward=np.zeros(0, dtype=np.float32),
        )


--- a/ml-agents-envs/mlagents_envs/communicator_objects/agent_info_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/agent_info_pb2.py
  name='mlagents_envs/communicator_objects/agent_info.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n3mlagents_envs/communicator_objects/agent_info.proto\x12\x14\x63ommunicator_objects\x1a\x34mlagents_envs/communicator_objects/observation.proto\"\xd1\x01\n\x0e\x41gentInfoProto\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x12<\n\x0cobservations\x18\r \x03(\x0b\x32&.communicator_objects.ObservationProtoJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04J\x04\x08\x04\x10\x05J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x0c\x10\rB%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n3mlagents_envs/communicator_objects/agent_info.proto\x12\x14\x63ommunicator_objects\x1a\x34mlagents_envs/communicator_objects/observation.proto\"\xf9\x01\n\x0e\x41gentInfoProto\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x12<\n\x0cobservations\x18\r \x03(\x0b\x32&.communicator_objects.ObservationProto\x12\x10\n\x08group_id\x18\x0e \x01(\x05\x12\x14\n\x0cgroup_reward\x18\x0f \x01(\x02J\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04J\x04\x08\x04\x10\x05J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08\x0c\x10\rB%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
  ,
  dependencies=[mlagents__envs_dot_communicator__objects_dot_observation__pb2.DESCRIPTOR,])

      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='group_id', full_name='communicator_objects.AgentInfoProto.group_id', index=6,
+      number=14, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='group_reward', full_name='communicator_objects.AgentInfoProto.group_reward', index=7,
+      number=15, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  oneofs=[
  ],
  serialized_start=132,
-  serialized_end=341,
+  serialized_end=381,
 )

 _AGENTINFOPROTO.fields_by_name['observations'].message_type = mlagents__envs_dot_communicator__objects_dot_observation__pb2._OBSERVATIONPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/agent_info_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/agent_info_pb2.pyi
    max_step_reached = ... # type: builtin___bool
    id = ... # type: builtin___int
    action_mask = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___bool]
+    group_id = ... # type: builtin___int
+    group_reward = ... # type: builtin___float

    @property
    def observations(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[mlagents_envs___communicator_objects___observation_pb2___ObservationProto]: ...
        id : typing___Optional[builtin___int] = None,
        action_mask : typing___Optional[typing___Iterable[builtin___bool]] = None,
        observations : typing___Optional[typing___Iterable[mlagents_envs___communicator_objects___observation_pb2___ObservationProto]] = None,
+        group_id : typing___Optional[builtin___int] = None,
+        group_reward : typing___Optional[builtin___float] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> AgentInfoProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",u"done",u"id",u"max_step_reached",u"observations",u"reward"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",u"done",u"group_id",u"group_reward",u"id",u"max_step_reached",u"observations",u"reward"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",b"action_mask",u"done",b"done",u"id",b"id",u"max_step_reached",b"max_step_reached",u"observations",b"observations",u"reward",b"reward"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",b"action_mask",u"done",b"done",u"group_id",b"group_id",u"group_reward",b"group_reward",u"id",b"id",u"max_step_reached",b"max_step_reached",u"observations",b"observations",u"reward",b"reward"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  name='mlagents_envs/communicator_objects/capabilities.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\xaf\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x12\x19\n\x11trainingAnalytics\x18\x05 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\xd2\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x12\x19\n\x11trainingAnalytics\x18\x05 \x01(\x08\x12!\n\x19variableLengthObservation\x18\x06 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )


      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='variableLengthObservation', full_name='communicator_objects.UnityRLCapabilitiesProto.variableLengthObservation', index=5,
+      number=6, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  oneofs=[
  ],
  serialized_start=80,
-  serialized_end=255,
+  serialized_end=290,
 )

 DESCRIPTOR.message_types_by_name['UnityRLCapabilitiesProto'] = _UNITYRLCAPABILITIESPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
    compressedChannelMapping = ... # type: builtin___bool
    hybridActions = ... # type: builtin___bool
    trainingAnalytics = ... # type: builtin___bool
+    variableLengthObservation = ... # type: builtin___bool

    def __init__(self,
        *,
        hybridActions : typing___Optional[builtin___bool] = None,
        trainingAnalytics : typing___Optional[builtin___bool] = None,
+        variableLengthObservation : typing___Optional[builtin___bool] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> UnityRLCapabilitiesProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions",u"trainingAnalytics"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions",u"trainingAnalytics",u"variableLengthObservation"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions",u"trainingAnalytics",b"trainingAnalytics"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions",u"trainingAnalytics",b"trainingAnalytics",u"variableLengthObservation",b"variableLengthObservation"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
    #  * 1.2.0 - support compression mapping for stacked compressed observations.
    #  * 1.3.0 - support action spaces with both continuous and discrete actions.
    #  * 1.4.0 - support training analytics sent from python trainer to the editor.
-    API_VERSION = "1.4.0"
+    #  * 1.5.0 - support variable length observation training.
+    API_VERSION = "1.5.0"

    # Default port that the editor listens on. If an environment executable
    # isn't specified, this port will be used.
        capabilities.compressedChannelMapping = True
        capabilities.hybridActions = True
        capabilities.trainingAnalytics = True
+        capabilities.variableLengthObservation = True
        return capabilities

    @staticmethod
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py


@timed
-def observation_to_np_array(
+def _observation_to_np_array(
    obs: ObservationProto, expected_shape: Optional[Iterable[int]] = None
 ) -> np.ndarray:
    """


@timed
-def _process_visual_observation(
+def _process_maybe_compressed_observation(
    obs_index: int,
    shape: Tuple[int, int, int],
    agent_info_list: Collection[AgentInfoProto],

    batched_visual = [
-        observation_to_np_array(agent_obs.observations[obs_index], shape)
+        _observation_to_np_array(agent_obs.observations[obs_index], shape)
        for agent_obs in agent_info_list
    ]
    return np.array(batched_visual, dtype=np.float32)


@timed
-def _process_vector_observation(
+def _process_rank_one_or_two_observation(
    obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
 ) -> np.ndarray:
    if len(agent_info_list) == 0:
        if is_visual:
            obs_shape = cast(Tuple[int, int, int], observation_specs.shape)
            decision_obs_list.append(
-                _process_visual_observation(
+                _process_maybe_compressed_observation(
-                _process_visual_observation(
+                _process_maybe_compressed_observation(
-                _process_vector_observation(
+                _process_rank_one_or_two_observation(
-                _process_vector_observation(
+                _process_rank_one_or_two_observation(
                    obs_index, observation_specs.shape, terminal_agent_info_list
                )
            )
        [agent_info.reward for agent_info in terminal_agent_info_list], dtype=np.float32
    )

+    decision_group_rewards = np.array(
+        [agent_info.group_reward for agent_info in decision_agent_info_list],
+        dtype=np.float32,
+    )
+    terminal_group_rewards = np.array(
+        [agent_info.group_reward for agent_info in terminal_agent_info_list],
+        dtype=np.float32,
+    )
+
+    _raise_on_nan_and_inf(decision_group_rewards, "group_rewards")
+    _raise_on_nan_and_inf(terminal_group_rewards, "group_rewards")
+
+    decision_group_id = [agent_info.group_id for agent_info in decision_agent_info_list]
+    terminal_group_id = [agent_info.group_id for agent_info in terminal_agent_info_list]

    max_step = np.array(
        [agent_info.max_step_reached for agent_info in terminal_agent_info_list],
            action_mask = np.split(action_mask, indices, axis=1)
    return (
        DecisionSteps(
-            decision_obs_list, decision_rewards, decision_agent_id, action_mask
+            decision_obs_list,
+            decision_rewards,
+            decision_agent_id,
+            action_mask,
+            decision_group_id,
+            decision_group_rewards,
-        TerminalSteps(terminal_obs_list, terminal_rewards, max_step, terminal_agent_id),
+        TerminalSteps(
+            terminal_obs_list,
+            terminal_rewards,
+            max_step,
+            terminal_agent_id,
+            terminal_group_id,
+            terminal_group_rewards,
+        ),
    )


--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
 from mlagents_envs.rpc_utils import (
    behavior_spec_from_proto,
    process_pixels,
-    _process_visual_observation,
-    _process_vector_observation,
+    _process_maybe_compressed_observation,
+    _process_rank_one_or_two_observation,
    steps_from_proto,
 )
 from PIL import Image
    shapes = [(3,), (4,)]
    list_proto = generate_list_agent_proto(n_agents, shapes)
    for obs_index, shape in enumerate(shapes):
-        arr = _process_vector_observation(obs_index, shape, list_proto)
+        arr = _process_rank_one_or_two_observation(obs_index, shape, list_proto)
        assert list(arr.shape) == ([n_agents] + list(shape))
        assert np.allclose(arr, 0.1, atol=0.01)

    ap2 = AgentInfoProto()
    ap2.observations.extend([proto_obs_2])
    ap_list = [ap1, ap2]
-    arr = _process_visual_observation(0, (128, 64, 3), ap_list)
+    arr = _process_maybe_compressed_observation(0, (128, 64, 3), ap_list)
    assert list(arr.shape) == [2, 128, 64, 3]
    assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
    assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)
    ap2 = AgentInfoProto()
    ap2.observations.extend([proto_obs_2])
    ap_list = [ap1, ap2]
-    arr = _process_visual_observation(0, (128, 64, 1), ap_list)
+    arr = _process_maybe_compressed_observation(0, (128, 64, 1), ap_list)
    assert list(arr.shape) == [2, 128, 64, 1]
    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
    assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)
    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap_list = [ap1]
-    arr = _process_visual_observation(0, (128, 64, 8), ap_list)
+    arr = _process_maybe_compressed_observation(0, (128, 64, 8), ap_list)
    assert list(arr.shape) == [1, 128, 64, 8]
    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)

    ap1.observations.extend([proto_obs_1])
    ap_list = [ap1]
    with pytest.raises(UnityObservationException):
-        _process_visual_observation(0, (128, 42, 3), ap_list)
+        _process_maybe_compressed_observation(0, (128, 42, 3), ap_list)


 def test_batched_step_result_from_proto():
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py
        reward=np.array(range(3), dtype=np.float32),
        agent_id=np.array(range(10, 13), dtype=np.int32),
        action_mask=[np.zeros((3, 4), dtype=np.bool)],
+        group_id=np.array(range(3), dtype=np.int32),
+        group_reward=np.array(range(3), dtype=np.float32),
    )

    assert ds.agent_id_to_index[10] == 0
        reward=np.array(range(3), dtype=np.float32),
        agent_id=np.array(range(10, 13), dtype=np.int32),
        interrupted=np.array([1, 0, 1], dtype=np.bool),
+        group_id=np.array(range(3), dtype=np.int32),
+        group_reward=np.array(range(3), dtype=np.float32),
    )

    assert ts.agent_id_to_index[10] == 0
--- a/ml-agents/mlagents/plugins/stats_writer.py
+++ b/ml-agents/mlagents/plugins/stats_writer.py
    and evaluates them, and returns the list of all the StatsWriter implementations.
    """
    all_stats_writers: List[StatsWriter] = []
+    if ML_AGENTS_STATS_WRITER not in importlib_metadata.entry_points():
+        logger.warning(
+            f"Unable to find any entry points for {ML_AGENTS_STATS_WRITER}, even the default ones. "
+            "Uninstalling and reinstalling ml-agents via pip should resolve. "
+            "Using default plugins for now."
+        )
+        return get_default_stats_writers(run_options)
+
    entry_points = importlib_metadata.entry_points()[ML_AGENTS_STATS_WRITER]

    for entry_point in entry_points:
--- a/ml-agents/mlagents/trainers/init.py
+++ b/ml-agents/mlagents/trainers/init.py
 # Version of the library that will be used to upload to pypi
-__version__ = "0.24.0.dev0"
+__version__ = "0.25.0.dev0"

 # Git tag that will be checked to determine whether to trigger upload to pypi
 __release_tag__ = None
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
 import sys
+import numpy as np
 from typing import List, Dict, TypeVar, Generic, Tuple, Any, Union
 from collections import defaultdict, Counter
 import queue
    StatsAggregationMethod,
    EnvironmentStats,
 )
-from mlagents.trainers.trajectory import Trajectory, AgentExperience
+from mlagents.trainers.trajectory import GroupmateStatus, Trajectory, AgentExperience
-from mlagents.trainers.behavior_id_utils import get_global_agent_id
+from mlagents.trainers.behavior_id_utils import get_global_agent_id, get_global_group_id

 T = TypeVar("T")

        """
        self.experience_buffers: Dict[str, List[AgentExperience]] = defaultdict(list)
        self.last_step_result: Dict[str, Tuple[DecisionStep, int]] = {}
+        # current_group_obs is used to collect the last seen obs of all the agents in the same group,
+        # and assemble the group obs.
+        self.current_group_obs: Dict[str, Dict[str, List[np.ndarray]]] = defaultdict(
+            lambda: defaultdict(list)
+        )
+        # last_group_obs is used to collect the last seen obs of all the agents in the same group,
+        # and assemble the group obs.
+        self.group_status: Dict[str, Dict[str, GroupmateStatus]] = defaultdict(
+            lambda: defaultdict(None)
+        )
        # last_take_action_outputs stores the action a_t taken before the current observation s_(t+1), while
        # grabbing previous_action from the policy grabs the action PRIOR to that, a_(t-1).
        self.last_take_action_outputs: Dict[str, ActionInfoOutputs] = {}
            if global_id in self.last_step_result:  # Don't store if agent just reset
                self.last_take_action_outputs[global_id] = take_action_outputs

-        # Iterate over all the terminal steps
+        # Iterate over all the terminal steps, first gather all the teammate obs
+        # and then create the AgentExperiences/Trajectories
+        for terminal_step in terminal_steps.values():
+            self._gather_group_obs(terminal_step, worker_id)
-                terminal_step, global_id, terminal_steps.agent_id_to_index[local_id]
+                terminal_step, worker_id, terminal_steps.agent_id_to_index[local_id]
-        # Iterate over all the decision steps
+            # Clear the last seen group obs when agents die.
+            self._clear_group_obs(global_id)
+
+        # Clean the last experience dictionary for terminal steps
+        for terminal_step in terminal_steps.values():
+            local_id = terminal_step.agent_id
+            global_id = get_global_agent_id(worker_id, local_id)
+
+        # Iterate over all the decision steps, first gather all the teammate obs
+        # and then create the trajectories
+        for ongoing_step in decision_steps.values():
+            self._gather_group_obs(ongoing_step, worker_id)
-            global_id = get_global_agent_id(worker_id, local_id)
-                ongoing_step, global_id, decision_steps.agent_id_to_index[local_id]
+                ongoing_step, worker_id, decision_steps.agent_id_to_index[local_id]
            )

        for _gid in action_global_agent_ids:
                        [_gid], take_action_outputs["action"]
                    )

+    def _gather_group_obs(
+        self, step: Union[TerminalStep, DecisionStep], worker_id: int
+    ) -> None:
+        global_agent_id = get_global_agent_id(worker_id, step.agent_id)
+        stored_decision_step, idx = self.last_step_result.get(
+            global_agent_id, (None, None)
+        )
+        stored_take_action_outputs = self.last_take_action_outputs.get(
+            global_agent_id, None
+        )
+        if stored_decision_step is not None and stored_take_action_outputs is not None:
+            if step.group_id > 0:
+                global_group_id = get_global_group_id(worker_id, step.group_id)
+                stored_actions = stored_take_action_outputs["action"]
+                action_tuple = ActionTuple(
+                    continuous=stored_actions.continuous[idx],
+                    discrete=stored_actions.discrete[idx],
+                )
+                group_status = GroupmateStatus(
+                    obs=stored_decision_step.obs,
+                    reward=step.reward,
+                    action=action_tuple,
+                    done=isinstance(step, TerminalStep),
+                )
+                self.group_status[global_group_id][global_agent_id] = group_status
+                self.current_group_obs[global_group_id][global_agent_id] = step.obs
+
+    def _clear_group_obs(self, global_id: str) -> None:
+        self._delete_in_nested_dict(self.current_group_obs, global_id)
+        self._delete_in_nested_dict(self.group_status, global_id)
+
+    def _delete_in_nested_dict(self, nested_dict: Dict[str, Any], key: str) -> None:
+        for _manager_id in list(nested_dict.keys()):
+            _team_group = nested_dict[_manager_id]
+            self._safe_delete(_team_group, key)
+            if not _team_group:  # if dict is empty
+                self._safe_delete(nested_dict, _manager_id)
+
-        self, step: Union[TerminalStep, DecisionStep], global_id: str, index: int
+        self, step: Union[TerminalStep, DecisionStep], worker_id: int, index: int
-        stored_decision_step, idx = self.last_step_result.get(global_id, (None, None))
-        stored_take_action_outputs = self.last_take_action_outputs.get(global_id, None)
+        global_agent_id = get_global_agent_id(worker_id, step.agent_id)
+        global_group_id = get_global_group_id(worker_id, step.group_id)
+        stored_decision_step, idx = self.last_step_result.get(
+            global_agent_id, (None, None)
+        )
+        stored_take_action_outputs = self.last_take_action_outputs.get(
+            global_agent_id, None
+        )
-            self.last_step_result[global_id] = (step, index)
+            self.last_step_result[global_agent_id] = (step, index)
-                memory = self.policy.retrieve_memories([global_id])[0, :]
+                memory = self.policy.retrieve_memories([global_agent_id])[0, :]
            else:
                memory = None
            done = terminated  # Since this is an ongoing step
                discrete=stored_action_probs.discrete[idx],
            )
            action_mask = stored_decision_step.action_mask
-            prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
+            prev_action = self.policy.retrieve_previous_action([global_agent_id])[0, :]
+
+            # Assemble teammate_obs. If none saved, then it will be an empty list.
+            group_statuses = []
+            for _id, _obs in self.group_status[global_group_id].items():
+                if _id != global_agent_id:
+                    group_statuses.append(_obs)
+
            experience = AgentExperience(
                obs=obs,
                reward=step.reward,
                prev_action=prev_action,
                interrupted=interrupted,
                memory=memory,
+                group_status=group_statuses,
+                group_reward=step.group_reward,
-            self.experience_buffers[global_id].append(experience)
-            self.episode_rewards[global_id] += step.reward
+            self.experience_buffers[global_agent_id].append(experience)
+            self.episode_rewards[global_agent_id] += step.reward
-                self.episode_steps[global_id] += 1
+                self.episode_steps[global_agent_id] += 1
-                len(self.experience_buffers[global_id]) >= self.max_trajectory_length
+                len(self.experience_buffers[global_agent_id])
+                >= self.max_trajectory_length
-                # Make next AgentExperience
+                next_group_obs = []
+                for _id, _exp in self.current_group_obs[global_group_id].items():
+                    if _id != global_agent_id:
+                        next_group_obs.append(_exp)
+
-                    steps=self.experience_buffers[global_id],
-                    agent_id=global_id,
+                    steps=self.experience_buffers[global_agent_id],
+                    agent_id=global_agent_id,
+                    next_group_obs=next_group_obs,
-                self.experience_buffers[global_id] = []
+                self.experience_buffers[global_agent_id] = []
-                    "Environment/Episode Length", self.episode_steps.get(global_id, 0)
+                    "Environment/Episode Length",
+                    self.episode_steps.get(global_agent_id, 0),
-                self._clean_agent_data(global_id)
+                self._clean_agent_data(global_agent_id)

    def _clean_agent_data(self, global_id: str) -> None:
        """
--- a/ml-agents/mlagents/trainers/behavior_id_utils.py
+++ b/ml-agents/mlagents/trainers/behavior_id_utils.py
    """
    Create an agent id that is unique across environment workers using the worker_id.
    """
-    return f"${worker_id}-{agent_id}"
+    return f"agent_{worker_id}-{agent_id}"
+
+
+def get_global_group_id(worker_id: int, group_id: int) -> str:
+    """
+    Create a group id that is unique across environment workers when using the worker_id.
+    """
+    return f"group_{worker_id}-{group_id}"
--- a/ml-agents/mlagents/trainers/buffer.py
+++ b/ml-agents/mlagents/trainers/buffer.py

 from mlagents_envs.exception import UnityException

+# Elements in the buffer can be np.ndarray, or in the case of teammate obs, actions, rewards,
+# a List of np.ndarray. This is done so that we don't have duplicated np.ndarrays, only references.
+BufferEntry = Union[np.ndarray, List[np.ndarray]]
+

 class BufferException(UnityException):
    """
 class BufferKey(enum.Enum):
    ACTION_MASK = "action_mask"
    CONTINUOUS_ACTION = "continuous_action"
+    NEXT_CONT_ACTION = "next_continuous_action"
+    NEXT_DISC_ACTION = "next_discrete_action"
    DISCRETE_LOG_PROBS = "discrete_log_probs"
    DONE = "done"
    ENVIRONMENT_REWARDS = "environment_rewards"
    ADVANTAGES = "advantages"
    DISCOUNTED_RETURNS = "discounted_returns"

+    GROUP_DONES = "group_dones"
+    GROUPMATE_REWARDS = "groupmate_reward"
+    GROUP_REWARD = "group_reward"
+    GROUP_CONTINUOUS_ACTION = "group_continuous_action"
+    GROUP_DISCRETE_ACTION = "group_discrete_aaction"
+    GROUP_NEXT_CONT_ACTION = "group_next_cont_action"
+    GROUP_NEXT_DISC_ACTION = "group_next_disc_action"
+
+
+    GROUP_OBSERVATION = "group_obs"
+    NEXT_GROUP_OBSERVATION = "next_group_obs"


 class RewardSignalKeyPrefix(enum.Enum):
        super().append(element)
        self.padding_value = padding_value

-    def extend(self, data: np.ndarray) -> None:
-        """
-        Adds a list of np.arrays to the end of the list of np.arrays.
-        :param data: The np.array list to append.
-        """
-        self += list(np.array(data, dtype=np.float32))
-
-        # Make sure we convert incoming data to float32 if it's a float
-        dtype = None
-        if data is not None and len(data) and isinstance(data[0], float):
-            dtype = np.float32
-        self[:] = list(np.array(data, dtype=dtype))
+        self[:] = data

    def get_batch(
        self,
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py

        next_learning_team = self.controller.get_learning_team

-        # CASE 1: Current learning team is managed by this GhostTrainer.
-        # If the learning team changes, the following loop over queues will push the
-        # new policy into the policy queue for the new learning agent if
-        # that policy is managed by this GhostTrainer. Otherwise, it will save the current snapshot.
-        # CASE 2: Current learning team is managed by a different GhostTrainer.
-        # If the learning team changes to a team managed by this GhostTrainer, this loop
-        # will push the current_snapshot into the correct queue.  Otherwise,
-        # it will continue skipping and swap_snapshot will continue to handle
-        # pushing fixed snapshots
-        # Case 3: No team change. The if statement just continues to push the policy
+        # Case 1: No team change. The if statement just continues to push the policy
        # into the correct queue (or not if not learning team).
        for brain_name in self._internal_policy_queues:
            internal_policy_queue = self._internal_policy_queues[brain_name]
            except AgentManagerQueue.Empty:
-                pass
-            if next_learning_team in self._team_to_name_to_policy_queue:
+                continue
+            if (
+                self._learning_team == next_learning_team
+                and next_learning_team in self._team_to_name_to_policy_queue
+            ):
                name_to_policy_queue = self._team_to_name_to_policy_queue[
                    next_learning_team
                ]
                    policy = self.get_policy(behavior_id)
                    policy.load_weights(self.current_policy_snapshot[brain_name])
                    name_to_policy_queue[brain_name].put(policy)
+
+        # CASE 2: Current learning team is managed by this GhostTrainer.
+        # If the learning team changes, the following loop over queues will push the
+        # new policy into the policy queue for the new learning agent if
+        # that policy is managed by this GhostTrainer. Otherwise, it will save the current snapshot.
+        # CASE 3: Current learning team is managed by a different GhostTrainer.
+        # If the learning team changes to a team managed by this GhostTrainer, this loop
+        # will push the current_snapshot into the correct queue.  Otherwise,
+        # it will continue skipping and swap_snapshot will continue to handle
+        # pushing fixed snapshots
+        if (
+            self._learning_team != next_learning_team
+            and next_learning_team in self._team_to_name_to_policy_queue
+        ):
+            name_to_policy_queue = self._team_to_name_to_policy_queue[
+                next_learning_team
+            ]
+            for brain_name in name_to_policy_queue:
+                behavior_id = create_name_behavior_id(brain_name, next_learning_team)
+                policy = self.get_policy(behavior_id)
+                policy.load_weights(self.current_policy_snapshot[brain_name])
+                name_to_policy_queue[brain_name].put(policy)

        # Note save and swap should be on different step counters.
        # We don't want to save unless the policy is learning.
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
 class RewardSignalSettings:
    gamma: float = 0.99
    strength: float = 1.0
+    network_settings: NetworkSettings = attr.ib(factory=NetworkSettings)

    @staticmethod
    def structure(d: Mapping, t: type) -> Any:
            enum_key = RewardSignalType(key)
            t = enum_key.to_settings()
            d_final[enum_key] = strict_to_cls(val, t)
+            # Checks to see if user specifying deprecated encoding_size for RewardSignals.
+            # If network_settings is not specified, this updates the default hidden_units
+            # to the value of encoding size. If specified, this ignores encoding size and
+            # uses network_settings values.
+            if "encoding_size" in val:
+                logger.warning(
+                    "'encoding_size' was deprecated for RewardSignals. Please use network_settings."
+                )
+                # If network settings was not specified, use the encoding size. Otherwise, use hidden_units
+                if "network_settings" not in val:
+                    d_final[enum_key].network_settings.hidden_units = val[
+                        "encoding_size"
+                    ]
-    encoding_size: int = 64
+    encoding_size: Optional[int] = None
    use_actions: bool = False
    use_vail: bool = False
    demo_path: str = attr.ib(kw_only=True)
 class CuriositySettings(RewardSignalSettings):
-    encoding_size: int = 64
+    encoding_size: Optional[int] = None
-    encoding_size: int = 64
+    encoding_size: Optional[int] = None


 # SAMPLERS #############################################################################
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py

 from mlagents.trainers.buffer import AgentBuffer, AgentBufferKey
 from mlagents.trainers.torch.action_log_probs import LogProbsTuple
-from mlagents.trainers.trajectory import Trajectory, AgentExperience
+from mlagents.trainers.trajectory import GroupmateStatus, Trajectory, AgentExperience
 from mlagents_envs.base_env import (
    DecisionSteps,
    TerminalSteps,
    observation_specs: List[ObservationSpec],
    action_spec: ActionSpec,
    done: bool = False,
+    grouped: bool = False,
 ) -> Tuple[DecisionSteps, TerminalSteps]:
    """
    Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations.
    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
+    _gid = 1 if grouped else 0
+    group_id = np.array(num_agents * [_gid], dtype=np.int32)
+    group_reward = np.array(num_agents * [0.0], dtype=np.float32)
-            TerminalSteps(obs_list, reward, interrupted, agent_id),
+            TerminalSteps(
+                obs_list, reward, interrupted, agent_id, group_id, group_reward
+            ),
-            DecisionSteps(obs_list, reward, agent_id, action_mask),
+            DecisionSteps(
+                obs_list, reward, agent_id, action_mask, group_id, group_reward
+            ),
            TerminalSteps.empty(behavior_spec),
        )

    action_spec: ActionSpec,
    max_step_complete: bool = False,
    memory_size: int = 10,
+    num_other_agents_in_group: int = 0,
 ) -> Trajectory:
    """
    Makes a fake trajectory of length length. If max_step_complete,
        memory = np.ones(memory_size, dtype=np.float32)
        agent_id = "test_agent"
        behavior_id = "test_brain"
+        group_status = []
+        for _ in range(num_other_agents_in_group):
+            group_status.append(GroupmateStatus(obs, reward, action, done))
        experience = AgentExperience(
            obs=obs,
            reward=reward,
            prev_action=prev_action,
            interrupted=max_step,
            memory=memory,
+            group_status=group_status,
+            group_reward=0,
        )
        steps_list.append(experience)
    obs = []
        prev_action=prev_action,
        interrupted=max_step_complete,
        memory=memory,
+        group_status=group_status,
+        group_reward=0,
-        steps=steps_list, agent_id=agent_id, behavior_id=behavior_id, next_obs=obs
+        steps=steps_list,
+        agent_id=agent_id,
+        behavior_id=behavior_id,
+        next_obs=obs,
+        next_group_obs=[obs] * num_other_agents_in_group,
    )


--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
        self.agent_id[name] = self.agent_id[name] + 1

    def _make_batched_step(
-        self, name: str, done: bool, reward: float
+        self, name: str, done: bool, reward: float, group_reward: float
+        m_group_id = np.array([0], dtype=np.int32)
+        m_group_reward = np.array([group_reward], dtype=np.float32)
-        decision_step = DecisionSteps(m_vector_obs, m_reward, m_agent_id, action_mask)
+        decision_step = DecisionSteps(
+            m_vector_obs, m_reward, m_agent_id, action_mask, m_group_id, m_group_reward
+        )
        terminal_step = TerminalSteps.empty(self.behavior_spec)
        if done:
            self.final_rewards[name].append(self.rewards[name])
                new_done,
                new_agent_id,
                new_action_mask,
+                new_group_id,
+                new_group_reward,
-                new_vector_obs, new_reward, new_agent_id, new_action_mask
+                new_vector_obs,
+                new_reward,
+                new_agent_id,
+                new_action_mask,
+                new_group_id,
+                new_group_reward,
-                m_vector_obs, m_reward, np.array([False], dtype=np.bool), m_agent_id
+                m_vector_obs,
+                m_reward,
+                np.array([False], dtype=np.bool),
+                m_agent_id,
+                m_group_id,
+                m_group_reward,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-        return new_reward, new_done, new_agent_id, new_action_mask
+        new_group_id = np.array([0], dtype=np.int32)
+        new_group_reward = np.array([0.0], dtype=np.float32)
+        return (
+            new_reward,
+            new_done,
+            new_agent_id,
+            new_action_mask,
+            new_group_id,
+            new_group_reward,
+        )

    def step(self) -> None:
        assert all(action is not None for action in self.action.values())
            reward = self._compute_reward(name, done)
            self.rewards[name] += reward
-            self.step_result[name] = self._make_batched_step(name, done, reward)
+            self.step_result[name] = self._make_batched_step(name, done, reward, 0.0)
-            self.step_result[name] = self._make_batched_step(name, False, 0.0)
+            self.step_result[name] = self._make_batched_step(name, False, 0.0, 0.0)

    @property
    def reset_parameters(self) -> Dict[str, str]:
        self.num_show_steps = 2

    def _make_batched_step(
-        self, name: str, done: bool, reward: float
+        self, name: str, done: bool, reward: float, group_reward: float
    ) -> Tuple[DecisionSteps, TerminalSteps]:
        recurrent_obs_val = (
            self.goal[name] if self.step_count[name] <= self.num_show_steps else 0
        m_agent_id = np.array([self.agent_id[name]], dtype=np.int32)
+        m_group_id = np.array([0], dtype=np.int32)
+        m_group_reward = np.array([group_reward], dtype=np.float32)
-        decision_step = DecisionSteps(m_vector_obs, m_reward, m_agent_id, action_mask)
+        decision_step = DecisionSteps(
+            m_vector_obs, m_reward, m_agent_id, action_mask, m_group_id, m_group_reward
+        )
        terminal_step = TerminalSteps.empty(self.behavior_spec)
        if done:
            self.final_rewards[name].append(self.rewards[name])
                new_done,
                new_agent_id,
                new_action_mask,
+                new_group_id,
+                new_group_reward,
-                new_vector_obs, new_reward, new_agent_id, new_action_mask
+                new_vector_obs,
+                new_reward,
+                new_agent_id,
+                new_action_mask,
+                new_group_id,
+                new_group_reward,
-                m_vector_obs, m_reward, np.array([False], dtype=np.bool), m_agent_id
+                m_vector_obs,
+                m_reward,
+                np.array([False], dtype=np.bool),
+                m_agent_id,
+                m_group_id,
+                m_group_reward,
            )
        return (decision_step, terminal_step)

--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
 from unittest import mock
 import pytest
+from typing import List
 import mlagents.trainers.tests.mock_brain as mb
 import numpy as np
 from mlagents.trainers.agent_processor import (
    return mock_policy


+def _create_action_info(num_agents: int, agent_ids: List[str]) -> ActionInfo:
+    fake_action_outputs = {
+        "action": ActionTuple(
+            continuous=np.array([[0.1]] * num_agents, dtype=np.float32)
+        ),
+        "entropy": np.array([1.0], dtype=np.float32),
+        "learning_rate": 1.0,
+        "log_probs": LogProbsTuple(
+            continuous=np.array([[0.1]] * num_agents, dtype=np.float32)
+        ),
+    }
+    fake_action_info = ActionInfo(
+        action=ActionTuple(continuous=np.array([[0.1]] * num_agents, dtype=np.float32)),
+        env_action=ActionTuple(
+            continuous=np.array([[0.1]] * num_agents, dtype=np.float32)
+        ),
+        outputs=fake_action_outputs,
+        agent_ids=agent_ids,
+    )
+    return fake_action_info
+
+
@pytest.mark.parametrize("num_vis_obs", [0, 1, 2], ids=["vec", "1 viz", "2 viz"])
 def test_agentprocessor(num_vis_obs):
    policy = create_mock_policy()
        stats_reporter=StatsReporter("testcat"),
    )

-    fake_action_outputs = {
-        "action": ActionTuple(continuous=np.array([[0.1], [0.1]])),
-        "entropy": np.array([1.0], dtype=np.float32),
-        "learning_rate": 1.0,
-        "log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])),
-    }
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        observation_specs=create_observation_specs_with_shapes(
    )
-    fake_action_info = ActionInfo(
-        action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
-        env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
-        outputs=fake_action_outputs,
-        agent_ids=mock_decision_steps.agent_id,
-    )
+    fake_action_info = _create_action_info(2, mock_decision_steps.agent_id)
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
    processor.add_experiences(
    # Assert that the trajectory is of length 5
    trajectory = tqueue.put.call_args_list[0][0][0]
    assert len(trajectory.steps) == 5
+    # Make sure ungrouped agents don't have team obs
+    for step in trajectory.steps:
+        assert len(step.group_status) == 0

    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0
    assert len(processor.experience_buffers[0]) == 0


+def test_group_statuses():
+    policy = create_mock_policy()
+    tqueue = mock.Mock()
+    name_behavior_id = "test_brain_name"
+    processor = AgentProcessor(
+        policy,
+        name_behavior_id,
+        max_trajectory_length=5,
+        stats_reporter=StatsReporter("testcat"),
+    )
+
+    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
+        num_agents=4,
+        observation_specs=create_observation_specs_with_shapes([(8,)]),
+        action_spec=ActionSpec.create_continuous(2),
+        grouped=True,
+    )
+    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
+    processor.publish_trajectory_queue(tqueue)
+    # This is like the initial state after the env reset
+    processor.add_experiences(
+        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
+    )
+    for _ in range(2):
+        processor.add_experiences(
+            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
+        )
+
+    # Make terminal steps for some dead agents
+    mock_decision_steps_2, mock_terminal_steps_2 = mb.create_mock_steps(
+        num_agents=2,
+        observation_specs=create_observation_specs_with_shapes([(8,)]),
+        action_spec=ActionSpec.create_continuous(2),
+        done=True,
+        grouped=True,
+    )
+
+    processor.add_experiences(
+        mock_decision_steps_2, mock_terminal_steps_2, 0, fake_action_info
+    )
+    fake_action_info = _create_action_info(4, mock_decision_steps.agent_id)
+    for _ in range(3):
+        processor.add_experiences(
+            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
+        )
+
+    # Assert that four trajectories have been added to the Trainer
+    assert len(tqueue.put.call_args_list) == 4
+    # Last trajectory should be the longest
+    trajectory = tqueue.put.call_args_list[0][0][-1]
+
+    # Make sure trajectory has the right Groupmate Experiences
+    for step in trajectory.steps[0:3]:
+        assert len(step.group_status) == 3
+    # After 2 agents has died
+    for step in trajectory.steps[3:]:
+        assert len(step.group_status) == 1
+
+
 def test_agent_deletion():
    policy = create_mock_policy()
    tqueue = mock.Mock()
        stats_reporter=StatsReporter("testcat"),
    )
    fake_action_outputs = {
-        "action": ActionTuple(continuous=np.array([[0.1]])),
+        "action": ActionTuple(continuous=np.array([[0.1]], dtype=np.float32)),
-        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
+        "log_probs": LogProbsTuple(continuous=np.array([[0.1]], dtype=np.float32)),
    }

    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        done=True,
    )
    fake_action_info = ActionInfo(
-        action=ActionTuple(continuous=np.array([[0.1]])),
-        env_action=ActionTuple(continuous=np.array([[0.1]])),
+        action=ActionTuple(continuous=np.array([[0.1]], dtype=np.float32)),
+        env_action=ActionTuple(continuous=np.array([[0.1]], dtype=np.float32)),
+        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
    )
        stats_reporter=StatsReporter("testcat"),
    )
    fake_action_outputs = {
-        "action": ActionTuple(continuous=np.array([[0.1]])),
+        "action": ActionTuple(continuous=np.array([[0.1]], dtype=np.float32)),
-        "log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
+        "log_probs": LogProbsTuple(continuous=np.array([[0.1]], dtype=np.float32)),
    }

    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
    )
    fake_action_info = ActionInfo(
-        action=ActionTuple(continuous=np.array([[0.1]])),
-        env_action=ActionTuple(continuous=np.array([[0.1]])),
+        action=ActionTuple(continuous=np.array([[0.1]], dtype=np.float32)),
+        env_action=ActionTuple(continuous=np.array([[0.1]], dtype=np.float32)),
+        value=[0.1],
        outputs=fake_action_outputs,
        agent_ids=mock_decision_step.agent_id,
    )
--- a/ml-agents/mlagents/trainers/tests/test_buffer.py
+++ b/ml-agents/mlagents/trainers/tests/test_buffer.py
    b = AgentBuffer()
    for step in range(9):
        b[ObsUtil.get_name_at(0)].append(
-            [
-                100 * fake_agent_id + 10 * step + 1,
-                100 * fake_agent_id + 10 * step + 2,
-                100 * fake_agent_id + 10 * step + 3,
-            ]
+            np.array(
+                [
+                    100 * fake_agent_id + 10 * step + 1,
+                    100 * fake_agent_id + 10 * step + 2,
+                    100 * fake_agent_id + 10 * step + 3,
+                ],
+                dtype=np.float32,
+            )
-            [100 * fake_agent_id + 10 * step + 4, 100 * fake_agent_id + 10 * step + 5]
+            np.array(
+                [
+                    100 * fake_agent_id + 10 * step + 4,
+                    100 * fake_agent_id + 10 * step + 5,
+                ],
+                dtype=np.float32,
+            )
        )
    return b

    a = agent_1_buffer[ObsUtil.get_name_at(0)].get_batch(
        batch_size=2, training_length=1, sequential=True
    )
-    assert_array(np.array(a), np.array([[171, 172, 173], [181, 182, 183]]))
+    assert_array(
+        np.array(a), np.array([[171, 172, 173], [181, 182, 183]], dtype=np.float32)
+    )
    a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(
        batch_size=2, training_length=3, sequential=True
    )
                [261, 262, 263],
                [271, 272, 273],
                [281, 282, 283],
-            ]
+            ],
+            dtype=np.float32,
        ),
    )
    a = agent_2_buffer[ObsUtil.get_name_at(0)].get_batch(
--- a/ml-agents/mlagents/trainers/tests/test_trajectory.py
+++ b/ml-agents/mlagents/trainers/tests/test_trajectory.py

 def test_trajectory_to_agentbuffer():
    length = 15
+    # These keys should be of type np.ndarray
    wanted_keys = [
        (ObservationKeyPrefix.OBSERVATION, 0),
        (ObservationKeyPrefix.OBSERVATION, 1),
        BufferKey.ACTION_MASK,
        BufferKey.PREV_ACTION,
        BufferKey.ENVIRONMENT_REWARDS,
+        BufferKey.GROUP_REWARD,
-    wanted_keys = set(wanted_keys)
+    # These keys should be of type List
+    wanted_group_keys = [
+        BufferKey.GROUPMATE_REWARDS,
+        BufferKey.GROUP_CONTINUOUS_ACTION,
+        BufferKey.GROUP_DISCRETE_ACTION,
+        BufferKey.GROUP_DONES,
+        BufferKey.GROUP_NEXT_CONT_ACTION,
+        BufferKey.GROUP_NEXT_DISC_ACTION,
+    ]
+    wanted_keys = set(wanted_keys + wanted_group_keys)
    trajectory = make_fake_trajectory(
        length=length,
        observation_specs=create_observation_specs_with_shapes(
+        num_other_agents_in_group=4,
    )
    agentbuffer = trajectory.to_agentbuffer()
    seen_keys = set()

-    assert seen_keys == wanted_keys
+    assert seen_keys.issuperset(wanted_keys)
+
+    for _key in wanted_group_keys:
+        for step in agentbuffer[_key]:
+            assert len(step) == 4
--- a/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
 VECTOR_ACTION_SPACE = 1
 VECTOR_OBS_SPACE = 8
 DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
-BUFFER_INIT_SAMPLES = 513
+BUFFER_INIT_SAMPLES = 10241
 NUM_AGENTS = 12


    assert policy_queue0.empty() and not policy_queue1.empty()
    # clear
    policy_queue1.get_nowait()
-
-    mock_specs = mb.setup_test_behavior_specs(
-        False,
-        False,
-        vector_action_space=VECTOR_ACTION_SPACE,
-        vector_obs_space=VECTOR_OBS_SPACE,
-    )

    buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, mock_specs)
    # Mock out reward signal eval
--- a/ml-agents/mlagents/trainers/torch/attention.py
+++ b/ml-agents/mlagents/trainers/torch/attention.py
 from mlagents.torch_utils import torch
+import warnings
 from typing import Tuple, Optional, List
 from mlagents.trainers.torch.layers import (
    LinearEncoder,
 from mlagents.trainers.exception import UnityTrainerException


-def get_zero_entities_mask(observations: List[torch.Tensor]) -> List[torch.Tensor]:
+def get_zero_entities_mask(entities: List[torch.Tensor]) -> List[torch.Tensor]:
    """
    Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
    all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
+
+        if exporting_to_onnx.is_exporting():
+            with warnings.catch_warnings():
+                # We ignore a TracerWarning from PyTorch that warns that doing
+                # shape[n].item() will cause the trace to be incorrect (the trace might
+                # not generalize to other inputs)
+                # We ignore this warning because we know the model will always be
+                # run with inputs of the same shape
+                warnings.simplefilter("ignore")
+                # When exporting to ONNX, we want to transpose the entities. This is
+                # because ONNX only support input in NCHW (channel first) format.
+                # Barracuda also expect to get data in NCHW.
+                entities = [
+                    torch.transpose(obs, 2, 1).reshape(
+                        -1, obs.shape[1].item(), obs.shape[2].item()
+                    )
+                    for obs in entities
+                ]
+
-            (torch.sum(ent ** 2, axis=2) < 0.01).float() for ent in observations
+            (torch.sum(ent ** 2, axis=2) < 0.01).float() for ent in entities
        ]
    return key_masks

        )

    def forward(self, x_self: torch.Tensor, entities: torch.Tensor) -> torch.Tensor:
+        num_entities = self.entity_num_max_elements
+        if num_entities < 0:
+            if exporting_to_onnx.is_exporting():
+                raise UnityTrainerException(
+                    "Trying to export an attention mechanism that doesn't have a set max \
+                    number of elements."
+                )
+            num_entities = entities.shape[1]
+
+        if exporting_to_onnx.is_exporting():
+            # When exporting to ONNX, we want to transpose the entities. This is
+            # because ONNX only support input in NCHW (channel first) format.
+            # Barracuda also expect to get data in NCHW.
+            entities = torch.transpose(entities, 2, 1).reshape(
+                -1, num_entities, self.entity_size
+            )
+
-            num_entities = self.entity_num_max_elements
-            if num_entities < 0:
-                if exporting_to_onnx.is_exporting():
-                    raise UnityTrainerException(
-                        "Trying to export an attention mechanism that doesn't have a set max \
-                        number of elements."
-                    )
-                num_entities = entities.shape[1]
            expanded_self = x_self.reshape(-1, 1, self.self_size)
            expanded_self = torch.cat([expanded_self] * num_entities, dim=1)
            # Concatenate all observations with self
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
 from mlagents.trainers.settings import CuriositySettings

 from mlagents_envs.base_env import BehaviorSpec
+from mlagents_envs import logging_util
-from mlagents.trainers.settings import NetworkSettings, EncoderType
+
+logger = logging_util.get_logger(__name__)


 class ActionPredictionTuple(NamedTuple):
    def __init__(self, specs: BehaviorSpec, settings: CuriositySettings) -> None:
        super().__init__()
        self._action_spec = specs.action_spec
-        state_encoder_settings = NetworkSettings(
-            normalize=False,
-            hidden_units=settings.encoding_size,
-            num_layers=2,
-            vis_encode_type=EncoderType.SIMPLE,
-            memory=None,
-        )
+
+        state_encoder_settings = settings.network_settings
+        if state_encoder_settings.memory is not None:
+            state_encoder_settings.memory = None
+            logger.warning(
+                "memory was specified in network_settings but is not supported by Curiosity. It is being ignored."
+            )
+
        self._state_encoder = NetworkBody(
            specs.observation_specs, state_encoder_settings
        )
        self.inverse_model_action_encoding = torch.nn.Sequential(
-            LinearEncoder(2 * settings.encoding_size, 1, 256)
+            LinearEncoder(2 * state_encoder_settings.hidden_units, 1, 256)
        )

        if self._action_spec.continuous_size > 0:

        self.forward_model_next_state_prediction = torch.nn.Sequential(
            LinearEncoder(
-                settings.encoding_size + self._action_flattener.flattened_size, 1, 256
+                state_encoder_settings.hidden_units
+                + self._action_flattener.flattened_size,
+                1,
+                256,
-            linear_layer(256, settings.encoding_size),
+            linear_layer(256, state_encoder_settings.hidden_units),
        )

    def get_current_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
 )
 from mlagents.trainers.settings import GAILSettings
 from mlagents_envs.base_env import BehaviorSpec
+from mlagents_envs import logging_util
-from mlagents.trainers.settings import NetworkSettings, EncoderType
+logger = logging_util.get_logger(__name__)
+
-        self._ignore_done = True
+        self._ignore_done = False
        self._discriminator_network = DiscriminatorNetwork(specs, settings)
        self._discriminator_network.to(default_device())
        _, self._demo_buffer = demo_to_buffer(
            )

    def update(self, mini_batch: AgentBuffer) -> Dict[str, np.ndarray]:
+
+        self._discriminator_network.encoder.update_normalization(expert_batch)
+
        loss, stats_dict = self._discriminator_network.compute_loss(
            mini_batch, expert_batch
        )
        self._use_vail = settings.use_vail
        self._settings = settings

-        encoder_settings = NetworkSettings(
-            normalize=False,
-            hidden_units=settings.encoding_size,
-            num_layers=2,
-            vis_encode_type=EncoderType.SIMPLE,
-            memory=None,
-        )
+        encoder_settings = settings.network_settings
+        if encoder_settings.memory is not None:
+            encoder_settings.memory = None
+            logger.warning(
+                "memory was specified in network_settings but is not supported by GAIL. It is being ignored."
+            )
+
        self._action_flattener = ActionFlattener(specs.action_spec)
        unencoded_size = (
            self._action_flattener.flattened_size + 1 if settings.use_actions else 0
        )

-        estimator_input_size = settings.encoding_size
+        estimator_input_size = encoder_settings.hidden_units
        if settings.use_vail:
            estimator_input_size = self.z_size
            self._z_sigma = torch.nn.Parameter(
-                settings.encoding_size,
+                encoder_settings.hidden_units,
                self.z_size,
                kernel_init=Initialization.KaimingHeNormal,
                kernel_gain=0.1,
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
 from mlagents.trainers.settings import RNDSettings

 from mlagents_envs.base_env import BehaviorSpec
+from mlagents_envs import logging_util
-from mlagents.trainers.settings import NetworkSettings, EncoderType
+
+logger = logging_util.get_logger(__name__)


 class RNDRewardProvider(BaseRewardProvider):

    def __init__(self, specs: BehaviorSpec, settings: RNDSettings) -> None:
        super().__init__()
-        state_encoder_settings = NetworkSettings(
-            normalize=True,
-            hidden_units=settings.encoding_size,
-            num_layers=3,
-            vis_encode_type=EncoderType.SIMPLE,
-            memory=None,
-        )
+        state_encoder_settings = settings.network_settings
+        if state_encoder_settings.memory is not None:
+            state_encoder_settings.memory = None
+            logger.warning(
+                "memory was specified in network_settings but is not supported by RND. It is being ignored."
+            )
+
        self._encoder = NetworkBody(specs.observation_specs, state_encoder_settings)

    def forward(self, mini_batch: AgentBuffer) -> torch.Tensor: