Merge branch 'develop-base-teammanager' into develop-agentprocessor-teammanager

4 年前 · d4438878
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
 jobs:
  pytest:
    runs-on: ubuntu-latest
+    env:
+        TEST_ENFORCE_BUFFER_KEY_TYPES: 1
    strategy:
      matrix:
        python-version: [3.6.x, 3.7.x, 3.8.x]
        python -m pip install --progress-bar=off -e ./ml-agents
        python -m pip install --progress-bar=off -r test_requirements.txt
        python -m pip install --progress-bar=off -e ./gym-unity
+        python -m pip install --progress-bar=off -e ./ml-agents-plugin-examples
    - name: Save python dependencies
      run: |
        pip freeze > pip_versions-${{ matrix.python-version }}.txt
--- a/.yamato/com.unity.ml-agents-performance.yml
+++ b/.yamato/com.unity.ml-agents-performance.yml
 test_editors:
  - version: 2019.4
-  - version: 2020.1
  - version: 2020.2
 ---
 {% for editor in test_editors %}
--- a/.yamato/com.unity.ml-agents-test.yml
+++ b/.yamato/com.unity.ml-agents-test.yml
    enableCodeCoverage: !!bool true
    testProject: DevProject
    enableNoDefaultPackages: !!bool true
-  - version: 2020.1
-    enableCodeCoverage: !!bool true
-    testProject: DevProject
-    enableNoDefaultPackages: !!bool true
  - version: 2020.2
    enableCodeCoverage: !!bool true
    testProject: DevProject
--- a/.yamato/compressed-sensor-test.yml
+++ b/.yamato/compressed-sensor-test.yml
    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
+    {% if editor.extra_test == "sensor" %}
    expression: |
      (pull_request.target eq "master" OR
      pull_request.target match "release.+") AND
      pull_request.changes.any match "Project/**" OR
-      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents/tests/yamato/**" OR
+    {% endif %}
 {% endfor %}
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
+    {% if editor.extra_test == "gym" %}
    expression: |
      (pull_request.target eq "master" OR
      pull_request.target match "release.+") AND
-      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents/tests/yamato/**" OR
+    {% endif %}
 {% endfor %}
--- a/.yamato/python-ll-api-test.yml
+++ b/.yamato/python-ll-api-test.yml
    - .yamato/standalone-build-test.yml#test_linux_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
+    {% if editor.extra_test == "llapi" %}
    expression: |
      (pull_request.target eq "master" OR
      pull_request.target match "release.+") AND
-      pull_request.changes.any match "ml-agents/**" OR
+      pull_request.changes.any match "ml-agents/tests/yamato/**" OR
+    {% endif %}
 {% endfor %}
--- a/.yamato/test_versions.metafile
+++ b/.yamato/test_versions.metafile
 # List of editor versions for standalone-build-test and its dependencies.
-# csharp_backcompat_version is used in training-int-tests to determine the
-# older package version to run the backwards compat tests against.
+# We always run training-int-tests for all versions of the editor
+# For each "other" test, we only run it against a single version of the
+# editor to reduce the number of yamato jobs
-    csharp_backcompat_version: 1.0.0
+    extra_test: llapi
-    csharp_backcompat_version: 1.0.0
-  - version: 2020.1
-    csharp_backcompat_version: 1.0.0
+    extra_test: gym
-    # 2020.2 moved the AssetImporters namespace
-    # but we didn't handle this until 1.2.0
-    csharp_backcompat_version: 1.2.0
+    extra_test: sensor
--- a/DevProject/Packages/manifest.json
+++ b/DevProject/Packages/manifest.json
    "com.unity.purchasing": "2.1.0",
    "com.unity.test-framework": "1.1.16",
    "com.unity.test-framework.performance": "2.2.0-preview",
-    "com.unity.testtools.codecoverage": "0.2.2-preview",
+    "com.unity.testtools.codecoverage": "1.0.0-pre.3",
    "com.unity.textmeshpro": "2.0.1",
    "com.unity.timeline": "1.2.12",
    "com.unity.ugui": "1.0.0",
--- a/Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
+++ b/Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
                scenes,
                outputPath,
                buildTarget,
-                BuildOptions.None
+                BuildOptions.Development
            );
            var isOk = buildResult.summary.result == BuildResult.Succeeded;
            var error = "";
--- a/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
  m_BrainParameters:
    VectorObservationSize: 8
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 2
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: 3DBall
  TeamId: 0
--- a/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallHardNew.prefab
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallHardNew.prefab
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 5
-    numStackedVectorObservations: 9
-    vectorActionSize: 02000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 1
-  m_Model: {fileID: 11400000, guid: 27d49984757ed46b181090a532ef48e5, type: 3}
-  m_InferenceDevice: 0
+    VectorObservationSize: 0
+    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 2
+      BranchSizes: 
+    VectorActionSize: 02000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 1
+    hasUpgradedBrainParametersWithActionSpec: 1
+  m_Model: {fileID: 11400000, guid: d179c44c147aa4ffbbb725f009eca3b8, type: 3}
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 1
 --- !u!114 &114466000339026140
 MonoBehaviour:
  m_ObjectHideFlags: 0
  agentParameters:
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
-  maxStep: 5000
+  MaxStep: 5000
  ball: {fileID: 1142513601053358}
 --- !u!114 &8193279139064749781
 MonoBehaviour:
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
  TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!114 &7923264721978289873
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1978072206102878
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/Visual3DBall.prefab
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/Visual3DBall.prefab
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 2
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: Visual3DBall
  TeamId: 0
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
 using Unity.MLAgents;
 using Unity.MLAgents.Actuators;
 using Unity.MLAgents.Sensors;
+using Unity.MLAgents.Sensors.Reflection;

 public class Ball3DHardAgent : Agent
 {
        SetResetParameters();
    }

-    public override void CollectObservations(VectorSensor sensor)
+    [Observable(numStackedObservations: 9)]
+    Vector2 Rotation
-        sensor.AddObservation(gameObject.transform.rotation.z);
-        sensor.AddObservation(gameObject.transform.rotation.x);
-        sensor.AddObservation((ball.transform.position - gameObject.transform.position));
+        get
+        {
+            return new Vector2(gameObject.transform.rotation.z, gameObject.transform.rotation.x);
+        }
+    }
+
+    [Observable(numStackedObservations: 9)]
+    Vector3 PositionDelta
+    {
+        get
+        {
+            return ball.transform.position - gameObject.transform.position;
+        }
    }

    public override void OnActionReceived(ActionBuffers actionBuffers)
--- a/Project/Assets/ML-Agents/Examples/Basic/Prefabs/Basic.prefab
+++ b/Project/Assets/ML-Agents/Examples/Basic/Prefabs/Basic.prefab
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: Basic
  TeamId: 0
--- a/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
+++ b/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
        /// Creates a BasicActuator.
        /// </summary>
        /// <returns></returns>
+#pragma warning disable 672
+#pragma warning restore 672
        {
            return new BasicActuator(basicController);
        }
--- a/Project/Assets/ML-Agents/Examples/Bouncer/Prefabs/Environment.prefab
+++ b/Project/Assets/ML-Agents/Examples/Bouncer/Prefabs/Environment.prefab
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 6
-    numStackedVectorObservations: 3
-    vectorActionSize: 03000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 1
+    VectorObservationSize: 6
+    NumStackedVectorObservations: 3
+    m_ActionSpec:
+      m_NumContinuousActions: 3
+      BranchSizes: 
+    VectorActionSize: 03000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 1
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114878620968301562
 MonoBehaviour:
  m_ObjectHideFlags: 0
  agentParameters:
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
-  maxStep: 0
+  MaxStep: 0
  target: {fileID: 1160631129428284}
  bodyObject: {fileID: 1680588139522898}
  strength: 500
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1680588139522898
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/CrawlerBase.prefab
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/CrawlerBase.prefab
  m_BrainParameters:
    VectorObservationSize: 32
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 20
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: 
  TeamId: 0
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab
    VectorActionSpaceType: 1
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
  TeamId: 0
    VectorActionSpaceType: 1
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
  TeamId: 0
    VectorActionSpaceType: 1
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
  TeamId: 0
    VectorActionSpaceType: 1
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
  TeamId: 0
    VectorActionSpaceType: 1
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 3210b528a2bc44a86bd6bd1d571070f8, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
  TeamId: 0
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/GridFoodCollectorArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/GridFoodCollectorArea.prefab
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: GridFoodCollector
  TeamId: 0
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: GridFoodCollector
  TeamId: 0
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: GridFoodCollector
  TeamId: 0
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: GridFoodCollector
  TeamId: 0
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 75910f45f20be49b18e2b95879a217b2, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: GridFoodCollector
  TeamId: 0
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/VisualFoodCollectorArea.prefab
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: ec4b31b5d66ca4e51ae3ac41945facb2, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: VisualFoodCollector
  TeamId: 0
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: ec4b31b5d66ca4e51ae3ac41945facb2, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: VisualFoodCollector
  TeamId: 0
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: ec4b31b5d66ca4e51ae3ac41945facb2, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: VisualFoodCollector
  TeamId: 0
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: ec4b31b5d66ca4e51ae3ac41945facb2, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: VisualFoodCollector
  TeamId: 0
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 1
-    vectorActionSize: 05000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 0
+    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 05000000
+    VectorActionSize: 05000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114650561397225712
 MonoBehaviour:
  m_ObjectHideFlags: 0
  agentParameters:
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
-  maxStep: 100
+  MaxStep: 100
  area: {fileID: 114704252266302846}
  timeBetweenDecisionsAtInference: 0.15
  renderCamera: {fileID: 0}
  m_Width: 84
  m_Height: 64
  m_Grayscale: 0
+  m_ObservationStacks: 1
  m_Compression: 1
 --- !u!114 &7980686505185502968
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1625008366184734
 GameObject:
  m_ObjectHideFlags: 0
  trueAgent: {fileID: 1488387672112076}
  goalPref: {fileID: 1508142483324970, guid: 1ec4e4e96e7514d45b7ebc3ba5a9a481, type: 3}
  pitPref: {fileID: 1811317785436014, guid: d13ee2db77b3a4dcc8664d2fe2a0f219, type: 3}
+  numberOfObstacles: 1
 --- !u!1 &1656910849934022
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.44971228, g: 0.49977815, b: 0.57563734, a: 1}
+  m_IndirectSpecularColor: {r: 0.4497121, g: 0.49977785, b: 0.57563704, a: 1}
  m_UseRadianceAmbientProbe: 0
 --- !u!157 &3
 LightmapSettings:
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 05000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: GridWorld
  TeamId: 0
--- a/Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
  m_BrainParameters:
    VectorObservationSize: 1
    NumStackedVectorObservations: 3
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 05000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: Hallway
  TeamId: 0
--- a/Project/Assets/ML-Agents/Examples/Hallway/Prefabs/VisualSymbolFinderArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Prefabs/VisualSymbolFinderArea.prefab
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 1
-    vectorActionSize: 05000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 0
+    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 05000000
+    VectorActionSize: 05000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114451776683649118
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: b446afae240924105b36d07e8d17a608, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  maxStep: 3000
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
+  MaxStep: 3000
  ground: {fileID: 1625056884785366}
  area: {fileID: 1689874756253538}
  symbolOGoal: {fileID: 1800868804754718}
  m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  camera: {fileID: 20961984019151212}
-  sensorName: CameraSensor
-  width: 84
-  height: 84
-  grayscale: 0
-  compression: 1
+  m_Camera: {fileID: 20961984019151212}
+  m_SensorName: CameraSensor
+  m_Width: 84
+  m_Height: 84
+  m_Grayscale: 0
+  m_ObservationStacks: 1
+  m_Compression: 1
 --- !u!114 &640264344416331590
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 6
-  RepeatAction: 1
-  offsetStep: 0
+  TakeActionsBetweenDecisions: 1
 --- !u!1 &1377584197416466
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: c34da50737a3c4a50918002b20b2b927, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: Match3SmartHeuristic
  TeamId: 0
  Columns: 8
  NumCellTypes: 6
  NumSpecialTypes: 2
-  RandomSeed: -1
+  RandomSeed: -1
 --- !u!114 &3508723250470608014
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  ActuatorName: Match3 Actuator
+  RandomSeed: -1
-  HeuristicQuality: 0
 --- !u!1 &3508723250774301855
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 9e89b8e81974148d3b7213530d00589d, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: Match3VectorObs
  TeamId: 0
  Columns: 8
  NumCellTypes: 6
  NumSpecialTypes: 2
-  RandomSeed: -1
+  RandomSeed: -1
 --- !u!114 &2118285884327540680
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  ActuatorName: Match3 Actuator
+  RandomSeed: -1
-  HeuristicQuality: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
    VectorActionSpaceType: 0
    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 48d14da88fea74d0693c691c6e3f2e34, type: 3}
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: Match3VisualObs
  TeamId: 0
  Columns: 8
  NumCellTypes: 6
  NumSpecialTypes: 2
-  RandomSeed: -1
+  RandomSeed: -1
 --- !u!114 &3019509692332007783
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  ActuatorName: Match3 Actuator
+  RandomSeed: -1
-  HeuristicQuality: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
    public class Match3ExampleActuatorComponent : Match3ActuatorComponent
    {
        /// <inheritdoc/>
+#pragma warning disable 672
+#pragma warning restore 672
        {
            var board = GetComponent<Match3Board>();
            var agent = GetComponentInParent<Agent>();
--- a/Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockArea.prefab
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 2
-    vectorActionSize: 07000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 0
+    NumStackedVectorObservations: 2
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 07000000
+    VectorActionSize: 07000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114505490781873732
 MonoBehaviour:
  m_ObjectHideFlags: 0
  agentParameters:
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
-  maxStep: 5000
+  MaxStep: 5000
  ground: {fileID: 1500989011945850}
  area: {fileID: 1125452240183160}
  areaBounds:
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
  TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!114 &4081319787948195948
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1500989011945850
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockVisualArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockVisualArea.prefab
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 07000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &114812843792483960
 MonoBehaviour:
  m_Width: 84
  m_Height: 84
  m_Grayscale: 0
+  m_ObservationStacks: 1
  m_Compression: 1
 --- !u!114 &9049837659352187721
 MonoBehaviour:
--- a/Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/AreaPB.prefab
+++ b/Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/AreaPB.prefab
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 4
-    numStackedVectorObservations: 1
-    vectorActionSize: 05000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 4
+    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 05000000
+    VectorActionSize: 05000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114937736047215868
 MonoBehaviour:
  m_ObjectHideFlags: 0
  agentParameters:
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
-  maxStep: 5000
+  MaxStep: 5000
  area: {fileID: 1464170487903594}
  areaSwitch: {fileID: 1432086782037750}
  useVectorObs: 1
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
  TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!114 &5712624269609438939
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1148882946833254
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab
+++ b/Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 0
-    numStackedVectorObservations: 1
-    vectorActionSize: 05000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 0
+    VectorObservationSize: 0
+    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 05000000
+    VectorActionSize: 05000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 0
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114741503533626942
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: b8db44472779248d3be46895c4d562d5, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  maxStep: 5000
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
+  MaxStep: 5000
  area: {fileID: 1055559745433172}
  areaSwitch: {fileID: 1212218760704844}
  useVectorObs: 0
  m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  camera: {fileID: 20712684238256298}
-  sensorName: CameraSensor
-  width: 84
-  height: 84
-  grayscale: 0
-  compression: 1
+  m_Camera: {fileID: 20712684238256298}
+  m_SensorName: CameraSensor
+  m_Width: 84
+  m_Height: 84
+  m_Grayscale: 0
+  m_ObservationStacks: 1
+  m_Compression: 1
 --- !u!114 &9216598927300453297
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
-  offsetStep: 0
+  TakeActionsBetweenDecisions: 1
 --- !u!1 &1747856067778386
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab
+++ b/Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab
  m_Name: 
  m_EditorClassIdentifier: 
  m_BrainParameters:
-    vectorObservationSize: 33
-    numStackedVectorObservations: 1
-    vectorActionSize: 04000000
-    vectorActionDescriptions: []
-    vectorActionSpaceType: 1
+    VectorObservationSize: 33
+    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 4
+      BranchSizes: 
+    VectorActionSize: 04000000
+    VectorActionDescriptions: []
+    VectorActionSpaceType: 1
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114955921823023820
 MonoBehaviour:
  m_ObjectHideFlags: 0
  agentParameters:
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
-  maxStep: 4000
+  MaxStep: 4000
  pendulumA: {fileID: 1644872085946016}
  pendulumB: {fileID: 1053261483945176}
  hand: {fileID: 1654288206095398}
  m_EditorClassIdentifier: 
  DecisionPeriod: 4
  TakeActionsBetweenDecisions: 1
-  offsetStep: 0
 --- !u!114 &7840105453417110232
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1644872085946016
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
        const string k_CommandLineModelOverrideDirectoryFlag = "--mlagents-override-model-directory";
        const string k_CommandLineModelOverrideExtensionFlag = "--mlagents-override-model-extension";
        const string k_CommandLineQuitAfterEpisodesFlag = "--mlagents-quit-after-episodes";
+        const string k_CommandLineQuitAfterSeconds = "--mlagents-quit-after-seconds";
        const string k_CommandLineQuitOnLoadFailure = "--mlagents-quit-on-load-failure";

        // The attached Agent
        // Max episodes to run. Only used if > 0
        // Will default to 1 if override models are specified, otherwise 0.
        int m_MaxEpisodes;
+
+        // Deadline - exit if the time exceeds this
+        DateTime m_Deadline = DateTime.MaxValue;

        int m_NumSteps;
        int m_PreviousNumSteps;
        void GetAssetPathFromCommandLine()
        {
            var maxEpisodes = 0;
+            var timeoutSeconds = 0;
+
            string[] commandLineArgsOverride = null;
            if (!string.IsNullOrEmpty(debugCommandLineOverride) && Application.isEditor)
            {
                {
                    Int32.TryParse(args[i + 1], out maxEpisodes);
                }
+                else if (args[i] == k_CommandLineQuitAfterSeconds && i < args.Length - 1)
+                {
+                    Int32.TryParse(args[i + 1], out timeoutSeconds);
+                }
                else if (args[i] == k_CommandLineQuitOnLoadFailure)
                {
                    m_QuitOnLoadFailure = true;
                m_MaxEpisodes = maxEpisodes > 0 ? maxEpisodes : 1;
                Debug.Log($"setting m_MaxEpisodes to {maxEpisodes}");
            }
+
+            if (timeoutSeconds > 0)
+            {
+                m_Deadline = DateTime.Now + TimeSpan.FromSeconds(timeoutSeconds);
+                Debug.Log($"setting deadline to {timeoutSeconds} from now.");
+
+            }
        }

        void OnEnable()
                    EditorApplication.isPlaying = false;
 #endif
                }
+                else if (DateTime.Now >= m_Deadline)
+                {
+                    Debug.Log(
+                        $"Deadline exceeded. " +
+                        $"{TotalCompletedEpisodes}/{m_MaxEpisodes} episodes and " +
+                        $"{TotalNumSteps}/{m_MaxEpisodes * m_Agent.MaxStep} steps completed. Exiting.");
+                    Application.Quit(0);
+#if UNITY_EDITOR
+                    EditorApplication.isPlaying = false;
+#endif
+                }
+
            m_NumSteps++;
        }

--- a/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 030000000300000003000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &114492261207303438
 MonoBehaviour:
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 030000000300000003000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &114850431417842684
 MonoBehaviour:
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 030000000300000003000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &5320024511406682322
 MonoBehaviour:
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 030000000300000003000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &5379409612883756837
 MonoBehaviour:
--- a/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 030000000300000003000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &114492261207303438
 MonoBehaviour:
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 030000000300000003000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &114850431417842684
 MonoBehaviour:
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 030000000300000003000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &5379409612883756837
 MonoBehaviour:
--- a/Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  m_BrainParameters:
    VectorObservationSize: 9
    NumStackedVectorObservations: 3
+    m_ActionSpec:
+      m_NumContinuousActions: 3
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114915946461826994
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1194790474478638
 GameObject:
  m_ObjectHideFlags: 0
  m_BrainParameters:
    VectorObservationSize: 9
    NumStackedVectorObservations: 3
+    m_ActionSpec:
+      m_NumContinuousActions: 3
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
+  m_ObservableAttributeHandling: 0
 --- !u!114 &114800310164848628
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  debugCommandLineOverride: 
 --- !u!1 &1969551055586186
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
  m_BrainParameters:
    VectorObservationSize: 243
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 39
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &7408209125961349353
 MonoBehaviour:
    maxStep: 0
  hasUpgradedFromAgentParameters: 1
  MaxStep: 5000
-  targetWalkingSpeed: 10
+  m_TargetWalkingSpeed: 10
-  walkDirectionMethod: 0
-  worldDirToWalk: {x: 1, y: 0, z: 0}
-  worldPosToWalkTo: {x: 0, y: 0, z: 0}
  target: {fileID: 0}
  hips: {fileID: 895268871264836332}
  chest: {fileID: 7933235354845945071}
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollDySingleSpeedVariant.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollDySingleSpeedVariant.prefab
      value: 
      objectReference: {fileID: 11400000, guid: 47e7c480450ec4dcd9e4a04124e14ed4,
        type: 3}
+    - target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: m_InferenceDevice
+      value: 2
+      objectReference: {fileID: 0}
    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
        type: 3}
      propertyPath: m_LocalPosition.x
--- a/Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
  m_BrainParameters:
    VectorObservationSize: 4
    NumStackedVectorObservations: 6
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 03000000030000000300000002000000
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
+  m_UseChildActuators: 1
  m_ObservableAttributeHandling: 0
 --- !u!114 &114925928594762506
 MonoBehaviour:
--- a/Project/Assets/ML-Agents/Examples/Worm/Prefabs/WormBasePrefab.prefab
+++ b/Project/Assets/ML-Agents/Examples/Worm/Prefabs/WormBasePrefab.prefab
  m_BrainParameters:
    VectorObservationSize: 64
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 9
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
-  m_InferenceDevice: 0
+  m_InferenceDevice: 2
  m_BehaviorType: 0
  m_BehaviorName: WormDynamic
  TeamId: 0
--- a/Project/ProjectSettings/TagManager.asset
+++ b/Project/ProjectSettings/TagManager.asset
  - symbol_O_Goal
  - purpleAgent
  - purpleGoal
+  - tile
  layers:
  - Default
  - TransparentFX
--- a/README.md
+++ b/README.md
 | **Release 8** | October 14, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_8) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_8.zip) | [0.21.0](https://pypi.org/project/mlagents/0.21.0/) | [1.5.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.5/manual/index.html) |
 | **Verified Package 1.0.5** | September 23, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/com.unity.ml-agents_1.0.5) | [docs](https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/com.unity.ml-agents_1.0.5.zip) | [0.16.1](https://pypi.org/project/mlagents/0.16.1/) | [1.0.5](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.0/manual/index.html) |
 | **Release 7** | September 16, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_7) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip) | [0.20.0](https://pypi.org/project/mlagents/0.20.0/) | [1.4.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.4/manual/index.html) |
+
 If you are a researcher interested in a discussion of Unity as an AI platform,
 see a pre-print of our
 [reference paper on Unity and the ML-Agents Toolkit](https://arxiv.org/abs/1809.02627).
--- a/com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
        public bool ForceHeuristic;

        /// <inheritdoc/>
+#pragma warning disable 672
+#pragma warning restore 672
        {
            var board = GetComponent<AbstractBoard>();
            var agent = GetComponentInParent<Agent>();
--- a/com.unity.ml-agents.extensions/Runtime/MultiAgent/BaseMultiAgentGroup.cs
+++ b/com.unity.ml-agents.extensions/Runtime/MultiAgent/BaseMultiAgentGroup.cs
 using System;
+using System.Linq;
 using System.Collections.Generic;

 namespace Unity.MLAgents.Extensions.MultiAgent
    /// </summary>
    public class BaseMultiAgentGroup : IMultiAgentGroup, IDisposable
    {
-        int m_StepCount;
-        int m_GroupMaxStep;
-        List<Agent> m_Agents = new List<Agent> { };
+        HashSet<Agent> m_Agents = new HashSet<Agent>();
-        public BaseMultiAgentGroup()
-        {
-            Academy.Instance.PostAgentAct += _GroupStep;
-        }
-
-            Academy.Instance.PostAgentAct -= _GroupStep;
-                UnregisterAgent(m_Agents[0]);
-            }
-        }
-
-        void _GroupStep()
-        {
-            m_StepCount += 1;
-            if ((m_StepCount >= m_GroupMaxStep) && (m_GroupMaxStep > 0))
-            {
-                foreach (var agent in m_Agents)
-                {
-                    if (agent.enabled)
-                    {
-                        agent.EpisodeInterrupted();
-                    }
-                }
-                Reset();
+                UnregisterAgent(m_Agents.First());
-        /// <summary>
-        /// Register the agent to the MultiAgentGroup.
-        /// Registered agents will be able to receive group rewards from the MultiAgentGroup
-        /// and share observations during training.
-        /// </summary>
+        /// <inheritdoc />
        public virtual void RegisterAgent(Agent agent)
        {
            if (!m_Agents.Contains(agent))
            }
        }

-        /// <summary>
-        /// Remove the agent from the MultiAgentGroup.
-        /// </summary>
+        /// <inheritdoc />
        public virtual void UnregisterAgent(Agent agent)
        {
            if (m_Agents.Contains(agent))
            }
        }

+        /// <inheritdoc />
        public int GetId()
        {
            return m_Id;
-        /// Get list of all agents registered to this MultiAgentGroup.
+        /// Get list of all agents currently registered to this MultiAgentGroup.
-        /// List of agents belongs to the MultiAgentGroup.
+        /// List of agents registered to the MultiAgentGroup.
-        public List<Agent> GetRegisteredAgents()
+        public HashSet<Agent> GetRegisteredAgents()
-        /// Add group reward for all agents under this MultiAgentGroup.
-        /// Disabled agent will not receive this reward.
+        /// Increments the group rewards for all agents in this MultiAgentGroup.
+        /// <remarks>
+        /// This function increases or decreases the group rewards by a given amount for all agents
+        /// in the group. Use <see cref="SetGroupReward(float)"/> to set the group reward assigned
+        /// to the current step with a specific value rather than increasing or decreasing it.
+        ///
+        /// A positive group reward indicates the whole group's accomplishments or desired behaviors.
+        /// Every agent in the group will receive the same group reward no matter whether the
+        /// agent's act directly leads to the reward. Group rewards are meant to reinforce agents
+        /// to act in the group's best interest instead of individual ones.
+        /// Group rewards are treated differently than individual agent rewards during training, so
+        /// calling AddGroupReward() is not equivalent to calling agent.AddReward() on each agent in the group.
+        /// </remarks>
+        /// <param name="reward">Incremental group reward value.</param>
-                if (agent.enabled)
-                {
-                    agent.AddGroupReward(reward);
-                }
+                agent.AddGroupReward(reward);
-        /// Set group reward for all agents under this MultiAgentGroup.
-        /// Disabled agent will not receive this reward.
+        /// Set the group rewards for all agents in this MultiAgentGroup.
+        /// <remarks>
+        /// This function replaces any group rewards given during the current step for all agents in the group.
+        /// Use <see cref="AddGroupReward(float)"/> to incrementally change the group reward rather than
+        /// overriding it.
+        ///
+        /// A positive group reward indicates the whole group's accomplishments or desired behaviors.
+        /// Every agent in the group will receive the same group reward no matter whether the
+        /// agent's act directly leads to the reward. Group rewards are meant to reinforce agents
+        /// to act in the group's best interest instead of indivisual ones.
+        /// Group rewards are treated differently than individual agent rewards during training, so
+        /// calling SetGroupReward() is not equivalent to calling agent.SetReward() on each agent in the group.
+        /// </remarks>
+        /// <param name="reward">The new value of the group reward.</param>
-                if (agent.enabled)
-                {
-                    agent.SetGroupReward(reward);
-                }
+                agent.SetGroupReward(reward);
-        /// Returns the current step counter (within the current episode).
+        /// End episodes for all agents in this MultiAgentGroup.
-        /// <returns>
-        /// Current step count.
-        /// </returns>
-        public int StepCount
+        /// <remarks>
+        /// This should be used when the episode can no longer continue, such as when the group
+        /// reaches the goal or fails at the task.
+        /// </remarks>
+        public void EndGroupEpisode()
-            get { return m_StepCount; }
-        }
-
-        public int GroupMaxStep
-        {
-            get { return m_GroupMaxStep; }
-        }
-
-        public void SetGroupMaxStep(int maxStep)
-        {
-            m_GroupMaxStep = maxStep;
+            foreach (var agent in m_Agents)
+            {
+                agent.EndEpisode();
+            }
-        /// End Episode for all agents under this MultiAgentGroup.
+        /// Indicate that the episode is over but not due to the "fault" of the group.
+        /// This has the same end result as calling <see cref="EndGroupEpisode"/>, but has a
+        /// slightly different effect on training.
-        public void EndGroupEpisode()
+        /// <remarks>
+        /// This should be used when the episode could continue, but has gone on for
+        /// a sufficient number of steps, such as if the environment hits some maximum number of steps.
+        /// </remarks>
+        public void GroupEpisodeInterrupted()
-                if (agent.enabled)
-                {
-                    agent.EndEpisode();
-                }
+                agent.EpisodeInterrupted();
-            Reset();
-        }
-
-        void Reset()
-        {
-            m_StepCount = 0;
        }
    }
 }
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 and this project adheres to
 [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

+## [Unreleased]
+### Major Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
-## [Unreleased]
+### Minor Changes
+#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+### Bug Fixes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+
+## [1.8.0-preview] - 2021-02-17
+- A plugin system for `mlagents-learn` has been added. You can now define custom
+  `StatsWriter` implementations and register them to be called during training.
+  More types of plugins will be added in the future. (#4788)

 ### Minor Changes
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
  will result in the values being summed (instead of averaged) when written to
  TensorBoard. Thanks to @brccabral for the contribution! (#4816)
 - The upper limit for the time scale (by setting the `--time-scale` paramater in mlagents-learn) was
-removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
+  removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
 - Added the IHeuristicProvider interface to allow IActuators as well as Agent implement the Heuristic function to generate actions.
  Updated the Basic example and the Match3 Example to use Actuators.
  Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)
 - Added `ObservationWriter.AddList()` and deprecated `ObservationWriter.AddRange()`.
  `AddList()` is recommended, as it does not generate any additional memory allocations. (#4887)
+- The Barracuda dependency was upgraded to 1.3.0. (#4898)
+- Added `ActuatorComponent.CreateActuators`, and deprecate `ActuatorComponent.CreateActuator`.  The
+  default implementation will wrap `ActuatorComponent.CreateActuator` in an array and return that. (#4899)
+- `InferenceDevice.Burst` was added, indicating that Agent's model will be run using Barracuda's Burst backend.
+  This is the default for new Agents, but existing ones that use `InferenceDevice.CPU` should update to
+  `InferenceDevice.Burst`. (#4925)
+- Tensorboard now logs the Environment Reward as both a scalar and a histogram. (#4878)
+- The `mlagents_env` API has changed, `BehaviorSpec` now has a `observation_specs` property containing a list of `ObservationSpec`. For more information on `ObservationSpec` see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md#behaviorspec). (#4763, #4825)

 ### Bug Fixes
 #### com.unity.ml-agents (C#)
 - Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886)
 - Removed several memory allocations that happened during inference. On a test scene, this
  reduced the amount of memory allocated by approximately 25%. (#4887)
+- Removed several memory allocations that happened during inference with discrete actions. (#4922)
+- Properly catch permission errors when writing timer files. (#4921)
+- Unexpected exceptions during training initialization and shutdown are now logged. If you see
+  "noisy" logs, please let us know! (#4930, #4935)

 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Fixed a bug that would cause an exception when `RunOptions` was deserialized via `pickle`. (#4842)
  while waiting for a connection, and raises a better error message if it crashes. (#4880)
 - Passing a `-logfile` option in the `--env-args` option to `mlagents-learn` is
  no longer overwritten. (#4880)
+- The `load_weights` function was being called unnecessarily often in the Ghost Trainer leading to training slowdowns. (#4934)


 ## [1.7.2-preview] - 2020-12-22
--- a/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
+++ b/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs

            // Grab the sensor components, since we need them to determine the observation sizes.
            // TODO make these methods of BehaviorParameters
-            SensorComponent[] sensorComponents;
-            if (behaviorParameters.UseChildSensors)
-            {
-                sensorComponents = behaviorParameters.GetComponentsInChildren<SensorComponent>();
-            }
-            else
-            {
-                sensorComponents = behaviorParameters.GetComponents<SensorComponent>();
-            }
+            var agent = behaviorParameters.gameObject.GetComponent<Agent>();
+            agent.sensors = new List<ISensor>();
+            agent.InitializeSensors();
+            var sensors = agent.sensors.ToArray();

            ActuatorComponent[] actuatorComponents;
            if (behaviorParameters.UseChildActuators)
            // Get the total size of the sensors generated by ObservableAttributes.
            // If there are any errors (e.g. unsupported type, write-only properties), display them too.
            int observableAttributeSensorTotalSize = 0;
-            var agent = behaviorParameters.GetComponent<Agent>();
            if (agent != null && behaviorParameters.ObservableAttributeHandling != ObservableAttributeOptions.Ignore)
            {
                List<string> observableErrors = new List<string>();
            if (brainParameters != null)
            {
                var failedChecks = Inference.BarracudaModelParamLoader.CheckModel(
-                    barracudaModel, brainParameters, sensorComponents, actuatorComponents,
+                    barracudaModel, brainParameters, sensors, actuatorComponents,
                    observableAttributeSensorTotalSize, behaviorParameters.BehaviorType
                );
                foreach (var check in failedChecks)
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        ///         <term>1.4.0</term>
        ///         <description>Support training analytics sent from python trainer to the editor.</description>
        ///     </item>
+        ///     <item>
+        ///         <term>1.5.0</term>
+        ///         <description>Support variable length observation training.</description>
+        ///     </item>
-        const string k_ApiVersion = "1.4.0";
+        const string k_ApiVersion = "1.5.0";

        /// <summary>
        /// Unity package version of com.unity.ml-agents.
        // This will mark the Agent as Done if it has reached its maxSteps.
        internal event Action AgentIncrementStep;

-        internal event Action PostAgentAct;

        /// <summary>
        /// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
            {
                // We try to exchange the first message with Python. If this fails, it means
                // no Python Process is ready to train the environment. In this case, the
-                //environment must use Inference.
+                // environment must use Inference.
+                bool initSuccessful = false;
+                var communicatorInitParams = new CommunicatorInitParameters
+                {
+                    unityCommunicationVersion = k_ApiVersion,
+                    unityPackageVersion = k_PackageVersion,
+                    name = "AcademySingleton",
+                    CSharpCapabilities = new UnityRLCapabilities()
+                };
+
-                    var unityRlInitParameters = Communicator.Initialize(
-                        new CommunicatorInitParameters
-                        {
-                            unityCommunicationVersion = k_ApiVersion,
-                            unityPackageVersion = k_PackageVersion,
-                            name = "AcademySingleton",
-                            CSharpCapabilities = new UnityRLCapabilities()
-                        });
-                    UnityEngine.Random.InitState(unityRlInitParameters.seed);
-                    // We might have inference-only Agents, so set the seed for them too.
-                    m_InferenceSeed = unityRlInitParameters.seed;
-                    TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
-                    TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
+                    initSuccessful = Communicator.Initialize(
+                        communicatorInitParams,
+                        out var unityRlInitParameters
+                    );
+                    if (initSuccessful)
+                    {
+                        UnityEngine.Random.InitState(unityRlInitParameters.seed);
+                        // We might have inference-only Agents, so set the seed for them too.
+                        m_InferenceSeed = unityRlInitParameters.seed;
+                        TrainerCapabilities = unityRlInitParameters.TrainerCapabilities;
+                        TrainerCapabilities.WarnOnPythonMissingBaseRLCapabilities();
+                    }
+                    else
+                    {
+                        Debug.Log($"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. Will perform inference instead.");
+                        Communicator = null;
+                    }
-                catch
+                catch (Exception ex)
-                    Debug.Log($"" +
-                        $"Couldn't connect to trainer on port {port} using API version {k_ApiVersion}. " +
-                        "Will perform inference instead."
-                    );
+                    Debug.Log($"Unexpected exception when trying to initialize communication: {ex}\nWill perform inference instead.");
+
            if (Communicator != null)
            {
                Communicator.QuitCommandReceived += OnQuitCommandReceived;
                {
                    AgentAct?.Invoke();
                }
-
-                PostAgentAct?.Invoke();
            }
        }

--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorComponent.cs
+using System;
 using UnityEngine;

 namespace Unity.MLAgents.Actuators
        /// Create the IActuator.  This is called by the Agent when it is initialized.
        /// </summary>
        /// <returns>Created IActuator object.</returns>
+        [Obsolete("Use CreateActuators instead.")]
+
+        /// <summary>
+        /// Create a collection of <see cref="IActuator"/>s.  This is called by the <see cref="Agent"/> during
+        /// initialization.
+        /// </summary>
+        /// <returns>A collection of <see cref="IActuator"/>s</returns>
+        public virtual IActuator[] CreateActuators()
+        {
+#pragma warning disable 618
+            return new[] { CreateActuator() };
+#pragma warning restore 618
+        }

        /// <summary>
        /// The specification of the possible actions for this ActuatorComponent.
--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
            NumContinuousActions = NumDiscreteActions = SumOfDiscreteBranchSizes = 0;
        }

+        /// <summary>
+        /// Add an array of <see cref="IActuator"/>s at once.
+        /// </summary>
+        /// <param name="actuators">The array of <see cref="IActuator"/>s to add.</param>
+        public void AddActuators(IActuator[] actuators)
+        {
+            for (var i = 0; i < actuators.Length; i++)
+            {
+                Add(actuators[i]);
+            }
+        }
+
        /*********************************************************************************
         * IList implementation that delegates to m_Actuators List.                      *
         *********************************************************************************/
        public int Count => m_Actuators.Count;

        /// <inheritdoc/>
-        public bool IsReadOnly => m_Actuators.IsReadOnly;
+        public bool IsReadOnly => false;

        /// <inheritdoc/>
        public int IndexOf(IActuator item)
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
        /// </summary>
        internal void InitializeSensors()
        {
+            if (m_PolicyFactory == null)
+            {
+                m_PolicyFactory = GetComponent<BehaviorParameters>();
+            }
            if (m_PolicyFactory.ObservableAttributeHandling != ObservableAttributeOptions.Ignore)
            {
                var excludeInherited =

            foreach (var actuatorComponent in attachedActuators)
            {
-                m_ActuatorManager.Add(actuatorComponent.CreateActuator());
+                m_ActuatorManager.AddActuators(actuatorComponent.CreateActuators());
            }
        }


        internal void SetMultiAgentGroup(IMultiAgentGroup multiAgentGroup)
        {
-            // unregister from current group if this agent has been assigned one before
+            // Unregister from current group if this agent has been assigned one before
            UnregisterFromGroup?.Invoke(this);

            m_GroupId = multiAgentGroup.GetId();
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
                {
                    observationProto.DimensionProperties.Add((int)dimensionProperties[i]);
                }
+                // Checking trainer compatibility with variable length observations
+                if (dimensionProperties.Length == 2)
+                {
+                    if (dimensionProperties[0] == DimensionProperty.VariableSize &&
+                    dimensionProperties[1] == DimensionProperty.None)
+                    {
+                        var trainerCanHandleVarLenObs = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.VariableLengthObservation;
+                        if (!trainerCanHandleVarLenObs)
+                        {
+                            throw new UnityAgentsException("Variable Length Observations are not supported by the trainer");
+                        }
+                    }
+                }
            }
            observationProto.Shape.AddRange(shape);

                CompressedChannelMapping = proto.CompressedChannelMapping,
                HybridActions = proto.HybridActions,
                TrainingAnalytics = proto.TrainingAnalytics,
+                VariableLengthObservation = proto.VariableLengthObservation,
            };
        }

                CompressedChannelMapping = rlCaps.CompressedChannelMapping,
                HybridActions = rlCaps.HybridActions,
                TrainingAnalytics = rlCaps.TrainingAnalytics,
+                VariableLengthObservation = rlCaps.VariableLengthObservation,
            };
        }

--- a/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
        /// Sends the academy parameters through the Communicator.
        /// Is used by the academy to send the AcademyParameters to the communicator.
        /// </summary>
-        /// <returns>The External Initialization Parameters received.</returns>
+        /// <returns>Whether the connection was successful.</returns>
-        UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters);
+        /// <param name="initParametersOut">The External Initialization Parameters received</param>
+        bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut);

        /// <summary>
        /// Registers a new Brain to the Communicator.
--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs

        internal static bool CheckCommunicationVersionsAreCompatible(
            string unityCommunicationVersion,
-            string pythonApiVersion,
-            string pythonLibraryVersion)
+            string pythonApiVersion
+            )
        {
            var unityVersion = new Version(unityCommunicationVersion);
            var pythonVersion = new Version(pythonApiVersion);
        /// Sends the initialization parameters through the Communicator.
        /// Is used by the academy to send initialization parameters to the communicator.
        /// </summary>
-        /// <returns>The External Initialization Parameters received.</returns>
+        /// <returns>Whether the connection was successful.</returns>
-        public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters)
+        /// <param name="initParametersOut">The External Initialization Parameters received.</param>
+        public bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut)
        {
            var academyParameters = new UnityRLInitializationOutputProto
            {
                    {
                        RlInitializationOutput = academyParameters
                    },
-                    out input);
-
-                var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion;
-                var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
-                var unityCommunicationVersion = initParameters.unityCommunicationVersion;
-
-                TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
+                    out input
+                );
+            }
+            catch (Exception ex)
+            {
+                if (ex is RpcException rpcException)
+                {
-                var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(unityCommunicationVersion,
-                    pythonCommunicationVersion,
-                    pythonPackageVersion);
-
-                // Initialization succeeded part-way. The most likely cause is a mismatch between the communicator
-                // API strings, so log an explicit warning if that's the case.
-                if (initializationInput != null && input == null)
-                {
-                    if (!communicationIsCompatible)
+                    switch (rpcException.Status.StatusCode)
-                        Debug.LogWarningFormat(
-                            "Communication protocol between python ({0}) and Unity ({1}) have different " +
-                            "versions which make them incompatible. Python library version: {2}.",
-                            pythonCommunicationVersion, initParameters.unityCommunicationVersion,
-                            pythonPackageVersion
-                        );
+                        case StatusCode.Unavailable:
+                            // This is the common case where there's no trainer to connect to.
+                            break;
+                        case StatusCode.DeadlineExceeded:
+                            // We don't currently set a deadline for connection, but likely will in the future.
+                            break;
+                        default:
+                            Debug.Log($"Unexpected gRPC exception when trying to initialize communication: {rpcException}");
+                            break;
-                    else
-                    {
-                        Debug.LogWarningFormat(
-                            "Unknown communication error between Python. Python communication protocol: {0}, " +
-                            "Python library version: {1}.",
-                            pythonCommunicationVersion,
-                            pythonPackageVersion
-                        );
-                    }
-
-                    throw new UnityAgentsException("ICommunicator.Initialize() failed.");
+                else
+                {
+                    Debug.Log($"Unexpected exception when trying to initialize communication: {ex}");
+                }
+                initParametersOut = new UnityRLInitParameters();
+                return false;
-            catch
+
+            var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion;
+            var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
+
+            TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
+
+            var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(
+                initParameters.unityCommunicationVersion,
+                pythonCommunicationVersion
+            );
+
+            // Initialization succeeded part-way. The most likely cause is a mismatch between the communicator
+            // API strings, so log an explicit warning if that's the case.
+            if (initializationInput != null && input == null)
-                var exceptionMessage = "The Communicator was unable to connect. Please make sure the External " +
-                    "process is ready to accept communication with Unity.";
-
-                // Check for common error condition and add details to the exception message.
-                var httpProxy = Environment.GetEnvironmentVariable("HTTP_PROXY");
-                var httpsProxy = Environment.GetEnvironmentVariable("HTTPS_PROXY");
-                if (httpProxy != null || httpsProxy != null)
+                if (!communicationIsCompatible)
+                {
+                    Debug.LogWarningFormat(
+                        "Communication protocol between python ({0}) and Unity ({1}) have different " +
+                        "versions which make them incompatible. Python library version: {2}.",
+                        pythonCommunicationVersion, initParameters.unityCommunicationVersion,
+                        pythonPackageVersion
+                    );
+                }
+                else
-                    exceptionMessage += " Try removing HTTP_PROXY and HTTPS_PROXY from the" +
-                        "environment variables and try again.";
+                    Debug.LogWarningFormat(
+                        "Unknown communication error between Python. Python communication protocol: {0}, " +
+                        "Python library version: {1}.",
+                        pythonCommunicationVersion,
+                        pythonPackageVersion
+                    );
-                throw new UnityAgentsException(exceptionMessage);
+
+                initParametersOut = new UnityRLInitParameters();
+                return false;
-            return initializationInput.RlInitializationInput.ToUnityRLInitParameters();
+            initParametersOut = initializationInput.RlInitializationInput.ToUnityRLInitParameters();
+            return true;
        }

        /// <summary>
            SendCommandEvent(rlInput.Command);
        }

-        UnityInputProto Initialize(UnityOutputProto unityOutput,
-            out UnityInputProto unityInput)
+        UnityInputProto Initialize(UnityOutputProto unityOutput, out UnityInputProto unityInput)
        {
 #if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
            m_IsOpen = true;
            }
            return result.UnityInput;
 #else
-            throw new UnityAgentsException(
-                "You cannot perform training on this platform.");
+            throw new UnityAgentsException("You cannot perform training on this platform.");
 #endif
        }

            {
                return null;
            }
+
            try
            {
                var message = m_Client.Exchange(WrapMessage(unityOutput, 200));
                QuitCommandReceived?.Invoke();
                return message.UnityInput;
            }
-            catch
+            catch (Exception ex)
+                if (ex is RpcException rpcException)
+                {
+                    // Log more verbose errors if they're something the user can possibly do something about.
+                    switch (rpcException.Status.StatusCode)
+                    {
+                        case StatusCode.Unavailable:
+                            // This can happen when python disconnects. Ignore it to avoid noisy logs.
+                            break;
+                        case StatusCode.ResourceExhausted:
+                            // This happens is the message body is too large. There's no way to
+                            // gracefully handle this, but at least we can show the message and the
+                            // user can try to reduce the number of agents or observation sizes.
+                            Debug.LogError($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
+                            break;
+                        default:
+                            // Other unknown errors. Log at INFO level.
+                            Debug.Log($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer.");
+                            break;
+                    }
+                }
+                else
+                {
+                    // Fall-through for other error types
+                    Debug.LogError($"Communication Exception: {ex.Message}. Disconnecting from trainer.");
+                }
+
                m_IsOpen = false;
                QuitCommandReceived?.Invoke();
                return null;
--- a/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
        public bool CompressedChannelMapping;
        public bool HybridActions;
        public bool TrainingAnalytics;
+        public bool VariableLengthObservation;

        /// <summary>
        /// A class holding the capabilities flags for Reinforcement Learning across C# and the Trainer codebase.  This
            bool concatenatedPngObservations = true,
            bool compressedChannelMapping = true,
            bool hybridActions = true,
-            bool trainingAnalytics = true)
+            bool trainingAnalytics = true,
+            bool variableLengthObservation = true)
        {
            BaseRLCapabilities = baseRlCapabilities;
            ConcatenatedPngObservations = concatenatedPngObservations;
+            VariableLengthObservation = variableLengthObservation;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
-            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMirwEKGFVuaXR5UkxD",
+            "dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi0gEKGFVuaXR5UkxD",
-            "ASgIEhkKEXRyYWluaW5nQW5hbHl0aWNzGAUgASgIQiWqAiJVbml0eS5NTEFn",
-            "ZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
+            "ASgIEhkKEXRyYWluaW5nQW5hbHl0aWNzGAUgASgIEiEKGXZhcmlhYmxlTGVu",
+            "Z3RoT2JzZXJ2YXRpb24YBiABKAhCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11",
+            "bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions", "TrainingAnalytics" }, null, null, null)
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions", "TrainingAnalytics", "VariableLengthObservation" }, null, null, null)
          }));
    }
    #endregion
      compressedChannelMapping_ = other.compressedChannelMapping_;
      hybridActions_ = other.hybridActions_;
      trainingAnalytics_ = other.trainingAnalytics_;
+      variableLengthObservation_ = other.variableLengthObservation_;
      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
    }

      }
    }

+    /// <summary>Field number for the "variableLengthObservation" field.</summary>
+    public const int VariableLengthObservationFieldNumber = 6;
+    private bool variableLengthObservation_;
+    /// <summary>
+    /// Support for variable length observations of rank 2
+    /// </summary>
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public bool VariableLengthObservation {
+      get { return variableLengthObservation_; }
+      set {
+        variableLengthObservation_ = value;
+      }
+    }
+
    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
    public override bool Equals(object other) {
      return Equals(other as UnityRLCapabilitiesProto);
      if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
      if (HybridActions != other.HybridActions) return false;
      if (TrainingAnalytics != other.TrainingAnalytics) return false;
+      if (VariableLengthObservation != other.VariableLengthObservation) return false;
      return Equals(_unknownFields, other._unknownFields);
    }

      if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
      if (HybridActions != false) hash ^= HybridActions.GetHashCode();
      if (TrainingAnalytics != false) hash ^= TrainingAnalytics.GetHashCode();
+      if (VariableLengthObservation != false) hash ^= VariableLengthObservation.GetHashCode();
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
      }
      if (TrainingAnalytics != false) {
        output.WriteRawTag(40);
        output.WriteBool(TrainingAnalytics);
+      }
+      if (VariableLengthObservation != false) {
+        output.WriteRawTag(48);
+        output.WriteBool(VariableLengthObservation);
      }
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      if (TrainingAnalytics != false) {
        size += 1 + 1;
      }
+      if (VariableLengthObservation != false) {
+        size += 1 + 1;
+      }
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
      if (other.TrainingAnalytics != false) {
        TrainingAnalytics = other.TrainingAnalytics;
      }
+      if (other.VariableLengthObservation != false) {
+        VariableLengthObservation = other.VariableLengthObservation;
+      }
      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
    }

          }
          case 40: {
            TrainingAnalytics = input.ReadBool();
+            break;
+          }
+          case 48: {
+            VariableLengthObservation = input.ReadBool();
            break;
          }
        }
--- a/com.unity.ml-agents/Runtime/IMultiAgentGroup.cs
+++ b/com.unity.ml-agents/Runtime/IMultiAgentGroup.cs
        void RegisterAgent(Agent agent);

        /// <summary>
-        /// UnRegister agent from the MultiAgentGroup.
+        /// Unregister agent from the MultiAgentGroup.
        /// </summary>
        void UnregisterAgent(Agent agent);
    }
--- a/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
-using System;
 using System.Collections.Generic;
 using System.Linq;
 using Unity.MLAgents.Inference.Utils;
    {
        readonly int[] m_ActionSize;
        readonly Multinomial m_Multinomial;
-        readonly ITensorAllocator m_Allocator;
+        readonly int[] m_StartActionIndices;
+        readonly float[] m_CdfBuffer;
+
-            m_Allocator = allocator;
+            m_StartActionIndices = Utilities.CumSum(m_ActionSize);
+
+            // Scratch space for computing the cumulative distribution function.
+            // In order to reuse it, make it the size of the largest branch.
+            var largestBranch = Mathf.Max(m_ActionSize);
+            m_CdfBuffer = new float[largestBranch];
-            //var tensorDataProbabilities = tensorProxy.Data as float[,];
-            var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
-            var batchSize = idActionPairList.Count;
-            var actionValues = new float[batchSize, m_ActionSize.Length];
-            var startActionIndices = Utilities.CumSum(m_ActionSize);
-            for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++)
-            {
-                var nBranchAction = m_ActionSize[actionIndex];
-                var actionProbs = new TensorProxy()
-                {
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    shape = new long[] { batchSize, nBranchAction },
-                    data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction))
-                };
-
-                for (var batchIndex = 0; batchIndex < batchSize; batchIndex++)
-                {
-                    for (var branchActionIndex = 0;
-                         branchActionIndex < nBranchAction;
-                         branchActionIndex++)
-                    {
-                        actionProbs.data[batchIndex, branchActionIndex] =
-                            tensorProxy.data[batchIndex, startActionIndices[actionIndex] + branchActionIndex];
-                    }
-                }
-
-                var outputTensor = new TensorProxy()
-                {
-                    valueType = TensorProxy.TensorType.FloatingPoint,
-                    shape = new long[] { batchSize, 1 },
-                    data = m_Allocator.Alloc(new TensorShape(batchSize, 1))
-                };
-
-                Eval(actionProbs, outputTensor, m_Multinomial);
-
-                for (var ii = 0; ii < batchSize; ii++)
-                {
-                    actionValues[ii, actionIndex] = outputTensor.data[ii, 0];
-                }
-                actionProbs.data.Dispose();
-                outputTensor.data.Dispose();
-            }
-
            var agentIndex = 0;
            for (var i = 0; i < actionIds.Count; i++)
            {
                    var discreteBuffer = actionBuffer.DiscreteActions;
                    for (var j = 0; j < m_ActionSize.Length; j++)
                    {
-                        discreteBuffer[j] = (int)actionValues[agentIndex, j];
+                        ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]);
+                        discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]);
                    }
                }
                agentIndex++;
        /// <summary>
-        /// Draw samples from a multinomial distribution based on log-probabilities specified
-        /// in tensor src. The samples will be saved in the dst tensor.
+        /// Compute the cumulative distribution function for a given agent's action
+        /// given the log-probabilities.
+        /// The results are stored in m_CdfBuffer, which is the size of the largest action's number of branches.
-        /// <param name="src">2-D tensor with shape batch_size x num_classes</param>
-        /// <param name="dst">Allocated tensor with size batch_size x num_samples</param>
-        /// <param name="multinomial">Multinomial object used to sample values</param>
-        /// <exception cref="NotImplementedException">
-        /// Multinomial doesn't support integer tensors
-        /// </exception>
-        /// <exception cref="ArgumentException">Issue with tensor shape or type</exception>
-        /// <exception cref="ArgumentNullException">
-        /// At least one of the tensors is not allocated
-        /// </exception>
-        public static void Eval(TensorProxy src, TensorProxy dst, Multinomial multinomial)
+        /// <param name="logProbs"></param>
+        /// <param name="batch">Index of the agent being considered</param>
+        /// <param name="channelOffset">Offset into the tensor's channel.</param>
+        /// <param name="branchSize"></param>
+        internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize)
-            if (src.DataType != typeof(float))
-            {
-                throw new NotImplementedException("Only float tensors are currently supported");
-            }
-
-            if (src.valueType != dst.valueType)
-            {
-                throw new ArgumentException(
-                    "Source and destination tensors have different types!");
-            }
-
-            if (src.data == null || dst.data == null)
-            {
-                throw new ArgumentNullException();
-            }
-
-            if (src.data.batch != dst.data.batch)
+            // Find the class maximum
+            var maxProb = float.NegativeInfinity;
+            for (var cls = 0; cls < branchSize; ++cls)
-                throw new ArgumentException("Batch size for input and output data is different!");
+                maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb);
-            var cdf = new float[src.data.channels];
-
-            for (var batch = 0; batch < src.data.batch; ++batch)
+            // Sum the log probabilities and compute CDF
+            var sumProb = 0.0f;
+            for (var cls = 0; cls < branchSize; ++cls)
-                // Find the class maximum
-                var maxProb = float.NegativeInfinity;
-                for (var cls = 0; cls < src.data.channels; ++cls)
-                {
-                    maxProb = Mathf.Max(src.data[batch, cls], maxProb);
-                }
-
-                // Sum the log probabilities and compute CDF
-                var sumProb = 0.0f;
-                for (var cls = 0; cls < src.data.channels; ++cls)
-                {
-                    sumProb += Mathf.Exp(src.data[batch, cls] - maxProb);
-                    cdf[cls] = sumProb;
-                }
-
-                // Generate the samples
-                for (var sample = 0; sample < dst.data.channels; ++sample)
-                {
-                    dst.data[batch, sample] = multinomial.Sample(cdf);
-                }
+                sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb);
+                m_CdfBuffer[cls] = sumProb;
            }
        }
    }
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
        /// <param name="brainParameters">
        /// The BrainParameters that are used verify the compatibility with the InferenceEngine
        /// </param>
-        /// <param name="sensorComponents">Attached sensor components</param>
+        /// <param name="sensors">Attached sensor components</param>
-            SensorComponent[] sensorComponents, ActuatorComponent[] actuatorComponents,
+            ISensor[] sensors, ActuatorComponent[] actuatorComponents,
            int observableAttributeTotalSize = 0,
            BehaviorType behaviorType = BehaviorType.Default)
        {
            }

            failedModelChecks.AddRange(
-                CheckInputTensorPresence(model, brainParameters, memorySize, sensorComponents)
+                CheckInputTensorPresence(model, brainParameters, memorySize, sensors)
-                CheckInputTensorShape(model, brainParameters, sensorComponents, observableAttributeTotalSize)
+                CheckInputTensorShape(model, brainParameters, sensors, observableAttributeTotalSize)
            );
            failedModelChecks.AddRange(
                CheckOutputTensorShape(model, brainParameters, actuatorComponents)
        /// <param name="memory">
        /// The memory size that the model is expecting.
        /// </param>
-        /// <param name="sensorComponents">Array of attached sensor components</param>
+        /// <param name="sensors">Array of attached sensor components</param>
        /// <returns>
        /// A IEnumerable of string corresponding to the failed input presence checks.
        /// </returns>
            int memory,
-            SensorComponent[] sensorComponents
+            ISensor[] sensors
        )
        {
            var failedModelChecks = new List<string>();
            // If there are not enough Visual Observation Input compared to what the
            // sensors expect.
            var visObsIndex = 0;
-            for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
+            for (var sensorIndex = 0; sensorIndex < sensors.Length; sensorIndex++)
-                var sensor = sensorComponents[sensorIndex];
+                var sensor = sensors[sensorIndex];
                if (sensor.GetObservationShape().Length == 3)
                {
                    if (!tensorsNames.Contains(
        /// Checks that the shape of the visual observation input placeholder is the same as the corresponding sensor.
        /// </summary>
        /// <param name="tensorProxy">The tensor that is expected by the model</param>
-        /// <param name="sensorComponent">The sensor that produces the visual observation.</param>
+        /// <param name="sensor">The sensor that produces the visual observation.</param>
-            TensorProxy tensorProxy, SensorComponent sensorComponent)
+            TensorProxy tensorProxy, ISensor sensor)
-            var shape = sensorComponent.GetObservationShape();
+            var shape = sensor.GetObservationShape();
            var heightBp = shape[0];
            var widthBp = shape[1];
            var pixelBp = shape[2];
        /// Checks that the shape of the rank 2 observation input placeholder is the same as the corresponding sensor.
        /// </summary>
        /// <param name="tensorProxy">The tensor that is expected by the model</param>
-        /// <param name="sensorComponent">The sensor that produces the visual observation.</param>
+        /// <param name="sensor">The sensor that produces the visual observation.</param>
-            TensorProxy tensorProxy, SensorComponent sensorComponent)
+            TensorProxy tensorProxy, ISensor sensor)
-            var shape = sensorComponent.GetObservationShape();
+            var shape = sensor.GetObservationShape();
            var dim1Bp = shape[0];
            var dim2Bp = shape[1];
            var dim1T = tensorProxy.Channels;
        /// <param name="brainParameters">
        /// The BrainParameters that are used verify the compatibility with the InferenceEngine
        /// </param>
-        /// <param name="sensorComponents">Attached sensors</param>
+        /// <param name="sensors">Attached sensors</param>
-            Model model, BrainParameters brainParameters, SensorComponent[] sensorComponents,
+            Model model, BrainParameters brainParameters, ISensor[] sensors,
-                new Dictionary<string, Func<BrainParameters, TensorProxy, SensorComponent[], int, string>>()
+                new Dictionary<string, Func<BrainParameters, TensorProxy, ISensor[], int, string>>()
            {
                {TensorNames.VectorObservationPlaceholder, CheckVectorObsShape},
                {TensorNames.PreviousActionPlaceholder, CheckPreviousActionShape},
            }

            var visObsIndex = 0;
-            for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
+            for (var sensorIndex = 0; sensorIndex < sensors.Length; sensorIndex++)
-                var sensorComponent = sensorComponents[sensorIndex];
-                if (sensorComponent.GetObservationShape().Length == 3)
+                var sens = sensors[sensorIndex];
+                if (sens.GetObservationShape().Length == 3)
-                        (bp, tensor, scs, i) => CheckVisualObsShape(tensor, sensorComponent);
+                        (bp, tensor, scs, i) => CheckVisualObsShape(tensor, sens);
-                if (sensorComponent.GetObservationShape().Length == 2)
+                if (sens.GetObservationShape().Length == 2)
-                        (bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sensorComponent);
+                        (bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sens);
                }
            }

                else
                {
                    var tester = tensorTester[tensor.name];
-                    var error = tester.Invoke(brainParameters, tensor, sensorComponents, observableAttributeTotalSize);
+                    var error = tester.Invoke(brainParameters, tensor, sensors, observableAttributeTotalSize);
                    if (error != null)
                    {
                        failedModelChecks.Add(error);
        /// The BrainParameters that are used verify the compatibility with the InferenceEngine
        /// </param>
        /// <param name="tensorProxy">The tensor that is expected by the model</param>
-        /// <param name="sensorComponents">Array of attached sensor components</param>
+        /// <param name="sensors">Array of attached sensor components</param>
        /// <param name="observableAttributeTotalSize">Sum of the sizes of all ObservableAttributes.</param>
        /// <returns>
        /// If the Check failed, returns a string containing information about why the
-            BrainParameters brainParameters, TensorProxy tensorProxy, SensorComponent[] sensorComponents,
+            BrainParameters brainParameters, TensorProxy tensorProxy, ISensor[] sensors,
            int observableAttributeTotalSize)
        {
            var vecObsSizeBp = brainParameters.VectorObservationSize;
            var totalVectorSensorSize = 0;
-            foreach (var sensorComp in sensorComponents)
+            foreach (var sens in sensors)
-                if (sensorComp.GetObservationShape().Length == 1)
+                if ((sens.GetObservationShape().Length == 1))
-                    totalVectorSensorSize += sensorComp.GetObservationShape()[0];
+                    totalVectorSensorSize += sens.GetObservationShape()[0];
-            totalVectorSensorSize += observableAttributeTotalSize;
-
-            if (vecObsSizeBp * numStackedVector + totalVectorSensorSize != totalVecObsSizeT)
+            if (totalVectorSensorSize != totalVecObsSizeT)
-                foreach (var sensorComp in sensorComponents)
+                foreach (var sensorComp in sensors)
                {
                    if (sensorComp.GetObservationShape().Length == 1)
                    {
                    $"but received: \n" +
                    $"Vector observations: {vecObsSizeBp} x {numStackedVector}\n" +
                    $"Total [Observable] attributes: {observableAttributeTotalSize}\n" +
-                    $"SensorComponent sizes: {sensorSizes}.";
+                    $"Sensor sizes: {sensorSizes}.";
            }
            return null;
        }
        /// The BrainParameters that are used verify the compatibility with the InferenceEngine
        /// </param>
        /// <param name="tensorProxy"> The tensor that is expected by the model</param>
-        /// <param name="sensorComponents">Array of attached sensor components (unused).</param>
+        /// <param name="sensors">Array of attached sensor components (unused).</param>
-            SensorComponent[] sensorComponents, int observableAttributeTotalSize)
+            ISensor[] sensors, int observableAttributeTotalSize)
        {
            var numberActionsBp = brainParameters.ActionSpec.NumDiscreteActions;
            var numberActionsT = tensorProxy.shape[tensorProxy.shape.Length - 1];
--- a/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
+using System;
 using System.Collections.Generic;
 using Unity.Barracuda;
 using UnityEngine.Profiling;

        SensorShapeValidator m_SensorShapeValidator = new SensorShapeValidator();

-        bool m_VisualObservationsInitialized;
+        bool m_ObservationsInitialized;

        /// <summary>
        /// Initializes the Brain with the Model that it will use when selecting actions for
        public ModelRunner(
            NNModel model,
            ActionSpec actionSpec,
-            InferenceDevice inferenceDevice = InferenceDevice.CPU,
+            InferenceDevice inferenceDevice,
            int seed = 0)
        {
            Model barracudaModel;
                D.logEnabled = m_Verbose;

                barracudaModel = ModelLoader.Load(model);
-                var executionDevice = inferenceDevice == InferenceDevice.GPU
-                    ? WorkerFactory.Type.ComputePrecompiled
-                    : WorkerFactory.Type.CSharp;
+                WorkerFactory.Type executionDevice;
+                switch (inferenceDevice)
+                {
+                    case InferenceDevice.CPU:
+                        executionDevice = WorkerFactory.Type.CSharp;
+                        break;
+                    case InferenceDevice.GPU:
+                        executionDevice = WorkerFactory.Type.ComputePrecompiled;
+                        break;
+                    case InferenceDevice.Burst:
+                        executionDevice = WorkerFactory.Type.CSharpBurst;
+                        break;
+                    default:
+                        executionDevice = WorkerFactory.Type.CSharpBurst;
+                        break;
+                }
                m_Engine = WorkerFactory.CreateWorker(executionDevice, barracudaModel, m_Verbose);
            }
            else
            {
                return;
            }
-            if (!m_VisualObservationsInitialized)
+            if (!m_ObservationsInitialized)
-                m_VisualObservationsInitialized = true;
+                m_ObservationsInitialized = true;
            }

            Profiler.BeginSample("ModelRunner.DecideAction");
--- a/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
+++ b/com.unity.ml-agents/Runtime/Inference/Utils/Multinomial.cs
        /// to be monotonic (always increasing). If the CMF is scaled, then the last entry in
        /// the array will be 1.0.
        /// </param>
-        /// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
-        public int Sample(float[] cmf)
+        /// <param name="branchSize">The number of possible branches, i.e. the effective size of the cmf array.</param>
+        /// <returns>A sampled index from the CMF ranging from 0 to branchSize-1.</returns>
+        public int Sample(float[] cmf, int branchSize)
-            var p = (float)m_Random.NextDouble() * cmf[cmf.Length - 1];
+            var p = (float)m_Random.NextDouble() * cmf[branchSize - 1];
            var cls = 0;
            while (cmf[cls] < p)
            {
            return cls;
+        }
+
+        /// <summary>
+        /// Samples from the Multinomial distribution defined by the provided cumulative
+        /// mass function.
+        /// </summary>
+        /// <returns>A sampled index from the CMF ranging from 0 to cmf.Length-1.</returns>
+        public int Sample(float[] cmf)
+        {
+            return Sample(cmf, cmf.Length);
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
    public enum InferenceDevice
    {
        /// <summary>
-        /// CPU inference
+        /// CPU inference. Corresponds to in WorkerFactory.Type.CSharp Barracuda.
+        /// Burst is recommended instead; this is kept for legacy compatibility.
-        /// GPU inference
+        /// GPU inference. Corresponds to WorkerFactory.Type.ComputePrecompiled in Barracuda.
-        GPU = 1
+        GPU = 1,
+
+        /// <summary>
+        /// CPU inference using Burst. Corresponds to WorkerFactory.Type.CSharpBurst in Barracuda.
+        /// </summary>
+        Burst = 2,
    }

    /// <summary>
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
        }

        [HideInInspector, SerializeField]
-        InferenceDevice m_InferenceDevice;
+        InferenceDevice m_InferenceDevice = InferenceDevice.Burst;

        /// <summary>
        /// How inference is performed for this Agent's model.
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs

 namespace Unity.MLAgents.Sensors
 {
-    internal class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
+    /// <summary>
+    /// A Sensor that allows to observe a variable number of entities.
+    /// </summary>
+    public class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
+        static DimensionProperty[] s_DimensionProperties = new DimensionProperty[]{
+                DimensionProperty.VariableSize,
+                DimensionProperty.None
+            };
        public BufferSensor(int maxNumberObs, int obsSize)
        {
            m_MaxNumObs = maxNumberObs;
        /// <inheritdoc/>
        public DimensionProperty[] GetDimensionProperties()
        {
-            return new DimensionProperty[]{
-                DimensionProperty.VariableSize,
-                DimensionProperty.None
-            };
+            return s_DimensionProperties;
        }

        /// <summary>
        /// <param name="obs"> The float array observation</param>
        public void AppendObservation(float[] obs)
        {
+            if (obs.Length != m_ObsSize)
+            {
+                throw new UnityAgentsException(
+                    "The BufferSensor was expecting an observation of size " +
+                    $"{m_ObsSize} but received {obs.Length} observations instead."
+                );
+            }
            if (m_CurrentNumObservables >= m_MaxNumObs)
            {
                return;
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
 {

    /// <summary>
-    /// A component for BufferSensor.
+    /// A SensorComponent that creates a <see cref="BufferSensor"/>.
-    internal class BufferSensorComponent : SensorComponent
+    public class BufferSensorComponent : SensorComponent
+        /// <summary>
+        /// This is how many floats each entities will be represented with. This number
+        /// is fixed and all entities must have the same representation.
+        /// </summary>
+
+        /// <summary>
+        /// This is the maximum number of entities the `BufferSensor` will be able to
+        /// collect.
+        /// </summary>
+
        private BufferSensor m_Sensor;

        /// <inheritdoc/>
--- a/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
    /// <summary>
    /// A sensor that wraps a Camera object to generate visual observations for an agent.
    /// </summary>
-    public class CameraSensor : ISensor, IBuiltInSensor
+    public class CameraSensor : ISensor, IBuiltInSensor, IDimensionPropertiesSensor
    {
        Camera m_Camera;
        int m_Width;
        int[] m_Shape;
        SensorCompressionType m_CompressionType;
+        static DimensionProperty[] s_DimensionProperties = new DimensionProperty[] {
+            DimensionProperty.TranslationalEquivariance,
+            DimensionProperty.TranslationalEquivariance,
+            DimensionProperty.None };

        /// <summary>
        /// The Camera used for rendering the sensor observations.
        public int[] GetObservationShape()
        {
            return m_Shape;
+        }
+
+        /// <summary>
+        /// Accessor for the dimension properties of a camera sensor. A camera sensor
+        /// Has translational equivariance along width and hight and no property along
+        /// the channels dimension.
+        /// </summary>
+        /// <returns></returns>
+        public DimensionProperty[] GetDimensionProperties()
+        {
+            return s_DimensionProperties;
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
    /// The Dimension property flags of the observations
    /// </summary>
    [System.Flags]
-    internal enum DimensionProperty
+    public enum DimensionProperty
    {
        /// <summary>
        /// No properties specified.
--- a/com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
+++ b/com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
 using System.Collections.Generic;
 using System;
+using UnityEngine;

 namespace Unity.MLAgents.SideChannels
 {

        internal void ProcessMessage(byte[] msg)
        {
-            using (var incomingMsg = new IncomingMessage(msg))
+            try
+            {
+                using (var incomingMsg = new IncomingMessage(msg))
+                {
+                    OnMessageReceived(incomingMsg);
+                }
+            }
+            catch (Exception ex)
-                OnMessageReceived(incomingMsg);
+                // Catch all errors in the sidechannel processing, so that a single
+                // bad SideChannel implementation doesn't take everything down with it.
+                Debug.LogError($"Error processing SideChannel message: {ex}.\nThe message will be skipped.");
            }
        }

--- a/com.unity.ml-agents/Runtime/StatsRecorder.cs
+++ b/com.unity.ml-agents/Runtime/StatsRecorder.cs
        /// <summary>
        /// Values within the summary period are summed up before reporting.
        /// </summary>
-        Sum = 2
+        Sum = 2,
+
+        /// <summary>
+        /// Values within the summary period are reported as a histogram.
+        /// </summary>
+        Histogram = 3
    }

    /// <summary>
--- a/com.unity.ml-agents/Runtime/Timer.cs
+++ b/com.unity.ml-agents/Runtime/Timer.cs
                SaveJsonTimers(fs);
                fs.Close();
            }
-            catch (IOException)
+            catch (SystemException)
-                // It's possible we don't have write access to the directory.
+                // We may not have write access to the directory.
                Debug.LogWarning($"Unable to save timers to file {filename}");
            }
 #endif
--- a/com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Communicator/RpcCommunicatorTests.cs
        {
            var unityVerStr = "1.0.0";
            var pythonVerStr = "1.0.0";
-            var pythonPackageVerStr = "0.16.0";
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));
-                pythonVerStr,
-                pythonPackageVerStr));
+                pythonVerStr));

        }
    }
--- a/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/DiscreteActionOutputApplierTest.cs
-using System;
+using System.Collections.Generic;
-using UnityEngine;
+using Unity.MLAgents.Actuators;
-using Unity.MLAgents.Inference.Utils;

 namespace Unity.MLAgents.Tests
 {
-        public void TestEvalP()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                data = new Tensor(1, 3, new[] { 0.1f, 0.2f, 0.7f }),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                data = new Tensor(1, 3),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            DiscreteActionOutputApplier.Eval(src, dst, m);
-
-            float[] reference = { 2, 2, 1 };
-            for (var i = 0; i < dst.data.length; i++)
-            {
-                Assert.AreEqual(reference[i], dst.data[i]);
-                ++i;
-            }
-        }
-
-        [Test]
-        public void TestEvalLogits()
+        public void TestDiscreteApply()
-            var m = new Multinomial(2018);
+            var actionSpec = ActionSpec.MakeDiscrete(3, 2);
+            const float smallLogProb = -1000.0f;
+            const float largeLogProb = -1.0f;
-            var src = new TensorProxy
+            var logProbs = new TensorProxy
-                    1,
-                    3,
-                    new[] { Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50 }),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                data = new Tensor(1, 3),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            DiscreteActionOutputApplier.Eval(src, dst, m);
-
-            float[] reference = { 2, 2, 2 };
-            for (var i = 0; i < dst.data.length; i++)
-            {
-                Assert.AreEqual(reference[i], dst.data[i]);
-                ++i;
-            }
-        }
-
-        [Test]
-        public void TestEvalBatching()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                data = new Tensor(2, 3, new[]
-                {
-                    Mathf.Log(0.1f) - 50, Mathf.Log(0.2f) - 50, Mathf.Log(0.7f) - 50,
-                    Mathf.Log(0.3f) - 25, Mathf.Log(0.4f) - 25, Mathf.Log(0.3f) - 25
-                }),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                data = new Tensor(2, 3),
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            DiscreteActionOutputApplier.Eval(src, dst, m);
-
-            float[] reference = { 2, 2, 2, 0, 1, 0 };
-            for (var i = 0; i < dst.data.length; i++)
-            {
-                Assert.AreEqual(reference[i], dst.data[i]);
-                ++i;
-            }
-        }
-
-        [Test]
-        public void TestSrcInt()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.Integer
-            };
-
-            Assert.Throws<NotImplementedException>(
-                () => DiscreteActionOutputApplier.Eval(src, null, m));
-        }
-
-        [Test]
-        public void TestDstInt()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
+                    2,
+                    5,
+                    new[]
+                    {
+                        smallLogProb, smallLogProb, largeLogProb, // Agent 0, branch 0
+                        smallLogProb, largeLogProb,               // Agent 0, branch 1
+                        largeLogProb, smallLogProb, smallLogProb, // Agent 1, branch 0
+                        largeLogProb, smallLogProb,               // Agent 1, branch 1
+                    }),
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.Integer
-            };
+            var applier = new DiscreteActionOutputApplier(actionSpec, 2020, null);
+            var agentIds = new List<int> { 42, 1337 };
+            var actionBuffers = new Dictionary<int, ActionBuffers>();
+            actionBuffers[42] = new ActionBuffers(actionSpec);
+            actionBuffers[1337] = new ActionBuffers(actionSpec);
-            Assert.Throws<ArgumentException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
-        }
-
-        [Test]
-        public void TestSrcDataNull()
-        {
-            var m = new Multinomial(2018);
+            applier.Apply(logProbs, agentIds, actionBuffers);
+            Assert.AreEqual(2, actionBuffers[42].DiscreteActions[0]);
+            Assert.AreEqual(1, actionBuffers[42].DiscreteActions[1]);
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            Assert.Throws<ArgumentNullException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
-        }
-
-        [Test]
-        public void TestDstDataNull()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint,
-                data = new Tensor(0, 1)
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint
-            };
-
-            Assert.Throws<ArgumentNullException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
-        }
-
-        [Test]
-        public void TestUnequalBatchSize()
-        {
-            var m = new Multinomial(2018);
-
-            var src = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint,
-                data = new Tensor(1, 1)
-            };
-
-            var dst = new TensorProxy
-            {
-                valueType = TensorProxy.TensorType.FloatingPoint,
-                data = new Tensor(2, 1)
-            };
-
-            Assert.Throws<ArgumentException>(
-                () => DiscreteActionOutputApplier.Eval(src, dst, m));
+            Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[0]);
+            Assert.AreEqual(0, actionBuffers[1337].DiscreteActions[1]);
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
        [Test]
        public void TestCreation()
        {
-            var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec());
+            var inferenceDevice = InferenceDevice.Burst;
+            var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec(), inferenceDevice);
-            modelRunner = new ModelRunner(discreteONNXModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
+            modelRunner = new ModelRunner(discreteONNXModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec(), inferenceDevice);
-            modelRunner = new ModelRunner(hybridONNXModel, GetHybrid0vis53vec_3c_2dActionSpec());
+            modelRunner = new ModelRunner(hybridONNXModel, GetHybrid0vis53vec_3c_2dActionSpec(), inferenceDevice);
-            modelRunner = new ModelRunner(continuousNNModel, GetContinuous2vis8vec2actionActionSpec());
+            modelRunner = new ModelRunner(continuousNNModel, GetContinuous2vis8vec2actionActionSpec(), inferenceDevice);
-            modelRunner = new ModelRunner(discreteNNModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
+            modelRunner = new ModelRunner(discreteNNModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec(), inferenceDevice);
            modelRunner.Dispose();
        }

        public void TestRunModel()
        {
            var actionSpec = GetDiscrete1vis0vec_2_3action_recurrModelActionSpec();
-            var modelRunner = new ModelRunner(discreteONNXModel, actionSpec);
+            var modelRunner = new ModelRunner(discreteONNXModel, actionSpec, InferenceDevice.Burst);
            var info1 = new AgentInfo();
            info1.episodeId = 1;
            modelRunner.PutObservations(info1, new[] { sensor_21_20_3.CreateSensor() }.ToList());
--- a/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs

            var errors = BarracudaModelParamLoader.CheckModel(
                model, validBrainParameters,
-                new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
+                new ISensor[] { new VectorSensor(8), sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]
            );
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }

            var errors = BarracudaModelParamLoader.CheckModel(
                model, validBrainParameters,
-                new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]
+                new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]
            );
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }

            var errors = BarracudaModelParamLoader.CheckModel(
                model, validBrainParameters,
-                new SensorComponent[] { }, new ActuatorComponent[0]
+                new ISensor[] { new VectorSensor(validBrainParameters.VectorObservationSize) }, new ActuatorComponent[0]
            );
            Assert.AreEqual(0, errors.Count()); // There should not be any errors
        }
            brainParameters.VectorObservationSize = 9; // Invalid observation
            var errors = BarracudaModelParamLoader.CheckModel(
                model, brainParameters,
-                new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
+                new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]
            );
            Assert.Greater(errors.Count(), 0);

                model, brainParameters,
-                new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]
+                new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]
            );
            Assert.Greater(errors.Count(), 0);
        }

            var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
            brainParameters.VectorObservationSize = 1; // Invalid observation
-            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }

            brainParameters.VectorObservationSize = 9; // Invalid observation
            var errors = BarracudaModelParamLoader.CheckModel(
                model, brainParameters,
-                new SensorComponent[] { }, new ActuatorComponent[0]
+                new ISensor[] { }, new ActuatorComponent[0]
            );
            Assert.Greater(errors.Count(), 0);

                model, brainParameters,
-                new SensorComponent[] { }, new ActuatorComponent[0]
+                new ISensor[] { }, new ActuatorComponent[0]
            );
            Assert.Greater(errors.Count(), 0);
        }

            var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
            brainParameters.ActionSpec = ActionSpec.MakeContinuous(3); // Invalid action
-            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
-            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }


            var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
            brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3, 3); // Invalid action
-            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]);
-            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3 }, new ActuatorComponent[0]);
+            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }


            var brainParameters = GetHybridBrainParameters();
            brainParameters.ActionSpec = new ActionSpec(3, new[] { 3 }); // Invalid discrete action size
-            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
-            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }

            var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
-            var errors = BarracudaModelParamLoader.CheckModel(null, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
+            var errors = BarracudaModelParamLoader.CheckModel(null, brainParameters, new ISensor[] { sensor_21_20_3.CreateSensor(), sensor_20_22_3.CreateSensor() }, new ActuatorComponent[0]);
            Assert.Greater(errors.Count(), 0);
        }
    }
--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
  "unity": "2018.4",
  "description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
  "dependencies": {
-    "com.unity.barracuda": "1.2.1-preview",
+    "com.unity.barracuda": "1.3.0-preview",
    "com.unity.modules.imageconversion": "1.0.0",
    "com.unity.modules.jsonserialize": "1.0.0",
    "com.unity.modules.physics": "1.0.0",
--- a/docs/Installation.md
+++ b/docs/Installation.md
 installing ML-Agents. Activate your virtual environment and run from the command line:

 ```sh
-pip3 install torch==1.7.0 -f https://download.pytorch.org/whl/torch_stable.html
+pip3 install torch~=1.7.1 -f https://download.pytorch.org/whl/torch_stable.html
 ```

 Note that on Windows, you may also need Microsoft's
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
    - [Visual Observation Summary & Best Practices](#visual-observation-summary--best-practices)
  - [Raycast Observations](#raycast-observations)
    - [RayCast Observation Summary & Best Practices](#raycast-observation-summary--best-practices)
+  - [Variable Length Observations](#variable-length-observations)
+    - [Variable Length Observation Summary & Best Practices](#variable-length-observation-summary--best-practices)
 - [Actions and Actuators](#actions-and-actuators)
  - [Continuous Actions](#continuous-actions)
  - [Discrete Actions](#discrete-actions)
 #### Observable Fields and Properties
 Another approach is to define the relevant observations as fields or properties
 on your Agent class, and annotate them with an `ObservableAttribute`. For
-example, in the 3DBall example above, the rigid body velocity could be observed
+example, in the Ball3DHardAgent, the difference between positions could be observed
-public class Ball3DAgent : Agent {
+public class Ball3DHardAgent : Agent {
-    [Observable]
-    public Vector3 RigidBodyVelocity
+    [Observable(numStackedObservations: 9)]
+    Vector3 PositionDelta
-        get { return m_BallRb.velocity;  }
+        get
+        {
+            return ball.transform.position - gameObject.transform.position;
+        }
    }
 }
 ```
  for the agent that doesn't require a fully rendered image to convey.
 - Use as few rays and tags as necessary to solve the problem in order to improve
  learning stability and agent performance.
+
+### Variable Length Observations
+
+It is possible for agents to collect observations from a varying number of
+GameObjects by using a `BufferSensor`.
+You can add a `BufferSensor` to your Agent by adding a `BufferSensorComponent` to
+its GameObject.
+The `BufferSensor` can be useful in situations in which the Agent must pay
+attention to a varying number of entities (for example, a varying number of
+enemies or projectiles).
+On the trainer side, the `BufferSensor`
+is processed using an attention module. More information about attention
+mechanisms can be found [here](https://arxiv.org/abs/1706.03762). Training or
+doing inference with variable length observations can be slower than using
+a flat vector observation. However, attention mechanisms enable solving
+problems that require comparative reasoning between entities in a scene
+such as our [Sorter environment](Learning-Environment-Examples.md#sorter).
+Note that even though the `BufferSensor` can process a variable number of
+entities, you still need to define a maximum number of entities. This is
+because our network architecture requires to know what the shape of the
+observations will be. If fewer entities are observed than the maximum, the
+observation will be padded with zeros and the trainer will ignore
+the padded observations. Note that attention layers are invariant to
+the order of the entities, so there is no need to properly "order" the
+entities before feeding them into the `BufferSensor`.
+
+The  the `BufferSensorComponent` Editor inspector have two arguments:
+ - `Observation Size` : This is how many floats each entities will be
+ represented with. This number is fixed and all entities must
+ have the same representation. For example, if the entities you want to
+ put into the `BufferSensor` have for relevant information position and
+ speed, then the `Observation Size` should be 6 floats.
+ - `Maximum Number of Entities` : This is the maximum number of entities
+ the `BufferSensor` will be able to collect.
+
+To add an entity's observations to a `BufferSensorComponent`, you need
+to call `BufferSensorComponent.AppendObservation()`
+with a float array of size `Observation Size` as argument.
+
+__Note__: Currently, the observations put into the `BufferSensor` are
+not normalized, you will need to normalize your observations manually
+between -1 and 1.
+
+#### Variable Length Observation Summary & Best Practices
+ - Attach `BufferSensorComponent` to use.
+ - Call `BufferSensorComponent.AppendObservation()` to add the observations
+ of an entity to the `BufferSensor`.
+ - Normalize the entities observations before feeding them into the `BufferSensor`.
+

 ## Actions and Actuators

--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
  - 37.6 for vector observations
  - 34.2 for simple heuristic (pick a random valid move)
  - 37.0 for greedy heuristic (pick the highest-scoring valid move)
+
+## Sorter
+![Sorter](images/sorter.png)
+
+ - Set-up: The Agent is in a circular room with numbered tiles. The values of the
+ tiles are random between 1 and 20. The tiles present in the room are randomized
+ at each episode. When the Agent visits a tile, it turns green.
+ - Goal: Visit all the tiles in ascending order.
+ - Agents: The environment contains a single Agent
+ - Agent Reward Function:
+  - -.0002 Existential penalty.
+  - +1 For visiting the right tile
+  - -1 For visiting the wrong tile
+ - BehaviorParameters:
+  - Vector Observations : 4 : 2 floats for Position and 2 floats for orientation
+  - Variable Length Observations : Between 1 and 20 entities (one for each tile)
+  each with 22 observations, the first 20 are one hot encoding of the value of the tile,
+  the 21st and 22nd represent the position of the tile relative to the Agent and the 23rd
+  is `1` if the tile was visited and `0` otherwise.
+  - Actions: 3 discrete branched actions corresponding to forward, backward,
+  sideways movement, as well as rotation.
+  - Float Properties: One
+    - num_tiles: The maximum number of tiles to sample.
+      - Default: 2
+      - Recommended Minimum: 1
+      - Recommended Maximum: 20
+  - Benchmark Mean Reward: Depends on the number of tiles.
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 - `VectorSensor.AddObservation(IEnumerable<float>)` is deprecated. Use `VectorSensor.AddObservation(IList<float>)`
  instead.
 - `ObservationWriter.AddRange()` is deprecated. Use `ObservationWriter.AddList()` instead.
+- `ActuatorComponent.CreateAcuator()` is deprecated.  Please use override `ActuatorComponent.CreateActuators`
+  instead.  Since `ActuatorComponent.CreateActuator()` is abstract, you will still need to override it in your
+  class until it is removed.  It is only ever called if you don't override `ActuatorComponent.CreateActuators`.
+  You can suppress the warnings by surrounding the method with the following pragma:
+    ```c#
+    #pragma warning disable 672
+    public IActuator CreateActuator() { ... }
+    #pragma warning restore 672
+    ```


 # Migrating
--- a/docs/Training-Configuration-File.md
+++ b/docs/Training-Configuration-File.md

 - LSTM does not work well with continuous actions. Please use
  discrete actions for better results.
- Since the memories must be sent back and forth between Python and Unity, using
-  too large `memory_size` will slow down training.
 - Adding a recurrent layer increases the complexity of the neural network, it is
  recommended to decrease `num_layers` when using recurrent.
 - It is required that `memory_size` be divisible by 2.
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
    spaces for a group of Agents under the same behavior.
    - observation_specs is a List of ObservationSpec NamedTuple containing
    information about the information of the Agent's observations such as their shapes.
-    The order of the SensorSpec is the same as the order of the observations of an
+    The order of the ObservationSpec is the same as the order of the observations of an
    agent.
    - action_spec is an ActionSpec NamedTuple.
    """
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  name='mlagents_envs/communicator_objects/capabilities.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\xaf\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x12\x19\n\x11trainingAnalytics\x18\x05 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\xd2\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x12\x19\n\x11trainingAnalytics\x18\x05 \x01(\x08\x12!\n\x19variableLengthObservation\x18\x06 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )


      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='variableLengthObservation', full_name='communicator_objects.UnityRLCapabilitiesProto.variableLengthObservation', index=5,
+      number=6, type=8, cpp_type=7, label=1,
+      has_default_value=False, default_value=False,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
  oneofs=[
  ],
  serialized_start=80,
-  serialized_end=255,
+  serialized_end=290,
 )

 DESCRIPTOR.message_types_by_name['UnityRLCapabilitiesProto'] = _UNITYRLCAPABILITIESPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
    compressedChannelMapping = ... # type: builtin___bool
    hybridActions = ... # type: builtin___bool
    trainingAnalytics = ... # type: builtin___bool
+    variableLengthObservation = ... # type: builtin___bool

    def __init__(self,
        *,
        hybridActions : typing___Optional[builtin___bool] = None,
        trainingAnalytics : typing___Optional[builtin___bool] = None,
+        variableLengthObservation : typing___Optional[builtin___bool] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> UnityRLCapabilitiesProto: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions",u"trainingAnalytics"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions",u"trainingAnalytics",u"variableLengthObservation"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions",u"trainingAnalytics",b"trainingAnalytics"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions",u"trainingAnalytics",b"trainingAnalytics",u"variableLengthObservation",b"variableLengthObservation"]) -> None: ...
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
    #  * 1.2.0 - support compression mapping for stacked compressed observations.
    #  * 1.3.0 - support action spaces with both continuous and discrete actions.
    #  * 1.4.0 - support training analytics sent from python trainer to the editor.
-    API_VERSION = "1.4.0"
+    #  * 1.5.0 - support variable length observation training.
+    API_VERSION = "1.5.0"

    # Default port that the editor listens on. If an environment executable
    # isn't specified, this port will be used.
        capabilities.compressedChannelMapping = True
        capabilities.hybridActions = True
        capabilities.trainingAnalytics = True
+        capabilities.variableLengthObservation = True
        return capabilities

    @staticmethod
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
        observation_specs.append(
            ObservationSpec(
                tuple(obs.shape),
-                tuple(DimensionProperty(dim) for dim in obs.dimension_properties),
+                tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
+                if len(obs.dimension_properties) > 0
+                else (DimensionProperty.UNSPECIFIED,) * len(obs.shape),
+
    # proto from communicator < v1.3 does not set action spec, use deprecated fields instead
    if (
        brain_param_proto.action_spec.num_continuous_actions == 0


@timed
-def observation_to_np_array(
+def _observation_to_np_array(
    obs: ObservationProto, expected_shape: Optional[Iterable[int]] = None
 ) -> np.ndarray:
    """


@timed
-def _process_visual_observation(
+def _process_maybe_compressed_observation(
    obs_index: int,
    shape: Tuple[int, int, int],
    agent_info_list: Collection[AgentInfoProto],

    batched_visual = [
-        observation_to_np_array(agent_obs.observations[obs_index], shape)
+        _observation_to_np_array(agent_obs.observations[obs_index], shape)
        for agent_obs in agent_info_list
    ]
    return np.array(batched_visual, dtype=np.float32)


@timed
-def _process_vector_observation(
+def _process_rank_one_or_two_observation(
    obs_index: int, shape: Tuple[int, ...], agent_info_list: Collection[AgentInfoProto]
 ) -> np.ndarray:
    if len(agent_info_list) == 0:
        if is_visual:
            obs_shape = cast(Tuple[int, int, int], observation_specs.shape)
            decision_obs_list.append(
-                _process_visual_observation(
+                _process_maybe_compressed_observation(
-                _process_visual_observation(
+                _process_maybe_compressed_observation(
-                _process_vector_observation(
+                _process_rank_one_or_two_observation(
-                _process_vector_observation(
+                _process_rank_one_or_two_observation(
                    obs_index, observation_specs.shape, terminal_agent_info_list
                )
            )
--- a/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
    # Values within the summary period are summed up before reporting.
    SUM = 2

+    # All values within a summary period are reported as a histogram.
+    HISTOGRAM = 3
+

 StatList = List[Tuple[float, StatsAggregationMethod]]
 EnvironmentStats = Mapping[str, StatList]
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
 from mlagents_envs.rpc_utils import (
    behavior_spec_from_proto,
    process_pixels,
-    _process_visual_observation,
-    _process_vector_observation,
+    _process_maybe_compressed_observation,
+    _process_rank_one_or_two_observation,
    steps_from_proto,
 )
 from PIL import Image
    shapes = [(3,), (4,)]
    list_proto = generate_list_agent_proto(n_agents, shapes)
    for obs_index, shape in enumerate(shapes):
-        arr = _process_vector_observation(obs_index, shape, list_proto)
+        arr = _process_rank_one_or_two_observation(obs_index, shape, list_proto)
        assert list(arr.shape) == ([n_agents] + list(shape))
        assert np.allclose(arr, 0.1, atol=0.01)

    ap2 = AgentInfoProto()
    ap2.observations.extend([proto_obs_2])
    ap_list = [ap1, ap2]
-    arr = _process_visual_observation(0, (128, 64, 3), ap_list)
+    arr = _process_maybe_compressed_observation(0, (128, 64, 3), ap_list)
    assert list(arr.shape) == [2, 128, 64, 3]
    assert np.allclose(arr[0, :, :, :], in_array_1, atol=0.01)
    assert np.allclose(arr[1, :, :, :], in_array_2, atol=0.01)
    ap2 = AgentInfoProto()
    ap2.observations.extend([proto_obs_2])
    ap_list = [ap1, ap2]
-    arr = _process_visual_observation(0, (128, 64, 1), ap_list)
+    arr = _process_maybe_compressed_observation(0, (128, 64, 1), ap_list)
    assert list(arr.shape) == [2, 128, 64, 1]
    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)
    assert np.allclose(arr[1, :, :, :], expected_out_array_2, atol=0.01)
    ap1 = AgentInfoProto()
    ap1.observations.extend([proto_obs_1])
    ap_list = [ap1]
-    arr = _process_visual_observation(0, (128, 64, 8), ap_list)
+    arr = _process_maybe_compressed_observation(0, (128, 64, 8), ap_list)
    assert list(arr.shape) == [1, 128, 64, 8]
    assert np.allclose(arr[0, :, :, :], expected_out_array_1, atol=0.01)

    ap1.observations.extend([proto_obs_1])
    ap_list = [ap1]
    with pytest.raises(UnityObservationException):
-        _process_visual_observation(0, (128, 42, 3), ap_list)
+        _process_maybe_compressed_observation(0, (128, 42, 3), ap_list)


 def test_batched_step_result_from_proto():
--- a/ml-agents/mlagents/trainers/buffer.py
+++ b/ml-agents/mlagents/trainers/buffer.py
+from collections import defaultdict
+from collections.abc import MutableMapping
+import enum
+import itertools
+from typing import BinaryIO, DefaultDict, List, Tuple, Union, Optional
+
-from typing import List, BinaryIO, Any, Union
-import itertools

 from mlagents_envs.exception import UnityException

    pass


-class AgentBuffer(dict):
+class BufferKey(enum.Enum):
+    ACTION_MASK = "action_mask"
+    CONTINUOUS_ACTION = "continuous_action"
+    NEXT_CONT_ACTION = "next_continuous_action"
+    CONTINUOUS_LOG_PROBS = "continuous_log_probs"
+    DISCRETE_ACTION = "discrete_action"
+    NEXT_DISC_ACTION = "next_discrete_action"
+    DISCRETE_LOG_PROBS = "discrete_log_probs"
+    DONE = "done"
+    ENVIRONMENT_REWARDS = "environment_rewards"
+    MASKS = "masks"
+    MEMORY = "memory"
+    PREV_ACTION = "prev_action"
+
+    ADVANTAGES = "advantages"
+    DISCOUNTED_RETURNS = "discounted_returns"
+
+    GROUP_DONES = "group_dones"
+    GROUPMATE_REWARDS = "groupmate_reward"
+    GROUP_REWARD = "group_reward"
+    GROUP_CONTINUOUS_ACTION = "group_continuous_action"
+    GROUP_DISCRETE_ACTION = "group_discrete_aaction"
+    GROUP_NEXT_CONT_ACTION = "group_next_cont_action"
+    GROUP_NEXT_DISC_ACTION = "group_next_disc_action"
+
+
+class ObservationKeyPrefix(enum.Enum):
+    OBSERVATION = "obs"
+    NEXT_OBSERVATION = "next_obs"
+
+    GROUP_OBSERVATION = "group_obs"
+    NEXT_GROUP_OBSERVATION = "next_group_obs"
+
+
+class RewardSignalKeyPrefix(enum.Enum):
+    # Reward signals
+    REWARDS = "rewards"
+    VALUE_ESTIMATES = "value_estimates"
+    RETURNS = "returns"
+    ADVANTAGE = "advantage"
+
+
+AgentBufferKey = Union[
+    BufferKey, Tuple[ObservationKeyPrefix, int], Tuple[RewardSignalKeyPrefix, str]
+]
+
+
+class RewardSignalUtil:
+    @staticmethod
+    def rewards_key(name: str) -> AgentBufferKey:
+        return RewardSignalKeyPrefix.REWARDS, name
+
+    @staticmethod
+    def value_estimates_key(name: str) -> AgentBufferKey:
+        return RewardSignalKeyPrefix.RETURNS, name
+
+    @staticmethod
+    def returns_key(name: str) -> AgentBufferKey:
+        return RewardSignalKeyPrefix.RETURNS, name
+
+    @staticmethod
+    def advantage_key(name: str) -> AgentBufferKey:
+        return RewardSignalKeyPrefix.ADVANTAGE, name
+
+
+class AgentBufferField(list):
-    AgentBuffer contains a dictionary of AgentBufferFields. Each agent has his own AgentBuffer.
-    The keys correspond to the name of the field. Example: state, action
+    AgentBufferField is a list of numpy arrays. When an agent collects a field, you can add it to its
+    AgentBufferField with the append method.
-    class AgentBufferField(list):
+    def __init__(self):
+        self.padding_value = 0
+        super().__init__()
+
+    def __str__(self):
+        return str(np.array(self).shape)
+
+    def append(self, element: np.ndarray, padding_value: float = 0.0) -> None:
-        AgentBufferField is a list of data, usually numpy arrays. When an agent collects a field,
-        you can add it to its AgentBufferField with the append method.
+        Adds an element to this list. Also lets you change the padding
+        type, so that it can be set on append (e.g. action_masks should
+        be padded with 1.)
+        :param element: The element to append to the list.
+        :param padding_value: The value used to pad when get_batch is called.
+        super().append(element)
+        self.padding_value = padding_value
-        def __init__(self):
-            self.padding_value = 0
-            super().__init__()
+    def set(self, data):
+        """
+        Sets the list of np.array to the input data
+        :param data: The np.array list to be set.
+        """
+        self[:] = []
+        self[:] = data
-        def __str__(self):
-            return str(np.array(self).shape)
+    def get_batch(
+        self,
+        batch_size: int = None,
+        training_length: Optional[int] = 1,
+        sequential: bool = True,
+    ) -> np.ndarray:
+        """
+        Retrieve the last batch_size elements of length training_length
+        from the list of np.array
+        :param batch_size: The number of elements to retrieve. If None:
+        All elements will be retrieved.
+        :param training_length: The length of the sequence to be retrieved. If
+        None: only takes one element.
+        :param sequential: If true and training_length is not None: the elements
+        will not repeat in the sequence. [a,b,c,d,e] with training_length = 2 and
+        sequential=True gives [[0,a],[b,c],[d,e]]. If sequential=False gives
+        [[a,b],[b,c],[c,d],[d,e]]
+        """
+        if training_length is None:
+            training_length = 1
+        if sequential:
+            # The sequences will not have overlapping elements (this involves padding)
+            leftover = len(self) % training_length
+            # leftover is the number of elements in the first sequence (this sequence might need 0 padding)
+            if batch_size is None:
+                # retrieve the maximum number of elements
+                batch_size = len(self) // training_length + 1 * (leftover != 0)
+            # The maximum number of sequences taken from a list of length len(self) without overlapping
+            # with padding is equal to batch_size
+            if batch_size > (len(self) // training_length + 1 * (leftover != 0)):
+                raise BufferException(
+                    "The batch size and training length requested for get_batch where"
+                    " too large given the current number of data points."
+                )
+            if batch_size * training_length > len(self):
+                padding = np.array(self[-1], dtype=np.float32) * self.padding_value
+                return np.array(
+                    [padding] * (training_length - leftover) + self[:], dtype=np.float32
+                )
+            else:
+                return np.array(
+                    self[len(self) - batch_size * training_length :], dtype=np.float32
+                )
+        else:
+            # The sequences will have overlapping elements
+            if batch_size is None:
+                # retrieve the maximum number of elements
+                batch_size = len(self) - training_length + 1
+            # The number of sequences of length training_length taken from a list of len(self) elements
+            # with overlapping is equal to batch_size
+            if (len(self) - training_length + 1) < batch_size:
+                raise BufferException(
+                    "The batch size and training length requested for get_batch where"
+                    " too large given the current number of data points."
+                )
+            tmp_list: List[np.ndarray] = []
+            for end in range(len(self) - batch_size + 1, len(self) + 1):
+                tmp_list += self[end - training_length : end]
+            return np.array(tmp_list, dtype=np.float32)
-        def append(self, element: BufferEntry, padding_value: Any = 0.0) -> None:
-            """
-            Adds an element to this AgentBuffer. Also lets you change the padding
-            type, so that it can be set on append (e.g. action_masks should
-            be padded with 1.)
-            :param element: The element to append to the list.
-            :param padding_value: The value used to pad when get_batch is called.
-            """
-            super().append(element)
-            self.padding_value = padding_value
+    def reset_field(self) -> None:
+        """
+        Resets the AgentBufferField
+        """
+        self[:] = []
-        def set(self, data: List[BufferEntry]) -> None:
-            """
-            Sets the AgentBuffer to the provided list
-            :param data: The list to be set.
-            """
-            # Make sure we convert incoming data to float32 if it's a float
-            self[:] = []
-            self[:] = data
-        def get_batch(
-            self,
-            batch_size: int = None,
-            training_length: int = 1,
-            sequential: bool = True,
-        ) -> List[BufferEntry]:
-            """
-            Retrieve the last batch_size elements of length training_length
-            from the AgentBuffer.
-            :param batch_size: The number of elements to retrieve. If None:
-            All elements will be retrieved.
-            :param training_length: The length of the sequence to be retrieved. If
-            None: only takes one element.
-            :param sequential: If true and training_length is not None: the elements
-            will not repeat in the sequence. [a,b,c,d,e] with training_length = 2 and
-            sequential=True gives [[0,a],[b,c],[d,e]]. If sequential=False gives
-            [[a,b],[b,c],[c,d],[d,e]]
-            """
-            if sequential:
-                # The sequences will not have overlapping elements (this involves padding)
-                leftover = len(self) % training_length
-                # leftover is the number of elements in the first sequence (this sequence might need 0 padding)
-                if batch_size is None:
-                    # retrieve the maximum number of elements
-                    batch_size = len(self) // training_length + 1 * (leftover != 0)
-                # The maximum number of sequences taken from a list of length len(self) without overlapping
-                # with padding is equal to batch_size
-                if batch_size > (len(self) // training_length + 1 * (leftover != 0)):
-                    raise BufferException(
-                        "The batch size and training length requested for get_batch where"
-                        " too large given the current number of data points."
-                    )
-                if batch_size * training_length > len(self):
-                    padding = np.array(self[-1], dtype=np.float32) * self.padding_value
-                    return [padding] * (training_length - leftover) + self[:]
-                else:
-                    return self[len(self) - batch_size * training_length :]
-            else:
-                # The sequences will have overlapping elements
-                if batch_size is None:
-                    # retrieve the maximum number of elements
-                    batch_size = len(self) - training_length + 1
-                # The number of sequences of length training_length taken from a list of len(self) elements
-                # with overlapping is equal to batch_size
-                if (len(self) - training_length + 1) < batch_size:
-                    raise BufferException(
-                        "The batch size and training length requested for get_batch where"
-                        " too large given the current number of data points."
-                    )
-                tmp_list: List[np.ndarray] = []
-                for end in range(len(self) - batch_size + 1, len(self) + 1):
-                    tmp_list += self[end - training_length : end]
-                return tmp_list
+class AgentBuffer(MutableMapping):
+    """
+    AgentBuffer contains a dictionary of AgentBufferFields. Each agent has his own AgentBuffer.
+    The keys correspond to the name of the field. Example: state, action
+    """
-        def reset_field(self) -> None:
-            """
-            Resets the AgentBufferField
-            """
-            self[:] = []
+    # Whether or not to validate the types of keys at runtime
+    # This should be off for training, but enabled for testing
+    CHECK_KEY_TYPES_AT_RUNTIME = False
-        super().__init__()
+        self._fields: DefaultDict[AgentBufferKey, AgentBufferField] = defaultdict(
+            AgentBufferField
+        )
-        return ", ".join(["'{}' : {}".format(k, str(self[k])) for k in self.keys()])
+        return ", ".join(
+            ["'{}' : {}".format(k, str(self[k])) for k in self._fields.keys()]
+        )
-        for k in self.keys():
-            self[k].reset_field()
+        for f in self._fields.values():
+            f.reset_field()
-    def __getitem__(self, key):
-        if key not in self.keys():
-            self[key] = self.AgentBufferField()
-        return super().__getitem__(key)
+    @staticmethod
+    def _check_key(key):
+        if isinstance(key, BufferKey):
+            return
+        if isinstance(key, tuple):
+            key0, key1 = key
+            if isinstance(key0, ObservationKeyPrefix):
+                if isinstance(key1, int):
+                    return
+                raise KeyError(f"{key} has type ({type(key0)}, {type(key1)})")
+            if isinstance(key0, RewardSignalKeyPrefix):
+                if isinstance(key1, str):
+                    return
+                raise KeyError(f"{key} has type ({type(key0)}, {type(key1)})")
+        raise KeyError(f"{key} is a {type(key)}")
-    def check_length(self, key_list: List[str]) -> bool:
+    @staticmethod
+    def _encode_key(key: AgentBufferKey) -> str:
+        """
+        Convert the key to a string representation so that it can be used for serialization.
+        """
+        if isinstance(key, BufferKey):
+            return key.value
+        prefix, suffix = key
+        return f"{prefix.value}:{suffix}"
+
+    @staticmethod
+    def _decode_key(encoded_key: str) -> AgentBufferKey:
+        """
+        Convert the string representation back to a key after serialization.
+        """
+        # Simple case: convert the string directly to a BufferKey
+        try:
+            return BufferKey(encoded_key)
+        except ValueError:
+            pass
+
+        # Not a simple key, so split into two parts
+        prefix_str, _, suffix_str = encoded_key.partition(":")
+
+        # See if it's an ObservationKeyPrefix first
+        try:
+            return ObservationKeyPrefix(prefix_str), int(suffix_str)
+        except ValueError:
+            pass
+
+        # If not, it had better be a RewardSignalKeyPrefix
+        try:
+            return RewardSignalKeyPrefix(prefix_str), suffix_str
+        except ValueError:
+            raise ValueError(f"Unable to convert {encoded_key} to an AgentBufferKey")
+
+    def __getitem__(self, key: AgentBufferKey) -> AgentBufferField:
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            self._check_key(key)
+        return self._fields[key]
+
+    def __setitem__(self, key: AgentBufferKey, value: AgentBufferField) -> None:
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            self._check_key(key)
+        self._fields[key] = value
+
+    def __delitem__(self, key: AgentBufferKey) -> None:
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            self._check_key(key)
+        self._fields.__delitem__(key)
+
+    def __iter__(self):
+        return self._fields.__iter__()
+
+    def __len__(self) -> int:
+        return self._fields.__len__()
+
+    def __contains__(self, key):
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            self._check_key(key)
+        return self._fields.__contains__(key)
+
+    def check_length(self, key_list: List[AgentBufferKey]) -> bool:
        """
        Some methods will require that some fields have the same length.
        check_length will return true if the fields in key_list
+        if self.CHECK_KEY_TYPES_AT_RUNTIME:
+            for k in key_list:
+                self._check_key(k)
+
-            if key not in self.keys():
+            if key not in self._fields:
                return False
            if (length is not None) and (length != len(self[key])):
                return False
-    def shuffle(self, sequence_length: int, key_list: List[str] = None) -> None:
+    def shuffle(
+        self, sequence_length: int, key_list: List[AgentBufferKey] = None
+    ) -> None:
        """
        Shuffles the fields in key_list in a consistent way: The reordering will
        be the same across fields.
-            key_list = list(self.keys())
+            key_list = list(self._fields.keys())
        if not self.check_length(key_list):
            raise BufferException(
                "Unable to shuffle if the fields are not of same length"
        :return: Dict of mini batch.
        """
        mini_batch = AgentBuffer()
-        for key in self:
-            mini_batch[key] = self[key][start:end]
+        for key, field in self._fields.items():
+            # slicing AgentBufferField returns a List[Any}
+            mini_batch[key] = field[start:end]  # type: ignore
        return mini_batch

    def sample_mini_batch(
        """
        with h5py.File(file_object, "w") as write_file:
            for key, data in self.items():
-                write_file.create_dataset(key, data=data, dtype="f", compression="gzip")
+                write_file.create_dataset(
+                    self._encode_key(key), data=data, dtype="f", compression="gzip"
+                )

    def load_from_file(self, file_object: BinaryIO) -> None:
        """
            for key in list(read_file.keys()):
-                self[key] = AgentBuffer.AgentBufferField()
+                decoded_key = self._decode_key(key)
+                self[decoded_key] = AgentBufferField()
-                self[key].extend(read_file[key][()])
+                self[decoded_key].extend(read_file[key][()])

    def truncate(self, max_length: int, sequence_length: int = 1) -> None:
        """
    def resequence_and_append(
        self,
        target_buffer: "AgentBuffer",
-        key_list: List[str] = None,
+        key_list: List[AgentBufferKey] = None,
        batch_size: int = None,
        training_length: int = None,
    ) -> None:
--- a/ml-agents/mlagents/trainers/cli_utils.py
+++ b/ml-agents/mlagents/trainers/cli_utils.py
        action=RaiseRemovedWarning,
        help="(Removed) Use the TensorFlow framework.",
    )
+    argparser.add_argument(
+        "--results-dir", default="results", help="Results base directory"
+    )

    eng_conf = argparser.add_argument_group(title="Engine Configuration")
    eng_conf.add_argument(
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
 import os
 from typing import List, Tuple
 import numpy as np
-from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.buffer import AgentBuffer, BufferKey
 from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
    AgentInfoActionPairProto,
 )
        else:
            current_obs = list(current_decision_step.values())[0].obs

-        demo_raw_buffer["done"].append(next_done)
-        demo_raw_buffer["rewards"].append(next_reward)
+        demo_raw_buffer[BufferKey.DONE].append(next_done)
+        demo_raw_buffer[BufferKey.ENVIRONMENT_REWARDS].append(next_reward)
        for i, obs in enumerate(current_obs):
            demo_raw_buffer[ObsUtil.get_name_at(i)].append(obs)
        if (
            if behavior_spec.action_spec.continuous_size > 0:
-                demo_raw_buffer["continuous_action"].append(
+                demo_raw_buffer[BufferKey.CONTINUOUS_ACTION].append(
-                demo_raw_buffer["discrete_action"].append(
+                demo_raw_buffer[BufferKey.DISCRETE_ACTION].append(
-                demo_raw_buffer["continuous_action"].append(
+                demo_raw_buffer[BufferKey.CONTINUOUS_ACTION].append(
-                demo_raw_buffer["discrete_action"].append(
+                demo_raw_buffer[BufferKey.DISCRETE_ACTION].append(
-        demo_raw_buffer["prev_action"].append(previous_action)
+        demo_raw_buffer[BufferKey.PREV_ACTION].append(previous_action)
        if next_done:
            demo_raw_buffer.resequence_and_append(
                demo_processed_buffer, batch_size=None, training_length=sequence_length
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py

        next_learning_team = self.controller.get_learning_team

-        # CASE 1: Current learning team is managed by this GhostTrainer.
-        # If the learning team changes, the following loop over queues will push the
-        # new policy into the policy queue for the new learning agent if
-        # that policy is managed by this GhostTrainer. Otherwise, it will save the current snapshot.
-        # CASE 2: Current learning team is managed by a different GhostTrainer.
-        # If the learning team changes to a team managed by this GhostTrainer, this loop
-        # will push the current_snapshot into the correct queue.  Otherwise,
-        # it will continue skipping and swap_snapshot will continue to handle
-        # pushing fixed snapshots
-        # Case 3: No team change. The if statement just continues to push the policy
+        # Case 1: No team change. The if statement just continues to push the policy
        # into the correct queue (or not if not learning team).
        for brain_name in self._internal_policy_queues:
            internal_policy_queue = self._internal_policy_queues[brain_name]
            except AgentManagerQueue.Empty:
-                pass
-            if next_learning_team in self._team_to_name_to_policy_queue:
+                continue
+            if (
+                self._learning_team == next_learning_team
+                and next_learning_team in self._team_to_name_to_policy_queue
+            ):
                name_to_policy_queue = self._team_to_name_to_policy_queue[
                    next_learning_team
                ]
                    policy = self.get_policy(behavior_id)
                    policy.load_weights(self.current_policy_snapshot[brain_name])
                    name_to_policy_queue[brain_name].put(policy)
+
+        # CASE 2: Current learning team is managed by this GhostTrainer.
+        # If the learning team changes, the following loop over queues will push the
+        # new policy into the policy queue for the new learning agent if
+        # that policy is managed by this GhostTrainer. Otherwise, it will save the current snapshot.
+        # CASE 3: Current learning team is managed by a different GhostTrainer.
+        # If the learning team changes to a team managed by this GhostTrainer, this loop
+        # will push the current_snapshot into the correct queue.  Otherwise,
+        # it will continue skipping and swap_snapshot will continue to handle
+        # pushing fixed snapshots
+        if (
+            self._learning_team != next_learning_team
+            and next_learning_team in self._team_to_name_to_policy_queue
+        ):
+            name_to_policy_queue = self._team_to_name_to_policy_queue[
+                next_learning_team
+            ]
+            for brain_name in name_to_policy_queue:
+                behavior_id = create_name_behavior_id(brain_name, next_learning_team)
+                policy = self.get_policy(behavior_id)
+                policy.load_weights(self.current_policy_snapshot[brain_name])
+                name_to_policy_queue[brain_name].put(policy)

        # Note save and swap should be on different step counters.
        # We don't want to save unless the policy is learning.
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents.trainers.environment_parameter_manager import EnvironmentParameterManager
 from mlagents.trainers.trainer import TrainerFactory
 from mlagents.trainers.directory_utils import validate_existing_directories
-from mlagents.trainers.stats import (
-    TensorboardWriter,
-    StatsReporter,
-    GaugeWriter,
-    ConsoleWriter,
-)
+from mlagents.trainers.stats import StatsReporter
 from mlagents.trainers.cli_utils import parser
 from mlagents_envs.environment import UnityEnvironment
 from mlagents.trainers.settings import RunOptions
    add_metadata as add_timer_metadata,
 )
 from mlagents_envs import logging_util
+from mlagents.plugins.stats_writer import register_stats_writer_plugins

 logger = logging_util.get_logger(__name__)

        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
-        base_path = "results"
-        write_path = os.path.join(base_path, checkpoint_settings.run_id)
-        maybe_init_path = (
-            os.path.join(base_path, checkpoint_settings.initialize_from)
-            if checkpoint_settings.initialize_from is not None
-            else None
-        )
-        run_logs_dir = os.path.join(write_path, "run_logs")
+
+        run_logs_dir = checkpoint_settings.run_logs_dir
-            write_path,
+            checkpoint_settings.write_path,
-            maybe_init_path,
+            checkpoint_settings.maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
            )

        # Configure Tensorboard Writers and StatsReporter
-        tb_writer = TensorboardWriter(
-            write_path, clear_past_data=not checkpoint_settings.resume
-        )
-        gauge_write = GaugeWriter()
-        console_writer = ConsoleWriter()
-        StatsReporter.add_writer(tb_writer)
-        StatsReporter.add_writer(gauge_write)
-        StatsReporter.add_writer(console_writer)
+        stats_writers = register_stats_writer_plugins(options)
+        for sw in stats_writers:
+            StatsReporter.add_writer(sw)

        if env_settings.env_path is None:
            port = None

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
-            output_path=write_path,
+            output_path=checkpoint_settings.write_path,
-            init_path=maybe_init_path,
+            init_path=checkpoint_settings.maybe_init_path,
-            write_path,
+            checkpoint_settings.write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
-        write_run_options(write_path, options)
+        write_run_options(checkpoint_settings.write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)

--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
 from typing import Dict, cast
 from mlagents.torch_utils import torch

-from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.buffer import AgentBuffer, BufferKey, RewardSignalUtil

 from mlagents_envs.timers import timed
 from mlagents.trainers.policy.torch_policy import TorchPolicy
        old_values = {}
        for name in self.reward_signals:
            old_values[name] = ModelUtils.list_to_tensor(
-                batch[f"{name}_value_estimates"]
+                batch[RewardSignalUtil.value_estimates_key(name)]
-            returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])
+            returns[name] = ModelUtils.list_to_tensor(
+                batch[RewardSignalUtil.returns_key(name)]
+            )

        n_obs = len(self.policy.behavior_spec.observation_specs)
        current_obs = ObsUtil.from_buffer(batch, n_obs)
-        act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
-        actions = AgentAction.from_dict(batch)
+        act_masks = ModelUtils.list_to_tensor(batch[BufferKey.ACTION_MASK])
+        actions = AgentAction.from_buffer(batch)
-            ModelUtils.list_to_tensor(batch["memory"][i])
-            for i in range(0, len(batch["memory"]), self.policy.sequence_length)
+            ModelUtils.list_to_tensor(batch[BufferKey.MEMORY][i])
+            for i in range(0, len(batch[BufferKey.MEMORY]), self.policy.sequence_length)
        ]
        if len(memories) > 0:
            memories = torch.stack(memories).unsqueeze(0)
            memories=memories,
            seq_len=self.policy.sequence_length,
        )
-        old_log_probs = ActionLogProbs.from_dict(batch).flatten()
+        old_log_probs = ActionLogProbs.from_buffer(batch).flatten()
-        loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
+        loss_masks = ModelUtils.list_to_tensor(batch[BufferKey.MASKS], dtype=torch.bool)
-            ModelUtils.list_to_tensor(batch["advantages"]),
+            ModelUtils.list_to_tensor(batch[BufferKey.ADVANTAGES]),
            log_probs,
            old_log_probs,
            loss_masks,
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py

 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.base_env import BehaviorSpec
+from mlagents.trainers.buffer import BufferKey, RewardSignalUtil
 from mlagents.trainers.trainer.rl_trainer import RLTrainer
 from mlagents.trainers.policy import Policy
 from mlagents.trainers.policy.torch_policy import TorchPolicy
        )

        for name, v in value_estimates.items():
-            agent_buffer_trajectory[f"{name}_value_estimates"].extend(v)
+            agent_buffer_trajectory[RewardSignalUtil.value_estimates_key(name)].extend(
+                v
+            )
            self._stats_reporter.add_stat(
                f"Policy/{self.optimizer.reward_signals[name].name.capitalize()} Value Estimate",
                np.mean(v),
        self.collected_rewards["environment"][agent_id] += np.sum(
-            agent_buffer_trajectory["environment_rewards"]
+            agent_buffer_trajectory[BufferKey.ENVIRONMENT_REWARDS]
-            agent_buffer_trajectory[f"{name}_rewards"].extend(evaluate_result)
+            agent_buffer_trajectory[RewardSignalUtil.rewards_key(name)].extend(
+                evaluate_result
+            )
            # Report the reward signals
            self.collected_rewards[name][agent_id] += np.sum(evaluate_result)

        for name in self.optimizer.reward_signals:
            bootstrap_value = value_next[name]

-            local_rewards = agent_buffer_trajectory[f"{name}_rewards"].get_batch()
+            local_rewards = agent_buffer_trajectory[
+                RewardSignalUtil.rewards_key(name)
+            ].get_batch()
-                f"{name}_value_estimates"
+                RewardSignalUtil.value_estimates_key(name)
            ].get_batch()

            local_advantage = get_gae(
            )
            local_return = local_advantage + local_value_estimates
            # This is later use as target for the different value estimates
-            agent_buffer_trajectory[f"{name}_returns"].set(local_return)
-            agent_buffer_trajectory[f"{name}_advantage"].set(local_advantage)
+            agent_buffer_trajectory[RewardSignalUtil.returns_key(name)].set(
+                local_return
+            )
+            agent_buffer_trajectory[RewardSignalUtil.advantage_key(name)].set(
+                local_advantage
+            )
            tmp_advantages.append(local_advantage)
            tmp_returns.append(local_return)

        )
        global_returns = list(np.mean(np.array(tmp_returns, dtype=np.float32), axis=0))
-        agent_buffer_trajectory["advantages"].set(global_advantages)
-        agent_buffer_trajectory["discounted_returns"].set(global_returns)
+        agent_buffer_trajectory[BufferKey.ADVANTAGES].set(global_advantages)
+        agent_buffer_trajectory[BufferKey.DISCOUNTED_RETURNS].set(global_returns)
        # Append to update buffer
        agent_buffer_trajectory.resequence_and_append(
            self.update_buffer, training_length=self.policy.sequence_length
            int(self.hyperparameters.batch_size / self.policy.sequence_length), 1
        )

-        advantages = np.array(self.update_buffer["advantages"].get_batch())
-        self.update_buffer["advantages"].set(
+        advantages = self.update_buffer[BufferKey.ADVANTAGES].get_batch()
+        self.update_buffer[BufferKey.ADVANTAGES].set(
            (advantages - advantages.mean()) / (advantages.std() + 1e-10)
        )
        num_epoch = self.hyperparameters.num_epoch
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
 from mlagents.trainers.torch.agent_action import AgentAction
 from mlagents.trainers.torch.action_log_probs import ActionLogProbs
 from mlagents.trainers.torch.utils import ModelUtils
-from mlagents.trainers.buffer import AgentBuffer
+from mlagents.trainers.buffer import AgentBuffer, BufferKey, RewardSignalUtil
 from mlagents_envs.timers import timed
 from mlagents_envs.base_env import ActionSpec, ObservationSpec
 from mlagents.trainers.exception import UnityTrainerException
        """
        rewards = {}
        for name in self.reward_signals:
-            rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])
+            rewards[name] = ModelUtils.list_to_tensor(
+                batch[RewardSignalUtil.rewards_key(name)]
+            )

        n_obs = len(self.policy.behavior_spec.observation_specs)
        current_obs = ObsUtil.from_buffer(batch, n_obs)
        # Convert to tensors
        next_obs = [ModelUtils.list_to_tensor(obs) for obs in next_obs]

-        act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
-        actions = AgentAction.from_dict(batch)
+        act_masks = ModelUtils.list_to_tensor(batch[BufferKey.ACTION_MASK])
+        actions = AgentAction.from_buffer(batch)
-            ModelUtils.list_to_tensor(batch["memory"][i])
-            for i in range(0, len(batch["memory"]), self.policy.sequence_length)
+            ModelUtils.list_to_tensor(batch[BufferKey.MEMORY][i])
+            for i in range(0, len(batch[BufferKey.MEMORY]), self.policy.sequence_length)
-                batch["memory"][i][self.policy.m_size // 2 :]
+                batch[BufferKey.MEMORY][i][self.policy.m_size // 2 :]
-            for i in range(offset, len(batch["memory"]), self.policy.sequence_length)
+            for i in range(
+                offset, len(batch[BufferKey.MEMORY]), self.policy.sequence_length
+            )
        ]

        if len(memories_list) > 0:
                memories=next_memories,
                sequence_length=self.policy.sequence_length,
            )
-        masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
-        dones = ModelUtils.list_to_tensor(batch["done"])
+        masks = ModelUtils.list_to_tensor(batch[BufferKey.MASKS], dtype=torch.bool)
+        dones = ModelUtils.list_to_tensor(batch[BufferKey.DONE])

        q1_loss, q2_loss = self.sac_q_loss(
            q1_stream, q2_stream, target_values, dones, rewards, masks
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
 from mlagents_envs.logging_util import get_logger
 from mlagents_envs.timers import timed
 from mlagents_envs.base_env import BehaviorSpec
+from mlagents.trainers.buffer import BufferKey, RewardSignalUtil
 from mlagents.trainers.policy import Policy
 from mlagents.trainers.trainer.rl_trainer import RLTrainer
 from mlagents.trainers.policy.torch_policy import TorchPolicy

        # Evaluate all reward functions for reporting purposes
        self.collected_rewards["environment"][agent_id] += np.sum(
-            agent_buffer_trajectory["environment_rewards"]
+            agent_buffer_trajectory[BufferKey.ENVIRONMENT_REWARDS]
        )
        for name, reward_signal in self.optimizer.reward_signals.items():
            evaluate_result = (
            last_step_obs = last_step.obs
            for i, obs in enumerate(last_step_obs):
                agent_buffer_trajectory[ObsUtil.get_name_at_next(i)][-1] = obs
-            agent_buffer_trajectory["done"][-1] = False
+            agent_buffer_trajectory[BufferKey.DONE][-1] = False

        # Append to update buffer
        agent_buffer_trajectory.resequence_and_append(
                )
                # Get rewards for each reward
                for name, signal in self.optimizer.reward_signals.items():
-                    sampled_minibatch[f"{name}_rewards"] = (
+                    sampled_minibatch[RewardSignalUtil.rewards_key(name)] = (
                        signal.evaluate(sampled_minibatch) * signal.strength
                    )

--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
+import os.path
 import warnings

 import attr
    force: bool = parser.get_default("force")
    train_model: bool = parser.get_default("train_model")
    inference: bool = parser.get_default("inference")
+    results_dir: str = parser.get_default("results_dir")
+
+    @property
+    def write_path(self) -> str:
+        return os.path.join(self.results_dir, self.run_id)
+
+    @property
+    def maybe_init_path(self) -> Optional[str]:
+        return (
+            os.path.join(self.results_dir, self.initialize_from)
+            if self.initialize_from is not None
+            else None
+        )
+
+    @property
+    def run_logs_dir(self) -> str:
+        return os.path.join(self.write_path, "run_logs")


@attr.s(auto_attribs=True)
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py


 class StatsSummary(NamedTuple):
-    mean: float
-    std: float
-    num: int
-    sum: float
+    full_dist: List[float]
-        return StatsSummary(0.0, 0.0, 0, 0.0, StatsAggregationMethod.AVERAGE)
+        return StatsSummary([], StatsAggregationMethod.AVERAGE)

    @property
    def aggregated_value(self):
            return self.mean

+    @property
+    def mean(self):
+        return np.mean(self.full_dist)
+
+    @property
+    def std(self):
+        return np.std(self.full_dist)
+
+    @property
+    def num(self):
+        return len(self.full_dist)
+
+    @property
+    def sum(self):
+        return np.sum(self.full_dist)
+

 class StatsPropertyType(Enum):
    HYPERPARAMETERS = "hyperparameters"
    def write_stats(
        self, category: str, values: Dict[str, StatsSummary], step: int
    ) -> None:
+        """
+        Callback to record training information
+        :param category: Category of the statistics. Usually this is the behavior name.
+        :param values: Dictionary of statistics.
+        :param step: The current training step.
+        :return:
+        """
        pass

    def add_property(
            self.summary_writers[category].add_scalar(
                f"{key}", value.aggregated_value, step
            )
+            if value.aggregation_method == StatsAggregationMethod.HISTOGRAM:
+                self.summary_writers[category].add_histogram(
+                    f"{key}_hist", np.array(value.full_dist), step
+                )
            self.summary_writers[category].flush()

    def _maybe_create_summary_writer(self, category: str) -> None:
            return StatsSummary.empty()

        return StatsSummary(
-            mean=np.mean(stat_values),
-            std=np.std(stat_values),
-            num=len(stat_values),
-            sum=np.sum(stat_values),
+            full_dist=stat_values,
            aggregation_method=StatsReporter.stats_aggregation[self.category][key],
        )
--- a/ml-agents/mlagents/trainers/tests/init.py
+++ b/ml-agents/mlagents/trainers/tests/init.py
    np.array = np_array_no_float64
    np.zeros = np_zeros_no_float64
    np.ones = np_ones_no_float64
+
+
+if os.getenv("TEST_ENFORCE_BUFFER_KEY_TYPES"):
+    from mlagents.trainers.buffer import AgentBuffer
+
+    AgentBuffer.CHECK_KEY_TYPES_AT_RUNTIME = True
--- a/ml-agents/mlagents/trainers/tests/dummy_config.py
+++ b/ml-agents/mlagents/trainers/tests/dummy_config.py
    obs_specs: List[ObservationSpec] = []
    for shape in shapes:
        dim_prop = (DimensionProperty.UNSPECIFIED,) * len(shape)
+        if len(shape) == 2:
+            dim_prop = (DimensionProperty.VARIABLE_SIZE, DimensionProperty.NONE)
        spec = ObservationSpec(shape, dim_prop, ObservationType.DEFAULT)
        obs_specs.append(spec)
    return obs_specs