Merge branch 'master' into develop-sac-apex

4 年前 · 5e980ec1
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
  commands:
    - pip install pyyaml
    - python -u -m ml-agents.tests.yamato.setup_venv
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py
+    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
  dependencies:
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
--- a/.yamato/protobuf-generation-test.yml
+++ b/.yamato/protobuf-generation-test.yml
        - "protobuf-definitions/*.md"
        - "protobuf-definitions/**/*.md"
  artifacts:
-    dist:
+    patch:
-        - "artifacts/*"
+        - "artifacts/*.*"
--- a/.yamato/python-ll-api-test.yml
+++ b/.yamato/python-ll-api-test.yml
  commands:
    - pip install pyyaml
    - python -u -m ml-agents.tests.yamato.setup_venv
-    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py
+    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py 
+    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Basic
+    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-WallJump
+    - ./venv/bin/python ml-agents/tests/yamato/scripts/run_llapi.py --env=artifacts/testPlayer-Bouncer
  dependencies:
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
  commands:
    - pip install pyyaml
    - python -u -m ml-agents.tests.yamato.standalone_build_tests
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity 
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests --scene=Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity 
  triggers:
    cancel_old_ci: true
    changes:
        - "com.unity.ml-agents/*.md"
        - "com.unity.ml-agents/**/*.md"
  artifacts:
+    logs:
+      paths:
+        - "artifacts/standalone_build.txt"
-        - "Project/testPlayer*/**"
+        - "artifacts/testPlayer*/**"
 {% endfor %}
--- a/.yamato/training-int-tests.yml
+++ b/.yamato/training-int-tests.yml
    # Backwards-compatibility tests.
    # If we make a breaking change to the communication protocol, these will need
    # to be disabled until the next release.
-    - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
-    - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
+    # - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
+    # - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
  dependencies:
    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
        - "com.unity.ml-agents/*.md"
        - "com.unity.ml-agents/**/*.md"
  artifacts:
-    unit:
+    logs:
+      paths:
+        - "artifacts/standalone_build.txt"
+    standalonebuild:
-        - "artifacts/**"
+        - "artifacts/testplayer*/**"
 {% endfor %}
--- a/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallHardNew.prefab
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallHardNew.prefab
  - component: {fileID: 114284317994838100}
  - component: {fileID: 114466000339026140}
  - component: {fileID: 8193279139064749781}
+  - component: {fileID: 7923264721978289873}
  m_Layer: 0
  m_Name: Agent
  m_TagString: Untagged
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: 3DBallHard
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114466000339026140
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: edf26e11cf4ed42eaa3ffb7b91bb4676, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  ball: {fileID: 1142513601053358}
 --- !u!114 &8193279139064749781
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &7923264721978289873
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1829721031899636}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1978072206102878
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Basic/Prefabs/Basic.prefab
+++ b/Project/Assets/ML-Agents/Examples/Basic/Prefabs/Basic.prefab
--- a/Project/Assets/ML-Agents/Examples/Bouncer/Prefabs/Environment.prefab
+++ b/Project/Assets/ML-Agents/Examples/Bouncer/Prefabs/Environment.prefab
--- a/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/DynamicPlatform.prefab
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/DynamicPlatform.prefab
  - component: {fileID: 114590693924030052}
  - component: {fileID: 114423363226357902}
  - component: {fileID: 8520694362683208207}
+  - component: {fileID: 1267665179144855710}
  m_Layer: 0
  m_Name: Crawler
  m_TagString: Untagged
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: CrawlerDynamic
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114590693924030052
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 2f37c30a5e8d04117947188818902ef3, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  target: {fileID: 4490950947783742}
  ground: {fileID: 4684408634944056}
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 0
+  TakeActionsBetweenDecisions: 0
+--- !u!114 &1267665179144855710
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1515093357607024}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1520563409393552
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/FixedPlatform.prefab
  - component: {fileID: 114230237520033992}
  - component: {fileID: 114375802757824636}
  - component: {fileID: 8847231916954260663}
+  - component: {fileID: 6335439310911778343}
  m_Layer: 0
  m_Name: Crawler
  m_TagString: Untagged
  DecisionPeriod: 5
  TakeActionsBetweenDecisions: 0
  offsetStep: 0
+--- !u!114 &6335439310911778343
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1492298671135358}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1492926997393242
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab
  - component: {fileID: 114176228333253036}
  - component: {fileID: 114725457980523372}
  - component: {fileID: 8297075921230369060}
+  - component: {fileID: 1222199865870203693}
  m_Layer: 0
  m_Name: Agent
  m_TagString: agent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
-  m_TeamID: 0
+  TeamId: 0
  m_UseChildSensors: 1
 --- !u!114 &114176228333253036
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  area: {fileID: 1819751139121548}
  turnSpeed: 300
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor
-  detectableTags:
+  m_SensorName: RayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 3
-  maxRayDegrees: 70
-  sphereCastRadius: 0.5
-  rayLength: 50
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 70
+  m_SphereCastRadius: 0.5
+  m_RayLength: 50
+  m_RayLayerMask:
-  observationStacks: 1
+  m_ObservationStacks: 1
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 0
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 0
 --- !u!114 &8297075921230369060
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &1222199865870203693
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1464820575638702}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1482701732800114
 GameObject:
  m_ObjectHideFlags: 0
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
-  m_TeamID: 0
+  TeamId: 0
  m_UseChildSensors: 1
 --- !u!114 &114711827726849508
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  area: {fileID: 1819751139121548}
  turnSpeed: 300
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor
-  detectableTags:
+  m_SensorName: RayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 3
-  maxRayDegrees: 70
-  sphereCastRadius: 0.5
-  rayLength: 50
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 70
+  m_SphereCastRadius: 0.5
+  m_RayLength: 50
+  m_RayLayerMask:
-  observationStacks: 1
+  m_ObservationStacks: 1
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 0
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 0
 --- !u!114 &259154752087955944
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
  offsetStep: 0
 --- !u!1 &1528397385587768
 GameObject:
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
-  m_TeamID: 0
+  TeamId: 0
  m_UseChildSensors: 1
 --- !u!114 &114542632553128056
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  area: {fileID: 1819751139121548}
  turnSpeed: 300
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor
-  detectableTags:
+  m_SensorName: RayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 3
-  maxRayDegrees: 70
-  sphereCastRadius: 0.5
-  rayLength: 50
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 70
+  m_SphereCastRadius: 0.5
+  m_RayLength: 50
+  m_RayLayerMask:
-  observationStacks: 1
+  m_ObservationStacks: 1
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 0
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 0
 --- !u!114 &5519119940433428255
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
  offsetStep: 0
 --- !u!1 &1617924810425504
 GameObject:
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
-  m_TeamID: 0
+  TeamId: 0
  m_UseChildSensors: 1
 --- !u!114 &114189751434580810
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  area: {fileID: 1819751139121548}
  turnSpeed: 300
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor
-  detectableTags:
+  m_SensorName: RayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 3
-  maxRayDegrees: 70
-  sphereCastRadius: 0.5
-  rayLength: 50
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 70
+  m_SphereCastRadius: 0.5
+  m_RayLength: 50
+  m_RayLayerMask:
-  observationStacks: 1
+  m_ObservationStacks: 1
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 0
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 0
 --- !u!114 &5884750436653390196
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
  offsetStep: 0
 --- !u!1 &1688105343773098
 GameObject:
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: FoodCollector
-  m_TeamID: 0
+  TeamId: 0
  m_UseChildSensors: 1
 --- !u!114 &114235147148547996
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: c66e6845309d241c78a6d77ee2567928, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  area: {fileID: 1819751139121548}
  turnSpeed: 300
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor
-  detectableTags:
+  m_SensorName: RayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 3
-  maxRayDegrees: 70
-  sphereCastRadius: 0.5
-  rayLength: 50
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 70
+  m_SphereCastRadius: 0.5
+  m_RayLength: 50
+  m_RayLayerMask:
-  observationStacks: 1
+  m_ObservationStacks: 1
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 0
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 0
 --- !u!114 &4768752321433982785
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
  offsetStep: 0
 --- !u!1 &1729825611722018
 GameObject:
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Prefabs/Area.prefab
  - component: {fileID: 114935253044749092}
  - component: {fileID: 114650561397225712}
  - component: {fileID: 114889700908650620}
+  - component: {fileID: 7980686505185502968}
  m_Layer: 8
  m_Name: Agent
  m_TagString: agent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: GridWorld
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114650561397225712
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 857707f3f352541d5b858efca4479b95, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 100
  area: {fileID: 114704252266302846}
  timeBetweenDecisionsAtInference: 0.15
  m_Script: {fileID: 11500000, guid: 282f342c2ab144bf38be65d4d0c4e07d, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  camera: {fileID: 20743940359151984}
-  sensorName: CameraSensor
-  width: 84
-  height: 64
-  grayscale: 0
-  compression: 1
+  m_Camera: {fileID: 20743940359151984}
+  m_SensorName: CameraSensor
+  m_Width: 84
+  m_Height: 64
+  m_Grayscale: 0
+  m_Compression: 1
+--- !u!114 &7980686505185502968
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1488387672112076}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1625008366184734
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Prefabs/SymbolFinderArea.prefab
  - component: {fileID: 114286701363010626}
  - component: {fileID: 114388598785529460}
  - component: {fileID: 1360037369662378601}
+  - component: {fileID: 3959905707628515947}
  m_Layer: 0
  m_Name: Agent
  m_TagString: agent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Hallway
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114286701363010626
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: b446afae240924105b36d07e8d17a608, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 3000
  ground: {fileID: 1510027348950282}
  area: {fileID: 1745841960385024}
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor
-  detectableTags:
+  m_SensorName: RayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 2
-  maxRayDegrees: 70
-  sphereCastRadius: 0.5
-  rayLength: 12
-  rayLayerMask:
+  m_RaysPerDirection: 2
+  m_MaxRayDegrees: 70
+  m_SphereCastRadius: 0.5
+  m_RayLength: 12
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 0
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 0
 --- !u!114 &1360037369662378601
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 6
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &3959905707628515947
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1471560210313468}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1510027348950282
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/PushBlock/Prefabs/PushBlockArea.prefab
  - component: {fileID: 114807072692257076}
  - component: {fileID: 114451319691753174}
  - component: {fileID: 8964598783836598940}
+  - component: {fileID: 4081319787948195948}
  m_Layer: 0
  m_Name: Agent
  m_TagString: agent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: PushBlock
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114505490781873732
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: dea8c4f2604b947e6b7b97750dde87ca, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  ground: {fileID: 1500989011945850}
  area: {fileID: 1125452240183160}
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor
-  detectableTags:
+  m_SensorName: RayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 3
-  maxRayDegrees: 90
-  sphereCastRadius: 0.5
-  rayLength: 12
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 90
+  m_SphereCastRadius: 0.5
+  m_RayLength: 12
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 0
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 0
 --- !u!114 &114451319691753174
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: OffsetRayPerceptionSensor
-  detectableTags:
+  m_SensorName: OffsetRayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 3
-  maxRayDegrees: 90
-  sphereCastRadius: 0.5
-  rayLength: 12
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 90
+  m_SphereCastRadius: 0.5
+  m_RayLength: 12
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 1.5
-  endVerticalOffset: 1.5
+  m_StartVerticalOffset: 1.5
+  m_EndVerticalOffset: 1.5
 --- !u!114 &8964598783836598940
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &4081319787948195948
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1489716781518988}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1500989011945850
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/AreaPB.prefab
+++ b/Project/Assets/ML-Agents/Examples/Pyramids/Prefabs/AreaPB.prefab
  - component: {fileID: 5712624269609438939}
  - component: {fileID: 5767481171805996936}
  - component: {fileID: 4725417187860315718}
+  - component: {fileID: 6474351450651730614}
  m_Layer: 0
  m_Name: Agent
  m_TagString: agent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Pyramids
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114937736047215868
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: b8db44472779248d3be46895c4d562d5, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  area: {fileID: 1464170487903594}
  areaSwitch: {fileID: 1432086782037750}
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
  offsetStep: 0
 --- !u!114 &5712624269609438939
 MonoBehaviour:
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor
-  detectableTags:
+  m_SensorName: RayPerceptionSensor
+  m_DetectableTags:
  - block
  - wall
  - goal
-  raysPerDirection: 3
-  maxRayDegrees: 70
-  sphereCastRadius: 0.5
-  rayLength: 35
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 70
+  m_SphereCastRadius: 0.5
+  m_RayLength: 35
+  m_RayLayerMask:
-  observationStacks: 1
+  m_ObservationStacks: 1
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 0
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 0
 --- !u!114 &5767481171805996936
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor1
-  detectableTags:
+  m_SensorName: RayPerceptionSensor1
+  m_DetectableTags:
  - block
  - wall
  - goal
-  raysPerDirection: 3
-  maxRayDegrees: 65
-  sphereCastRadius: 0.5
-  rayLength: 35
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 65
+  m_SphereCastRadius: 0.5
+  m_RayLength: 35
+  m_RayLayerMask:
-  observationStacks: 1
+  m_ObservationStacks: 1
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 5
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 5
 --- !u!114 &4725417187860315718
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor2
-  detectableTags:
+  m_SensorName: RayPerceptionSensor2
+  m_DetectableTags:
  - block
  - wall
  - goal
-  raysPerDirection: 3
-  maxRayDegrees: 75
-  sphereCastRadius: 0.5
-  rayLength: 35
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 75
+  m_SphereCastRadius: 0.5
+  m_RayLength: 35
+  m_RayLayerMask:
-  observationStacks: 1
+  m_ObservationStacks: 1
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 10
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 10
+--- !u!114 &6474351450651730614
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1131043459059966}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1148882946833254
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab
+++ b/Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab
  - component: {fileID: 114731167133171590}
  - component: {fileID: 114955921823023820}
  - component: {fileID: 6226801880261327134}
+  - component: {fileID: 7840105453417110232}
  m_Layer: 0
  m_Name: Agent
  m_TagString: Untagged
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Reacher
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114955921823023820
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 220b156e3b142406c8b76d4db981d044, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 4000
  pendulumA: {fileID: 1644872085946016}
  pendulumB: {fileID: 1053261483945176}
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 4
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &7840105453417110232
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1395682910799436}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1644872085946016
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
        {
            if (m_MaxEpisodes > 0)
            {
-                if (m_NumSteps > m_MaxEpisodes * m_Agent.maxStep)
+                // For Agents without maxSteps, exit as soon as we've hit the target number of episodes.
+                // For Agents that specify maxStep, also make sure we've gone at least that many steps.
+                // Since we exit as soon as *any* Agent hits its target, the maxSteps condition keeps us running
+                // a bit longer in case there's an early failure.
+                if (m_Agent.CompletedEpisodes >= m_MaxEpisodes && m_NumSteps > m_MaxEpisodes * m_Agent.maxStep)
                {
                    Application.Quit(0);
                }
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs
            Physics.defaultSolverIterations = solverIterations;
            Physics.defaultSolverVelocityIterations = solverVelocityIterations;

+            // Make sure the Academy singleton is initialized first, since it will create the SideChannels.
+            var academy = Academy.Instance;
            SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
        }

            Physics.defaultSolverVelocityIterations = m_OriginalSolverVelocityIterations;
        }
    }
-}
+}
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs
        public void Update() {}

        /// <inheritdoc/>
+        public void Reset() { }
+
+        /// <inheritdoc/>
        public virtual byte[] GetCompressedObservation()
        {
            return null;
--- a/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
  - component: {fileID: 114492261207303438}
  - component: {fileID: 114320493772006642}
  - component: {fileID: 9152743230243588598}
+  - component: {fileID: 5530675298926254831}
  m_Layer: 0
  m_Name: PurpleStriker
  m_TagString: purpleAgent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Soccer
-  m_TeamID: 1
-  m_useChildSensors: 1
+  TeamId: 1
+  m_UseChildSensors: 1
 --- !u!114 &114492261207303438
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 2a2688ef4a36349f9aa010020c32d198, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 3000
  team: 0
  area: {fileID: 114559182131992928}
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: PurpleRayPerceptionSensor
-  detectableTags:
+  m_SensorName: PurpleRayPerceptionSensor
+  m_DetectableTags:
  - ball
  - purpleGoal
  - blueGoal
-  raysPerDirection: 5
-  maxRayDegrees: 60
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 5
+  m_MaxRayDegrees: 60
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0.5
-  endVerticalOffset: 0.5
+  m_StartVerticalOffset: 0.5
+  m_EndVerticalOffset: 0.5
 --- !u!114 &9152743230243588598
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &5530675298926254831
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1095606497496374}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1100217258374548
 GameObject:
  m_ObjectHideFlags: 0
  - component: {fileID: 114850431417842684}
  - component: {fileID: 114516244030127556}
  - component: {fileID: 404683423509059512}
+  - component: {fileID: 2668741801881409108}
  m_Layer: 0
  m_Name: BlueStriker
  m_TagString: blueAgent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Soccer
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114850431417842684
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 2a2688ef4a36349f9aa010020c32d198, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 3000
  team: 1
  area: {fileID: 114559182131992928}
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: BlueRayPerceptionSensor
-  detectableTags:
+  m_SensorName: BlueRayPerceptionSensor
+  m_DetectableTags:
  - ball
  - blueGoal
  - purpleGoal
-  raysPerDirection: 5
-  maxRayDegrees: 60
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 5
+  m_MaxRayDegrees: 60
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0.5
-  endVerticalOffset: 0.5
+  m_StartVerticalOffset: 0.5
+  m_EndVerticalOffset: 0.5
 --- !u!114 &404683423509059512
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &2668741801881409108
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1131626411948014}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1141134673700168
 GameObject:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: PurpleRayPerceptionSensorReverse
-  detectableTags:
+  m_SensorName: PurpleRayPerceptionSensorReverse
+  m_DetectableTags:
  - ball
  - purpleGoal
  - blueGoal
-  raysPerDirection: 1
-  maxRayDegrees: 45
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 1
+  m_MaxRayDegrees: 45
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0.5
-  endVerticalOffset: 0.5
+  m_StartVerticalOffset: 0.5
+  m_EndVerticalOffset: 0.5
 --- !u!1 &742736642297762088
 GameObject:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: BlueRayPerceptionSensorReverse
-  detectableTags:
+  m_SensorName: BlueRayPerceptionSensorReverse
+  m_DetectableTags:
  - ball
  - blueGoal
  - purpleGoal
-  raysPerDirection: 1
-  maxRayDegrees: 45
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 1
+  m_MaxRayDegrees: 45
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0.5
-  endVerticalOffset: 0.5
+  m_StartVerticalOffset: 0.5
+  m_EndVerticalOffset: 0.5
 --- !u!1 &2016057044266316337
 GameObject:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: BlueRayPerceptionSensorReverse
-  detectableTags:
+  m_SensorName: BlueRayPerceptionSensorReverse
+  m_DetectableTags:
  - ball
  - blueGoal
  - purpleGoal
-  raysPerDirection: 1
-  maxRayDegrees: 45
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 1
+  m_MaxRayDegrees: 45
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0.5
-  endVerticalOffset: 0.5
+  m_StartVerticalOffset: 0.5
+  m_EndVerticalOffset: 0.5
 --- !u!1 &4599713170205044794
 GameObject:
  m_ObjectHideFlags: 0
  - component: {fileID: 5320024511406682322}
  - component: {fileID: 1023485123796557062}
  - component: {fileID: 8734522883866558980}
+  - component: {fileID: 2436210718391481760}
  m_Layer: 0
  m_Name: PurpleStriker (1)
  m_TagString: purpleAgent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Soccer
-  m_TeamID: 1
-  m_useChildSensors: 1
+  TeamId: 1
+  m_UseChildSensors: 1
 --- !u!114 &5320024511406682322
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 2a2688ef4a36349f9aa010020c32d198, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 3000
  team: 0
  area: {fileID: 114559182131992928}
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: PurpleRayPerceptionSensor
-  detectableTags:
+  m_SensorName: PurpleRayPerceptionSensor
+  m_DetectableTags:
  - ball
  - purpleGoal
  - blueGoal
-  raysPerDirection: 5
-  maxRayDegrees: 60
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 5
+  m_MaxRayDegrees: 60
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0.5
-  endVerticalOffset: 0.5
+  m_StartVerticalOffset: 0.5
+  m_EndVerticalOffset: 0.5
 --- !u!114 &8734522883866558980
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &2436210718391481760
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 6257467487437560250}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &6442519122303792292
 GameObject:
  m_ObjectHideFlags: 0
  - component: {fileID: 5379409612883756837}
  - component: {fileID: 2562571719799803906}
  - component: {fileID: 1018414316889932458}
+  - component: {fileID: 5288255359135781773}
  m_Layer: 0
  m_Name: BlueStriker (1)
  m_TagString: blueAgent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Soccer
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &5379409612883756837
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 2a2688ef4a36349f9aa010020c32d198, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 3000
  team: 1
  area: {fileID: 114559182131992928}
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: BlueRayPerceptionSensor
-  detectableTags:
+  m_SensorName: BlueRayPerceptionSensor
+  m_DetectableTags:
  - ball
  - blueGoal
  - purpleGoal
-  raysPerDirection: 5
-  maxRayDegrees: 60
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 5
+  m_MaxRayDegrees: 60
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0.5
-  endVerticalOffset: 0.5
+  m_StartVerticalOffset: 0.5
+  m_EndVerticalOffset: 0.5
 --- !u!114 &1018414316889932458
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &5288255359135781773
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 8360301818957399454}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &8673569163220857793
 GameObject:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: PurpleRayPerceptionSensorReverse
-  detectableTags:
+  m_SensorName: PurpleRayPerceptionSensorReverse
+  m_DetectableTags:
  - ball
  - purpleGoal
  - blueGoal
-  raysPerDirection: 1
-  maxRayDegrees: 45
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 1
+  m_MaxRayDegrees: 45
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 3
+  m_ObservationStacks: 3
-  useWorldPositions: 1
-  startVerticalOffset: 0.5
-  endVerticalOffset: 0.5
+  m_StartVerticalOffset: 0.5
+  m_EndVerticalOffset: 0.5
--- a/Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  - component: {fileID: 114176423636690854}
  - component: {fileID: 114915946461826994}
  - component: {fileID: 2449890524009497851}
+  - component: {fileID: 6467897465973556822}
  m_Layer: 0
  m_Name: AgentA
  m_TagString: agent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Tennis
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114915946461826994
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: e51a3fb0b3186433ea84fc1e0549cc91, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 0
  ball: {fileID: 1273406647218856}
  invertX: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &6467897465973556822
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1170495812642400}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1194790474478638
 GameObject:
  m_ObjectHideFlags: 0
  - component: {fileID: 114399072728845634}
  - component: {fileID: 114800310164848628}
  - component: {fileID: 6598495797138489682}
+  - component: {fileID: 1420140102966759323}
  m_Layer: 0
  m_Name: AgentB
  m_TagString: agent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Tennis
-  m_TeamID: 1
-  m_useChildSensors: 1
+  TeamId: 1
+  m_UseChildSensors: 1
 --- !u!114 &114800310164848628
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: e51a3fb0b3186433ea84fc1e0549cc91, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 0
  ball: {fileID: 1273406647218856}
  invertX: 1
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &1420140102966759323
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1882383181950958}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1969551055586186
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs

    public override float[] Heuristic()
    {
-        var action = new float[2];
+        var action = new float[3];
-        action[0] = Input.GetAxis("Horizontal");
-        action[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f;
+        action[0] = Input.GetAxis("Horizontal");    // Racket Movement
+        action[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f;   // Racket Jumping
+        action[2] = Input.GetAxis("Vertical");   // Racket Rotation  
        return action;
    }

--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPair.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPair.prefab
  - component: {fileID: 114363722412740164}
  - component: {fileID: 114614375190687060}
  - component: {fileID: 7095046440131842424}
+  - component: {fileID: 526281586680617836}
  m_Layer: 0
  m_Name: WalkerAgent
  m_TagString: Untagged
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Walker
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114363722412740164
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: ccb0f85f0009540d7ad997952e2aed7b, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 5000
  target: {fileID: 4085853164035250}
  hips: {fileID: 4333477265252406}
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 0
+  TakeActionsBetweenDecisions: 0
+--- !u!114 &526281586680617836
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1800913799254612}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1907539933197724
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
  - component: {fileID: 114458838850320084}
  - component: {fileID: 114227939525648256}
  - component: {fileID: 4778045978646539396}
+  - component: {fileID: 7445449404652947848}
  m_Layer: 0
  m_Name: Agent
  m_TagString: agent
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: SmallWallJump
-  m_TeamID: 0
-  m_useChildSensors: 1
+  TeamId: 0
+  m_UseChildSensors: 1
 --- !u!114 &114925928594762506
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 676fca959b8ee45539773905ca71afa1, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
+  agentParameters:
+    maxStep: 0
+  hasUpgradedFromAgentParameters: 1
  maxStep: 2000
  noWallBrain: {fileID: 11400000, guid: fb2ce36eb40b6480e94ea0b5d7573e47, type: 3}
  smallWallBrain: {fileID: 11400000, guid: fb2ce36eb40b6480e94ea0b5d7573e47, type: 3}
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: RayPerceptionSensor
-  detectableTags:
+  m_SensorName: RayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 3
-  maxRayDegrees: 90
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 90
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 6
+  m_ObservationStacks: 6
-  useWorldPositions: 1
-  startVerticalOffset: 0
-  endVerticalOffset: 0
+  m_StartVerticalOffset: 0
+  m_EndVerticalOffset: 0
 --- !u!114 &114227939525648256
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Script: {fileID: 11500000, guid: 6bb6b867a41448888c1cd4f99643ad71, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  sensorName: OffsetRayPerceptionSensor
-  detectableTags:
+  m_SensorName: OffsetRayPerceptionSensor
+  m_DetectableTags:
-  raysPerDirection: 3
-  maxRayDegrees: 90
-  sphereCastRadius: 0.5
-  rayLength: 20
-  rayLayerMask:
+  m_RaysPerDirection: 3
+  m_MaxRayDegrees: 90
+  m_SphereCastRadius: 0.5
+  m_RayLength: 20
+  m_RayLayerMask:
-  observationStacks: 6
+  m_ObservationStacks: 6
-  useWorldPositions: 1
-  startVerticalOffset: 2.5
-  endVerticalOffset: 5
+  m_StartVerticalOffset: 2.5
+  m_EndVerticalOffset: 5
 --- !u!114 &4778045978646539396
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  DecisionPeriod: 5
-  RepeatAction: 1
+  TakeActionsBetweenDecisions: 1
+--- !u!114 &7445449404652947848
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1195095783991828}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1264699583886832
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/ProjectSettings/GraphicsSettings.asset
+++ b/Project/ProjectSettings/GraphicsSettings.asset
  - {fileID: 10753, guid: 0000000000000000f000000000000000, type: 0}
  - {fileID: 10770, guid: 0000000000000000f000000000000000, type: 0}
  - {fileID: 10783, guid: 0000000000000000f000000000000000, type: 0}
+  - {fileID: 16000, guid: 0000000000000000f000000000000000, type: 0}
+  - {fileID: 16001, guid: 0000000000000000f000000000000000, type: 0}
+  - {fileID: 17000, guid: 0000000000000000f000000000000000, type: 0}
  m_PreloadedShaders: []
  m_SpritesDefaultMaterial: {fileID: 10754, guid: 0000000000000000f000000000000000,
    type: 0}
--- a/Project/ProjectSettings/UnityConnectSettings.asset
+++ b/Project/ProjectSettings/UnityConnectSettings.asset
 UnityConnectSettings:
  m_ObjectHideFlags: 0
  serializedVersion: 1
-  m_Enabled: 0
+  m_Enabled: 1
  m_TestMode: 0
  m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events
  m_EventUrl: https://cdp.cloud.unity3d.com/v1/events
--- a/README.md
+++ b/README.md
 |:-------:|:------:|:-------------:|:-------:|:------------:|
 | **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
 | **0.15.1** | **March 30, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.1.zip)** |
-| **0.15.0** | **March 18, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip)** |
+| **0.15.0** | March 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip) |
 | **0.14.1** | February 26, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.1.zip) |
 | **0.14.0** | February 13, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
 | **0.13.1** | January 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) |
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 - The Jupyter notebooks have been removed from the repository.
 - Introduced the `SideChannelUtils` to register, unregister and access side channels.
 - `Academy.FloatProperties` was removed, please use `SideChannelUtils.GetSideChannel<FloatPropertiesChannel>()` instead.
+ - Removed the multi-agent gym option from the gym wrapper. For multi-agent scenarios, use the [Low Level Python API](Python-API.md).
+ - The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. If you use `mlagents-learn` for training, this should be a transparent change.
+ - Added ability to start training (initialize model weights) from a previous run ID. (#3710)
+ - The internal event `Academy.AgentSetStatus` was renamed to `Academy.AgentPreStep` and made public.
+ - The offset logic was removed from DecisionRequester.
+ - Timer files now contain a dictionary of metadata, including things like the package version numbers.
+ - SideChannel IncomingMessages methods now take an optional default argument, which is used when trying to read more data than the message contains.
 - The way that UnityEnvironment decides the port was changed. If no port is specified, the behavior will depend on the `file_name` parameter. If it is `None`, 5004 (the editor port) will be used; otherwise 5005 (the base environment port) will be used.
 - Fixed an issue where exceptions from environments provided a returncode of 0. (#3680)
 - Running `mlagents-learn` with the same `--run-id` twice will no longer overwrite the existing files. (#3705)
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
        /// on each side, although we may allow some flexibility in the future.
        /// This should be incremented whenever a change is made to the communication protocol.
        /// </summary>
-        const string k_ApiVersion = "0.15.0";
+        const string k_ApiVersion = "0.16.0";
-        internal const string k_PackageVersion = "0.15.0-preview";
+        internal const string k_PackageVersion = "0.15.1-preview";

        const int k_EditorTrainingPort = 5004;

        // This will mark the Agent as Done if it has reached its maxSteps.
        internal event Action AgentIncrementStep;

-        // Signals to all the agents at each environment step along with the
-        // Academy's maxStepReached, done and stepCount values. The agents rely
-        // on this event to update their own values of max step reached and done
-        // in addition to aligning on the step count of the global episode.
-        internal event Action<int> AgentSetStatus;
+
+        /// <summary>
+        /// Signals to all of the <see cref="Agent"/>s that their step is about to begin.
+        /// This is a good time for an <see cref="Agent"/> to decide if it would like to
+        /// call <see cref="Agent.RequestDecision"/> or <see cref="Agent.RequestAction"/>
+        /// for this step.  Any other pre-step setup could be done during this even as well.
+        /// </summary>
+        public event Action<int> AgentPreStep;

        // Signals to all the agents at each environment step so they can send
        // their state to their Policy if they have requested a decision.
        /// </summary>
        void InitializeEnvironment()
        {
+            TimerStack.Instance.AddMetadata("communication_protocol_version", k_ApiVersion);
+            TimerStack.Instance.AddMetadata("com.unity.ml-agents_version", k_PackageVersion);
+
            EnableAutomaticStepping();

            SideChannelUtils.RegisterSideChannel(new EngineConfigurationChannel());
        {
            DecideAction = () => {};
            DestroyAction = () => {};
-            AgentSetStatus = i => {};
+            AgentPreStep = i => {};
            AgentSendState = () => {};
            AgentAct = () => {};
            AgentForceReset = () => {};
                ForcedFullReset();
            }

-            AgentSetStatus?.Invoke(m_StepCount);
+            AgentPreStep?.Invoke(m_StepCount);

            m_StepCount += 1;
            m_TotalStepCount += 1;
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
        /// their own experience.
        int m_StepCount;

+        /// Number of times the Agent has completed an episode.
+        int m_CompletedEpisodes;
+
        /// Episode identifier each agent receives. It is used
        /// to separate between different agents in the environment.
        /// This Id will be changed every time the Agent resets.
            m_Info.reward = m_Reward;
            m_Info.done = true;
            m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
+            if (collectObservationsSensor != null)
+            {
+                // Make sure the latest observations are being passed to training.
+                collectObservationsSensor.Reset();
+                CollectObservations(collectObservationsSensor);
+            }
+            ResetSensors();

            // We also have to write any to any DemonstationStores so that they get the "done" flag.
            foreach (var demoWriter in DemonstrationWriters)

            if (doneReason != DoneReason.Disabled)
            {
-                // We don't want to udpate the reward stats when the Agent is disabled, because this will make
+                // We don't want to update the reward stats when the Agent is disabled, because this will make
+                m_CompletedEpisodes++;
-            // The Agent is done, so we give it a new episode Id
-            m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
            m_Reward = 0f;
            m_CumulativeReward = 0f;
            m_RequestAction = false;
        }

        /// <summary>
+        /// Returns the number of episodes that the Agent has completed (either <see cref="Agent.EndEpisode()"/>
+        /// was called, or maxSteps was reached).
+        /// </summary>
+        /// <returns>
+        /// Current episode count.
+        /// </returns>
+        public int CompletedEpisodes
+        {
+            get { return m_CompletedEpisodes; }
+        }
+
+        /// <summary>
        /// Overrides the current step reward of the agent and updates the episode
        /// reward accordingly.
        /// </summary>
            foreach (var sensor in sensors)
            {
                sensor.Update();
+            }
+        }
+
+        void ResetSensors()
+        {
+            foreach (var sensor in sensors)
+            {
+                sensor.Reset();
            }
        }

--- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
            {
                m_OrderedAgentsRequestingDecisions[behaviorName] = new List<int>();
            }
-            m_OrderedAgentsRequestingDecisions[behaviorName].Add(info.episodeId);
+            if (!info.done)
+            {
+                m_OrderedAgentsRequestingDecisions[behaviorName].Add(info.episodeId);
+            }
            if (!m_LastActionsReceived.ContainsKey(behaviorName))
            {
                m_LastActionsReceived[behaviorName] = new Dictionary<int, float[]>();
--- a/com.unity.ml-agents/Runtime/DecisionRequester.cs
+++ b/com.unity.ml-agents/Runtime/DecisionRequester.cs
+using System;
 using UnityEngine;
 using UnityEngine.Serialization;

    /// at regular intervals.
    /// </summary>
    [AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
-    internal class DecisionRequester : MonoBehaviour
+    [RequireComponent(typeof(Agent))]
+    public class DecisionRequester : MonoBehaviour
-        /// that the Agent will request a decision every 5 Academy steps.
-        /// </summary>
+        /// that the Agent will request a decision every 5 Academy steps. /// </summary>
        [Range(1, 20)]
        [Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
                 "of 5 means that the Agent will request a decision every 5 Academy steps.")]
        [FormerlySerializedAs("RepeatAction")]
        public bool TakeActionsBetweenDecisions = true;

-        /// <summary>
-        /// Whether or not the Agent decisions should start at an offset (different for each agent).
-        /// This does not affect <see cref="DecisionPeriod"/>. Turning this on will distribute
-        /// the decision-making computations for all the agents across multiple Academy steps.
-        /// This can be valuable in scenarios where you have many agents in the scene, particularly
-        /// during the inference phase.
-        /// </summary>
-        [Tooltip("Whether or not Agent decisions should start at an offset.")]
-        public bool offsetStep;
-
+        [NonSerialized]
-        int m_Offset;
-            m_Offset = offsetStep ? gameObject.GetInstanceID() : 0;
-            Academy.Instance.AgentSetStatus += MakeRequests;
+            Debug.Assert(m_Agent != null, "Agent component was not found on this gameObject and is required.");
+            Academy.Instance.AgentPreStep += MakeRequests;
        }

        void OnDestroy()
-                Academy.Instance.AgentSetStatus -= MakeRequests;
+                Academy.Instance.AgentPreStep -= MakeRequests;
-        void MakeRequests(int count)
+        /// <summary>
+        /// Method that hooks into the Academy in order inform the Agent on whether or not it should request a
+        /// decision, and whether or not it should take actions between decisions.
+        /// </summary>
+        /// <param name="academyStepCount">The current step count of the academy.</param>
+        void MakeRequests(int academyStepCount)
-            if ((count + m_Offset) % DecisionPeriod == 0)
+            if (academyStepCount % DecisionPeriod == 0)
            {
                m_Agent?.RequestDecision();
            }
--- a/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
        public void Update() {}

        /// <inheritdoc/>
+        public void Reset() { }
+
+        /// <inheritdoc/>
        public SensorCompressionType GetCompressionType()
        {
            return m_CompressionType;
--- a/com.unity.ml-agents/Runtime/Sensors/ISensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/ISensor.cs
        void Update();

        /// <summary>
+        /// Resets the internal states of the sensor. This is called at the end of an Agent's episode.
+        /// Most implementations can leave this empty.
+        /// </summary>
+        void Reset();
+
+        /// <summary>
        /// Return the compression type being used. If no compression is used, return
        /// <see cref="SensorCompressionType.None"/>.
        /// </summary>
--- a/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
        }

        /// <inheritdoc/>
+        public void Reset() { }
+
+        /// <inheritdoc/>
        public int[] GetObservationShape()
        {
            return m_Shape;
--- a/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
        public void Update() {}

        /// <inheritdoc/>
+        public void Reset() { }
+
+        /// <inheritdoc/>
        public SensorCompressionType GetCompressionType()
        {
            return m_CompressionType;
--- a/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
+using System;
+
 namespace MLAgents.Sensors
 {
    /// <summary>
        {
            m_WrappedSensor.Update();
            m_CurrentIndex = (m_CurrentIndex + 1) % m_NumStackedObservations;
+        }
+
+        /// <inheritdoc/>
+        public void Reset()
+        {
+            m_WrappedSensor.Reset();
+            // Zero out the buffer.
+            for (var i = 0; i < m_NumStackedObservations; i++)
+            {
+                Array.Clear(m_StackedObservations[i], 0, m_StackedObservations[i].Length);
+            }
        }

        /// <inheritdoc/>
--- a/com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
        }

        /// <inheritdoc/>
+        public void Reset()
+        {
+            Clear();
+        }
+
+        /// <inheritdoc/>
        public int[] GetObservationShape()
        {
            return m_Shape;
--- a/com.unity.ml-agents/Runtime/SideChannels/IncomingMessage.cs
+++ b/com.unity.ml-agents/Runtime/SideChannels/IncomingMessage.cs
 using System.Collections.Generic;
+using System.Runtime.CompilerServices;
 using System;
 using System.IO;
 using System.Text;
        }

        /// <summary>
-        /// Read a boolan value from the message.
+        /// Read a boolean value from the message.
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public bool ReadBoolean()
+        public bool ReadBoolean(bool defaultValue = false)
-            return m_Reader.ReadBoolean();
+            return CanReadMore() ? m_Reader.ReadBoolean() : defaultValue;
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public int ReadInt32()
+        public int ReadInt32(int defaultValue = 0)
-            return m_Reader.ReadInt32();
+            return CanReadMore() ? m_Reader.ReadInt32() : defaultValue;
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public float ReadFloat32()
+        public float ReadFloat32(float defaultValue = 0.0f)
-            return m_Reader.ReadSingle();
+            return CanReadMore() ? m_Reader.ReadSingle() : defaultValue;
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public string ReadString()
+        public string ReadString(string defaultValue = default)
+            if (!CanReadMore())
+            {
+                return defaultValue;
+            }
+
            var strLength = ReadInt32();
            var str = Encoding.ASCII.GetString(m_Reader.ReadBytes(strLength));
            return str;
        /// Reads a list of floats from the message. The length of the list is stored in the message.
        /// </summary>
+        /// <param name="defaultValue">Default value to use if the end of the message is reached.</param>
-        public IList<float> ReadFloatList()
+        public IList<float> ReadFloatList(IList<float> defaultValue = default)
+            if (!CanReadMore())
+            {
+                return defaultValue;
+            }
+
            var len = ReadInt32();
            var output = new float[len];
            for (var i = 0; i < len; i++)
        {
            m_Reader?.Dispose();
            m_Stream?.Dispose();
+        }
+
+        /// <summary>
+        /// Whether or not there is more data left in the stream that can be read.
+        /// </summary>
+        /// <returns></returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        bool CanReadMore()
+        {
+            return m_Stream.Position < m_Stream.Length;
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Timer.cs
+++ b/com.unity.ml-agents/Runtime/Timer.cs
        Dictionary<string, TimerNode> m_Children;

        /// <summary>
-        /// Gauge Nodes to measure arbitrary values.
-        /// </summary>
-        [DataMember(Name = "gauges", EmitDefaultValue = false)]
-        Dictionary<string, GaugeNode> m_Gauges;
-
-        /// <summary>
        /// Custom sampler used to add timings to the profiler.
        /// </summary>
        CustomSampler m_Sampler;
            set {}  // Serialization needs this, but unused.
        }

-        public Dictionary<string, GaugeNode> Gauges
-        {
-            get { return m_Gauges; }
-        }
-
        /// <summary>
        /// Total seconds spent in this block, excluding it's children.
        /// </summary>
                // The root node doesn't have a sampler since that could interfere with the profiler.
                m_NumCalls = 1;
                m_TickStart = DateTime.Now.Ticks;
-                m_Gauges = new Dictionary<string, GaugeNode>();
            }
            else
            {
        }
    }

+    [DataContract]
+    internal class RootNode : TimerNode
+    {
+        // Timer output format version
+        internal const string k_timerFormatVersion = "0.1.0";
+
+        [DataMember(Name = "metadata", Order = 0)]
+        Dictionary<string, string> m_Metadata = new Dictionary<string, string>();
+
+        /// <summary>
+        /// Gauge Nodes to measure arbitrary values.
+        /// </summary>
+        [DataMember(Name = "gauges", EmitDefaultValue = false)]
+        Dictionary<string, GaugeNode> m_Gauges = new Dictionary<string, GaugeNode>();
+
+        public RootNode(string name="root") : base(name, true)
+        {
+            m_Metadata.Add("timer_format_version", k_timerFormatVersion);
+            m_Metadata.Add("start_time_seconds", $"{DateTimeOffset.Now.ToUnixTimeSeconds()}");
+            m_Metadata.Add("unity_version", Application.unityVersion);
+            m_Metadata.Add("command_line_arguments", String.Join(" ", Environment.GetCommandLineArgs()));
+        }
+
+        public void AddMetadata(string key, string value)
+        {
+            m_Metadata[key] = value;
+        }
+
+        public Dictionary<string, GaugeNode> Gauges
+        {
+            get { return m_Gauges; }
+        }
+
+        public Dictionary<string, string> Metadata
+        {
+            get { return m_Metadata; }
+        }
+    }
+
    /// <summary>
    /// Tracks the most recent value of a metric. This is analogous to gauges in statsd.
    /// </summary>
        static readonly TimerStack k_Instance = new TimerStack();

        Stack<TimerNode> m_Stack;
-        TimerNode m_RootNode;
+        RootNode m_RootNode;
+        Dictionary<string, string> m_Metadata;

        // Explicit static constructor to tell C# compiler
        // not to mark type as beforefieldinit
        public void Reset(string name = "root")
        {
            m_Stack = new Stack<TimerNode>();
-            m_RootNode = new TimerNode(name, true);
+            m_RootNode = new RootNode(name);
            m_Stack.Push(m_RootNode);
        }

            get { return k_Instance; }
        }

-        internal TimerNode RootNode
+        internal RootNode RootNode
        {
            get { return m_RootNode; }
        }
                    m_RootNode.Gauges[name] = new GaugeNode(value);
                }
            }
+        }
+
+        public void AddMetadata(string key, string value)
+        {
+            m_RootNode.AddMetadata(key, value);
        }

        void Push(string name)
        /// <param name="stream"></param>
        public void SaveJsonTimers(Stream stream)
        {
+            // Add some final metadata info
+            AddMetadata("scene_name", SceneManager.GetActiveScene().name);
+            AddMetadata("end_time_seconds", $"{DateTimeOffset.Now.ToUnixTimeSeconds()}");
+
-            var ser = new DataContractJsonSerializer(typeof(TimerNode), jsonSettings);
+            var ser = new DataContractJsonSerializer(typeof(RootNode), jsonSettings);
            ser.WriteObject(stream, m_RootNode);
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
 using System.CodeDom;
+using System;
 using UnityEngine;
 using NUnit.Framework;
 using System.Reflection;
 {
    internal class TestPolicy : IPolicy
    {
-        public void RequestDecision(AgentInfo info, List<ISensor> sensors) {}
+        public Action OnRequestDecision;
+        private WriteAdapter m_Adapter = new WriteAdapter();
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors) {
+            foreach(var sensor in sensors){
+                sensor.GetObservationProto(m_Adapter);
+            }
+            OnRequestDecision?.Invoke();
+        }

        public float[] DecideAction() { return new float[0]; }

        {
            collectObservationsCalls += 1;
            collectObservationsCallsForEpisode += 1;
-            sensor.AddObservation(0f);
+            sensor.AddObservation(collectObservationsCallsForEpisode);
        }

        public override void OnActionReceived(float[] vectorAction)
        public string sensorName;
        public int numWriteCalls;
        public int numCompressedCalls;
+        public int numResetCalls;
        public SensorCompressionType compressionType = SensorCompressionType.None;

        public TestSensor(string n)
        }

        public void Update() {}
+
+        public void Reset()
+        {
+            numResetCalls++;
+        }
    }

    [TestFixture]
                aca.EnvironmentStep();
            }
        }
+
+        [Test]
+        public void AssertStackingReset()
+        {
+            var agentGo1 = new GameObject("TestAgent");
+            agentGo1.AddComponent<TestAgent>();
+            var behaviorParameters = agentGo1.GetComponent<BehaviorParameters>();
+            behaviorParameters.brainParameters.numStackedVectorObservations = 3;
+            var agent1 = agentGo1.GetComponent<TestAgent>();
+            var aca = Academy.Instance;
+            agent1.LazyInitialize();
+            var policy = new TestPolicy();
+            agent1.SetPolicy(policy);
+
+            StackingSensor sensor = null;
+            foreach(ISensor s in agent1.sensors){
+                if (s is  StackingSensor){
+                    sensor = s as StackingSensor;
+                }
+            }
+
+            Assert.NotNull(sensor);
+
+            for (int i = 0; i < 20; i++)
+            {
+                agent1.RequestDecision();
+                aca.EnvironmentStep();
+
+            }
+
+            policy.OnRequestDecision = () =>  SensorTestHelper.CompareObservation(sensor, new[] {18f, 19f, 21f});
+            agent1.EndEpisode();
+            SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 0f});
+        }
    }

    [TestFixture]
            var expectedAgentActionForEpisode = 0;
            var expectedCollectObsCalls = 0;
            var expectedCollectObsCallsForEpisode = 0;
+            var expectedCompletedEpisodes = 0;
+            var expectedSensorResetCalls = 0;

            for (var i = 0; i < 15; i++)
            {
                    expectedAgentActionForEpisode = 0;
                    expectedCollectObsCallsForEpisode = 0;
                    expectedAgentStepCount = 0;
+                    expectedCompletedEpisodes++;
+                    expectedSensorResetCalls++;
+                    expectedCollectObsCalls += 1;
                }
                aca.EnvironmentStep();

                Assert.AreEqual(expectedAgentActionForEpisode, agent1.agentActionCallsForEpisode);
                Assert.AreEqual(expectedCollectObsCalls, agent1.collectObservationsCalls);
                Assert.AreEqual(expectedCollectObsCallsForEpisode, agent1.collectObservationsCallsForEpisode);
+                Assert.AreEqual(expectedCompletedEpisodes, agent1.CompletedEpisodes);
+                Assert.AreEqual(expectedSensorResetCalls, agent1.sensor1.numResetCalls);
            }
        }

--- a/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
        }

        public void Update() {}
+        public void Reset() { }

        public SensorCompressionType GetCompressionType()
        {
--- a/com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
+++ b/com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
 using MLAgents.Sensors;
 using NUnit.Framework;
 using UnityEngine;
+using UnityEngine.TestTools;

 namespace MLAgentsExamples
 {
            sensorComponent.observationStacks = 2;

            sensorComponent.CreateSensor();
-        }
-
-        class PublicApiAgent : Agent
-        {
-            public int numHeuristicCalls;
-
-            public override float[] Heuristic()
-            {
-                numHeuristicCalls++;
-                return base.Heuristic();
-            }
-        }
-
-        // Simple SensorComponent that sets up a StackingSensor
-        class StackingComponent : SensorComponent
-        {
-            public SensorComponent wrappedComponent;
-            public int numStacks;
-
-            public override ISensor CreateSensor()
-            {
-                var wrappedSensor = wrappedComponent.CreateSensor();
-                return new StackingSensor(wrappedSensor, numStacks);
-            }
-
-            public override int[] GetObservationShape()
-            {
-                int[] shape = (int[]) wrappedComponent.GetObservationShape().Clone();
-                for (var i = 0; i < shape.Length; i++)
-                {
-                    shape[i] *= numStacks;
-                }
-
-                return shape;
-            }
-        }
-
-
-        [Test]
-        public void CheckSetupAgent()
-        {
-            var gameObject = new GameObject();
-
-            var behaviorParams = gameObject.AddComponent<BehaviorParameters>();
-            behaviorParams.brainParameters.vectorObservationSize = 3;
-            behaviorParams.brainParameters.numStackedVectorObservations = 2;
-            behaviorParams.brainParameters.vectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
-            behaviorParams.brainParameters.vectorActionSize = new[] { 2, 2 };
-            behaviorParams.brainParameters.vectorActionSpaceType = SpaceType.Discrete;
-            behaviorParams.behaviorName = "TestBehavior";
-            behaviorParams.TeamId = 42;
-            behaviorParams.useChildSensors = true;
-
-            var agent = gameObject.AddComponent<PublicApiAgent>();
-            // Make sure we can set the behavior type correctly after the agent is added
-            behaviorParams.behaviorType = BehaviorType.InferenceOnly;
-            // Can't actually create an Agent with InferenceOnly and no model, so change back
-            behaviorParams.behaviorType = BehaviorType.Default;
-
-            // TODO -  not internal yet
-            // var decisionRequester = gameObject.AddComponent<DecisionRequester>();
-            // decisionRequester.DecisionPeriod = 2;
-
-            var sensorComponent = gameObject.AddComponent<RayPerceptionSensorComponent3D>();
-            sensorComponent.sensorName = "ray3d";
-            sensorComponent.detectableTags = new List<string> { "Player", "Respawn" };
-            sensorComponent.raysPerDirection = 3;
-
-            // Make a StackingSensor that wraps the RayPerceptionSensorComponent3D
-            // This isn't necessarily practical, just to ensure that it can be done
-            var wrappingSensorComponent = gameObject.AddComponent<StackingComponent>();
-            wrappingSensorComponent.wrappedComponent = sensorComponent;
-            wrappingSensorComponent.numStacks = 3;
-
-            // ISensor isn't set up yet.
-            Assert.IsNull(sensorComponent.raySensor);
-
-            agent.LazyInitialize();
-            // Make sure we can set the behavior type correctly after the agent is initialized
-            // (this creates a new policy).
-            behaviorParams.behaviorType = BehaviorType.HeuristicOnly;
-
-            // Initialization should set up the sensors
-            Assert.IsNotNull(sensorComponent.raySensor);
-
-            // Let's change the inference device
-            var otherDevice = behaviorParams.inferenceDevice == InferenceDevice.CPU ? InferenceDevice.GPU : InferenceDevice.CPU;
-            agent.SetModel(behaviorParams.behaviorName, behaviorParams.model, otherDevice);
-
-            agent.AddReward(1.0f);
-
-            agent.RequestAction();
-            agent.RequestDecision();
-
-            Academy.Instance.AutomaticSteppingEnabled = false;
-            Academy.Instance.EnvironmentStep();
-
-            var actions = agent.GetAction();
-            // default Heuristic implementation should return zero actions.
-            Assert.AreEqual(new[] {0.0f, 0.0f}, actions);
-            Assert.AreEqual(1, agent.numHeuristicCalls);
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
        }

        public void Update() {}
+        public void Reset() { }

        public SensorCompressionType GetCompressionType()
        {
--- a/com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/SensorShapeValidatorTests.cs
        }

        public void Update() { }
+        public void Reset() { }

        public SensorCompressionType GetCompressionType()
        {
--- a/com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
            // Check that if we don't call Update(), the same observations are produced
            SensorTestHelper.CompareObservation(sensor, new[] {5f, 6f, 7f, 8f, 9f, 10f});
        }
+
+        [Test]
+        public void TestStackingReset()
+        {
+            VectorSensor wrapped = new VectorSensor(2);
+            ISensor sensor = new StackingSensor(wrapped, 3);
+
+            wrapped.AddObservation(new[] {1f, 2f});
+            SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 0f, 0f, 1f, 2f});
+
+            sensor.Update();
+            wrapped.AddObservation(new[] {3f, 4f});
+            SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 1f, 2f, 3f, 4f});
+
+            sensor.Reset();
+            wrapped.AddObservation(new[] {5f, 6f});
+            SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 0f, 0f, 5f, 6f});
+        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
            Assert.AreEqual(stringVal, incomingMsg.ReadString());
            Assert.AreEqual(floatListVal, incomingMsg.ReadFloatList());
        }
+
+        [Test]
+        public void TestMessageReadDefaults()
+        {
+            // Make sure reading past the end of a message will apply defaults.
+            IncomingMessage incomingMsg;
+            using (var outgoingMsg = new OutgoingMessage())
+            {
+                incomingMsg = new IncomingMessage(outgoingMsg.ToByteArray());
+            }
+
+            Assert.AreEqual(false, incomingMsg.ReadBoolean());
+            Assert.AreEqual(true, incomingMsg.ReadBoolean(defaultValue: true));
+
+            Assert.AreEqual(0, incomingMsg.ReadInt32());
+            Assert.AreEqual(42, incomingMsg.ReadInt32(defaultValue: 42));
+
+            Assert.AreEqual(0.0f, incomingMsg.ReadFloat32());
+            Assert.AreEqual(1337.0f, incomingMsg.ReadFloat32(defaultValue: 1337.0f));
+
+            Assert.AreEqual(default(string), incomingMsg.ReadString());
+            Assert.AreEqual("foo", incomingMsg.ReadString(defaultValue: "foo"));
+
+            Assert.AreEqual(default(float[]), incomingMsg.ReadFloatList());
+            Assert.AreEqual(new float[] { 1001, 1002 }, incomingMsg.ReadFloatList(new float[] { 1001, 1002 }));
+        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/TimerTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/TimerTest.cs
                    using (myTimer.Scoped("bar"))
                    {
                        myTimer.SetGauge("my_gauge", i);
+                        myTimer.AddMetadata("i", $"{i}");
                    }
                }
            }
            Assert.AreEqual(0, gauge.minValue);
            Assert.AreEqual(4, gauge.maxValue);
            Assert.AreEqual(4, gauge.value);
+            Assert.AreEqual("4", myTimer.RootNode.Metadata["i"]);

            var fooChildren = rootChildren["foo"].Children;
            Assert.That(fooChildren, Contains.Key("bar"));
--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
 {
-	"name": "com.unity.ml-agents",
-	"displayName":"ML Agents",
-	"version": "0.15.0-preview",
-	"unity": "2018.4",
-	"description": "Add interactivity to your game with Machine Learning Agents trained using Deep Reinforcement Learning.",
-	"dependencies": {
-		"com.unity.barracuda": "0.6.1-preview"
-	}
+  "name": "com.unity.ml-agents",
+  "displayName": "ML Agents",
+  "version": "0.15.1-preview",
+  "unity": "2018.4",
+  "description": "Add interactivity to your game with Machine Learning Agents trained using Deep Reinforcement Learning.",
+  "dependencies": {
+    "com.unity.barracuda": "0.6.1-preview"
+  }
 }
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md
 - For a "Hello World" introduction to creating your own Learning Environment,
  check out the [Making a New Learning
  Environment](Learning-Environment-Create-New.md) page.
- For a series of YouTube video tutorials, checkout the
-  [Machine Learning Agents PlayList](https://www.youtube.com/playlist?list=PLX2vGYjWbI0R08eWQkO7nQkGiicHAX7IX)
-  page.
--- a/docs/ML-Agents-Overview.md
+++ b/docs/ML-Agents-Overview.md
  [Training With Environment Parameter Randomization](Training-Environment-Parameter-Randomization.md)
  to learn more about this feature.

- **Cloud Training on AWS** - To facilitate using the ML-Agents toolkit on
-  Amazon Web Services (AWS) machines, we provide a
-  [guide](Training-on-Amazon-Web-Service.md) on how to set-up EC2 instances in
-  addition to a public pre-configured Amazon Machine Image (AMI).
-
- **Cloud Training on Microsoft Azure** - To facilitate using the ML-Agents
-  toolkit on Azure machines, we provide a
-  [guide](Training-on-Microsoft-Azure.md) on how to set-up virtual machine
-  instances in addition to a pre-configured data science image.
-
 ## Summary and Next Steps

 To briefly summarize: The ML-Agents toolkit enables games and simulations built
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 ### Important changes
 * The `--load` and `--train` command-line flags have been deprecated and replaced with `--resume` and `--inference`.
 * Running with the same `--run-id` twice will now throw an error.
+* Removed the multi-agent gym option from the gym wrapper. For multi-agent scenarios, use the [Low Level Python API](Python-API.md).
+* The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. If you use `mlagents-learn` for training, this should be a transparent change.
+

 ### Steps to Migrate
 * Replace the `--load` flag with `--resume` when calling `mlagents-learn`, and don't use the `--train` flag as training
 * Replace `Academy.UnregisterSideChannel` with `SideChannelUtils.UnregisterSideChannel`.
 * `steps_per_update` should be around equal to the number of agents in your environment, times `num_updates`
 and divided by `train_interval`.
+

 ## Migrating from 0.14 to 0.15

--- a/docs/Python-API.md
+++ b/docs/Python-API.md
 - **UnityEnvironment** — the main interface between the Unity application and
  your code. Use UnityEnvironment to start and control a simulation or training
  session.
- **BatchedStepResult** — contains the data from Agents belonging to the same
-  "AgentGroup" in the simulation, such as observations and rewards.
- **AgentGroupSpec** — describes the shape of the data inside a BatchedStepResult.
-  For example, provides the dimensions of the observations of a group.
+- **BehaviorName** - is a string that identifies a behavior in the simulation.
+- **AgentId** - is an `int` that serves as unique identifier for Agents in the
+  simulation.
+- **DecisionSteps** — contains the data from Agents belonging to the same
+  "Behavior" in the simulation, such as observations and rewards. Only Agents
+  that requested a decision since the last call to `env.step()` are in the
+  DecisionSteps object.
+- **TerminalSteps** — contains the data from Agents belonging to the same
+  "Behavior" in the simulation, such as observations and rewards. Only Agents
+  whose episode ended since the last call to `env.step()` are in the
+  TerminalSteps object.
+- **BehaviorSpec** — describes the shape of the observation data inside
+  DecisionSteps and TerminalSteps as well as the expected action shapes.
+
-An Agent Group is a group of Agents identified by a string name that share the same
-observations and action types. You can think about Agent Group as a group of agents
-that will share the same policy or behavior. All Agents in a group have the same goal
-and reward signals.
+An Agent "Behavior" is a group of Agents identified by a `BehaviorName` that share the same
+observations and action types (described in their `BehaviorSpec`). You can think about Agent
+Behavior as a group of agents that will share the same policy. All Agents with the same
+behavior have the same goal and reward signals.
-
-__Note__: The `Behavior Name` corresponds to the Agent Group name on Python.

 _Notice: Currently communication between Unity and Python takes place over an
 open socket without authentication. As such, please make sure that the network
   move forward until an Agent in the simulation needs a input from Python to act.
 - **Close : `env.close()`** Sends a shutdown signal to the environment and terminates
   the communication.
- - **Get Agent Group Names : `env.get_agent_groups()`** Returns a list of agent group ids.
+ - **Get Behavior Names : `env.get_behavior_names()`** Returns a list of `BehaviorName`.
-   agent groups are created in the simulation.
- - **Get Agent Group Spec : `env.get_agent_group_spec(agent_group: str)`** Returns
-   the `AgentGroupSpec` corresponding to the agent_group given as input. An
-   `AgentGroupSpec` contains information such as the observation shapes, the action
-   type (multi-discrete or continuous) and the action shape. Note that the `AgentGroupSpec`
+   Agent behaviors are created in the simulation.
+ - **Get Behavior Spec : `env.get_behavior_spec(behavior_name: str)`** Returns
+   the `BehaviorSpec` corresponding to the behavior_name given as input. A
+   `BehaviorSpec` contains information such as the observation shapes, the action
+   type (multi-discrete or continuous) and the action shape. Note that the `BehaviorSpec`
- - **Get Batched Step Result for Agent Group : `env.get_step_result(agent_group: str)`**
-   Returns a `BatchedStepResult` corresponding to the agent_group given as input.
-   A `BatchedStepResult` contains information about the state of the agents in a group
-   such as the observations, the rewards, the done flags and the agent identifiers. The
-   data is in `np.array` of which the first dimension is always the number of agents which
-   requested a decision in the simulation since the last call to `env.step()` note that the
-   number of agents is not guaranteed to remain constant during the simulation.
- - **Set Actions for Agent Group :`env.set_actions(agent_group: str, action: np.array)`**
+ - **Get Steps : `env.get_steps(behavior_name: str)`**
+   Returns a tuple `DecisionSteps, TerminalSteps` corresponding to the behavior_name
+   given as input.
+   The `DecisionSteps` contains information about the state of the agents
+   **that need an action this step** and have the behavior behavior_name.
+   The `TerminalSteps` contains information about the state of the agents
+   **whose episode ended** and have the behavior behavior_name.
+   Both `DecisionSteps` and `TerminalSteps` contain information such as
+   the observations, the rewards and the agent identifiers.
+   `DecisionSteps` also contains action masks for the next action while `TerminalSteps`
+   contains the reason for termination (did the Agent reach its maximum step and was
+   interrupted). The data is in `np.array` of which the first dimension is always the
+   number of agents note that the number of agents is not guaranteed to remain constant
+   during the simulation and it is not unusual to have either `DecisionSteps` or `TerminalSteps`
+   contain no Agents at all.
+ - **Set Actions :`env.set_actions(behavior_name: str, action: np.array)`**
   Sets the actions for a whole agent group. `action` is a 2D `np.array` of `dtype=np.int32`
   in the discrete action case and `dtype=np.float32` in the continuous action case.
   The first dimension of `action` is the number of agents that requested a decision

 __Note:__ If no action is provided for an agent group between two calls to `env.step()` then
 the default action will be all zeros (in either discrete or continuous action space)
-#### BathedStepResult and StepResult
+
+#### DecisionSteps and DecisionStep
+
+`DecisionSteps` (with `s`) contains information about a whole batch of Agents while
+`DecisionStep` (no `s`) only contains information about a single Agent.
-A `BatchedStepResult` has the following fields :
+A `DecisionSteps` has the following fields :

 - `obs` is a list of numpy arrays observations collected by the group of
 agent. The first dimension of the array corresponds to the batch size of
 rewards collected by each agent since the last simulation step.
 - `done` is an array of booleans of length batch size. Is true if the
 associated Agent was terminated during the last simulation step.
- - `max_step` is an array of booleans of length batch size. Is true if the
- associated Agent reached its maximum number of steps during the last
- simulation step.
 - `agent_id` is an int vector of length batch size containing unique
 identifier for the corresponding Agent. This is used to track Agents
 across simulation steps.

 It also has the two following methods:

- - `n_agents()` Returns the number of agents requesting a decision since
- the last call to `env.step()`
- - `get_agent_step_result(agent_id: int)` Returns a `StepResult`
+ - `len(DecisionSteps)` Returns the number of agents requesting a decision since
+ the last call to `env.step()`.
+ - `DecisionSteps[agent_id]` Returns a `DecisionStep`
-A `StepResult` has the following fields:
+A `DecisionStep` has the following fields:
- - `obs` is a list of numpy arrays observations collected by the group of
- agent. (Each array has one less dimension than the arrays in `BatchedStepResult`)
+ - `obs` is a list of numpy arrays observations collected by the agent.
+ (Each array has one less dimension than the arrays in `DecisionSteps`)
- - `max_step` is a bool. Is true if the Agent reached its maximum number of
- steps during the last simulation step.
 - `agent_id` is an int and an unique identifier for the corresponding Agent.
 - `action_mask` is an optional list of one dimensional array of booleans.
 Only available in multi-discrete action space type.

-#### AgentGroupSpec
+#### TerminalSteps and TerminalStep
+
+Similarly to `DecisionSteps` and `DecisionStep`,
+`TerminalSteps` (with `s`) contains information about a whole batch of Agents while
+`TerminalStep` (no `s`) only contains information about a single Agent.
-An Agent group can either have discrete or continuous actions. To check which type
+A `TerminalSteps` has the following fields :
+
+ - `obs` is a list of numpy arrays observations collected by the group of
+ agent. The first dimension of the array corresponds to the batch size of
+ the group (number of agents requesting a decision since the last call to
+ `env.step()`).
+ - `reward` is a float vector of length batch size. Corresponds to the
+ rewards collected by each agent since the last simulation step.
+ - `done` is an array of booleans of length batch size. Is true if the
+ associated Agent was terminated during the last simulation step.
+ - `agent_id` is an int vector of length batch size containing unique
+ identifier for the corresponding Agent. This is used to track Agents
+ across simulation steps.
+ - `max_step` is an array of booleans of length batch size. Is true if the
+ associated Agent reached its maximum number of steps during the last
+ simulation step.
+
+It also has the two following methods:
+
+ - `len(TerminalSteps)` Returns the number of agents requesting a decision since
+ the last call to `env.step()`.
+ - `TerminalSteps[agent_id]` Returns a `TerminalStep`
+ for the Agent with the `agent_id` unique identifier.
+
+A `TerminalStep` has the following fields:
+
+ - `obs` is a list of numpy arrays observations collected by the agent.
+ (Each array has one less dimension than the arrays in `TerminalSteps`)
+ - `reward` is a float. Corresponds to the rewards collected by the agent
+ since the last simulation step.
+ - `done` is a bool. Is true if the Agent was terminated during the last
+ simulation step.
+ - `agent_id` is an int and an unique identifier for the corresponding Agent.
+ - `max_step` is a bool. Is true if the Agent reached its maximum number of
+ steps during the last simulation step.
+
+
+#### BehaviorSpec
+
+An Agent behavior can either have discrete or continuous actions. To check which type
-An `AgentGroupSpec` has the following fields :
+A `BehaviorSpec` has the following fields :
- BatchedStepResult and StepResult.
+ DecisionSteps, DecisionStep, TerminalSteps and TerminalStep.
 - `action_type` is the type of data of the action. it can be discrete or
 continuous. If discrete, the action tensors are expected to be `np.int32`. If
 continuous, the actions are expected to be `np.float32`.


 ### Communicating additional information with the Environment
+
 In addition to the means of communicating between Unity and python described above,
 we also provide methods for sharing agent-agnostic information. These
 additional methods are referred to as side channels. ML-Agents includes two ready-made
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 specified, you will not be able to continue with training. Use `--force` to force ML-Agents to
 overwrite the existing data.

+Alternatively, you might want to start a new training run but _initialize_ it using an already-trained
+model. You may want to do this, for instance, if your environment changed and you want
+a new model, but the old behavior is still better than random. You can do this by specifying `--initialize-from=<run-identifier>`, where `<run-identifier>` is the old run ID.
+
 ### Command Line Training Options

 In addition to passing the path of the Unity executable containing your training
  as the current agents in your scene.
 * `--force`: Attempting to train a model with a run-id that has been used before will
  throw an error. Use `--force` to force-overwrite this run-id's summary and model data.
+* `--initialize-from=<run-identifier>`: Specify an old run-id here to initialize your model from
+  a previously trained model. Note that the previously saved models _must_ have the same behavior
+  parameters as your current environment.
 * `--no-graphics`: Specify this option to run the Unity executable in
  `-batchmode` and doesn't initialize the graphics driver. Use this only if your
  training doesn't involve visual observations (reading from Pixels). See
 * `--cpu`: Forces training using CPU only.
 * Engine Configuration :
-  * `--width' : The width of the executable window of the environment(s) in pixels
+  * `--width` : The width of the executable window of the environment(s) in pixels
  (ignored for editor training) (Default 84)
  * `--height` : The height of the executable window of the environment(s) in pixels
  (ignored for editor training). (Default 84)
 | train_interval       | How often to update the agent.                                                                                                                                                          | SAC                      |
 | steps_per_update           | Ratio of agent steps per mini-batch update.                                                                                                                     | SAC                      |
 | use_recurrent        | Train using a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md).                                                                                       | PPO, SAC             |
+| init_path        | Initialize trainer from a previously saved model.                                                                                       | PPO, SAC             |

 \*PPO = Proximal Policy Optimization, SAC = Soft Actor-Critic, BC = Behavioral Cloning (Imitation), GAIL = Generative Adversarial Imitaiton Learning

--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md

 Typical Range: Approximately equal to PPO's `buffer_size`

+### (Optional) Advanced: Initialize Model Path
+
+`init_path` can be specified to initialize your model from a previous run before starting.
+Note that the prior run should have used the same trainer configurations as the current run,
+and have been saved with the same version of ML-Agents. You should provide the full path
+to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`.
+
+This option is provided in case you want to initialize different behaviors from different runs;
+in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize
+all models from the same run.
+
 ## Training Statistics

 To view training statistics, use TensorBoard. For information on launching and
--- a/docs/Training-SAC.md
+++ b/docs/Training-SAC.md

 Typical Range (Discrete): `32` - `512`

+### (Optional) Advanced: Initialize Model Path
+
+`init_path` can be specified to initialize your model from a previous run before starting.
+Note that the prior run should have used the same trainer configurations as the current run,
+and have been saved with the same version of ML-Agents. You should provide the full path
+to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`.
+
+This option is provided in case you want to initialize different behaviors from different runs;
+in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize
+all models from the same run.
+
 ## Training Statistics

 To view training statistics, use TensorBoard. For information on launching and
--- a/docs/images/3dball_big.png
+++ b/docs/images/3dball_big.png
--- a/docs/images/3dball_small.png
+++ b/docs/images/3dball_small.png
--- a/docs/images/curriculum.png
+++ b/docs/images/curriculum.png
--- a/docs/images/demo_component.png
+++ b/docs/images/demo_component.png
--- a/docs/images/gridworld.png
+++ b/docs/images/gridworld.png
--- a/docs/images/ml-agents-LSTM.png
+++ b/docs/images/ml-agents-LSTM.png
--- a/docs/images/monitor.png
+++ b/docs/images/monitor.png
--- a/docs/images/platform_prefab.png
+++ b/docs/images/platform_prefab.png
--- a/docs/images/visual-observation-rawimage.png
+++ b/docs/images/visual-observation-rawimage.png
--- a/docs/images/visual-observation-rendertexture.png
+++ b/docs/images/visual-observation-rendertexture.png
--- a/docs/images/visual-observation.png
+++ b/docs/images/visual-observation.png
--- a/gym-unity/README.md
+++ b/gym-unity/README.md
 information on the gym interface, see [here](https://github.com/openai/gym).

 We provide a gym wrapper and instructions for using it with existing machine
-learning algorithms which utilize gyms. Both wrappers provide interfaces on top
+learning algorithms which utilize gym. Our wrapper provides interfaces on top
 of our `UnityEnvironment` class, which is the default way of interfacing with a
 Unity environment via Python.

 or by running the following from the `/gym-unity` directory of the repository:

 ```sh
-pip install .
+pip install -e .
 ```

 ## Using the Gym Wrapper
 ```python
 from gym_unity.envs import UnityEnv

-env = UnityEnv(environment_filename, worker_id, use_visual, uint8_visual, multiagent)
+env = UnityEnv(environment_filename, worker_id, use_visual, uint8_visual)
 ```

 *  `environment_filename` refers to the path to the Unity environment.
   (0-255). Many common Gym environments (e.g. Atari) do this. By default they
   will be floats (0.0-1.0). Defaults to `False`.

-*  `multiagent` refers to whether you intent to launch an environment which
-   contains more than one agent. Defaults to `False`.
-
 *  `flatten_branched` will flatten a branched discrete action space into a Gym Discrete.
   Otherwise, it will be converted into a MultiDiscrete. Defaults to `False`.


 ## Limitations

-* It is only possible to use an environment with a single Agent.
+* It is only possible to use an environment with a **single** Agent.
-* The `BatchedStepResult` output from the environment can still be accessed from the
-  `info` provided by `env.step(action)`.
+* The `TerminalSteps` or `DecisionSteps` output from the environment can still be
+accessed from the `info` provided by `env.step(action)`.
 * Stacked vector observations are not supported.
 * Environment registration for use with `gym.make()` is currently not supported.

--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py
 from gym import error, spaces

 from mlagents_envs.environment import UnityEnvironment
-from mlagents_envs.base_env import BatchedStepResult
+from mlagents_envs.base_env import DecisionSteps, TerminalSteps
 from mlagents_envs import logging_util


 logger = logging_util.get_logger(__name__)
 logging_util.set_log_level(logging_util.INFO)

-GymSingleStepResult = Tuple[np.ndarray, float, bool, Dict]
-GymMultiStepResult = Tuple[List[np.ndarray], List[float], List[bool], Dict]
-GymStepResult = Union[GymSingleStepResult, GymMultiStepResult]
+GymStepResult = Tuple[np.ndarray, float, bool, Dict]
-    Multi-agent environments use lists for object types, as done here:
-    https://github.com/openai/multiagent-particle-envs
    """

    def __init__(
        use_visual: bool = False,
        uint8_visual: bool = False,
-        multiagent: bool = False,
        flatten_branched: bool = False,
        no_graphics: bool = False,
        allow_multiple_visual_obs: bool = False,
        :param worker_id: Worker number for environment.
        :param use_visual: Whether to use visual observation or vector observation.
        :param uint8_visual: Return visual observations as uint8 (0-255) matrices instead of float (0.0-1.0).
-        :param multiagent: Whether to run in multi-agent mode (lists of obs, reward, done).
        :param flatten_branched: If True, turn branched discrete action spaces into a Discrete space rather than
            MultiDiscrete.
        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
        )

        # Take a single step so that the brain information will be sent over
-        if not self._env.get_agent_groups():
+        if not self._env.get_behavior_names():
-        self.agent_mapper = AgentIdIndexMapper()
-
-        self._previous_step_result: BatchedStepResult = None
-        self._multiagent = multiagent
+        self._previous_decision_step: DecisionSteps = None
        self._flattener = None
        # Hidden flag used by Atari environments to determine if the game is over
        self.game_over = False
-        if len(self._env.get_agent_groups()) != 1:
+        if len(self._env.get_behavior_names()) != 1:
-                "There can only be one brain in a UnityEnvironment "
+                "There can only be one behavior in a UnityEnvironment "
-        self.brain_name = self._env.get_agent_groups()[0]
-        self.name = self.brain_name
-        self.group_spec = self._env.get_agent_group_spec(self.brain_name)
+        self.name = self._env.get_behavior_names()[0]
+        self.group_spec = self._env.get_behavior_spec(self.name)

        if use_visual and self._get_n_vis_obs() == 0:
            raise UnityGymException(

        # Check for number of agents in scene.
        self._env.reset()
-        step_result = self._env.get_step_result(self.brain_name)
-        self._check_agents(step_result.n_agents())
-        self._previous_step_result = step_result
-        self.agent_mapper.set_initial_agents(list(self._previous_step_result.agent_id))
+        decision_steps, _ = self._env.get_steps(self.name)
+        self._check_agents(len(decision_steps))
+        self._previous_decision_step = decision_steps

        # Set observation and action spaces
        if self.group_spec.is_action_discrete():

    def reset(self) -> Union[List[np.ndarray], np.ndarray]:
        """Resets the state of the environment and returns an initial observation.
-        In the case of multi-agent environments, this is a list.
-        step_result = self._step(True)
-        n_agents = step_result.n_agents()
+        self._env.reset()
+        decision_step, _ = self._env.get_steps(self.name)
+        n_agents = len(decision_step)
-        if not self._multiagent:
-            res: GymStepResult = self._single_step(step_result)
-        else:
-            res = self._multi_step(step_result)
+        res: GymStepResult = self._single_step(decision_step)
        return res[0]

    def step(self, action: List[Any]) -> GymStepResult:
        Accepts an action and returns a tuple (observation, reward, done, info).
-        In the case of multi-agent environments, these are lists.
        Args:
            action (object/list): an action provided by the environment
        Returns:
-            info (dict): contains auxiliary diagnostic information, including BatchedStepResult.
+            info (dict): contains auxiliary diagnostic information.
-
-        # Use random actions for all other agents in environment.
-        if self._multiagent:
-            if not isinstance(action, list):
-                raise UnityGymException(
-                    "The environment was expecting `action` to be a list."
-                )
-            if len(action) != self._n_agents:
-                raise UnityGymException(
-                    "The environment was expecting a list of {} actions.".format(
-                        self._n_agents
-                    )
-                )
-            else:
-                if self._flattener is not None:
-                    # Action space is discrete and flattened - we expect a list of scalars
-                    action = [self._flattener.lookup_action(_act) for _act in action]
-                action = np.array(action)
-        else:
-            if self._flattener is not None:
-                # Translate action into list
-                action = self._flattener.lookup_action(action)
+        if self._flattener is not None:
+            # Translate action into list
+            action = self._flattener.lookup_action(action)
-        action = np.array(action).reshape((self._n_agents, spec.action_size))
-        action = self._sanitize_action(action)
-        self._env.set_actions(self.brain_name, action)
+        action = np.array(action).reshape((1, spec.action_size))
+        self._env.set_actions(self.name, action)
-        step_result = self._step()
-
-        n_agents = step_result.n_agents()
-        self._check_agents(n_agents)
-
-        if not self._multiagent:
-            single_res = self._single_step(step_result)
-            self.game_over = single_res[2]
-            return single_res
+        self._env.step()
+        decision_step, terminal_step = self._env.get_steps(self.name)
+        if len(terminal_step) != 0:
+            # The agent is done
+            self.game_over = True
+            return self._single_step(terminal_step)
-            multi_res = self._multi_step(step_result)
-            self.game_over = all(multi_res[2])
-            return multi_res
+            return self._single_step(decision_step)
-    def _single_step(self, info: BatchedStepResult) -> GymSingleStepResult:
+    def _single_step(self, info: Union[DecisionSteps, TerminalSteps]) -> GymStepResult:
        if self.use_visual:
            visual_obs = self._get_vis_obs_list(info)

                "The Agent does not have vector observations and the environment was not setup "
                + "to use visual observations."
            )
+        done = isinstance(info, TerminalSteps)
-        return (
-            default_observation,
-            info.reward[0],
-            info.done[0],
-            {"batched_step_result": info},
-        )
+        return (default_observation, info.reward[0], done, {"step": info})

    def _preprocess_single(self, single_visual_obs: np.ndarray) -> np.ndarray:
        if self.uint8_visual:

-    def _multi_step(self, info: BatchedStepResult) -> GymMultiStepResult:
-        if self.use_visual:
-            self.visual_obs = self._preprocess_multi(self._get_vis_obs_list(info))
-            default_observation = self.visual_obs
-        else:
-            default_observation = self._get_vector_obs(info)
-        return (
-            list(default_observation),
-            list(info.reward),
-            list(info.done),
-            {"batched_step_result": info},
-        )
-
    def _get_n_vis_obs(self) -> int:
        result = 0
        for shape in self.group_spec.observation_shapes:
                return shape
        return None

-    def _get_vis_obs_list(self, step_result: BatchedStepResult) -> List[np.ndarray]:
+    def _get_vis_obs_list(
+        self, step_result: Union[DecisionSteps, TerminalSteps]
+    ) -> List[np.ndarray]:
        result: List[np.ndarray] = []
        for obs in step_result.obs:
            if len(obs.shape) == 4:
-    def _get_vector_obs(self, step_result: BatchedStepResult) -> np.ndarray:
+    def _get_vector_obs(
+        self, step_result: Union[DecisionSteps, TerminalSteps]
+    ) -> np.ndarray:
        result: List[np.ndarray] = []
        for obs in step_result.obs:
            if len(obs.shape) == 2:
                result += shape[0]
        return result

-    def _preprocess_multi(
-        self, multiple_visual_obs: List[np.ndarray]
-    ) -> List[np.ndarray]:
-        if self.uint8_visual:
-            return [
-                (255.0 * _visual_obs).astype(np.uint8)
-                for _visual_obs in multiple_visual_obs
-            ]
-        else:
-            return multiple_visual_obs
-
    def render(self, mode="rgb_array"):
        return self.visual_obs

        return

    def _check_agents(self, n_agents: int) -> None:
-        if not self._multiagent and n_agents > 1:
-            raise UnityGymException(
-                "The environment was launched as a single-agent environment, however "
-                "there is more than one agent in the scene."
-            )
-        elif self._multiagent and n_agents <= 1:
-            raise UnityGymException(
-                "The environment was launched as a mutli-agent environment, however "
-                "there is only one agent in the scene."
-            )
-        if self._n_agents == -1:
-            self._n_agents = n_agents
-            logger.info("{} agents within environment.".format(n_agents))
-        elif self._n_agents != n_agents:
+        if self._n_agents > 1:
-                "The number of agents in the environment has changed since "
-                "initialization. This is not supported."
+                "There can only be one Agent in the environment but {n_agents} were detected."
-    def _sanitize_info(self, step_result: BatchedStepResult) -> BatchedStepResult:
-        n_extra_agents = step_result.n_agents() - self._n_agents
-        if n_extra_agents < 0:
-            # In this case, some Agents did not request a decision when expected
-            raise UnityGymException(
-                "The number of agents in the scene does not match the expected number."
-            )
-
-        if step_result.n_agents() - sum(step_result.done) != self._n_agents:
-            raise UnityGymException(
-                "The number of agents in the scene does not match the expected number."
-            )
-
-        for index, agent_id in enumerate(step_result.agent_id):
-            if step_result.done[index]:
-                self.agent_mapper.mark_agent_done(agent_id, step_result.reward[index])
-
-        # Set the new AgentDone flags to True
-        # Note that the corresponding agent_id that gets marked done will be different
-        # than the original agent that was done, but this is OK since the gym interface
-        # only cares about the ordering.
-        for index, agent_id in enumerate(step_result.agent_id):
-            if not self._previous_step_result.contains_agent(agent_id):
-                if step_result.done[index]:
-                    # If the Agent is already done (e.g. it ended its epsiode twice in one step)
-                    # Don't try to register it here.
-                    continue
-                # Register this agent, and get the reward of the previous agent that
-                # was in its index, so that we can return it to the gym.
-                last_reward = self.agent_mapper.register_new_agent_id(agent_id)
-                step_result.done[index] = True
-                step_result.reward[index] = last_reward
-
-        self._previous_step_result = step_result  # store the new original
-
-        # Get a permutation of the agent IDs so that a given ID stays in the same
-        # index as where it was first seen.
-        new_id_order = self.agent_mapper.get_id_permutation(list(step_result.agent_id))
-
-        _mask: Optional[List[np.array]] = None
-        if step_result.action_mask is not None:
-            _mask = []
-            for mask_index in range(len(step_result.action_mask)):
-                _mask.append(step_result.action_mask[mask_index][new_id_order])
-        new_obs: List[np.array] = []
-        for obs_index in range(len(step_result.obs)):
-            new_obs.append(step_result.obs[obs_index][new_id_order])
-        return BatchedStepResult(
-            obs=new_obs,
-            reward=step_result.reward[new_id_order],
-            done=step_result.done[new_id_order],
-            max_step=step_result.max_step[new_id_order],
-            agent_id=step_result.agent_id[new_id_order],
-            action_mask=_mask,
-        )
-
-    def _sanitize_action(self, action: np.array) -> np.array:
-        sanitized_action = np.zeros(
-            (self._previous_step_result.n_agents(), self.group_spec.action_size)
-        )
-        for index, agent_id in enumerate(self._previous_step_result.agent_id):
-            if not self._previous_step_result.done[index]:
-                array_index = self.agent_mapper.get_gym_index(agent_id)
-                sanitized_action[index, :] = action[array_index, :]
-        return sanitized_action
-
-    def _step(self, needs_reset: bool = False) -> BatchedStepResult:
-        if needs_reset:
-            self._env.reset()
-        else:
-            self._env.step()
-        info = self._env.get_step_result(self.brain_name)
-        # Two possible cases here:
-        # 1) all agents requested decisions (some of which might be done)
-        # 2) some Agents were marked Done in between steps.
-        # In case 2,  we re-request decisions until all agents request a real decision.
-        while info.n_agents() - sum(info.done) < self._n_agents:
-            if not info.done.all():
-                raise UnityGymException(
-                    "The environment does not have the expected amount of agents. "
-                    + "Some agents did not request decisions at the same time."
-                )
-            for agent_id, reward in zip(info.agent_id, info.reward):
-                self.agent_mapper.mark_agent_done(agent_id, reward)
-
-            self._env.step()
-            info = self._env.get_step_result(self.brain_name)
-        return self._sanitize_info(info)
-
    @property
    def metadata(self):
        return {"render.modes": ["rgb_array"]}
        :return: The List containing the branched actions.
        """
        return self.action_lookup[action]
-
-
-class AgentIdIndexMapper:
-    def __init__(self) -> None:
-        self._agent_id_to_gym_index: Dict[int, int] = {}
-        self._done_agents_index_to_last_reward: Dict[int, float] = {}
-
-    def set_initial_agents(self, agent_ids: List[int]) -> None:
-        """
-        Provide the initial list of agent ids for the mapper
-        """
-        for idx, agent_id in enumerate(agent_ids):
-            self._agent_id_to_gym_index[agent_id] = idx
-
-    def mark_agent_done(self, agent_id: int, reward: float) -> None:
-        """
-        Declare the agent done with the corresponding final reward.
-        """
-        if agent_id in self._agent_id_to_gym_index:
-            gym_index = self._agent_id_to_gym_index.pop(agent_id)
-            self._done_agents_index_to_last_reward[gym_index] = reward
-        else:
-            # Agent was never registered in the first place (e.g. EndEpisode called multiple times)
-            pass
-
-    def register_new_agent_id(self, agent_id: int) -> float:
-        """
-        Adds the new agent ID and returns the reward to use for the previous agent in this index
-        """
-        # Any free index is OK here.
-        free_index, last_reward = self._done_agents_index_to_last_reward.popitem()
-        self._agent_id_to_gym_index[agent_id] = free_index
-        return last_reward
-
-    def get_id_permutation(self, agent_ids: List[int]) -> List[int]:
-        """
-        Get the permutation from new agent ids to the order that preserves the positions of previous agents.
-        The result is a list with each integer from 0 to len(_agent_id_to_gym_index)-1
-        appearing exactly once.
-        """
-        # Map the new agent ids to the their index
-        new_agent_ids_to_index = {
-            agent_id: idx for idx, agent_id in enumerate(agent_ids)
-        }
-
-        # Make the output list. We don't write to it sequentially, so start with dummy values.
-        new_permutation = [-1] * len(self._agent_id_to_gym_index)
-
-        # For each agent ID, find the new index of the agent, and write it in the original index.
-        for agent_id, original_index in self._agent_id_to_gym_index.items():
-            new_permutation[original_index] = new_agent_ids_to_index[agent_id]
-        return new_permutation
-
-    def get_gym_index(self, agent_id: int) -> int:
-        """
-        Get the gym index for the current agent.
-        """
-        return self._agent_id_to_gym_index[agent_id]
-
-
-class AgentIdIndexMapperSlow:
-    """
-    Reference implementation of AgentIdIndexMapper.
-    The operations are O(N^2) so it shouldn't be used for large numbers of agents.
-    See AgentIdIndexMapper for method descriptions
-    """
-
-    def __init__(self) -> None:
-        self._gym_id_order: List[int] = []
-        self._done_agents_index_to_last_reward: Dict[int, float] = {}
-
-    def set_initial_agents(self, agent_ids: List[int]) -> None:
-        self._gym_id_order = list(agent_ids)
-
-    def mark_agent_done(self, agent_id: int, reward: float) -> None:
-        try:
-            gym_index = self._gym_id_order.index(agent_id)
-            self._done_agents_index_to_last_reward[gym_index] = reward
-            self._gym_id_order[gym_index] = -1
-        except ValueError:
-            # Agent was never registered in the first place (e.g. EndEpisode called multiple times)
-            pass
-
-    def register_new_agent_id(self, agent_id: int) -> float:
-        original_index = self._gym_id_order.index(-1)
-        self._gym_id_order[original_index] = agent_id
-        reward = self._done_agents_index_to_last_reward.pop(original_index)
-        return reward
-
-    def get_id_permutation(self, agent_ids):
-        new_id_order = []
-        for agent_id in self._gym_id_order:
-            new_id_order.append(agent_ids.index(agent_id))
-        return new_id_order
-
-    def get_gym_index(self, agent_id: int) -> int:
-        return self._gym_id_order.index(agent_id)
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
 import numpy as np

 from gym import spaces
-from gym_unity.envs import (
-    UnityEnv,
-    UnityGymException,
-    AgentIdIndexMapper,
-    AgentIdIndexMapperSlow,
+from gym_unity.envs import UnityEnv
+from mlagents_envs.base_env import (
+    BehaviorSpec,
+    ActionType,
+    DecisionSteps,
+    TerminalSteps,
-from mlagents_envs.base_env import AgentGroupSpec, ActionType, BatchedStepResult
-    mock_step = create_mock_vector_step_result()
-    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
+    mock_decision_step, mock_terminal_step = create_mock_vector_steps(mock_spec)
+    setup_mock_unityenvironment(
+        mock_env, mock_spec, mock_decision_step, mock_terminal_step
+    )
-    env = UnityEnv(" ", use_visual=False, multiagent=False)
+    env = UnityEnv(" ", use_visual=False)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()


@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_multi_agent(mock_env):
-    mock_spec = create_mock_group_spec()
-    mock_step = create_mock_vector_step_result(num_agents=2)
-    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
-
-    with pytest.raises(UnityGymException):
-        UnityEnv(" ", multiagent=False)
-
-    env = UnityEnv(" ", use_visual=False, multiagent=True)
-    assert isinstance(env.reset(), list)
-    actions = [env.action_space.sample() for i in range(env.number_agents)]
-    obs, rew, done, info = env.step(actions)
-    assert isinstance(obs, list)
-    assert isinstance(rew, list)
-    assert isinstance(done, list)
-    assert isinstance(info, dict)
-
-
-@mock.patch("gym_unity.envs.UnityEnvironment")
-    mock_step = create_mock_vector_step_result(num_agents=1)
-    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
+    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
+        mock_spec, num_agents=1
+    )
+    setup_mock_unityenvironment(
+        mock_env, mock_spec, mock_decision_step, mock_terminal_step
+    )
-    env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=True)
+    env = UnityEnv(" ", use_visual=False, flatten_branched=True)
    assert isinstance(env.action_space, spaces.Discrete)
    assert env.action_space.n == 12
    assert env._flattener.lookup_action(0) == [0, 0, 0]
-    env = UnityEnv(" ", use_visual=False, multiagent=False, flatten_branched=False)
+    env = UnityEnv(" ", use_visual=False, flatten_branched=False)
    assert isinstance(env.action_space, spaces.MultiDiscrete)


    mock_spec = create_mock_group_spec(number_visual_observations=1)
-    mock_step = create_mock_vector_step_result(number_visual_observations=1)
-    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
+    mock_decision_step, mock_terminal_step = create_mock_vector_steps(
+        mock_spec, number_visual_observations=1
+    )
+    setup_mock_unityenvironment(
+        mock_env, mock_spec, mock_decision_step, mock_terminal_step
+    )
-    env = UnityEnv(" ", use_visual=True, multiagent=False, uint8_visual=use_uint8)
+    env = UnityEnv(" ", use_visual=True, uint8_visual=use_uint8)
    assert isinstance(env, UnityEnv)
    assert isinstance(env.reset(), np.ndarray)
    actions = env.action_space.sample()
    assert isinstance(info, dict)


-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_sanitize_action_shuffled_id(mock_env):
-    mock_spec = create_mock_group_spec(
-        vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
-    )
-    mock_step = create_mock_vector_step_result(num_agents=5)
-    mock_step.agent_id = np.array(range(5))
-    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
-    env = UnityEnv(" ", use_visual=False, multiagent=True)
-
-    shuffled_step_result = create_mock_vector_step_result(num_agents=5)
-    shuffled_order = [4, 2, 3, 1, 0]
-    shuffled_step_result.reward = np.array(shuffled_order)
-    shuffled_step_result.agent_id = np.array(shuffled_order)
-    sanitized_result = env._sanitize_info(shuffled_step_result)
-    for expected_reward, reward in zip(range(5), sanitized_result.reward):
-        assert expected_reward == reward
-    for expected_agent_id, agent_id in zip(range(5), sanitized_result.agent_id):
-        assert expected_agent_id == agent_id
-
-
-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_sanitize_action_one_agent_done(mock_env):
-    mock_spec = create_mock_group_spec(
-        vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
-    )
-    mock_step = create_mock_vector_step_result(num_agents=5)
-    mock_step.agent_id = np.array(range(5))
-    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
-    env = UnityEnv(" ", use_visual=False, multiagent=True)
-
-    received_step_result = create_mock_vector_step_result(num_agents=6)
-    received_step_result.agent_id = np.array(range(6))
-    # agent #3 (id = 2) is Done
-    received_step_result.done = np.array([False] * 2 + [True] + [False] * 3)
-    sanitized_result = env._sanitize_info(received_step_result)
-    for expected_agent_id, agent_id in zip([0, 1, 5, 3, 4], sanitized_result.agent_id):
-        assert expected_agent_id == agent_id
-
-
-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_sanitize_action_new_agent_done(mock_env):
-    mock_spec = create_mock_group_spec(
-        vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
-    )
-    mock_step = create_mock_vector_step_result(num_agents=3)
-    mock_step.agent_id = np.array(range(5))
-    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
-    env = UnityEnv(" ", use_visual=False, multiagent=True)
-
-    received_step_result = create_mock_vector_step_result(num_agents=7)
-    received_step_result.agent_id = np.array(range(7))
-    # agent #3 (id = 2) is Done
-    # so is the "new" agent (id = 5)
-    done = [False] * 7
-    done[2] = True
-    done[5] = True
-    received_step_result.done = np.array(done)
-    sanitized_result = env._sanitize_info(received_step_result)
-    for expected_agent_id, agent_id in zip([0, 1, 6, 3, 4], sanitized_result.agent_id):
-        assert expected_agent_id == agent_id
-
-
-@mock.patch("gym_unity.envs.UnityEnvironment")
-def test_sanitize_action_single_agent_multiple_done(mock_env):
-    mock_spec = create_mock_group_spec(
-        vector_action_space_type="discrete", vector_action_space_size=[2, 2, 3]
-    )
-    mock_step = create_mock_vector_step_result(num_agents=1)
-    mock_step.agent_id = np.array(range(1))
-    setup_mock_unityenvironment(mock_env, mock_spec, mock_step)
-    env = UnityEnv(" ", use_visual=False, multiagent=False)
-
-    received_step_result = create_mock_vector_step_result(num_agents=3)
-    received_step_result.agent_id = np.array(range(3))
-    # original agent (id = 0) is Done
-    # so is the "new" agent (id = 1)
-    done = [True, True, False]
-    received_step_result.done = np.array(done)
-    sanitized_result = env._sanitize_info(received_step_result)
-    for expected_agent_id, agent_id in zip([2], sanitized_result.agent_id):
-        assert expected_agent_id == agent_id
-
-
 # Helper methods


    obs_shapes = [(vector_observation_space_size,)]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
-    return AgentGroupSpec(obs_shapes, act_type, vector_action_space_size)
+    return BehaviorSpec(obs_shapes, act_type, vector_action_space_size)
-def create_mock_vector_step_result(num_agents=1, number_visual_observations=0):
+def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):
+    :BehaviorSpecs specs: The BehaviorSpecs for this mock
    :int num_agents: Number of "agents" to imitate in your BatchedStepResult values.
    """
    obs = [np.array([num_agents * [1, 2, 3]]).reshape(num_agents, 3)]
-    done = np.array(num_agents * [False])
-    return BatchedStepResult(obs, rewards, done, done, agents, None)
+    return DecisionSteps(obs, rewards, agents, None), TerminalSteps.empty(specs)
-def setup_mock_unityenvironment(mock_env, mock_spec, mock_result):
+def setup_mock_unityenvironment(mock_env, mock_spec, mock_decision, mock_termination):
    """
    Takes a mock UnityEnvironment and adds the appropriate properties, defined by the mock
    GroupSpec and BatchedStepResult.
-    :Mock mock_result: A BatchedStepResult object that will be returned at each step and reset.
+    :Mock mock_decision: A DecisionSteps object that will be returned at each step and reset.
+    :Mock mock_termination: A TerminationSteps object that will be returned at each step and reset.
-    mock_env.return_value.get_agent_groups.return_value = ["MockBrain"]
-    mock_env.return_value.get_agent_group_spec.return_value = mock_spec
-    mock_env.return_value.get_step_result.return_value = mock_result
-
-
-@pytest.mark.parametrize("mapper_cls", [AgentIdIndexMapper, AgentIdIndexMapperSlow])
-def test_agent_id_index_mapper(mapper_cls):
-    mapper = mapper_cls()
-    initial_agent_ids = [1001, 1002, 1003, 1004]
-    mapper.set_initial_agents(initial_agent_ids)
-
-    # Mark some agents as done with their last rewards.
-    mapper.mark_agent_done(1001, 42.0)
-    mapper.mark_agent_done(1004, 1337.0)
-
-    # Make sure we can handle an unknown agent id being marked done.
-    # This can happen when an agent ends an episode on the same step it starts.
-    mapper.mark_agent_done(9999, -1.0)
-
-    # Now add new agents, and get the rewards of the agent they replaced.
-    old_reward1 = mapper.register_new_agent_id(2001)
-    old_reward2 = mapper.register_new_agent_id(2002)
-
-    # Order of the rewards don't matter
-    assert {old_reward1, old_reward2} == {42.0, 1337.0}
-
-    new_agent_ids = [1002, 1003, 2001, 2002]
-    permutation = mapper.get_id_permutation(new_agent_ids)
-    # Make sure it's actually a permutation - needs to contain 0..N-1 with no repeats.
-    assert set(permutation) == set(range(0, 4))
-
-    # For initial agents that were in the initial group, they need to be in the same slot.
-    # Agents that were added later can appear in any free slot.
-    permuted_ids = [new_agent_ids[i] for i in permutation]
-    for idx, agent_id in enumerate(initial_agent_ids):
-        if agent_id in permuted_ids:
-            assert permuted_ids[idx] == agent_id
+    mock_env.return_value.get_behavior_names.return_value = ["MockBrain"]
+    mock_env.return_value.get_behavior_spec.return_value = mock_spec
+    mock_env.return_value.get_steps.return_value = (mock_decision, mock_termination)
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
 """
 Python Environment API for the ML-Agents toolkit
-The aim of this API is to expose groups of similar Agents evolving in Unity
+The aim of this API is to expose Agents evolving in a simulation
-There can be multiple groups of similar Agents (same observations and actions
-spaces) in the simulation. These groups are identified by a agent_group that
-corresponds to a single group of Agents in the simulation.
+This API supports multi-agent scenarios and groups similar Agents (same
+observations, actions spaces and behavior) together. These groups of Agents are
+identified by their BehaviorName.
-batched manner. When retrieving the state of a group of Agents, said state
-contains the data for the whole group. Agents in these groups are identified
-by a unique int identifier that allows tracking of Agents across simulation
-steps. Note that there is no guarantee that the number or order of the Agents
-in the state will be consistent across simulation steps.
+batched manner. Agents are identified by a unique AgentId identifier that
+allows tracking of Agents across simulation steps. Note that there is no
+guarantee that the number or order of the Agents in the state will be
+consistent across simulation steps.
 A simulation steps corresponds to moving the simulation forward until at least
 one agent in the simulation sends its observations to Python again. Since
 Agents can request decisions at different frequencies, a simulation step does
 from abc import ABC, abstractmethod
-from typing import List, NamedTuple, Tuple, Optional, Union, Dict
+from collections.abc import Mapping
+from typing import List, NamedTuple, Tuple, Optional, Union, Dict, Iterator, Any
-AgentGroup = str
+BehaviorName = str
-class StepResult(NamedTuple):
+class DecisionStep(NamedTuple):
-     - obs is a list of numpy arrays observations collected by the group of
-     agent.
+     - obs is a list of numpy arrays observations collected by the agent.
-     - done is a bool. Is true if the Agent was terminated during the last
-     simulation step.
-     - max_step is a bool. Is true if the Agent reached its maximum number of
-     steps during the last simulation step.
     - agent_id is an int and an unique identifier for the corresponding Agent.
     - action_mask is an optional list of one dimensional array of booleans.
     Only available in multi-discrete action space type.

    obs: List[np.ndarray]
    reward: float
-    done: bool
-    max_step: bool
-class BatchedStepResult:
+class DecisionSteps(Mapping):
-    Contains the data a group of similar Agents collected since the last
+    Contains the data a batch of similar Agents collected since the last
-    agents and the batch size of the BatchedStepResult are not fixed across
+    agents and the batch size of the DecisionSteps are not fixed across
-     - obs is a list of numpy arrays observations collected by the group of
-     agent. Each obs has one extra dimension compared to StepResult: the first
-     dimension of the array corresponds to the batch size of
-     the group.
+     - obs is a list of numpy arrays observations collected by the batch of
+     agent. Each obs has one extra dimension compared to DecisionStep: the
+     first dimension of the array corresponds to the batch size of the batch.
-     - done is an array of booleans of length batch size. Is true if the
-     associated Agent was terminated during the last simulation step.
-     - max_step is an array of booleans of length batch size. Is true if the
-     associated Agent reached its maximum number of steps during the last
-     simulation step.
     - agent_id is an int vector of length batch size containing unique
     identifier for the corresponding Agent. This is used to track Agents
     across simulation steps.
     this simulation step.
    """

-    def __init__(self, obs, reward, done, max_step, agent_id, action_mask):
+    def __init__(self, obs, reward, agent_id, action_mask):
-        self.done: np.ndarray = done
-        self.max_step: np.ndarray = max_step
        self.agent_id: np.ndarray = agent_id
        self.action_mask: Optional[List[np.ndarray]] = action_mask
        self._agent_id_to_index: Optional[Dict[AgentId, int]] = None
        """
        :returns: A Dict that maps agent_id to the index of those agents in
-        this BatchedStepResult.
+        this DecisionSteps.
        """
        if self._agent_id_to_index is None:
            self._agent_id_to_index = {}

-    def contains_agent(self, agent_id: AgentId) -> bool:
-        return agent_id in self.agent_id_to_index
+    def __len__(self) -> int:
+        return len(self.agent_id)
-    def get_agent_step_result(self, agent_id: AgentId) -> StepResult:
+    def __getitem__(self, agent_id: AgentId) -> DecisionStep:
-        returns the step result for a specific agent.
+        returns the DecisionStep for a specific agent.
-        :returns: obs, reward, done, agent_id and optional action mask for a
-        specific agent
+        :returns: The DecisionStep
-        if not self.contains_agent(agent_id):
-            raise IndexError(
-                "get_agent_step_result failed. agent_id {} is not present in the BatchedStepResult".format(
-                    agent_id
-                )
+        if agent_id not in self.agent_id_to_index:
+            raise KeyError(
+                "agent_id {} is not present in the DecisionSteps".format(agent_id)
            )
        agent_index = self._agent_id_to_index[agent_id]  # type: ignore
        agent_obs = []
            agent_mask = []
            for mask in self.action_mask:
                agent_mask.append(mask[agent_index])
-        return StepResult(
+        return DecisionStep(
-            done=self.done[agent_index],
-            max_step=self.max_step[agent_index],
+
+    def __iter__(self) -> Iterator[Any]:
+        yield from self.agent_id
-    def empty(spec: "AgentGroupSpec") -> "BatchedStepResult":
+    def empty(spec: "BehaviorSpec") -> "DecisionSteps":
-        Returns an empty BatchedStepResult.
-        :param spec: The AgentGroupSpec for the BatchedStepResult
+        Returns an empty DecisionSteps.
+        :param spec: The BehaviorSpec for the DecisionSteps
-        return BatchedStepResult(
+        return DecisionSteps(
-            done=np.zeros(0, dtype=np.bool),
-            max_step=np.zeros(0, dtype=np.bool),
-    def n_agents(self) -> int:
+
+class TerminalStep(NamedTuple):
+    """
+    Contains the data a single Agent collected when its episode ended.
+     - obs is a list of numpy arrays observations collected by the agent.
+     - reward is a float. Corresponds to the rewards collected by the agent
+     since the last simulation step.
+     - max_step is a bool. Is true if the Agent reached its maximum number of
+     steps during the last simulation step.
+     - agent_id is an int and an unique identifier for the corresponding Agent.
+    """
+
+    obs: List[np.ndarray]
+    reward: float
+    max_step: bool
+    agent_id: AgentId
+
+
+class TerminalSteps(Mapping):
+    """
+    Contains the data a batch of Agents collected when their episode
+    terminated. All Agents present in the TerminalSteps have ended their
+    episode.
+     - obs is a list of numpy arrays observations collected by the batch of
+     agent. Each obs has one extra dimension compared to DecisionStep: the
+     first dimension of the array corresponds to the batch size of the batch.
+     - reward is a float vector of length batch size. Corresponds to the
+     rewards collected by each agent since the last simulation step.
+     - max_step is an array of booleans of length batch size. Is true if the
+     associated Agent reached its maximum number of steps during the last
+     simulation step.
+     - agent_id is an int vector of length batch size containing unique
+     identifier for the corresponding Agent. This is used to track Agents
+     across simulation steps.
+    """
+
+    def __init__(self, obs, reward, max_step, agent_id):
+        self.obs: List[np.ndarray] = obs
+        self.reward: np.ndarray = reward
+        self.max_step: np.ndarray = max_step
+        self.agent_id: np.ndarray = agent_id
+        self._agent_id_to_index: Optional[Dict[AgentId, int]] = None
+
+    @property
+    def agent_id_to_index(self) -> Dict[AgentId, int]:
+        """
+        :returns: A Dict that maps agent_id to the index of those agents in
+        this TerminalSteps.
+        """
+        if self._agent_id_to_index is None:
+            self._agent_id_to_index = {}
+            for a_idx, a_id in enumerate(self.agent_id):
+                self._agent_id_to_index[a_id] = a_idx
+        return self._agent_id_to_index
+
+    def __len__(self) -> int:
+    def __getitem__(self, agent_id: AgentId) -> TerminalStep:
+        """
+        returns the TerminalStep for a specific agent.
+        :param agent_id: The id of the agent
+        :returns: obs, reward, done, agent_id and optional action mask for a
+        specific agent
+        """
+        if agent_id not in self.agent_id_to_index:
+            raise KeyError(
+                "agent_id {} is not present in the TerminalSteps".format(agent_id)
+            )
+        agent_index = self._agent_id_to_index[agent_id]  # type: ignore
+        agent_obs = []
+        for batched_obs in self.obs:
+            agent_obs.append(batched_obs[agent_index])
+        return TerminalStep(
+            obs=agent_obs,
+            reward=self.reward[agent_index],
+            max_step=self.max_step[agent_index],
+            agent_id=agent_id,
+        )
+
+    def __iter__(self) -> Iterator[Any]:
+        yield from self.agent_id
+
+    @staticmethod
+    def empty(spec: "BehaviorSpec") -> "TerminalSteps":
+        """
+        Returns an empty TerminalSteps.
+        :param spec: The BehaviorSpec for the TerminalSteps
+        """
+        obs: List[np.ndarray] = []
+        for shape in spec.observation_shapes:
+            obs += [np.zeros((0,) + shape, dtype=np.float32)]
+        return TerminalSteps(
+            obs=obs,
+            reward=np.zeros(0, dtype=np.float32),
+            max_step=np.zeros(0, dtype=np.bool),
+            agent_id=np.zeros(0, dtype=np.int32),
+        )
+

 class ActionType(Enum):
    DISCRETE = 0
-class AgentGroupSpec(NamedTuple):
+class BehaviorSpec(NamedTuple):
-    spaces for a group of Agents.
+    spaces for a group of Agents under the same behavior.
-     the ordering of the BatchedStepResult and StepResult.
+     the ordering of the DecisionSteps and TerminalSteps.
     - action_type is the type of data of the action. it can be discrete or
     continuous. If discrete, the action tensors are expected to be int32. If
     continuous, the actions are expected to be float32.

    def is_action_discrete(self) -> bool:
        """
-        Returns true if the Agent group uses discrete actions
+        Returns true if this Behavior uses discrete actions
-        Returns true if the Agent group uses continuous actions
+        Returns true if this Behavior uses continuous actions
        """
        return self.action_type == ActionType.CONTINUOUS

        pass

    @abstractmethod
-    def get_agent_groups(self) -> List[AgentGroup]:
+    def get_behavior_names(self) -> List[BehaviorName]:
-        Returns the list of the agent group names present in the environment.
-        Agents grouped under the same group name have the same action and
-        observation specs, and are expected to behave similarly in the environment.
+        Returns the list of the behavior names present in the environment.
+        Agents grouped under the same behavior name have the same action and
+        observation specs, and are expected to behave similarly in the
+        environment.
-        :return: the list of agent group names.
+        :return: the list of agent BehaviorName.
-    def set_actions(self, agent_group: AgentGroup, action: np.ndarray) -> None:
+    def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
-        the step result.
-        :param agent_group: The name of the group the agents are part of
+        the DecisionSteps.
+        :param behavior_name: The name of the behavior the agents are part of
        :param action: A two dimensional np.ndarray corresponding to the action
        (either int or float)
        """
    def set_action_for_agent(
-        self, agent_group: AgentGroup, agent_id: AgentId, action: np.ndarray
+        self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
-        :param agent_group: The name of the group the agent is part of
+        :param behavior_name: The name of the behavior the agent is part of
-        :param action: A two dimensional np.ndarray corresponding to the action
+        :param action: A one dimensional np.ndarray corresponding to the action
-    def get_step_result(self, agent_group: AgentGroup) -> BatchedStepResult:
+    def get_steps(
+        self, behavior_name: BehaviorName
+    ) -> Tuple[DecisionSteps, TerminalSteps]:
-        Retrieves the observations of the agents that requested a step in the
+        Retrieves the steps of the agents that requested a step in the
-        :param agent_group: The name of the group the agents are part of
-        :return: A BatchedStepResult NamedTuple containing the observations,
-        the rewards and the done flags for this group of agents.
+        :param behavior_name: The name of the behavior the agents are part of
+        :return: A tuple containing :
+         - A DecisionSteps NamedTuple containing the observations,
+         the rewards, the agent ids and the action masks for the Agents
+         of the specified behavior. These Agents need an action this step.
+         - A TerminalSteps NamedTuple containing the observations,
+         rewards, agent ids and max_step flags of the agents that had their
+         episode terminated last step.
-    def get_agent_group_spec(self, agent_group: AgentGroup) -> AgentGroupSpec:
+    def get_behavior_spec(self, behavior_name: BehaviorName) -> BehaviorSpec:
-        Get the AgentGroupSpec corresponding to the agent group name
-        :param agent_group: The name of the group the agents are part of
-        :return: A AgentGroupSpec corresponding to that agent group name
+        Get the BehaviorSpec corresponding to the behavior name
+        :param behavior_name: The name of the behavior the agents are part of
+        :return: A BehaviorSpec corresponding to that behavior
        """
        pass
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
 import numpy as np
 import os
 import subprocess
-from typing import Dict, List, Optional, Any
+from typing import Dict, List, Optional, Any, Tuple

 import mlagents_envs

 from mlagents_envs.base_env import (
    BaseEnv,
-    BatchedStepResult,
-    AgentGroupSpec,
-    AgentGroup,
+    DecisionSteps,
+    TerminalSteps,
+    BehaviorSpec,
+    BehaviorName,
    AgentId,
 )
 from mlagents_envs.timers import timed, hierarchical_timer
 )

 from mlagents_envs.communicator_objects.command_pb2 import STEP, RESET
-from mlagents_envs.rpc_utils import (
-    agent_group_spec_from_proto,
-    batched_step_result_from_proto,
-)
+from mlagents_envs.rpc_utils import behavior_spec_from_proto, steps_from_proto

 from mlagents_envs.communicator_objects.unity_rl_input_pb2 import UnityRLInputProto
 from mlagents_envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
    # Currently we require strict equality between the communication protocol
    # on each side, although we may allow some flexibility in the future.
    # This should be incremented whenever a change is made to the communication protocol.
-    API_VERSION = "0.15.0"
+    API_VERSION = "0.16.0"

    # Default port that the editor listens on. If an environment executable
    # isn't specified, this port will be used.
                f"Connected to Unity environment with package version {aca_params.package_version} "
                f"and communication version {aca_params.communication_version}"
            )
-        self._env_state: Dict[str, BatchedStepResult] = {}
-        self._env_specs: Dict[str, AgentGroupSpec] = {}
+        self._env_state: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
+        self._env_specs: Dict[str, BehaviorSpec] = {}
-        self._update_group_specs(aca_output)
+        self._update_behavior_specs(aca_output)

    @staticmethod
    def get_communicator(worker_id, base_port, timeout_wait):
                    f'"chmod -R 755 {launch_string}"'
                ) from perm

-    def _update_group_specs(self, output: UnityOutputProto) -> None:
+    def _update_behavior_specs(self, output: UnityOutputProto) -> None:
        init_output = output.rl_initialization_output
        for brain_param in init_output.brain_parameters:
            # Each BrainParameter in the rl_initialization_output should have at least one AgentInfo
                agent = agent_infos.value[0]
-                new_spec = agent_group_spec_from_proto(brain_param, agent)
+                new_spec = behavior_spec_from_proto(brain_param, agent)
                self._env_specs[brain_param.brain_name] = new_spec
                logger.info(f"Connected new brain:\n{brain_param.brain_name}")

        for brain_name in self._env_specs.keys():
            if brain_name in output.agentInfos:
                agent_info_list = output.agentInfos[brain_name].value
-                self._env_state[brain_name] = batched_step_result_from_proto(
+                self._env_state[brain_name] = steps_from_proto(
-                self._env_state[brain_name] = BatchedStepResult.empty(
-                    self._env_specs[brain_name]
+                self._env_state[brain_name] = (
+                    DecisionSteps.empty(self._env_specs[brain_name]),
+                    TerminalSteps.empty(self._env_specs[brain_name]),
                )
        self._parse_side_channel_message(self.side_channels, output.side_channel)

            if outputs is None:
                raise UnityCommunicationException("Communicator has stopped.")
-            self._update_group_specs(outputs)
+            self._update_behavior_specs(outputs)
            rl_output = outputs.rl_output
            self._update_state(rl_output)
            self._is_first_message = False
            if group_name not in self._env_actions:
                n_agents = 0
                if group_name in self._env_state:
-                    n_agents = self._env_state[group_name].n_agents()
+                    n_agents = len(self._env_state[group_name][0])
                self._env_actions[group_name] = self._env_specs[
                    group_name
                ].create_empty_action(n_agents)
        if outputs is None:
            raise UnityCommunicationException("Communicator has stopped.")
-        self._update_group_specs(outputs)
+        self._update_behavior_specs(outputs)
-    def get_agent_groups(self) -> List[AgentGroup]:
+    def get_behavior_names(self):
-    def _assert_group_exists(self, agent_group: str) -> None:
-        if agent_group not in self._env_specs:
+    def _assert_behavior_exists(self, behavior_name: str) -> None:
+        if behavior_name not in self._env_specs:
-                "in the environment".format(agent_group)
+                "in the environment".format(behavior_name)
-    def set_actions(self, agent_group: AgentGroup, action: np.ndarray) -> None:
-        self._assert_group_exists(agent_group)
-        if agent_group not in self._env_state:
+    def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
+        self._assert_behavior_exists(behavior_name)
+        if behavior_name not in self._env_state:
-        spec = self._env_specs[agent_group]
+        spec = self._env_specs[behavior_name]
-        expected_shape = (self._env_state[agent_group].n_agents(), spec.action_size)
+        expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
-                "The group {0} needs an input of dimension {1} but received input of dimension {2}".format(
-                    agent_group, expected_shape, action.shape
+                "The behavior {0} needs an input of dimension {1} but received input of dimension {2}".format(
+                    behavior_name, expected_shape, action.shape
-        self._env_actions[agent_group] = action
+        self._env_actions[behavior_name] = action
-        self, agent_group: AgentGroup, agent_id: AgentId, action: np.ndarray
+        self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
-        self._assert_group_exists(agent_group)
-        if agent_group not in self._env_state:
+        self._assert_behavior_exists(behavior_name)
+        if behavior_name not in self._env_state:
-        spec = self._env_specs[agent_group]
+        spec = self._env_specs[behavior_name]
-                "The Agent {0} in group {1} needs an input of dimension {2} but received input of dimension {3}".format(
-                    agent_id, agent_group, expected_shape, action.shape
+                f"The Agent {0} with BehaviorName {1} needs an input of dimension "
+                f"{2} but received input of dimension {3}".format(
+                    agent_id, behavior_name, expected_shape, action.shape
                )
            )
        expected_type = np.float32 if spec.is_action_continuous() else np.int32
-        if agent_group not in self._env_actions:
-            self._env_actions[agent_group] = spec.create_empty_action(
-                self._env_state[agent_group].n_agents()
+        if behavior_name not in self._env_actions:
+            self._env_actions[behavior_name] = spec.create_empty_action(
+                len(self._env_state[behavior_name][0])
-            index = np.where(self._env_state[agent_group].agent_id == agent_id)[0][0]
+            index = np.where(self._env_state[behavior_name][0].agent_id == agent_id)[0][
+                0
+            ]
        except IndexError as ie:
            raise IndexError(
                "agent_id {} is did not request a decision at the previous step".format(
-        self._env_actions[agent_group][index] = action
+        self._env_actions[behavior_name][index] = action
-    def get_step_result(self, agent_group: AgentGroup) -> BatchedStepResult:
-        self._assert_group_exists(agent_group)
-        return self._env_state[agent_group]
+    def get_steps(
+        self, behavior_name: BehaviorName
+    ) -> Tuple[DecisionSteps, TerminalSteps]:
+        self._assert_behavior_exists(behavior_name)
+        return self._env_state[behavior_name]
-    def get_agent_group_spec(self, agent_group: AgentGroup) -> AgentGroupSpec:
-        self._assert_group_exists(agent_group)
-        return self._env_specs[agent_group]
+    def get_behavior_spec(self, behavior_name: BehaviorName) -> BehaviorSpec:
+        self._assert_behavior_exists(behavior_name)
+        return self._env_specs[behavior_name]

    def close(self):
        """
    ) -> UnityInputProto:
        rl_in = UnityRLInputProto()
        for b in vector_action:
-            n_agents = self._env_state[b].n_agents()
+            n_agents = len(self._env_state[b][0])
            if n_agents == 0:
                continue
            for i in range(n_agents):
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
-from mlagents_envs.base_env import AgentGroupSpec, ActionType, BatchedStepResult
+from mlagents_envs.base_env import (
+    BehaviorSpec,
+    ActionType,
+    DecisionSteps,
+    TerminalSteps,
+)
 from mlagents_envs.exception import UnityObservationException
 from mlagents_envs.timers import hierarchical_timer, timed
 from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto
 from PIL import Image


-def agent_group_spec_from_proto(
+def behavior_spec_from_proto(
-) -> AgentGroupSpec:
+) -> BehaviorSpec:
-    Converts brain parameter and agent info proto to AgentGroupSpec object.
+    Converts brain parameter and agent info proto to BehaviorSpec object.
-    :return: AgentGroupSpec object.
+    :return: BehaviorSpec object.
    """
    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
    action_type = (
        ] = brain_param_proto.vector_action_size[0]
    else:
        action_shape = tuple(brain_param_proto.vector_action_size)
-    return AgentGroupSpec(observation_shape, action_type, action_shape)
+    return BehaviorSpec(observation_shape, action_type, action_shape)


@timed


@timed
-def batched_step_result_from_proto(
+def steps_from_proto(
-    group_spec: AgentGroupSpec,
-) -> BatchedStepResult:
-    obs_list: List[np.ndarray] = []
-    for obs_index, obs_shape in enumerate(group_spec.observation_shapes):
+    behavior_spec: BehaviorSpec,
+) -> Tuple[DecisionSteps, TerminalSteps]:
+    decision_agent_info_list = [
+        agent_info for agent_info in agent_info_list if not agent_info.done
+    ]
+    terminal_agent_info_list = [
+        agent_info for agent_info in agent_info_list if agent_info.done
+    ]
+    decision_obs_list: List[np.ndarray] = []
+    terminal_obs_list: List[np.ndarray] = []
+    for obs_index, obs_shape in enumerate(behavior_spec.observation_shapes):
-            obs_list.append(
-                _process_visual_observation(obs_index, obs_shape, agent_info_list)
+            decision_obs_list.append(
+                _process_visual_observation(
+                    obs_index, obs_shape, decision_agent_info_list
+                )
+            )
+            terminal_obs_list.append(
+                _process_visual_observation(
+                    obs_index, obs_shape, terminal_agent_info_list
+                )
-            obs_list.append(
-                _process_vector_observation(obs_index, obs_shape, agent_info_list)
+            decision_obs_list.append(
+                _process_vector_observation(
+                    obs_index, obs_shape, decision_agent_info_list
+                )
+            )
+            terminal_obs_list.append(
+                _process_vector_observation(
+                    obs_index, obs_shape, terminal_agent_info_list
+                )
-    rewards = np.array(
-        [agent_info.reward for agent_info in agent_info_list], dtype=np.float32
+    decision_rewards = np.array(
+        [agent_info.reward for agent_info in decision_agent_info_list], dtype=np.float32
+    )
+    terminal_rewards = np.array(
+        [agent_info.reward for agent_info in terminal_agent_info_list], dtype=np.float32
-    _raise_on_nan_and_inf(rewards, "rewards")
+    _raise_on_nan_and_inf(decision_rewards, "rewards")
+    _raise_on_nan_and_inf(terminal_rewards, "rewards")
-    done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool)
-        [agent_info.max_step_reached for agent_info in agent_info_list], dtype=np.bool
+        [agent_info.max_step_reached for agent_info in terminal_agent_info_list],
+        dtype=np.bool,
-    agent_id = np.array(
-        [agent_info.id for agent_info in agent_info_list], dtype=np.int32
+    decision_agent_id = np.array(
+        [agent_info.id for agent_info in decision_agent_info_list], dtype=np.int32
+    )
+    terminal_agent_id = np.array(
+        [agent_info.id for agent_info in terminal_agent_info_list], dtype=np.int32
-    if group_spec.is_action_discrete():
-        if any([agent_info.action_mask is not None] for agent_info in agent_info_list):
-            n_agents = len(agent_info_list)
-            a_size = np.sum(group_spec.discrete_action_branches)
+    if behavior_spec.is_action_discrete():
+        if any(
+            [agent_info.action_mask is not None]
+            for agent_info in decision_agent_info_list
+        ):
+            n_agents = len(decision_agent_info_list)
+            a_size = np.sum(behavior_spec.discrete_action_branches)
-            for agent_index, agent_info in enumerate(agent_info_list):
+            for agent_index, agent_info in enumerate(decision_agent_info_list):
                if agent_info.action_mask is not None:
                    if len(agent_info.action_mask) == a_size:
                        mask_matrix[agent_index, :] = [
            action_mask = (1 - mask_matrix).astype(np.bool)
-            indices = _generate_split_indices(group_spec.discrete_action_branches)
+            indices = _generate_split_indices(behavior_spec.discrete_action_branches)
-    return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask)
+    return (
+        DecisionSteps(
+            decision_obs_list, decision_rewards, decision_agent_id, action_mask
+        ),
+        TerminalSteps(terminal_obs_list, terminal_rewards, max_step, terminal_agent_id),
+    )


 def _generate_split_indices(dims):
--- a/ml-agents-envs/mlagents_envs/side_channel/incoming_message.py
+++ b/ml-agents-envs/mlagents_envs/side_channel/incoming_message.py
        self.buffer = buffer
        self.offset = offset

-    def read_bool(self) -> bool:
+    def read_bool(self, default_value: bool = False) -> bool:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return default_value
+
-    def read_int32(self) -> int:
+    def read_int32(self, default_value: int = 0) -> int:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return default_value
+
-    def read_float32(self) -> float:
+    def read_float32(self, default_value: float = 0.0) -> float:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return default_value
+
-    def read_float32_list(self) -> List[float]:
+    def read_float32_list(self, default_value: List[float] = None) -> List[float]:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return [] if default_value is None else default_value
+
        list_len = self.read_int32()
        output = []
        for _ in range(list_len):
-    def read_string(self) -> str:
+    def read_string(self, default_value: str = "") -> str:
+        :param default_value: Default value to use if the end of the message is reached.
+        :return: The value read from the message, or the default value if the end was reached.
+        if self._at_end_of_buffer():
+            return default_value
+
        encoded_str_len = self.read_int32()
        val = self.buffer[self.offset : self.offset + encoded_str_len].decode("ascii")
        self.offset += encoded_str_len
        Get a copy of the internal bytes used by the message.
        """
        return bytearray(self.buffer)
+
+    def _at_end_of_buffer(self) -> bool:
+        return self.offset >= len(self.buffer)
--- a/ml-agents-envs/mlagents_envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_envs.py
 import numpy as np

 from mlagents_envs.environment import UnityEnvironment
-from mlagents_envs.base_env import BatchedStepResult
+from mlagents_envs.base_env import DecisionSteps, TerminalSteps
 from mlagents_envs.exception import UnityEnvironmentException, UnityActionException
 from mlagents_envs.mock_communicator import MockCommunicator

        discrete_action=False, visual_inputs=0
    )
    env = UnityEnvironment(" ")
-    assert env.get_agent_groups() == ["RealFakeBrain"]
+    assert env.get_behavior_names() == ["RealFakeBrain"]
    env.close()


        discrete_action=False, visual_inputs=0
    )
    env = UnityEnvironment(" ")
-    spec = env.get_agent_group_spec("RealFakeBrain")
+    spec = env.get_behavior_spec("RealFakeBrain")
-    batched_step_result = env.get_step_result("RealFakeBrain")
+    decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
-    assert isinstance(batched_step_result, BatchedStepResult)
-    assert len(spec.observation_shapes) == len(batched_step_result.obs)
-    n_agents = batched_step_result.n_agents()
-    for shape, obs in zip(spec.observation_shapes, batched_step_result.obs):
+    assert isinstance(decision_steps, DecisionSteps)
+    assert isinstance(terminal_steps, TerminalSteps)
+    assert len(spec.observation_shapes) == len(decision_steps.obs)
+    assert len(spec.observation_shapes) == len(terminal_steps.obs)
+    n_agents = len(decision_steps)
+    for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
+        assert (n_agents,) + shape == obs.shape
+    n_agents = len(terminal_steps)
+    for shape, obs in zip(spec.observation_shapes, terminal_steps.obs):
        assert (n_agents,) + shape == obs.shape


        discrete_action=False, visual_inputs=0
    )
    env = UnityEnvironment(" ")
-    spec = env.get_agent_group_spec("RealFakeBrain")
+    spec = env.get_behavior_spec("RealFakeBrain")
-    batched_step_result = env.get_step_result("RealFakeBrain")
-    n_agents = batched_step_result.n_agents()
+    decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
+    n_agents = len(decision_steps)
    env.set_actions(
        "RealFakeBrain", np.zeros((n_agents, spec.action_size), dtype=np.float32)
    )
            "RealFakeBrain",
            np.zeros((n_agents - 1, spec.action_size), dtype=np.float32),
        )
-    batched_step_result = env.get_step_result("RealFakeBrain")
-    n_agents = batched_step_result.n_agents()
+    decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
+    n_agents = len(decision_steps)
    env.set_actions(
        "RealFakeBrain", -1 * np.ones((n_agents, spec.action_size), dtype=np.float32)
    )
-    assert isinstance(batched_step_result, BatchedStepResult)
-    assert len(spec.observation_shapes) == len(batched_step_result.obs)
-    for shape, obs in zip(spec.observation_shapes, batched_step_result.obs):
+    assert isinstance(decision_steps, DecisionSteps)
+    assert isinstance(terminal_steps, TerminalSteps)
+    assert len(spec.observation_shapes) == len(decision_steps.obs)
+    assert len(spec.observation_shapes) == len(terminal_steps.obs)
+    for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
-    assert not batched_step_result.done[0]
-    assert batched_step_result.done[2]
+    assert 0 in decision_steps
+    assert 2 in terminal_steps


@mock.patch("mlagents_envs.environment.UnityEnvironment.executable_launcher")
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
    AgentInfoActionPairProto,
 )
 from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto
-from mlagents_envs.base_env import AgentGroupSpec, ActionType, BatchedStepResult
+from mlagents_envs.base_env import (
+    BehaviorSpec,
+    ActionType,
+    DecisionSteps,
+    TerminalSteps,
+)
-    agent_group_spec_from_proto,
+    behavior_spec_from_proto,
-    batched_step_result_from_proto,
+    steps_from_proto,
 )
 from PIL import Image

        ap = AgentInfoProto()
        ap.reward = float("inf") if infinite_rewards else agent_index
        ap.done = agent_index % 2 == 0
-        ap.max_step_reached = agent_index % 2 == 1
+        ap.max_step_reached = agent_index % 4 == 0
        ap.id = agent_index
        ap.action_mask.extend([True, False] * 5)
        obs_proto_list = []
    return obs_proto


-def proto_from_batched_step_result(
-    batched_step_result: BatchedStepResult
+def proto_from_steps(
+    decision_steps: DecisionSteps, terminal_steps: TerminalSteps
-    for agent_id in batched_step_result.agent_id:
-        agent_id_index = batched_step_result.agent_id_to_index[agent_id]
-        reward = batched_step_result.reward[agent_id_index]
-        done = batched_step_result.done[agent_id_index]
-        max_step_reached = batched_step_result.max_step[agent_id_index]
+    # Take care of the DecisionSteps first
+    for agent_id in decision_steps.agent_id:
+        agent_id_index = decision_steps.agent_id_to_index[agent_id]
+        reward = decision_steps.reward[agent_id_index]
+        done = False
+        max_step_reached = False
-        if batched_step_result.action_mask is not None:
+        if decision_steps.action_mask is not None:
-            for _branch in batched_step_result.action_mask:
+            for _branch in decision_steps.action_mask:
-        for all_observations_of_type in batched_step_result.obs:
+        for all_observations_of_type in decision_steps.obs:
            observation = all_observations_of_type[agent_id_index]
            if len(observation.shape) == 3:
                observations.append(generate_uncompressed_proto_obs(observation))
                        compression_type=NONE,
                    )
                )
-
        agent_info_proto = AgentInfoProto(
            reward=reward,
            done=done,
            observations=observations,
        )
        agent_info_protos.append(agent_info_proto)
+    # Take care of the TerminalSteps second
+    for agent_id in terminal_steps.agent_id:
+        agent_id_index = terminal_steps.agent_id_to_index[agent_id]
+        reward = terminal_steps.reward[agent_id_index]
+        done = True
+        max_step_reached = terminal_steps.max_step[agent_id_index]
+
+        final_observations: List[ObservationProto] = []
+        for all_observations_of_type in terminal_steps.obs:
+            observation = all_observations_of_type[agent_id_index]
+            if len(observation.shape) == 3:
+                final_observations.append(generate_uncompressed_proto_obs(observation))
+            else:
+                final_observations.append(
+                    ObservationProto(
+                        float_data=ObservationProto.FloatData(data=observation),
+                        shape=[len(observation)],
+                        compression_type=NONE,
+                    )
+                )
+        agent_info_proto = AgentInfoProto(
+            reward=reward,
+            done=done,
+            id=agent_id,
+            max_step_reached=max_step_reached,
+            action_mask=None,
+            observations=final_observations,
+        )
+        agent_info_protos.append(agent_info_proto)
+
-# The arguments here are the BatchedStepResult and actions for a single agent name
-def proto_from_batched_step_result_and_action(
-    batched_step_result: BatchedStepResult, actions: np.ndarray
+# The arguments here are the DecisionSteps, TerminalSteps and actions for a single agent name
+def proto_from_steps_and_action(
+    decision_steps: DecisionSteps, terminal_steps: TerminalSteps, actions: np.ndarray
-    agent_info_protos = proto_from_batched_step_result(batched_step_result)
+    agent_info_protos = proto_from_steps(decision_steps, terminal_steps)
    agent_action_protos = [
        AgentActionProto(vector_actions=action) for action in actions
    ]
 def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3,), (4,)]
-    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
+    spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 3)
-    result = batched_step_result_from_proto(ap_list, group_spec)
-    assert list(result.reward) == list(range(n_agents))
-    assert list(result.agent_id) == list(range(n_agents))
-    for index in range(n_agents):
-        assert result.done[index] == (index % 2 == 0)
-        assert result.max_step[index] == (index % 2 == 1)
-    assert list(result.obs[0].shape) == [n_agents] + list(shapes[0])
-    assert list(result.obs[1].shape) == [n_agents] + list(shapes[1])
+    decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
+    for agent_id in range(n_agents):
+        if agent_id in decision_steps:
+            # we set the reward equal to the agent id in generate_list_agent_proto
+            assert decision_steps[agent_id].reward == agent_id
+        elif agent_id in terminal_steps:
+            assert terminal_steps[agent_id].reward == agent_id
+        else:
+            raise Exception("Missing agent from the steps")
+    # We sort the AgentId since they are split between DecisionSteps and TerminalSteps
+    combined_agent_id = list(decision_steps.agent_id) + list(terminal_steps.agent_id)
+    combined_agent_id.sort()
+    assert combined_agent_id == list(range(n_agents))
+    for agent_id in range(n_agents):
+        assert (agent_id in terminal_steps) == (agent_id % 2 == 0)
+        if agent_id in terminal_steps:
+            assert terminal_steps[agent_id].max_step == (agent_id % 4 == 0)
+    assert decision_steps.obs[0].shape[1] == shapes[0][0]
+    assert decision_steps.obs[1].shape[1] == shapes[1][0]
+    assert terminal_steps.obs[0].shape[1] == shapes[0][0]
+    assert terminal_steps.obs[1].shape[1] == shapes[1][0]
-    group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (7, 3))
+    behavior_spec = BehaviorSpec(shapes, ActionType.DISCRETE, (7, 3))
-    result = batched_step_result_from_proto(ap_list, group_spec)
-    masks = result.action_mask
+    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
+    masks = decision_steps.action_mask
-    assert masks[0].shape == (n_agents, 7)
-    assert masks[1].shape == (n_agents, 3)
+    assert masks[0].shape == (n_agents / 2, 7)  # half agents are done
+    assert masks[1].shape == (n_agents / 2, 3)  # half agents are done
    assert masks[0][0, 0]
    assert not masks[1][0, 0]
    assert masks[1][0, 1]
    n_agents = 10
    shapes = [(3,), (4,)]
-    group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (10,))
+    behavior_spec = BehaviorSpec(shapes, ActionType.DISCRETE, (10,))
-    result = batched_step_result_from_proto(ap_list, group_spec)
-    masks = result.action_mask
+    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
+    masks = decision_steps.action_mask
-    assert masks[0].shape == (n_agents, 10)
+    assert masks[0].shape == (n_agents / 2, 10)
    assert masks[0][0, 0]


-    group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (2, 2, 6))
+    behavior_spec = BehaviorSpec(shapes, ActionType.DISCRETE, (2, 2, 6))
-    result = batched_step_result_from_proto(ap_list, group_spec)
-    masks = result.action_mask
+    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
+    masks = decision_steps.action_mask
-    assert masks[0].shape == (n_agents, 2)
-    assert masks[1].shape == (n_agents, 2)
-    assert masks[2].shape == (n_agents, 6)
+    assert masks[0].shape == (n_agents / 2, 2)
+    assert masks[1].shape == (n_agents / 2, 2)
+    assert masks[2].shape == (n_agents / 2, 6)
    assert masks[0][0, 0]


-    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 10)
+    behavior_spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 10)
-    result = batched_step_result_from_proto(ap_list, group_spec)
-    masks = result.action_mask
+    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
+    masks = decision_steps.action_mask
-def test_agent_group_spec_from_proto():
+def test_agent_behavior_spec_from_proto():
-    group_spec = agent_group_spec_from_proto(bp, agent_proto)
-    assert group_spec.is_action_discrete()
-    assert not group_spec.is_action_continuous()
-    assert group_spec.observation_shapes == [(3,), (4,)]
-    assert group_spec.discrete_action_branches == (5, 4)
-    assert group_spec.action_size == 2
+    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
+    assert behavior_spec.is_action_discrete()
+    assert not behavior_spec.is_action_continuous()
+    assert behavior_spec.observation_shapes == [(3,), (4,)]
+    assert behavior_spec.discrete_action_branches == (5, 4)
+    assert behavior_spec.action_size == 2
-    group_spec = agent_group_spec_from_proto(bp, agent_proto)
-    assert not group_spec.is_action_discrete()
-    assert group_spec.is_action_continuous()
-    assert group_spec.action_size == 6
+    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
+    assert not behavior_spec.is_action_discrete()
+    assert behavior_spec.is_action_continuous()
+    assert behavior_spec.action_size == 6
-    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
+    behavior_spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 3)
-        batched_step_result_from_proto(ap_list, group_spec)
+        steps_from_proto(ap_list, behavior_spec)
-    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
+    behavior_spec = BehaviorSpec(shapes, ActionType.CONTINUOUS, 3)
-        batched_step_result_from_proto(ap_list, group_spec)
+        steps_from_proto(ap_list, behavior_spec)
--- a/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_side_channel.py
        read_vals.append(msg_in.read_bool())
    assert vals == read_vals

+    # Test reading with defaults
+    assert msg_in.read_bool() is False
+    assert msg_in.read_bool(default_value=True) is True
+

 def test_message_int32():
    val = 1337
    read_val = msg_in.read_int32()
    assert val == read_val

+    # Test reading with defaults
+    assert 0 == msg_in.read_int32()
+    assert val == msg_in.read_int32(default_value=val)
+

 def test_message_float32():
    val = 42.0
    # These won't be exactly equal in general, since python floats are 64-bit.
    assert val == read_val

+    # Test reading with defaults
+    assert 0.0 == msg_in.read_float32()
+    assert val == msg_in.read_float32(default_value=val)
+

 def test_message_string():
    val = "mlagents!"
    read_val = msg_in.read_string()
    assert val == read_val

+    # Test reading with defaults
+    assert "" == msg_in.read_string()
+    assert val == msg_in.read_string(default_value=val)
+

 def test_message_float_list():
    val = [1.0, 3.0, 9.0]
    read_val = msg_in.read_float32_list()
    # These won't be exactly equal in general, since python floats are 64-bit.
    assert val == read_val
+
+    # Test reading with defaults
+    assert [] == msg_in.read_float32_list()
+    assert val == msg_in.read_float32_list(default_value=val)
--- a/ml-agents-envs/mlagents_envs/tests/test_timers.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_timers.py
                }
            },
            "gauges": {"my_gauge": {"value": 4.0, "max": 4.0, "min": 0.0, "count": 3}},
+            "metadata": {
+                "timer_format_version": timers.TIMER_FORMAT_VERSION,
+                "start_time_seconds": mock.ANY,
+                "end_time_seconds": mock.ANY,
+                "python_version": mock.ANY,
+                "command_line_arguments": mock.ANY,
+            },
        }

        assert timer_tree == expected_tree
--- a/ml-agents-envs/mlagents_envs/timers.py
+++ b/ml-agents-envs/mlagents_envs/timers.py
 """

 import math
-from time import perf_counter
+import sys
+import time
+
+TIMER_FORMAT_VERSION = "0.1.0"


 class TimerNode:
    sure that pushes and pops are already matched.
    """

-    __slots__ = ["root", "stack", "start_time", "gauges"]
+    __slots__ = ["root", "stack", "start_time", "gauges", "metadata"]
-        self.start_time = perf_counter()
+        self.start_time = time.perf_counter()
+        self.metadata: Dict[str, str] = {}
+        self._add_default_metadata()
-        self.start_time = perf_counter()
+        self.start_time = time.perf_counter()
+        self.metadata: Dict[str, str] = {}
+        self._add_default_metadata()

    def push(self, name: str) -> TimerNode:
        """
        Update the total time and count of the root name, and return it.
        """
        root = self.root
-        root.total = perf_counter() - self.start_time
+        root.total = time.perf_counter() - self.start_time
        root.count = 1
        return root

            if self.gauges:
                res["gauges"] = self._get_gauges()

+            if self.metadata:
+                self.metadata["end_time_seconds"] = str(int(time.time()))
+                res["metadata"] = self.metadata
+
        res["total"] = node.total
        res["count"] = node.count

        else:
            self.gauges[name] = GaugeNode(value)

+    def add_metadata(self, key: str, value: str) -> None:
+        self.metadata[key] = value
+
+
+    def _add_default_metadata(self):
+        self.metadata["timer_format_version"] = TIMER_FORMAT_VERSION
+        self.metadata["start_time_seconds"] = str(int(time.time()))
+        self.metadata["python_version"] = sys.version
+        self.metadata["command_line_arguments"] = " ".join(sys.argv)


 # Global instance of a TimerStack. This is generally all that we need for profiling, but you can potentially
    """
    timer_stack = timer_stack or _global_timer_stack
    timer_node = timer_stack.push(name)
-    start_time = perf_counter()
+    start_time = time.perf_counter()

    try:
        # The wrapped code block will run here.
        # We'll accumulate the time, and the exception (if any) gets raised automatically.
-        elapsed = perf_counter() - start_time
+        elapsed = time.perf_counter() - start_time
        timer_node.add_time(elapsed)
        timer_stack.pop()

    """
    timer_stack = timer_stack or _global_timer_stack
    timer_stack.set_gauge(name, value)
+
+
+def add_metadata(key: str, value: str, timer_stack: TimerStack = None) -> None:
+    timer_stack = timer_stack or _global_timer_stack
+    timer_stack.add_metadata(key, value)


 def get_timer_tree(timer_stack: TimerStack = None) -> Dict[str, Any]:
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
 import sys
-from typing import List, Dict, TypeVar, Generic, Tuple, Any
+from typing import List, Dict, TypeVar, Generic, Tuple, Any, Union
-from mlagents_envs.base_env import BatchedStepResult, StepResult
+from mlagents_envs.base_env import (
+    DecisionSteps,
+    DecisionStep,
+    TerminalSteps,
+    TerminalStep,
+)
 from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
 from mlagents.trainers.trajectory import Trajectory, AgentExperience
 from mlagents.trainers.policy.tf_policy import TFPolicy
        :param stats_category: The category under which to write the stats. Usually, this comes from the Trainer.
        """
        self.experience_buffers: Dict[str, List[AgentExperience]] = defaultdict(list)
-        self.last_step_result: Dict[str, Tuple[StepResult, int]] = {}
+        self.last_step_result: Dict[str, Tuple[DecisionStep, int]] = {}
        # last_take_action_outputs stores the action a_t taken before the current observation s_(t+1), while
        # grabbing previous_action from the policy grabs the action PRIOR to that, a_(t-1).
        self.last_take_action_outputs: Dict[str, ActionInfoOutputs] = {}

    def add_experiences(
        self,
-        batched_step_result: BatchedStepResult,
+        decision_steps: DecisionSteps,
+        terminal_steps: TerminalSteps,
-        :param batched_step_result: current BatchedStepResult.
+        :param decision_steps: current DecisionSteps.
+        :param terminal_steps: current TerminalSteps.
        :param previous_action: The outputs of the Policy's get_action method.
        """
        take_action_outputs = previous_action.outputs
            if global_id in self.last_step_result:  # Don't store if agent just reset
                self.last_take_action_outputs[global_id] = take_action_outputs

-        for _id in batched_step_result.agent_id:  # Assume agent_id is 1-D
-            local_id = int(
-                _id
-            )  # Needed for mypy to pass since ndarray has no content type
-            curr_agent_step = batched_step_result.get_agent_step_result(local_id)
+        # Iterate over all the terminal steps
+        for terminal_step in terminal_steps.values():
+            local_id = terminal_step.agent_id
-            stored_agent_step, idx = self.last_step_result.get(global_id, (None, None))
-            stored_take_action_outputs = self.last_take_action_outputs.get(
-                global_id, None
+            self._process_step(
+                terminal_step, global_id, terminal_steps.agent_id_to_index[local_id]
-
-            if stored_agent_step is not None and stored_take_action_outputs is not None:
-                # We know the step is from the same worker, so use the local agent id.
-                obs = stored_agent_step.obs
-                if not stored_agent_step.done:
-                    if self.policy.use_recurrent:
-                        memory = self.policy.retrieve_memories([global_id])[0, :]
-                    else:
-                        memory = None
-
-                    done = curr_agent_step.done
-                    max_step = curr_agent_step.max_step
-
-                    # Add the outputs of the last eval
-                    action = stored_take_action_outputs["action"][idx]
-                    if self.policy.use_continuous_act:
-                        action_pre = stored_take_action_outputs["pre_action"][idx]
-                    else:
-                        action_pre = None
-                    action_probs = stored_take_action_outputs["log_probs"][idx]
-                    action_mask = stored_agent_step.action_mask
-                    prev_action = self.policy.retrieve_previous_action([global_id])[
-                        0, :
-                    ]
-
-                    experience = AgentExperience(
-                        obs=obs,
-                        reward=curr_agent_step.reward,
-                        done=done,
-                        action=action,
-                        action_probs=action_probs,
-                        action_pre=action_pre,
-                        action_mask=action_mask,
-                        prev_action=prev_action,
-                        max_step=max_step,
-                        memory=memory,
-                    )
-                    # Add the value outputs if needed
-                    self.experience_buffers[global_id].append(experience)
-                    self.episode_rewards[global_id] += curr_agent_step.reward
-                if (
-                    curr_agent_step.done
-                    or (
-                        len(self.experience_buffers[global_id])
-                        >= self.max_trajectory_length
-                    )
-                ) and len(self.experience_buffers[global_id]) > 0:
-                    # Make next AgentExperience
-                    next_obs = curr_agent_step.obs
-                    trajectory = Trajectory(
-                        steps=self.experience_buffers[global_id],
-                        agent_id=global_id,
-                        next_obs=next_obs,
-                        behavior_id=self.behavior_id,
-                    )
-                    for traj_queue in self.trajectory_queues:
-                        traj_queue.put(trajectory)
-                    self.experience_buffers[global_id] = []
-                    if curr_agent_step.done:
-                        # Record episode length for agents which have had at least
-                        # 1 step. Done after reset ignored.
-                        self.stats_reporter.add_stat(
-                            "Environment/Episode Length",
-                            self.episode_steps.get(global_id, 0),
-                        )
-                elif not curr_agent_step.done:
-                    self.episode_steps[global_id] += 1
-
-            # Index is needed to grab from last_take_action_outputs
-            self.last_step_result[global_id] = (
-                curr_agent_step,
-                batched_step_result.agent_id_to_index[_id],
+        # Iterate over all the decision steps
+        for ongoing_step in decision_steps.values():
+            local_id = ongoing_step.agent_id
+            global_id = get_global_agent_id(worker_id, local_id)
+            self._process_step(
+                ongoing_step, global_id, decision_steps.agent_id_to_index[local_id]
-            # Delete all done agents, regardless of if they had a 0-length episode.
-            if curr_agent_step.done:
-                self._clean_agent_data(global_id)

        for _gid in action_global_agent_ids:
            # If the ID doesn't have a last step result, the agent just reset,
                    self.policy.save_previous_action(
                        [_gid], take_action_outputs["action"]
                    )
+
+    def _process_step(
+        self, step: Union[TerminalStep, DecisionStep], global_id: str, index: int
+    ) -> None:
+        terminated = isinstance(step, TerminalStep)
+        stored_decision_step, idx = self.last_step_result.get(global_id, (None, None))
+        stored_take_action_outputs = self.last_take_action_outputs.get(global_id, None)
+        if not terminated:
+            # Index is needed to grab from last_take_action_outputs
+            self.last_step_result[global_id] = (step, index)
+
+        # This state is the consequence of a past action
+        if stored_decision_step is not None and stored_take_action_outputs is not None:
+            obs = stored_decision_step.obs
+            if self.policy.use_recurrent:
+                memory = self.policy.retrieve_memories([global_id])[0, :]
+            else:
+                memory = None
+            done = terminated  # Since this is an ongoing step
+            max_step = step.max_step if terminated else False
+            # Add the outputs of the last eval
+            action = stored_take_action_outputs["action"][idx]
+            if self.policy.use_continuous_act:
+                action_pre = stored_take_action_outputs["pre_action"][idx]
+            else:
+                action_pre = None
+            action_probs = stored_take_action_outputs["log_probs"][idx]
+            action_mask = stored_decision_step.action_mask
+            prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
+            experience = AgentExperience(
+                obs=obs,
+                reward=step.reward,
+                done=done,
+                action=action,
+                action_probs=action_probs,
+                action_pre=action_pre,
+                action_mask=action_mask,
+                prev_action=prev_action,
+                max_step=max_step,
+                memory=memory,
+            )
+            # Add the value outputs if needed
+            self.experience_buffers[global_id].append(experience)
+            self.episode_rewards[global_id] += step.reward
+            if not terminated:
+                self.episode_steps[global_id] += 1
+
+            # if the trajectory is too long, we truncate it
+            if (
+                len(self.experience_buffers[global_id]) >= self.max_trajectory_length
+                or terminated
+            ):
+                # Make next AgentExperience
+                next_obs = step.obs
+                trajectory = Trajectory(
+                    steps=self.experience_buffers[global_id],
+                    agent_id=global_id,
+                    next_obs=next_obs,
+                    behavior_id=self.behavior_id,
+                )
+                for traj_queue in self.trajectory_queues:
+                    traj_queue.put(trajectory)
+                self.experience_buffers[global_id] = []
+            if terminated:
+                # Record episode length.
+                self.stats_reporter.add_stat(
+                    "Environment/Episode Length", self.episode_steps.get(global_id, 0)
+                )
+                self._clean_agent_data(global_id)

    def _clean_agent_data(self, global_id: str) -> None:
        """
--- a/ml-agents/mlagents/trainers/brain_conversion_utils.py
+++ b/ml-agents/mlagents/trainers/brain_conversion_utils.py
 from mlagents.trainers.brain import BrainParameters, CameraResolution
-from mlagents_envs.base_env import AgentGroupSpec
+from mlagents_envs.base_env import BehaviorSpec
-def group_spec_to_brain_parameters(
-    name: str, group_spec: AgentGroupSpec
+def behavior_spec_to_brain_parameters(
+    name: str, behavior_spec: BehaviorSpec
-        [shape[0] for shape in group_spec.observation_shapes if len(shape) == 1]
+        [shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1]
-    vis_sizes = [shape for shape in group_spec.observation_shapes if len(shape) == 3]
+    vis_sizes = [shape for shape in behavior_spec.observation_shapes if len(shape) == 3]
-    if group_spec.is_action_discrete():
-        a_size += list(group_spec.discrete_action_branches)
+    if behavior_spec.is_action_discrete():
+        a_size += list(behavior_spec.discrete_action_branches)
-        a_size += [group_spec.action_size]
+        a_size += [behavior_spec.action_size]
        vector_action_space_type = 1
    return BrainParameters(
        name, int(vec_size), cam_res, a_size, [], vector_action_space_type
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
 import numpy as np
 from mlagents.trainers.buffer import AgentBuffer
 from mlagents.trainers.brain import BrainParameters
-from mlagents.trainers.brain_conversion_utils import group_spec_to_brain_parameters
+from mlagents.trainers.brain_conversion_utils import behavior_spec_to_brain_parameters
-from mlagents_envs.rpc_utils import (
-    agent_group_spec_from_proto,
-    batched_step_result_from_proto,
-)
-from mlagents_envs.base_env import AgentGroupSpec
+from mlagents_envs.rpc_utils import behavior_spec_from_proto, steps_from_proto
+from mlagents_envs.base_env import BehaviorSpec
 from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
 from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
    DemonstrationMetaProto,
 from google.protobuf.internal.encoder import _EncodeVarint  # type: ignore


+INITIAL_POS = 33
+SUPPORTED_DEMONSTRATION_VERSIONS = frozenset([0, 1])
+
+
-    group_spec: AgentGroupSpec,
+    behavior_spec: BehaviorSpec,
    sequence_length: int,
 ) -> AgentBuffer:
    # Create and populate buffer using experiences
        if idx > len(pair_infos) - 2:
            break
        next_pair_info = pair_infos[idx + 1]
-        current_step_info = batched_step_result_from_proto(
-            [current_pair_info.agent_info], group_spec
+        current_decision_step, current_terminal_step = steps_from_proto(
+            [current_pair_info.agent_info], behavior_spec
-        next_step_info = batched_step_result_from_proto(
-            [next_pair_info.agent_info], group_spec
+        next_decision_step, next_terminal_step = steps_from_proto(
+            [next_pair_info.agent_info], behavior_spec
        )
        previous_action = (
            np.array(pair_infos[idx].action_info.vector_actions, dtype=np.float32) * 0
                pair_infos[idx - 1].action_info.vector_actions, dtype=np.float32
            )
-        curr_agent_id = current_step_info.agent_id[0]
-        current_agent_step_info = current_step_info.get_agent_step_result(curr_agent_id)
-        next_agent_id = next_step_info.agent_id[0]
-        next_agent_step_info = next_step_info.get_agent_step_result(next_agent_id)
+
+        next_done = len(next_terminal_step) == 1
+        next_reward = 0
+        if len(next_terminal_step) == 1:
+            next_reward = next_terminal_step.reward[0]
+        else:
+            next_reward = next_decision_step.reward[0]
+        current_obs = None
+        if len(current_terminal_step) == 1:
+            current_obs = list(current_terminal_step.values())[0].obs
+        else:
+            current_obs = list(current_decision_step.values())[0].obs
-        demo_raw_buffer["done"].append(next_agent_step_info.done)
-        demo_raw_buffer["rewards"].append(next_agent_step_info.reward)
-        split_obs = SplitObservations.from_observations(current_agent_step_info.obs)
+        demo_raw_buffer["done"].append(next_done)
+        demo_raw_buffer["rewards"].append(next_reward)
+        split_obs = SplitObservations.from_observations(current_obs)
-        if next_step_info.done:
+        if next_done:
            demo_raw_buffer.resequence_and_append(
                demo_processed_buffer, batch_size=None, training_length=sequence_length
            )
    :param sequence_length: Length of trajectories to fill buffer.
    :return:
    """
-    group_spec, info_action_pair, _ = load_demonstration(file_path)
-    demo_buffer = make_demo_buffer(info_action_pair, group_spec, sequence_length)
-    brain_params = group_spec_to_brain_parameters("DemoBrain", group_spec)
+    behavior_spec, info_action_pair, _ = load_demonstration(file_path)
+    demo_buffer = make_demo_buffer(info_action_pair, behavior_spec, sequence_length)
+    brain_params = behavior_spec_to_brain_parameters("DemoBrain", behavior_spec)
    return brain_params, demo_buffer


        )


-INITIAL_POS = 33
-
-
@timed
 def load_demonstration(
    file_path: str

    # First 32 bytes of file dedicated to meta-data.
    file_paths = get_demo_files(file_path)
-    group_spec = None
+    behavior_spec = None
    brain_param_proto = None
    info_action_pairs = []
    total_expected = 0
                if obs_decoded == 0:
                    meta_data_proto = DemonstrationMetaProto()
                    meta_data_proto.ParseFromString(data[pos : pos + next_pos])
+                    if (
+                        meta_data_proto.api_version
+                        not in SUPPORTED_DEMONSTRATION_VERSIONS
+                    ):
+                        raise RuntimeError(
+                            f"Can't load Demonstration data from an unsupported version ({meta_data_proto.api_version})"
+                        )
                    total_expected += meta_data_proto.number_steps
                    pos = INITIAL_POS
                if obs_decoded == 1:
                if obs_decoded > 1:
                    agent_info_action = AgentInfoActionPairProto()
                    agent_info_action.ParseFromString(data[pos : pos + next_pos])
-                    if group_spec is None:
-                        group_spec = agent_group_spec_from_proto(
+                    if behavior_spec is None:
+                        behavior_spec = behavior_spec_from_proto(
                            brain_param_proto, agent_info_action.agent_info
                        )
                    info_action_pairs.append(agent_info_action)
                obs_decoded += 1
-    if not group_spec:
+    if not behavior_spec:
-    return group_spec, info_action_pairs, total_expected
+    return behavior_spec, info_action_pairs, total_expected


 def write_delimited(f, message):
--- a/ml-agents/mlagents/trainers/env_manager.py
+++ b/ml-agents/mlagents/trainers/env_manager.py
 from abc import ABC, abstractmethod
 from typing import List, Dict, NamedTuple, Iterable, Tuple
-from mlagents_envs.base_env import BatchedStepResult, AgentGroupSpec, AgentGroup
+from mlagents_envs.base_env import (
+    DecisionSteps,
+    TerminalSteps,
+    BehaviorSpec,
+    BehaviorName,
+)
+
 from mlagents.trainers.brain import BrainParameters
 from mlagents.trainers.policy.tf_policy import TFPolicy
 from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
-AllStepResult = Dict[AgentGroup, BatchedStepResult]
-AllGroupSpec = Dict[AgentGroup, AgentGroupSpec]
-
+AllStepResult = Dict[BehaviorName, Tuple[DecisionSteps, TerminalSteps]]
+AllGroupSpec = Dict[BehaviorName, BehaviorSpec]

 logger = get_logger(__name__)

    worker_id: int
-    brain_name_to_action_info: Dict[AgentGroup, ActionInfo]
+    brain_name_to_action_info: Dict[BehaviorName, ActionInfo]
-    def name_behavior_ids(self) -> Iterable[AgentGroup]:
+    def name_behavior_ids(self) -> Iterable[BehaviorName]:
        return self.current_all_step_result.keys()

    @staticmethod

 class EnvManager(ABC):
    def __init__(self):
-        self.policies: Dict[AgentGroup, TFPolicy] = {}
-        self.agent_managers: Dict[AgentGroup, AgentManager] = {}
+        self.policies: Dict[BehaviorName, TFPolicy] = {}
+        self.agent_managers: Dict[BehaviorName, AgentManager] = {}
-    def set_policy(self, brain_name: AgentGroup, policy: TFPolicy) -> None:
+    def set_policy(self, brain_name: BehaviorName, policy: TFPolicy) -> None:
-    def set_agent_manager(self, brain_name: AgentGroup, manager: AgentManager) -> None:
+    def set_agent_manager(
+        self, brain_name: BehaviorName, manager: AgentManager
+    ) -> None:
        self.agent_managers[brain_name] = manager

    @abstractmethod

    @property
    @abstractmethod
-    def external_brains(self) -> Dict[AgentGroup, BrainParameters]:
+    def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
-    def get_properties(self) -> Dict[AgentGroup, float]:
+    def get_properties(self) -> Dict[BehaviorName, float]:
        pass

    @abstractmethod
                        )
                    )
                    continue
+                decision_steps, terminal_steps = step_info.current_all_step_result[
+                    name_behavior_id
+                ]
-                    step_info.current_all_step_result[name_behavior_id],
+                    decision_steps,
+                    terminal_steps,
                    step_info.worker_id,
                    step_info.brain_name_to_action_info.get(
                        name_behavior_id, ActionInfo.empty()
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
        """
        Forwarding call to wrapped trainers save_model
        """
-        self.trainer.save_model(name_behavior_id)
+        parsed_behavior_id = self._name_to_parsed_behavior_id[name_behavior_id]
+        brain_name = parsed_behavior_id.brain_name
+        self.trainer.save_model(brain_name)
-        First loads the current snapshot.
-        policy = self.trainer.get_policy(brain_name)
-        policy.load_weights(self.current_policy_snapshot[brain_name])
-    def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
+    def create_policy(
+        self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
+    ) -> TFPolicy:
-        """
-        return self.trainer.create_policy(brain_parameters)
-
-    def add_policy(
-        self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
-    ) -> None:
-        """
-        Adds policy to trainer. The first policy encountered sets the wrapped
+        The first policy encountered sets the wrapped
-        :param name_behavior_id: Behavior ID that the policy should belong to.
-        :param policy: Policy to associate with name_behavior_id.
-        name_behavior_id = parsed_behavior_id.behavior_id
+        policy = self.trainer.create_policy(parsed_behavior_id, brain_parameters)
+        policy.create_tf_graph()
+        policy.init_load_weights()
-        self.policies[name_behavior_id] = policy
-        policy.create_tf_graph()
-
-        self._name_to_parsed_behavior_id[name_behavior_id] = parsed_behavior_id
-        # for saving/swapping snapshots
-        policy.init_load_weights()
+            internal_trainer_policy = self.trainer.create_policy(
+                parsed_behavior_id, brain_parameters
+            )
+            internal_trainer_policy.create_tf_graph()
+            internal_trainer_policy.init_load_weights()
-            ] = policy.get_weights()
+            ] = internal_trainer_policy.get_weights()
+            policy.load_weights(internal_trainer_policy.get_weights())
-            self.trainer.add_policy(parsed_behavior_id, policy)
+            self.trainer.add_policy(parsed_behavior_id, internal_trainer_policy)
+        return policy
+
+    def add_policy(
+        self, parsed_behavior_id: BehaviorIdentifiers, policy: TFPolicy
+    ) -> None:
+        """
+        Adds policy to GhostTrainer.
+        :param parsed_behavior_id: Behavior ID that the policy should belong to.
+        :param policy: Policy to associate with name_behavior_id.
+        """
+        name_behavior_id = parsed_behavior_id.behavior_id
+        self._name_to_parsed_behavior_id[name_behavior_id] = parsed_behavior_id
+        self.policies[name_behavior_id] = policy

    def get_policy(self, name_behavior_id: str) -> TFPolicy:
        """
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents_envs.side_channel.side_channel import SideChannel
 from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
 from mlagents_envs.exception import UnityEnvironmentException
-from mlagents_envs.timers import hierarchical_timer, get_timer_tree
+from mlagents_envs.timers import (
+    hierarchical_timer,
+    get_timer_tree,
+    add_metadata as add_timer_metadata,
+)
 from mlagents_envs import logging_util

 logger = logging_util.get_logger(__name__)
        default=False,
        dest="force",
        action="store_true",
-        help="Force-overwrite existing models and summaries for a run-id that has been used "
+        help="Force-overwrite existing models and summaries for a run ID that has been used "
-        help="The directory name for model and summary statistics",
+        help="The run identifier for model and summary statistics.",
+    )
+    argparser.add_argument(
+        "--initialize-from",
+        metavar="RUN_ID",
+        default=None,
+        help="Specify a previously saved run ID from which to initialize the model from. "
+        "This can be used, for instance, to fine-tune an existing model on a new environment. ",
    )
    argparser.add_argument(
        "--save-freq", default=50000, type=int, help="Frequency at which to save model"
        dest="inference",
        action="store_true",
        help="Run in Python inference mode (don't train). Use with --resume to load a model trained with an "
-        "existing run-id.",
+        "existing run ID.",
    )
    argparser.add_argument(
        "--base-port",
    seed: int = parser.get_default("seed")
    env_path: Optional[str] = parser.get_default("env_path")
    run_id: str = parser.get_default("run_id")
+    initialize_from: str = parser.get_default("initialize_from")
    load_model: bool = parser.get_default("load_model")
    resume: bool = parser.get_default("resume")
    force: bool = parser.get_default("force")
    """
    with hierarchical_timer("run_training.setup"):
        model_path = f"./models/{options.run_id}"
+        maybe_init_path = (
+            f"./models/{options.initialize_from}" if options.initialize_from else None
+        )
        summaries_dir = "./summaries"
        port = options.base_port

            ],
        )
        handle_existing_directories(
-            model_path, summaries_dir, options.resume, options.force
+            model_path, summaries_dir, options.resume, options.force, maybe_init_path
        )
        tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume)
        gauge_write = GaugeWriter()
            not options.inference,
            options.resume,
            run_seed,
+            maybe_init_path,
            maybe_meta_curriculum,
            options.multi_gpu,
        )
    run_seed = options.seed
    if options.cpu:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+
+    # Add some timer metadata
+    add_timer_metadata("mlagents_version", mlagents.trainers.__version__)
+    add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)
+    add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)
+    add_timer_metadata("tensorflow_version", tf_utils.tf.__version__)

    if options.seed == -1:
        run_seed = np.random.randint(0, 10000)
--- a/ml-agents/mlagents/trainers/policy/nn_policy.py
+++ b/ml-agents/mlagents/trainers/policy/nn_policy.py
 from typing import Any, Dict, Optional, List
 from mlagents.tf_utils import tf
 from mlagents_envs.timers import timed
-from mlagents_envs.base_env import BatchedStepResult
+from mlagents_envs.base_env import DecisionSteps
 from mlagents.trainers.brain import BrainParameters
 from mlagents.trainers.models import EncoderType
 from mlagents.trainers.models import ModelUtils

    @timed
    def evaluate(
-        self, batched_step_result: BatchedStepResult, global_agent_ids: List[str]
+        self, decision_requests: DecisionSteps, global_agent_ids: List[str]
-        :param batched_step_result: BatchedStepResult object containing inputs.
+        :param decision_requests: DecisionSteps object containing inputs.
-            self.batch_size_ph: batched_step_result.n_agents(),
+            self.batch_size_ph: len(decision_requests),
            self.sequence_length_ph: 1,
        }
        if self.use_recurrent:
                )
            feed_dict[self.memory_in] = self.retrieve_memories(global_agent_ids)
-        feed_dict = self.fill_eval_dict(feed_dict, batched_step_result)
+        feed_dict = self.fill_eval_dict(feed_dict, decision_requests)
        run_out = self._execute_model(feed_dict, self.inference_dict)
        return run_out

--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
 from abc import ABC, abstractmethod

-from mlagents_envs.base_env import BatchedStepResult
+from mlagents_envs.base_env import DecisionSteps
 from mlagents.trainers.action_info import ActionInfo


-        self, batched_step_result: BatchedStepResult, worker_id: int = 0
+        self, decision_requests: DecisionSteps, worker_id: int = 0
    ) -> ActionInfo:
        pass
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
 from mlagents.trainers.action_info import ActionInfo
 from mlagents.trainers.trajectory import SplitObservations
 from mlagents.trainers.brain_conversion_utils import get_global_agent_id
-from mlagents_envs.base_env import BatchedStepResult
+from mlagents_envs.base_env import DecisionSteps
 from mlagents.trainers.models import ModelUtils


        if self.use_continuous_act:
            self.num_branches = self.brain.vector_action_space_size[0]
        self.model_path = trainer_parameters["model_path"]
+        self.initialize_path = trainer_parameters.get("init_path", None)
        self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
        self.graph = tf.Graph()
        self.sess = tf.Session(
            init = tf.global_variables_initializer()
            self.sess.run(init)

-    def _load_graph(self):
+    def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None:
-            logger.info("Loading Model for brain {}".format(self.brain.brain_name))
-            ckpt = tf.train.get_checkpoint_state(self.model_path)
+            logger.info(
+                "Loading model for brain {} from {}.".format(
+                    self.brain.brain_name, model_path
+                )
+            )
+            ckpt = tf.train.get_checkpoint_state(model_path)
-                    "--run-id. and that the previous run you are resuming from had the same "
-                    "behavior names.".format(self.model_path)
+                    "--run-id and that the previous run you are loading from had the same "
+                    "behavior names.".format(model_path)
+                )
+            try:
+                self.saver.restore(self.sess, ckpt.model_checkpoint_path)
+            except tf.errors.NotFoundError:
+                raise UnityPolicyException(
+                    "The model {0} was found but could not be loaded. Make "
+                    "sure the model is from the same version of ML-Agents, has the same behavior parameters, "
+                    "and is using the same trainer configuration as the current run.".format(
+                        model_path
+                    )
-            self.saver.restore(self.sess, ckpt.model_checkpoint_path)
+            if reset_global_steps:
+                logger.info(
+                    "Starting training from step 0 and saving to {}.".format(
+                        self.model_path
+                    )
+                )
+            else:
+                logger.info(
+                    "Resuming training from step {}.".format(self.get_current_step())
+                )
-        if self.load:
-            self._load_graph()
+        # If there is an initialize path, load from that. Else, load from the set model path.
+        # If load is set to True, don't reset steps to 0. Else, do. This allows a user to,
+        # e.g., resume from an initialize path.
+        reset_steps = not self.load
+        if self.initialize_path is not None:
+            self._load_graph(self.initialize_path, reset_global_steps=reset_steps)
+        elif self.load:
+            self._load_graph(self.model_path, reset_global_steps=reset_steps)
        else:
            self._initialize_graph()

            self.sess.run(self.assign_ops, feed_dict=feed_dict)

    def evaluate(
-        self, batched_step_result: BatchedStepResult, global_agent_ids: List[str]
+        self, decision_requests: DecisionSteps, global_agent_ids: List[str]
-        :param batched_step_result: BatchedStepResult input to network.
+        :param decision_requests: DecisionSteps input to network.
-        self, batched_step_result: BatchedStepResult, worker_id: int = 0
+        self, decision_requests: DecisionSteps, worker_id: int = 0
-        :param batched_step_result: A dictionary of brain names and BatchedStepResult from environment.
+        :param decision_requests: A dictionary of brain names and DecisionSteps from environment.
-            the BatchedStepResult came from. Used to construct a globally unique id for each agent.
+            the DecisionSteps came from. Used to construct a globally unique id for each agent.
-        if batched_step_result.n_agents() == 0:
+        if len(decision_requests) == 0:
-            for agent_id in batched_step_result.agent_id
+            for agent_id in decision_requests.agent_id
-            batched_step_result, global_agent_ids
+            decision_requests, global_agent_ids
        )

        self.save_memories(global_agent_ids, run_out.get("memory_out"))
            outputs=run_out,
-            agent_ids=batched_step_result.agent_id,
+            agent_ids=decision_requests.agent_id,
        )

    def update(self, mini_batch, num_sequences):
            feed_dict[self.vector_in] = vec_vis_obs.vector_observations
        if not self.use_continuous_act:
            mask = np.ones(
-                (
-                    batched_step_result.n_agents(),
-                    np.sum(self.brain.vector_action_space_size),
-                ),
+                (len(batched_step_result), np.sum(self.brain.vector_action_space_size)),
                dtype=np.float32,
            )
            if batched_step_result.action_mask is not None:
        """
        step = self.sess.run(self.global_step)
        return step
+
+    def _set_step(self, step: int) -> int:
+        """
+        Sets current model step to step without creating additional ops.
+        :param step: Step to set the current model step to.
+        :return: The step the model was set to.
+        """
+        current_step = self.get_current_step()
+        # Increment a positive or negative number of steps.
+        return self.increment_step(step - current_step)

    def increment_step(self, n_steps):
        """
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        self._clear_update_buffer()
        return True

-    def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
+    def create_policy(
+        self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
+    ) -> TFPolicy:
        """
        Creates a PPO policy to trainers list of policies.
        :param brain_parameters: specifications for policy construction
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
        self._update_reward_signals()
        return policy_was_updated

-    def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
+    def create_policy(
+        self, parsed_behavior_id: BehaviorIdentifiers, brain_parameters: BrainParameters
+    ) -> TFPolicy:
        policy = NNPolicy(
            self.seed,
            brain_parameters,
--- a/ml-agents/mlagents/trainers/simple_env_manager.py
+++ b/ml-agents/mlagents/trainers/simple_env_manager.py
 from typing import Dict, List

-from mlagents_envs.base_env import BaseEnv, AgentGroup
+from mlagents_envs.base_env import BaseEnv, BehaviorName
-from mlagents.trainers.brain_conversion_utils import group_spec_to_brain_parameters
+from mlagents.trainers.brain_conversion_utils import behavior_spec_to_brain_parameters


 class SimpleEnvManager(EnvManager):
        return [step_info]

    def _reset_env(
-        self, config: Dict[AgentGroup, float] = None
+        self, config: Dict[BehaviorName, float] = None
    ) -> List[EnvironmentStep]:  # type: ignore
        if config is not None:
            for k, v in config.items():
        return [self.previous_step]

    @property
-    def external_brains(self) -> Dict[AgentGroup, BrainParameters]:
+    def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
-        for brain_name in self.env.get_agent_groups():
-            result[brain_name] = group_spec_to_brain_parameters(
-                brain_name, self.env.get_agent_group_spec(brain_name)
+        for brain_name in self.env.get_behavior_names():
+            result[brain_name] = behavior_spec_to_brain_parameters(
+                brain_name, self.env.get_behavior_spec(brain_name)
-    def get_properties(self) -> Dict[AgentGroup, float]:
+    def get_properties(self) -> Dict[BehaviorName, float]:
        return self.shared_float_properties.get_property_dict_copy()

    def close(self):
-    def _take_step(self, last_step: EnvironmentStep) -> Dict[AgentGroup, ActionInfo]:
+    def _take_step(self, last_step: EnvironmentStep) -> Dict[BehaviorName, ActionInfo]:
-        for brain_name, step_info in last_step.current_all_step_result.items():
+        for brain_name, step_tuple in last_step.current_all_step_result.items():
-                step_info,
+                step_tuple[0],
                0,  # As there is only one worker, we assign the worker_id to 0.
            )
        return all_action_info
-        for brain_name in self.env.get_agent_groups():
-            all_step_result[brain_name] = self.env.get_step_result(brain_name)
+        for brain_name in self.env.get_behavior_names():
+            all_step_result[brain_name] = self.env.get_steps(brain_name)
        return all_step_result
--- a/ml-agents/mlagents/trainers/subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py
 from multiprocessing import Process, Pipe, Queue
 from multiprocessing.connection import Connection
 from queue import Empty as EmptyQueueException
-from mlagents_envs.base_env import BaseEnv, AgentGroup
+from mlagents_envs.base_env import BaseEnv, BehaviorName
 from mlagents_envs.logging_util import get_logger
 from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult
 from mlagents_envs.timers import (
    StatsAggregationMethod,
 )
 from mlagents_envs.side_channel.side_channel import SideChannel
-from mlagents.trainers.brain_conversion_utils import group_spec_to_brain_parameters
+from mlagents.trainers.brain_conversion_utils import behavior_spec_to_brain_parameters


 logger = get_logger(__name__)

    def _generate_all_results() -> AllStepResult:
        all_step_result: AllStepResult = {}
-        for brain_name in env.get_agent_groups():
-            all_step_result[brain_name] = env.get_step_result(brain_name)
+        for brain_name in env.get_behavior_names():
+            all_step_result[brain_name] = env.get_steps(brain_name)
-        for brain_name in env.get_agent_groups():
-            result[brain_name] = group_spec_to_brain_parameters(
-                brain_name, env.get_agent_group_spec(brain_name)
+        for brain_name in env.get_behavior_names():
+            result[brain_name] = behavior_spec_to_brain_parameters(
+                brain_name, env.get_behavior_spec(brain_name)
            )
        return result

        return list(map(lambda ew: ew.previous_step, self.env_workers))

    @property
-    def external_brains(self) -> Dict[AgentGroup, BrainParameters]:
+    def external_brains(self) -> Dict[BehaviorName, BrainParameters]:
-    def get_properties(self) -> Dict[AgentGroup, float]:
+    def get_properties(self) -> Dict[BehaviorName, float]:
        self.env_workers[0].send(EnvironmentCommand.GET_PROPERTIES)
        return self.env_workers[0].recv().payload

        return step_infos

    @timed
-    def _take_step(self, last_step: EnvironmentStep) -> Dict[AgentGroup, ActionInfo]:
+    def _take_step(self, last_step: EnvironmentStep) -> Dict[BehaviorName, ActionInfo]:
-        for brain_name, batch_step_result in last_step.current_all_step_result.items():
+        for brain_name, step_tuple in last_step.current_all_step_result.items():
-                    batch_step_result, last_step.worker_id
+                    step_tuple[0], last_step.worker_id
                )
        return all_action_info
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
 from unittest import mock
-from typing import List
+from typing import List, Tuple
-from mlagents_envs.base_env import BatchedStepResult
+from mlagents_envs.base_env import (
+    DecisionSteps,
+    TerminalSteps,
+    BehaviorSpec,
+    ActionType,
+)


 def create_mock_brainparams(
    return mock_brain()


-def create_mock_batchedstep(
+def create_mock_steps(
    num_agents: int = 1,
    num_vector_observations: int = 0,
    num_vis_observations: int = 0,
-) -> BatchedStepResult:
+) -> Tuple[DecisionSteps, TerminalSteps]:
-    Creates a mock BatchedStepResult with observations. Imitates constant
-    vector/visual observations, rewards, dones, and agents.
+    Creates a mock Tuple[DecisionSteps, TerminalSteps] with observations.
+    Imitates constant vector/visual observations, rewards, dones, and agents.

    :int num_agents: Number of "agents" to imitate.
    :int num_vector_observations: Number of "observations" in your observation space
+    :bool done: Whether all the agents in the batch are done
    """
    if action_shape is None:
        action_shape = [2]
        ]

    reward = np.array(num_agents * [1.0], dtype=np.float32)
-    done = np.array(num_agents * [done], dtype=np.bool)
-
-    return BatchedStepResult(obs_list, reward, done, max_step, agent_id, action_mask)
+    behavior_spec = BehaviorSpec(
+        [(84, 84, 3)] * num_vis_observations + [(num_vector_observations, 0, 0)],
+        ActionType.DISCRETE if discrete else ActionType.CONTINUOUS,
+        action_shape if discrete else action_shape[0],
+    )
+    if done:
+        return (
+            DecisionSteps.empty(behavior_spec),
+            TerminalSteps(obs_list, reward, max_step, agent_id),
+        )
+    else:
+        return (
+            DecisionSteps(obs_list, reward, agent_id, action_mask),
+            TerminalSteps.empty(behavior_spec),
+        )
-def create_batchedstep_from_brainparams(
+def create_steps_from_brainparams(
-) -> BatchedStepResult:
-    return create_mock_batchedstep(
+) -> Tuple[DecisionSteps, TerminalSteps]:
+    return create_mock_steps(
        num_agents=num_agents,
        num_vector_observations=brain_params.vector_observation_space_size,
        num_vis_observations=brain_params.number_visual_observations,
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py

 from mlagents_envs.base_env import (
    BaseEnv,
-    AgentGroupSpec,
-    BatchedStepResult,
+    BehaviorSpec,
+    DecisionSteps,
+    TerminalSteps,
-from mlagents_envs.tests.test_rpc_utils import proto_from_batched_step_result_and_action
+from mlagents_envs.tests.test_rpc_utils import proto_from_steps_and_action
 from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
    AgentInfoActionPairProto,
 )
        self.vis_obs_size = vis_obs_size
        self.vec_obs_size = vec_obs_size
        action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
-        self.group_spec = AgentGroupSpec(
+        self.behavior_spec = BehaviorSpec(
            self._make_obs_spec(),
            action_type,
            tuple(2 for _ in range(action_size)) if use_discrete else action_size,
        self.positions: Dict[str, List[float]] = {}
        self.step_count: Dict[str, float] = {}
-        self.random = random.Random(str(self.group_spec))
+        self.random = random.Random(str(self.behavior_spec))
-        self.step_result: Dict[str, BatchedStepResult] = {}
+        self.step_result: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
        self.agent_id: Dict[str, int] = {}
        self.step_size = step_size  # defines the difficulty of the test

            obs.append(np.ones((1,) + self.vis_obs_size, dtype=np.float32) * value)
        return obs

-    def get_agent_groups(self):
+    def get_behavior_names(self):
-    def get_agent_group_spec(self, name):
-        return self.group_spec
+    def get_behavior_spec(self, behavior_name):
+        return self.behavior_spec
-    def set_action_for_agent(self, name, id, data):
+    def set_action_for_agent(self, behavior_name, agent_id, action):
-    def set_actions(self, name, data):
-        self.action[name] = data
+    def set_actions(self, behavior_name, action):
+        self.action[behavior_name] = action
-    def get_step_result(self, name):
-        return self.step_result[name]
+    def get_steps(self, behavior_name):
+        return self.step_result[behavior_name]

    def _take_action(self, name: str) -> bool:
        deltas = []

    def _make_batched_step(
        self, name: str, done: bool, reward: float
-    ) -> BatchedStepResult:
+    ) -> Tuple[DecisionSteps, TerminalSteps]:
-        m_done = np.array([done], dtype=np.bool)
-
+        decision_step = DecisionSteps(m_vector_obs, m_reward, m_agent_id, action_mask)
+        terminal_step = TerminalSteps.empty(self.behavior_spec)
-                m_vector_obs,
-                m_reward,
-                m_done,
-                m_agent_id,
-                action_mask,
-            ) = self._construct_reset_step(
-                m_vector_obs,
-                new_vector_obs,
-                m_reward,
-                m_done,
-                m_agent_id,
-                action_mask,
-                name,
+                new_reward,
+                new_done,
+                new_agent_id,
+                new_action_mask,
+            ) = self._construct_reset_step(name)
+
+            decision_step = DecisionSteps(
+                new_vector_obs, new_reward, new_agent_id, new_action_mask
-        return BatchedStepResult(
-            m_vector_obs,
-            m_reward,
-            m_done,
-            np.zeros(m_done.shape, dtype=bool),
-            m_agent_id,
-            action_mask,
-        )
+            terminal_step = TerminalSteps(
+                m_vector_obs, m_reward, np.array([False], dtype=np.bool), m_agent_id
+            )
+        return (decision_step, terminal_step)
-        self,
-        vector_obs: List[np.ndarray],
-        new_vector_obs: List[np.ndarray],
-        reward: np.ndarray,
-        done: np.ndarray,
-        agent_id: np.ndarray,
-        action_mask: List[np.ndarray],
-        name: str,
-    ) -> Tuple[List[np.ndarray], np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        self, name: str
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
-
-        m_vector_obs = [
-            np.concatenate((old, new), axis=0)
-            for old, new in zip(vector_obs, new_vector_obs)
-        ]
-        m_reward = np.concatenate((reward, new_reward), axis=0)
-        m_done = np.concatenate((done, new_done), axis=0)
-        m_agent_id = np.concatenate((agent_id, new_agent_id), axis=0)
-        if action_mask is not None:
-            action_mask = [
-                np.concatenate((old, new), axis=0)
-                for old, new in zip(action_mask, new_action_mask)
-            ]
-        return m_vector_obs, m_reward, m_done, m_agent_id, action_mask
+        return new_reward, new_done, new_agent_id, new_action_mask

    def step(self) -> None:
        assert all(action is not None for action in self.action.values())

    def _make_batched_step(
        self, name: str, done: bool, reward: float
-    ) -> BatchedStepResult:
+    ) -> Tuple[DecisionSteps, TerminalSteps]:
-        m_done = np.array([done], dtype=np.bool)
+        decision_step = DecisionSteps(m_vector_obs, m_reward, m_agent_id, action_mask)
+        terminal_step = TerminalSteps.empty(self.behavior_spec)
        if done:
            self._reset_agent(name)
            recurrent_obs_val = (
            (
-                m_vector_obs,
-                m_reward,
-                m_done,
-                m_agent_id,
-                action_mask,
-            ) = self._construct_reset_step(
-                m_vector_obs,
-                new_vector_obs,
-                m_reward,
-                m_done,
-                m_agent_id,
-                action_mask,
-                name,
+                new_reward,
+                new_done,
+                new_agent_id,
+                new_action_mask,
+            ) = self._construct_reset_step(name)
+            decision_step = DecisionSteps(
+                new_vector_obs, new_reward, new_agent_id, new_action_mask
-        return BatchedStepResult(
-            m_vector_obs,
-            m_reward,
-            m_done,
-            np.zeros(m_done.shape, dtype=bool),
-            m_agent_id,
-            action_mask,
-        )
+            terminal_step = TerminalSteps(
+                m_vector_obs, m_reward, np.array([False], dtype=np.bool), m_agent_id
+            )
+        return (decision_step, terminal_step)


 class RecordEnvironment(SimpleEnvironment):
    def step(self) -> None:
        super().step()
        for name in self.names:
-            self.demonstration_protos[
-                name
-            ] += proto_from_batched_step_result_and_action(
-                self.step_result[name], self.action[name]
+            self.demonstration_protos[name] += proto_from_steps_and_action(
+                self.step_result[name][0], self.step_result[name][1], self.action[name]
            )
            self.demonstration_protos[name] = self.demonstration_protos[name][
                -self.n_demos :
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
        "pre_action": [0.1, 0.1],
        "log_probs": [0.1, 0.1],
    }
-    mock_step = mb.create_mock_batchedstep(
+    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
        num_vector_observations=8,
        action_shape=[2],
        action=[0.1, 0.1],
        value=[0.1, 0.1],
        outputs=fake_action_outputs,
-        agent_ids=mock_step.agent_id,
+        agent_ids=mock_decision_steps.agent_id,
-    processor.add_experiences(mock_step, 0, ActionInfo.empty())
+    processor.add_experiences(
+        mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
+    )
-        processor.add_experiences(mock_step, 0, fake_action_info)
+        processor.add_experiences(
+            mock_decision_steps, mock_terminal_steps, 0, fake_action_info
+        )

    # Assert that two trajectories have been added to the Trainer
    assert len(tqueue.put.call_args_list) == 2
    # Assert that the AgentProcessor is empty
    assert len(processor.experience_buffers[0]) == 0

-    # Test empty BatchedStepResult
-    mock_step = mb.create_mock_batchedstep(
+    # Test empty steps
+    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
-    processor.add_experiences(mock_step, 0, ActionInfo([], [], {}, []))
+    processor.add_experiences(
+        mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, [])
+    )
    # Assert that the AgentProcessor is still empty
    assert len(processor.experience_buffers[0]) == 0

        "pre_action": [0.1],
        "log_probs": [0.1],
    }
-    mock_step = mb.create_mock_batchedstep(
+    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
-    mock_done_step = mb.create_mock_batchedstep(
+    mock_done_decision_step, mock_done_terminal_step = mb.create_mock_steps(
        num_agents=1,
        num_vector_observations=8,
        action_shape=[2],
        action=[0.1],
        value=[0.1],
        outputs=fake_action_outputs,
-        agent_ids=mock_step.agent_id,
+        agent_ids=mock_decision_step.agent_id,
-    processor.add_experiences(mock_step, 0, ActionInfo.empty())
+    processor.add_experiences(
+        mock_decision_step, mock_terminal_step, 0, ActionInfo.empty()
+    )

    # Run 3 trajectories, with different workers (to simulate different agents)
    add_calls = []
-            processor.add_experiences(mock_step, _ep, fake_action_info)
+            processor.add_experiences(
+                mock_decision_step, mock_terminal_step, _ep, fake_action_info
+            )
-        processor.add_experiences(mock_done_step, _ep, fake_action_info)
+        processor.add_experiences(
+            mock_done_decision_step, mock_done_terminal_step, _ep, fake_action_info
+        )
        # Make sure we don't add experiences from the prior agents after the done
        remove_calls.append(mock.call([get_global_agent_id(_ep, 0)]))

    assert len(processor.last_step_result.keys()) == 0

    # check that steps with immediate dones don't add to dicts
-    processor.add_experiences(mock_done_step, 0, ActionInfo.empty())
+    processor.add_experiences(
+        mock_done_decision_step, mock_done_terminal_step, 0, ActionInfo.empty()
+    )
    assert len(processor.experience_buffers.keys()) == 0
    assert len(processor.last_take_action_outputs.keys()) == 0
    assert len(processor.episode_steps.keys()) == 0
        "pre_action": [0.1],
        "log_probs": [0.1],
    }
-    mock_step = mb.create_mock_batchedstep(
+    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
        num_vector_observations=8,
        action_shape=[2],
        action=[0.1],
        value=[0.1],
        outputs=fake_action_outputs,
-        agent_ids=mock_step.agent_id,
+        agent_ids=mock_decision_step.agent_id,
-    processor.add_experiences(mock_step, 0, ActionInfo.empty())
+    processor.add_experiences(
+        mock_decision_step, mock_terminal_step, 0, ActionInfo.empty()
+    )
-            processor.add_experiences(mock_step, _ep, fake_action_info)
+            processor.add_experiences(
+                mock_decision_step, mock_terminal_step, _ep, fake_action_info
+            )
            # Make sure we don't add experiences from the prior agents after the done

    # Call end episode
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+import io
+from unittest import mock
+from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
+    DemonstrationMetaProto,
+)
+
+    write_delimited,
-    group_spec, pair_infos, total_expected = load_demonstration(
+    behavior_spec, pair_infos, total_expected = load_demonstration(
-    assert np.sum(group_spec.observation_shapes[0]) == 8
+    assert np.sum(behavior_spec.observation_shapes[0]) == 8
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1)
 def test_load_demo_dir():
    path_prefix = os.path.dirname(os.path.abspath(__file__))
-    group_spec, pair_infos, total_expected = load_demonstration(
+    behavior_spec, pair_infos, total_expected = load_demonstration(
-    assert np.sum(group_spec.observation_shapes[0]) == 8
+    assert np.sum(behavior_spec.observation_shapes[0]) == 8
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1)
        assert get_demo_files(valid_fname) == [valid_fname]
        # valid directory
        assert get_demo_files(tmpdirname) == [valid_fname]
+
+
+@mock.patch("mlagents.trainers.demo_loader.get_demo_files", return_value=["foo.demo"])
+def test_unsupported_version_raises_error(mock_get_demo_files):
+    # Create a metadata proto with an unsupported version
+    bad_metadata = DemonstrationMetaProto()
+    bad_metadata.api_version = 1337
+
+    # Write the metadata to a temporary buffer, which will get returned by open()
+    buffer = io.BytesIO()
+    write_delimited(buffer, bad_metadata)
+    m = mock.mock_open(read_data=buffer.getvalue())
+
+    # Make sure that we get a RuntimeError when trying to load this.
+    with mock.patch("builtins.open", m):
+        with pytest.raises(RuntimeError):
+            load_demonstration("foo")
--- a/ml-agents/mlagents/trainers/tests/test_ghost.py
+++ b/ml-agents/mlagents/trainers/tests/test_ghost.py
    trainer_params = dummy_config
    trainer = PPOTrainer(mock_brain.brain_name, 0, trainer_params, True, False, 0, "0")
    trainer.seed = 1
-    policy = trainer.create_policy(mock_brain)
+    policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
-    to_load_policy = trainer.create_policy(mock_brain)
+    to_load_policy = trainer.create_policy(mock_brain.brain_name, mock_brain)
    to_load_policy.create_tf_graph()
    to_load_policy.init_load_weights()

    )

    # first policy encountered becomes policy trained by wrapped PPO
-    policy = trainer.create_policy(brain_params_team0)
+    policy = trainer.create_policy(parsed_behavior_id0, brain_params_team0)
-    policy = trainer.create_policy(brain_params_team1)
+    policy = trainer.create_policy(parsed_behavior_id1, brain_params_team1)
    trainer.add_policy(parsed_behavior_id1, policy)
    trajectory_queue1 = AgentManagerQueue(brain_params_team1.brain_name)
    trainer.subscribe_trajectory_queue(trajectory_queue1)

    # First policy encountered becomes policy trained by wrapped PPO
    # This queue should remain empty after swap snapshot
-    policy = trainer.create_policy(brain_params_team0)
+    policy = trainer.create_policy(parsed_behavior_id0, brain_params_team0)
-    policy = trainer.create_policy(brain_params_team1)
+    policy = trainer.create_policy(parsed_behavior_id1, brain_params_team1)
    trainer.add_policy(parsed_behavior_id1, policy)
    policy_queue1 = AgentManagerQueue(brain_params_team1.brain_name)
    trainer.publish_policy_queue(policy_queue1)
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
                None,
            )
            handle_dir_mock.assert_called_once_with(
-                "./models/ppo", "./summaries", False, False
+                "./models/ppo", "./summaries", False, False, None
            )
    StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py

--- a/ml-agents/mlagents/trainers/tests/test_nn_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_nn_policy.py
 import pytest
+import os
+from typing import Dict, Any

 import numpy as np
 from mlagents.tf_utils import tf
 NUM_AGENTS = 12


-def create_policy_mock(dummy_config, use_rnn, use_discrete, use_visual):
+def create_policy_mock(
+    dummy_config: Dict[str, Any],
+    use_rnn: bool = False,
+    use_discrete: bool = True,
+    use_visual: bool = False,
+    load: bool = False,
+    seed: int = 0,
+) -> NNPolicy:
    mock_brain = mb.setup_mock_brain(
        use_discrete,
        use_visual,
    trainer_parameters = dummy_config
    trainer_parameters["keep_checkpoints"] = 3
    trainer_parameters["use_recurrent"] = use_rnn
-    policy = NNPolicy(0, mock_brain, trainer_parameters, False, False)
+    policy = NNPolicy(seed, mock_brain, trainer_parameters, False, load)
+def test_load_save(dummy_config, tmp_path):
+    path1 = os.path.join(tmp_path, "runid1")
+    path2 = os.path.join(tmp_path, "runid2")
+    trainer_params = dummy_config
+    trainer_params["model_path"] = path1
+    policy = create_policy_mock(trainer_params)
+    policy.initialize_or_load()
+    policy.save_model(2000)
+
+    assert len(os.listdir(tmp_path)) > 0
+
+    # Try load from this path
+    policy2 = create_policy_mock(trainer_params, load=True, seed=1)
+    policy2.initialize_or_load()
+    _compare_two_policies(policy, policy2)
+
+    # Try initialize from path 1
+    trainer_params["model_path"] = path2
+    trainer_params["init_path"] = path1
+    policy3 = create_policy_mock(trainer_params, load=False, seed=2)
+    policy3.initialize_or_load()
+
+    _compare_two_policies(policy2, policy3)
+
+
+def _compare_two_policies(policy1: NNPolicy, policy2: NNPolicy) -> None:
+    """
+    Make sure two policies have the same output for the same input.
+    """
+    decision_step, _ = mb.create_steps_from_brainparams(policy1.brain, num_agents=1)
+    run_out1 = policy1.evaluate(decision_step, list(decision_step.agent_id))
+    run_out2 = policy2.evaluate(decision_step, list(decision_step.agent_id))
+
+    np.testing.assert_array_equal(run_out2["log_probs"], run_out1["log_probs"])
+
+
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
    policy = create_policy_mock(
        dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
    )
-    step = mb.create_batchedstep_from_brainparams(policy.brain, num_agents=NUM_AGENTS)
+    decision_step, terminal_step = mb.create_steps_from_brainparams(
+        policy.brain, num_agents=NUM_AGENTS
+    )
-    run_out = policy.evaluate(step, list(step.agent_id))
+    run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
    if discrete:
        run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
    else:
--- a/ml-agents/mlagents/trainers/tests/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_policy.py
 from mlagents.trainers.policy.tf_policy import TFPolicy
-from mlagents_envs.base_env import BatchedStepResult, AgentGroupSpec
+from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
 from mlagents.trainers.action_info import ActionInfo
 from unittest.mock import MagicMock
 import numpy as np
    test_seed = 3
    policy = FakePolicy(test_seed, basic_mock_brain(), basic_params())
    # Doesn't really matter what this is
-    dummy_groupspec = AgentGroupSpec([(1,)], "continuous", 1)
-    no_agent_step = BatchedStepResult.empty(dummy_groupspec)
+    dummy_groupspec = BehaviorSpec([(1,)], "continuous", 1)
+    no_agent_step = DecisionSteps.empty(dummy_groupspec)
    result = policy.get_action(no_agent_step)
    assert result == ActionInfo.empty()

    policy = FakePolicy(test_seed, basic_mock_brain(), basic_params())
    policy.evaluate = MagicMock(return_value={})
    policy.save_memories = MagicMock()
-    step_with_agents = BatchedStepResult(
-        [],
-        np.array([], dtype=np.float32),
-        np.array([False], dtype=np.bool),
-        np.array([], dtype=np.bool),
-        np.array([0]),
-        None,
+    step_with_agents = DecisionSteps(
+        [], np.array([], dtype=np.float32), np.array([0]), None
    )
    result = policy.get_action(step_with_agents, worker_id=0)
    assert result == ActionInfo(None, None, {}, [0])
        "value": np.array([1.1], dtype=np.float32),
    }
    policy.evaluate = MagicMock(return_value=policy_eval_out)
-    step_with_agents = BatchedStepResult(
-        [],
-        np.array([], dtype=np.float32),
-        np.array([False], dtype=np.bool),
-        np.array([], dtype=np.bool),
-        np.array([0]),
-        None,
+    step_with_agents = DecisionSteps(
+        [], np.array([], dtype=np.float32), np.array([0]), None
    )
    result = policy.get_action(step_with_agents)
    expected = ActionInfo(