Merge commit 'f9c05a61d574305497789b5997f1ae3ea1b1ad3b' into develop-splitpolicyoptimizer

5 年前 · d4ee7346
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
 blank_issues_enabled: false
 contact_links:
+  - name: Discussion / General Questions
+    url: https://forum.unity.com/forums/ml-agents.453/
+    about: Discussion about ML-Agents, RL algorithms, or game integrations.
+  - name: Installation / Setup
+    url: https://forum.unity.com/forums/ml-agents.453/
+    about: Questions about python installation, initial connection between Unity and training, etc.
-    about: Please ask Installation / Setup and Discussion / General Questions in the Unity Forum.
+    about: Please ask other questions in the ML-Agents Unity Forum.
--- a/.gitignore
+++ b/.gitignore
 /envs

 # Environemnt logfile
-*UnitySDK.log
+*Project.log
-/UnitySDK/.vs/
+/Project/.vs/

 # Autogenerated VS/MD/Consulo solution and project files
 /com.unity.ml-agentsExportedObj/
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
  name: Test Mac Standalone {{ editor.version }}
  agent:
    type: Unity::VM::osx
-    image: ml-agents/ml-agents-bokken-mac:v0.1.3-475350
+    image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
+    - pip install pyyaml
    - python -u -m ml-agents.tests.yamato.standalone_build_tests
  triggers:
    pull_requests:
--- a/Project/.gitignore
+++ b/Project/.gitignore
 /Assets/AssetStoreTools*
 /Assets/Plugins*
 /Assets/Demonstrations*
+/Assets/ML-Agents/Timers*
 /csharp_timers.json

 # Environemnt logfile
--- a/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBall.prefab
  - component: {fileID: 114368073295828880}
  - component: {fileID: 114715123104194396}
  - component: {fileID: 1306725529891448089}
+  - component: {fileID: 1758424554059689351}
  m_Layer: 0
  m_Name: Agent
  m_TagString: Untagged
  DecisionPeriod: 5
  RepeatAction: 1
  offsetStep: 0
+--- !u!114 &1758424554059689351
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1424713891854676}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
 --- !u!1 &1533320402322554
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
      propertyPath: m_Name
      value: 3DBall (1)
      objectReference: {fileID: 0}
+    - target: {fileID: 1321468028730240, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
+      propertyPath: m_IsActive
+      value: 1
+      objectReference: {fileID: 0}
    - target: {fileID: 4679453577574622, guid: cfa81c019162c4e3caf6e2999c6fdf48, type: 3}
      propertyPath: m_LocalPosition.x
      value: 9
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
            Mathf.Abs(ball.transform.position.x - gameObject.transform.position.x) > 3f ||
            Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f)
        {
-            Done();
+            Done();
        }
        else
        {
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
            Mathf.Abs(ball.transform.position.x - gameObject.transform.position.x) > 3f ||
            Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f)
        {
-            Done();
+            Done();
        }
        else
        {
--- a/Project/Assets/ML-Agents/Examples/3DBall/TFModels/3DBall.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/3DBall/TFModels/3DBall.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallHard.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/3DBall/TFModels/3DBallHard.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs

        if (m_Position == m_SmallGoalPosition)
        {
-            Done();
+            Done();
-            Done();
+            Done();
        }
    }

--- a/Project/Assets/ML-Agents/Examples/Basic/TFModels/Basic.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Basic/TFModels/Basic.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Bouncer/TFModels/Bouncer.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Bouncer/TFModels/Bouncer.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
 using UnityEngine;
 using MLAgents;
+using MLAgentsExamples;

 [RequireComponent(typeof(JointDriveController))] // Required to set joint forces
 public class CrawlerAgent : Agent
--- a/Project/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerDynamic.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerDynamic.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerStatic.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Crawler/TFModels/CrawlerStatic.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorArea.cs
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorArea.cs
 using UnityEngine;
-using MLAgents;
+using MLAgentsExamples;

 public class FoodCollectorArea : Area
 {
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/TFModels/FoodCollector.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/TFModels/FoodCollector.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs

            if (hit.Where(col => col.gameObject.CompareTag("goal")).ToArray().Length == 1)
            {
-                Done();
+                Done();
-            if (hit.Where(col => col.gameObject.CompareTag("pit")).ToArray().Length == 1)
+            else if (hit.Where(col => col.gameObject.CompareTag("pit")).ToArray().Length == 1)
+                SetReward(-1f);
-                SetReward(-1f);
            }
        }
    }
--- a/Project/Assets/ML-Agents/Examples/GridWorld/TFModels/GridWorld.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/TFModels/GridWorld.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Hallway/TFModels/Hallway.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Hallway/TFModels/Hallway.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/PushBlock/TFModels/PushBlock.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/PushBlock/TFModels/PushBlock.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidArea.cs
+++ b/Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidArea.cs
 using UnityEngine;
-using MLAgents;
+using MLAgentsExamples;

 public class PyramidArea : Area
 {
--- a/Project/Assets/ML-Agents/Examples/Pyramids/TFModels/Pyramids.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Pyramids/TFModels/Pyramids.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Reacher/TFModels/Reacher.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Reacher/TFModels/Reacher.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/AdjustTrainingTimescale.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/AdjustTrainingTimescale.cs

 using UnityEngine;

-namespace  MLAgents
+namespace  MLAgentsExamples
 {
    public class AdjustTrainingTimescale : MonoBehaviour
    {
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/Area.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/Area.cs
 using UnityEngine;

-namespace MLAgents
+namespace MLAgentsExamples
 {
    public class Area : MonoBehaviour
    {
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/CameraFollow.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/CameraFollow.cs
 using UnityEngine;

-namespace MLAgents
+namespace MLAgentsExamples
 {
    public class CameraFollow : MonoBehaviour
    {
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/FlyCamera.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/FlyCamera.cs
 using UnityEngine;

-namespace MLAgents
+namespace MLAgentsExamples
 {
    public class FlyCamera : MonoBehaviour
    {
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs
 using UnityEngine;
+using MLAgents;
-namespace MLAgents
+namespace MLAgentsExamples
 {
    /// <summary>
    /// This class contains logic for locomotion agents with joints which might make contact with the ground.
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/JointDriveController.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/JointDriveController.cs
 using System.Collections.Generic;
 using UnityEngine;
 using UnityEngine.Serialization;
+using MLAgents;
-namespace MLAgents
+namespace MLAgentsExamples
 {
    /// <summary>
    /// Used to store relevant information for acting and learning for each body part in agent.
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ProjectSettingsOverrides.cs
 using UnityEngine;
 using MLAgents;

-public class ProjectSettingsOverrides : MonoBehaviour
+namespace MLAgentsExamples
-    // Original values
-    float m_OriginalMonitorVerticalOffset;
-    Vector3 m_OriginalGravity;
-    float m_OriginalFixedDeltaTime;
-    float m_OriginalMaximumDeltaTime;
-    int m_OriginalSolverIterations;
-    int m_OriginalSolverVelocityIterations;
+    public class ProjectSettingsOverrides : MonoBehaviour
+    {
+        // Original values
+        float m_OriginalMonitorVerticalOffset;
+        Vector3 m_OriginalGravity;
+        float m_OriginalFixedDeltaTime;
+        float m_OriginalMaximumDeltaTime;
+        int m_OriginalSolverIterations;
+        int m_OriginalSolverVelocityIterations;
-    [Tooltip("Increase or decrease the scene gravity. Use ~3x to make things less floaty")]
-    public float gravityMultiplier = 1.0f;
+        [Tooltip("Increase or decrease the scene gravity. Use ~3x to make things less floaty")]
+        public float gravityMultiplier = 1.0f;
-    [Header("Display Settings")]
-    public float monitorVerticalOffset;
+        [Header("Display Settings")]
+        public float monitorVerticalOffset;
-    [Header("Advanced physics settings")]
-    [Tooltip("The interval in seconds at which physics and other fixed frame rate updates (like MonoBehaviour's FixedUpdate) are performed.")]
-    public float fixedDeltaTime = .02f;
-    [Tooltip("The maximum time a frame can take. Physics and other fixed frame rate updates (like MonoBehaviour's FixedUpdate) will be performed only for this duration of time per frame.")]
-    public float maximumDeltaTime = 1.0f / 3.0f;
-    [Tooltip("Determines how accurately Rigidbody joints and collision contacts are resolved. (default 6). Must be positive.")]
-    public int solverIterations = 6;
-    [Tooltip("Affects how accurately the Rigidbody joints and collision contacts are resolved. (default 1). Must be positive.")]
-    public int solverVelocityIterations = 1;
+        [Header("Advanced physics settings")]
+        [Tooltip("The interval in seconds at which physics and other fixed frame rate updates (like MonoBehaviour's FixedUpdate) are performed.")]
+        public float fixedDeltaTime = .02f;
+        [Tooltip("The maximum time a frame can take. Physics and other fixed frame rate updates (like MonoBehaviour's FixedUpdate) will be performed only for this duration of time per frame.")]
+        public float maximumDeltaTime = 1.0f / 3.0f;
+        [Tooltip("Determines how accurately Rigidbody joints and collision contacts are resolved. (default 6). Must be positive.")]
+        public int solverIterations = 6;
+        [Tooltip("Affects how accurately the Rigidbody joints and collision contacts are resolved. (default 1). Must be positive.")]
+        public int solverVelocityIterations = 1;
-    public void Awake()
-    {
-        // Save the original values
-        m_OriginalMonitorVerticalOffset = Monitor.verticalOffset;
-        m_OriginalGravity = Physics.gravity;
-        m_OriginalFixedDeltaTime = Time.fixedDeltaTime;
-        m_OriginalMaximumDeltaTime = Time.maximumDeltaTime;
-        m_OriginalSolverIterations = Physics.defaultSolverIterations;
-        m_OriginalSolverVelocityIterations = Physics.defaultSolverVelocityIterations;
+        public void Awake()
+        {
+            // Save the original values
+            m_OriginalMonitorVerticalOffset = Monitor.verticalOffset;
+            m_OriginalGravity = Physics.gravity;
+            m_OriginalFixedDeltaTime = Time.fixedDeltaTime;
+            m_OriginalMaximumDeltaTime = Time.maximumDeltaTime;
+            m_OriginalSolverIterations = Physics.defaultSolverIterations;
+            m_OriginalSolverVelocityIterations = Physics.defaultSolverVelocityIterations;
-        // Override
-        Monitor.verticalOffset = monitorVerticalOffset;
-        Physics.gravity *= gravityMultiplier;
-        Time.fixedDeltaTime = fixedDeltaTime;
-        Time.maximumDeltaTime = maximumDeltaTime;
-        Physics.defaultSolverIterations = solverIterations;
-        Physics.defaultSolverVelocityIterations = solverVelocityIterations;
+            // Override
+            Monitor.verticalOffset = monitorVerticalOffset;
+            Physics.gravity *= gravityMultiplier;
+            Time.fixedDeltaTime = fixedDeltaTime;
+            Time.maximumDeltaTime = maximumDeltaTime;
+            Physics.defaultSolverIterations = solverIterations;
+            Physics.defaultSolverVelocityIterations = solverVelocityIterations;
-        Academy.Instance.FloatProperties.RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
-    }
+            Academy.Instance.FloatProperties.RegisterCallback("gravity", f => { Physics.gravity = new Vector3(0, -f, 0); });
+        }
-    public void OnDestroy()
-    {
-        Monitor.verticalOffset = m_OriginalMonitorVerticalOffset;
-        Physics.gravity = m_OriginalGravity;
-        Time.fixedDeltaTime = m_OriginalFixedDeltaTime;
-        Time.maximumDeltaTime = m_OriginalMaximumDeltaTime;
-        Physics.defaultSolverIterations = m_OriginalSolverIterations;
-        Physics.defaultSolverVelocityIterations = m_OriginalSolverVelocityIterations;
+        public void OnDestroy()
+        {
+            Monitor.verticalOffset = m_OriginalMonitorVerticalOffset;
+            Physics.gravity = m_OriginalGravity;
+            Time.fixedDeltaTime = m_OriginalFixedDeltaTime;
+            Time.maximumDeltaTime = m_OriginalMaximumDeltaTime;
+            Physics.defaultSolverIterations = m_OriginalSolverIterations;
+            Physics.defaultSolverVelocityIterations = m_OriginalSolverVelocityIterations;
+        }
-}
+}
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/TargetContact.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/TargetContact.cs
 using UnityEngine;

-namespace MLAgents
+namespace MLAgentsExamples
 {
    /// <summary>
    /// This class contains logic for locomotion agents with joints which might make contact with a target.
--- a/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
+using System;
 using UnityEngine;
 using MLAgents;

    // * opposing player
    public enum Team
    {
-        Purple,
-        Blue
-    }
-    public enum AgentRole
-    {
-        Striker,
-        Goalie
+        Blue = 0,
+        Purple = 1
+    [HideInInspector]
-    public AgentRole agentRole;
    float m_KickPower;
    int m_PlayerIndex;
    public SoccerFieldArea area;
    SoccerSettings m_SoccerSettings;
    Renderer m_AgentRenderer;
+    BehaviorParameters m_BP;
+    Vector3 m_Transform;
-    public void ChooseRandomTeam()
+    public override void InitializeAgent()
-        team = (Team)Random.Range(0, 2);
-        if (team == Team.Purple)
+        base.InitializeAgent();
+        m_BP = gameObject.GetComponent<BehaviorParameters>();
+        if (m_BP.m_TeamID == (int)Team.Blue)
-            JoinPurpleTeam(agentRole);
+            team = Team.Blue;
+            m_Transform = new Vector3(transform.position.x - 4f, .5f, transform.position.z);
-            JoinBlueTeam(agentRole);
+            team = Team.Purple;
+            m_Transform = new Vector3(transform.position.x + 4f, .5f, transform.position.z);
-    }
-
-    public void JoinPurpleTeam(AgentRole role)
-    {
-        agentRole = role;
-        team = Team.Purple;
-        m_AgentRenderer.material = m_SoccerSettings.purpleMaterial;
-        tag = "purpleAgent";
-    }
-
-    public void JoinBlueTeam(AgentRole role)
-    {
-        agentRole = role;
-        team = Team.Blue;
-        m_AgentRenderer.material = m_SoccerSettings.blueMaterial;
-        tag = "blueAgent";
-    }
-
-    public override void InitializeAgent()
-    {
-        base.InitializeAgent();
        m_AgentRenderer = GetComponentInChildren<Renderer>();
        m_SoccerSettings = FindObjectOfType<SoccerSettings>();
        agentRb = GetComponent<Rigidbody>();

        var action = Mathf.FloorToInt(act[0]);

-        // Goalies and Strikers have slightly different action spaces.
-        if (agentRole == AgentRole.Goalie)
+        m_KickPower = 0f;
+
+        var forwardAxis = (int)act[0];
+        var rightAxis = (int)act[1];
+        var rotateAxis = (int)act[2];
+
+        switch (forwardAxis)
+        {
+            case 1:
+                dirToGo = transform.forward * 1f;
+                m_KickPower = 1f;
+                break;
+            case 2:
+                dirToGo = transform.forward * -1f;
+                break;
+        }
+
+        switch (rightAxis)
-            m_KickPower = 0f;
-            switch (action)
-            {
-                case 1:
-                    dirToGo = transform.forward * 1f;
-                    m_KickPower = 1f;
-                    break;
-                case 2:
-                    dirToGo = transform.forward * -1f;
-                    break;
-                case 4:
-                    dirToGo = transform.right * -1f;
-                    break;
-                case 3:
-                    dirToGo = transform.right * 1f;
-                    break;
-            }
+            case 1:
+                dirToGo = transform.right * 0.3f;
+                break;
+            case 2:
+                dirToGo = transform.right * -0.3f;
+                break;
-        else
+
+        switch (rotateAxis)
-            m_KickPower = 0f;
-            switch (action)
-            {
-                case 1:
-                    dirToGo = transform.forward * 1f;
-                    m_KickPower = 1f;
-                    break;
-                case 2:
-                    dirToGo = transform.forward * -1f;
-                    break;
-                case 3:
-                    rotateDir = transform.up * 1f;
-                    break;
-                case 4:
-                    rotateDir = transform.up * -1f;
-                    break;
-                case 5:
-                    dirToGo = transform.right * -0.75f;
-                    break;
-                case 6:
-                    dirToGo = transform.right * 0.75f;
-                    break;
-            }
+            case 1:
+                rotateDir = transform.up * -1f;
+                break;
+            case 2:
+                rotateDir = transform.up * 1f;
+                break;
+
        transform.Rotate(rotateDir, Time.deltaTime * 100f);
        agentRb.AddForce(dirToGo * m_SoccerSettings.agentRunSpeed,
            ForceMode.VelocityChange);
    {
        // Existential penalty for strikers.
-        if (agentRole == AgentRole.Striker)
+        AddReward(-1f / 3000f);
+        MoveAgent(vectorAction);
+    }
+
+    public override float[] Heuristic()
+    {
+        var action = new float[3];
+        //forward
+        if (Input.GetKey(KeyCode.W))
+        {
+            action[0] = 1f;
+        }
+        if (Input.GetKey(KeyCode.S))
-            AddReward(-1f / 3000f);
+            action[0] = 2f;
-        // Existential bonus for goalies.
-        if (agentRole == AgentRole.Goalie)
+        //rotate
+        if (Input.GetKey(KeyCode.A))
-            AddReward(1f / 3000f);
+            action[2] = 1f;
-        MoveAgent(vectorAction);
+        if (Input.GetKey(KeyCode.D))
+        {
+            action[2] = 2f;
+        }
+        //right
+        if (Input.GetKey(KeyCode.E))
+        {
+            action[1] = 1f;
+        }
+        if (Input.GetKey(KeyCode.Q))
+        {
+            action[1] = 2f;
+        }
+        return action;
-
    /// <summary>
    /// Used to provide a "kick" to the ball.
    /// </summary>

    public override void AgentReset()
    {
-        if (m_SoccerSettings.randomizePlayersTeamForTraining)
-        {
-            ChooseRandomTeam();
-        }
-
-            JoinPurpleTeam(agentRole);
-            JoinBlueTeam(agentRole);
-        transform.position = area.GetRandomSpawnPos(agentRole, team);
+        transform.position = m_Transform;
        agentRb.velocity = Vector3.zero;
        agentRb.angularVelocity = Vector3.zero;
        SetResetParameters();
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerBallController.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerBallController.cs
 {
    [HideInInspector]
    public SoccerFieldArea area;
-    public AgentSoccer lastTouchedBy; //who was the last to touch the ball
-    public string agentTag; //will be used to check if collided with a agent
-    public string purpleGoalTag; //will be used to check if collided with red goal
+    public string purpleGoalTag; //will be used to check if collided with purple goal
-        if (col.gameObject.CompareTag(purpleGoalTag)) //ball touched red goal
+        if (col.gameObject.CompareTag(purpleGoalTag)) //ball touched purple goal
        {
            area.GoalTouched(AgentSoccer.Team.Blue);
        }
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs

    SoccerSettings m_SoccerSettings;

-    public IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
-    {
-        m_GroundRenderer.material = mat;
-        yield return new WaitForSeconds(time);
-        m_GroundRenderer.material = m_GroundMaterial;
-    }
-
    void Awake()
    {
        m_SoccerSettings = FindObjectOfType<SoccerSettings>();
        if (goalTextUI) goalTextUI.SetActive(false);
    }

-    public void AllPlayersDone(float reward)
-    {
-        foreach (var ps in playerStates)
-        {
-            if (ps.agentScript.gameObject.activeInHierarchy)
-            {
-                if (reward != 0)
-                {
-                    ps.agentScript.AddReward(reward);
-                }
-                ps.agentScript.Done();
-            }
-        }
-    }
-
    public void GoalTouched(AgentSoccer.Team scoredTeam)
    {
        foreach (var ps in playerStates)
-                RewardOrPunishPlayer(ps, m_SoccerSettings.strikerReward, m_SoccerSettings.goalieReward);
+                ps.agentScript.AddReward(1);
-                RewardOrPunishPlayer(ps, m_SoccerSettings.strikerPunish, m_SoccerSettings.goaliePunish);
-            }
-            if (m_SoccerSettings.randomizePlayersTeamForTraining)
-            {
-                ps.agentScript.ChooseRandomTeam();
+                ps.agentScript.AddReward(-1);
+            ps.agentScript.Done();  //all agents need to be reset
-            if (scoredTeam == AgentSoccer.Team.Purple)
-            {
-                StartCoroutine(GoalScoredSwapGroundMaterial(m_SoccerSettings.purpleMaterial, 1));
-            }
-            else
-            {
-                StartCoroutine(GoalScoredSwapGroundMaterial(m_SoccerSettings.blueMaterial, 1));
-            }
            if (goalTextUI)
            {
                StartCoroutine(ShowGoalUI());

-    public void RewardOrPunishPlayer(PlayerState ps, float striker, float goalie)
-    {
-        if (ps.agentScript.agentRole == AgentSoccer.AgentRole.Striker)
-        {
-            ps.agentScript.AddReward(striker);
-        }
-        if (ps.agentScript.agentRole == AgentSoccer.AgentRole.Goalie)
-        {
-            ps.agentScript.AddReward(goalie);
-        }
-        ps.agentScript.Done();  //all agents need to be reset
-    }
-
-    public Vector3 GetRandomSpawnPos(AgentSoccer.AgentRole role, AgentSoccer.Team team)
-    {
-        var xOffset = 0f;
-        if (role == AgentSoccer.AgentRole.Goalie)
-        {
-            xOffset = 13f;
-        }
-        if (role == AgentSoccer.AgentRole.Striker)
-        {
-            xOffset = 7f;
-        }
-        if (team == AgentSoccer.Team.Blue)
-        {
-            xOffset = xOffset * -1f;
-        }
-        var randomSpawnPos = ground.transform.position +
-            new Vector3(xOffset, 0f, 0f)
-            + (Random.insideUnitSphere * 2);
-        randomSpawnPos.y = ground.transform.position.y + 2;
-        return randomSpawnPos;
-    }
-
-            new Vector3(0f, 0f, 0f)
-            + (Random.insideUnitSphere * 2);
-        randomSpawnPos.y = ground.transform.position.y + 2;
+            new Vector3(0f, 0f, 0f);
+        randomSpawnPos.y = ground.transform.position.y + .5f;
        return randomSpawnPos;
    }

--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerSettings.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerSettings.cs
    public Material purpleMaterial;
    public Material blueMaterial;
    public bool randomizePlayersTeamForTraining = true;
-
-
-    public float strikerPunish; //if opponents scores, the striker gets this neg reward (-1)
-    public float strikerReward; //if team scores a goal they get a reward (+1)
-    public float goaliePunish; //if opponents score, goalie gets this neg reward (-1)
-    public float goalieReward; //if team scores, goalie gets this reward (currently 0...no reward. can play with this later)
 }
--- a/Project/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
  m_Father: {fileID: 1184319693}
  m_RootOrder: 1
  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 1, y: 1}
-  m_AnchorMax: {x: 1, y: 1}
-  m_AnchoredPosition: {x: -20, y: -50}
+  m_AnchorMin: {x: 1, y: 0}
+  m_AnchorMax: {x: 1, y: 0}
+  m_AnchoredPosition: {x: -20, y: 50}
  m_SizeDelta: {x: 100, y: 50}
  m_Pivot: {x: 0.5, y: 0.5}
 --- !u!114 &1871669623
  m_Father: {fileID: 1184319693}
  m_RootOrder: 0
  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0, y: 1}
-  m_AnchorMax: {x: 0, y: 1}
-  m_AnchoredPosition: {x: 100, y: -50}
+  m_AnchorMin: {x: 0, y: 0}
+  m_AnchorMax: {x: 0, y: 0}
+  m_AnchoredPosition: {x: 100, y: 50}
  m_SizeDelta: {x: 100, y: 50}
  m_Pivot: {x: 0.5, y: 0.5}
 --- !u!114 &2073469452
--- a/Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
 using UnityEngine;
 using MLAgents;
+using MLAgentsExamples;

 public class WalkerAgent : Agent
 {
--- a/Project/Assets/ML-Agents/Examples/Walker/TFModels/Walker.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/TFModels/Walker.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
        if ((!Physics.Raycast(m_AgentRb.position, Vector3.down, 20))
            || (!Physics.Raycast(m_ShortBlockRb.position, Vector3.down, 20)))
        {
-            Done();
+            Done();
            ResetBlock(m_ShortBlockRb);
            StartCoroutine(
                GoalScoredSwapGroundMaterial(m_WallJumpSettings.failMaterial, .5f));
--- a/Project/Assets/ML-Agents/Examples/WallJump/TFModels/BigWallJump.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/WallJump/TFModels/BigWallJump.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJump.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/WallJump/TFModels/SmallWallJump.nn.meta
 ScriptedImporter:
  fileIDToRecycleName:
    11400000: main obj
+    11400002: model data
  externalObjects: {}
  userData: 
  assetBundleName: 
--- a/Project/ProjectSettings/TagManager.asset
+++ b/Project/ProjectSettings/TagManager.asset
  - 
  - 
  - invisible
-  - ball
-  - goalieWall
-  - goalie
-  - strikerWall
-  - striker
+  - 
+  - 
+  - 
+  - 
+  - 
  - 
  - 
  - 
--- a/README.md
+++ b/README.md

 In addition to our own documentation, here are some additional, relevant articles:

-* [Unity AI - Unity 3D Artificial Intelligence](https://www.youtube.com/watch?v=bqsfkGbBU6k)
+* [ML-Agents Penguins tutorial](https://learn.unity.com/project/ml-agents-penguins)

 ## Community and Feedback

--- a/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
+++ b/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
            var brainParameters = behaviorParameters.brainParameters;
            if (model != null)
            {
-                barracudaModel = ModelLoader.Load(model.Value);
+                barracudaModel = ModelLoader.Load(model);
            }
            if (brainParameters != null)
            {
--- a/com.unity.ml-agents/Editor/Unity.ML-Agents.Editor.asmdef
+++ b/com.unity.ml-agents/Editor/Unity.ML-Agents.Editor.asmdef
 {
    "name": "Unity.ML-Agents.Editor",
    "references": [
-        "Unity.ML-Agents"
+        "Unity.ML-Agents",
+        "Barracuda"
    ],
    "includePlatforms": [
        "Editor"
    "defineConstraints": [],
    "versionDefines": [],
    "noEngineReferences": false
-}
+}
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
    /// <summary>
    /// Helper class to step the Academy during FixedUpdate phase.
    /// </summary>
-    public class AcademyFixedUpdateStepper : MonoBehaviour
+    internal class AcademyFixedUpdateStepper : MonoBehaviour
    {
        void FixedUpdate()
        {
    /// attached to it.
    /// </remarks>
    [HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +
-        "docs/Learning-Environment-Design-Academy.md")]
+        "docs/Learning-Environment-Design.md")]
-        const string k_ApiVersion = "API-14-dev0";
+        const string k_ApiVersion = "API-15-dev0";
        const int k_EditorTrainingPort = 5004;

        // Lazy initializer pattern, see https://csharpindepth.com/articles/singleton#lazy
        int m_TotalStepCount;

        /// Pointer to the communicator currently in use by the Academy.
-        public ICommunicator Communicator;
+        internal ICommunicator Communicator;

        bool m_Initialized;
        List<ModelRunner> m_ModelRunners = new List<ModelRunner>();

        // Signals to all the Agents at each environment step so they can use
        // their Policy to decide on their next action.
-        public event System.Action DecideAction;
+        internal event System.Action DecideAction;
-        public event System.Action DestroyAction;
+        internal event System.Action DestroyAction;
-        public event System.Action<int> AgentSetStatus;
+        internal event System.Action<int> AgentSetStatus;
-        public event System.Action AgentSendState;
+        internal event System.Action AgentSendState;
-        public event System.Action AgentAct;
+        internal event System.Action AgentAct;
-        public event System.Action AgentForceReset;
+        internal event System.Action AgentForceReset;

        // Signals that the Academy has been reset by the training process
        public event System.Action OnEnvironmentReset;
        /// Initialize the Academy if it hasn't already been initialized.
        /// This method is always safe to call; it will have no effect if the Academy is already initialized.
        /// </summary>
-        public void LazyInitialization()
+        internal void LazyInitialization()
        {
            if (!m_Initialized)
            {
        /// <param name="inferenceDevice"> The inference device (CPU or GPU)
        /// the ModelRunner will use </param>
        /// <returns> The ModelRunner compatible with the input settings</returns>
-        public ModelRunner GetOrCreateModelRunner(
+        internal ModelRunner GetOrCreateModelRunner(
            NNModel model, BrainParameters brainParameters, InferenceDevice inferenceDevice)
        {
            var modelRunner = m_ModelRunners.Find(x => x.HasModel(model, inferenceDevice));
--- a/com.unity.ml-agents/Runtime/ActionMasker.cs
+++ b/com.unity.ml-agents/Runtime/ActionMasker.cs

 namespace MLAgents
 {
-    public class ActionMasker
+    internal class ActionMasker
    {
        /// When using discrete control, is the starting indices of the actions
        /// when all the branches are concatenated with each other.

        readonly BrainParameters m_BrainParameters;

-        public ActionMasker(BrainParameters brainParameters)
+        internal ActionMasker(BrainParameters brainParameters)
        {
            m_BrainParameters = brainParameters;
        }
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
    /// Struct that contains the action information sent from the Brain to the
    /// Agent.
    /// </summary>
-    public struct AgentAction
+    internal struct AgentAction
    {
        public float[] vectorActions;
    }
        /// This Id will be changed every time the Agent resets.
        int m_EpisodeId;

+        /// Whether or not the Agent has been initialized already
+        bool m_Initialized;
+
        /// Keeps track of the actions that are masked at each step.
        ActionMasker m_ActionMasker;

        /// Currently generated from attached SensorComponents, and a legacy VectorSensor
        /// </summary>
        [FormerlySerializedAs("m_Sensors")]
-        public List<ISensor> sensors;
+        internal List<ISensor> sensors;
-        public VectorSensor collectObservationsSensor;
+        internal VectorSensor collectObservationsSensor;
-            m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
-            OnEnableHelper();
-
-            m_Recorder = GetComponent<DemonstrationRecorder>();
+            LazyInitialize();
-        void OnEnableHelper()
+        public void LazyInitialize()
+            if (m_Initialized)
+            {
+                return;
+            }
+            m_Initialized = true;
+
+            // Grab the "static" properties for the Agent.
+            m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
+            m_PolicyFactory = GetComponent<BehaviorParameters>();
+            m_Recorder = GetComponent<DemonstrationRecorder>();
+
+
            m_Info = new AgentInfo();
            m_Action = new AgentAction();
            sensors = new List<ISensor>();
            Academy.Instance.AgentAct += AgentStep;
            Academy.Instance.AgentForceReset += _AgentReset;
-            m_PolicyFactory = GetComponent<BehaviorParameters>();
+
        }

        /// Monobehavior function that is called when the attached GameObject
            }
            NotifyAgentDone();
            m_Brain?.Dispose();
+            m_Initialized = false;
        }

        void NotifyAgentDone(bool maxStepReached = false)
            m_Info.maxStepReached = maxStepReached;
            // Request the last decision with no callbacks
            // We request a decision so Python knows the Agent is done immediately
-            m_Brain?.RequestDecision(m_Info, sensors, (a) => {});
+            m_Brain?.RequestDecision(m_Info, sensors);
+
+            UpdateRewardStats();
+
            // The Agent is done, so we give it a new episode Id
            m_EpisodeId = EpisodeIdCounter.GetEpisodeId();
            m_Reward = 0f;
            return m_CumulativeReward;
        }

+        void UpdateRewardStats()
+        {
+            var gaugeName = $"{m_PolicyFactory.behaviorName}.CumulativeReward";
+            TimerStack.Instance.SetGauge(gaugeName, GetCumulativeReward());
+        }
+
        /// <summary>
        /// Sets the done flag to true.
        /// </summary>
        /// Set up the list of ISensors on the Agent. By default, this will select any
        /// SensorBase's attached to the Agent.
        /// </summary>
-        public void InitializeSensors()
+        internal void InitializeSensors()
        {
            // Get all attached sensor components
            SensorComponent[] attachedSensorComponents;
            m_Info.maxStepReached = false;
            m_Info.episodeId = m_EpisodeId;

-            m_Brain.RequestDecision(m_Info, sensors, UpdateAgentAction);
+            m_Brain.RequestDecision(m_Info, sensors);

            if (m_Recorder != null && m_Recorder.record && Application.isEditor)
            {
        }

        /// <summary>
+        /// Returns the last action that was decided on by the Agent (returns null if no decision has been made)
+        /// </summary>
+        public float[] GetAction()
+        {
+        	return m_Action.vectorActions;
+        }
+
+        /// <summary>
        /// This method will forcefully reset the agent and will also reset the hasAlreadyReset flag.
        /// This way, even if the agent was already in the process of reseting, it will be reset again
        /// and will not send a Done flag at the next step.
            AgentReset();
        }

-        public void UpdateAgentAction(AgentAction action)
-        {
-            m_Action = action;
-        }
-
-        /// <summary>
-        /// Updates the vector action.
-        /// </summary>
-        /// <param name="vectorActions">Vector actions.</param>
-        public void UpdateVectorAction(float[] vectorActions)
-        {
-            m_Action.vectorActions = vectorActions;
-        }
-
        /// <summary>
        /// Scales continuous action from [-1, 1] to arbitrary range.
        /// </summary>
            if ((m_RequestAction) && (m_Brain != null))
            {
                m_RequestAction = false;
-                AgentAction(m_Action.vectorActions);
+                if (m_Action.vectorActions != null)
+                {
+                    AgentAction(m_Action.vectorActions);
+                }
-            m_Brain?.DecideAction();
+            m_Action.vectorActions = m_Brain?.DecideAction();
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/Constants.cs
+++ b/com.unity.ml-agents/Runtime/Constants.cs
    /// <summary>
    /// Grouping for use in AddComponentMenu (instead of nesting the menus).
    /// </summary>
-    public enum MenuGroup
+    internal enum MenuGroup
    {
        Default = 0,
        Sensors = 50
--- a/com.unity.ml-agents/Runtime/DecisionRequester.cs
+++ b/com.unity.ml-agents/Runtime/DecisionRequester.cs
    /// A component that when attached to an Agent will automatically request decisions from it
    /// at regular intervals.
    /// </summary>
+    [AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
    public class DecisionRequester : MonoBehaviour
    {
        [Range(1, 20)]
--- a/com.unity.ml-agents/Runtime/DemonstrationRecorder.cs
+++ b/com.unity.ml-agents/Runtime/DemonstrationRecorder.cs
            m_DemoStore.Initialize(
                demonstrationName,
                behaviorParams.brainParameters,
-                behaviorParams.behaviorName);
+                behaviorParams.fullyQualifiedBehaviorName);
            Monitor.Log("Recording Demonstration of Agent: ", m_RecordingAgent.name);
        }

--- a/com.unity.ml-agents/Runtime/EpisodeIdCounter.cs
+++ b/com.unity.ml-agents/Runtime/EpisodeIdCounter.cs
 namespace MLAgents
 {
-    public static class EpisodeIdCounter
+    internal static class EpisodeIdCounter
    {
        private static int Counter;
        public static int GetEpisodeId()
--- a/com.unity.ml-agents/Runtime/Grpc/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/GrpcExtensions.cs

 namespace MLAgents
 {
-    public static class GrpcExtensions
+    internal static class GrpcExtensions
-        internal static AgentInfoActionPairProto ToInfoActionPairProto(this AgentInfo ai)
+        public static AgentInfoActionPairProto ToInfoActionPairProto(this AgentInfo ai)
        {
            var agentInfoProto = ai.ToAgentInfoProto();

        /// Converts a AgentInfo to a protobuf generated AgentInfoProto
        /// </summary>
        /// <returns>The protobuf version of the AgentInfo.</returns>
-        internal static AgentInfoProto ToAgentInfoProto(this AgentInfo ai)
+        public static AgentInfoProto ToAgentInfoProto(this AgentInfo ai)
        {
            var agentInfoProto = new AgentInfoProto
            {
        /// <param name="bp">The instance of BrainParameter to extend.</param>
        /// <param name="name">The name of the brain.</param>
        /// <param name="isTraining">Whether or not the Brain is training.</param>
-        internal static BrainParametersProto ToProto(this BrainParameters bp, string name, bool isTraining)
+        public static BrainParametersProto ToProto(this BrainParameters bp, string name, bool isTraining)
        {
            var brainParametersProto = new BrainParametersProto
            {
        /// <summary>
        /// Convert metadata object to proto object.
        /// </summary>
-        internal static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm)
+        public static DemonstrationMetaProto ToProto(this DemonstrationMetaData dm)
        {
            var demoProto = new DemonstrationMetaProto
            {
        /// <summary>
        /// Initialize metadata values based on proto object.
        /// </summary>
-        internal static DemonstrationMetaData ToDemonstrationMetaData(this DemonstrationMetaProto demoProto)
+        public static DemonstrationMetaData ToDemonstrationMetaData(this DemonstrationMetaProto demoProto)
        {
            var dm = new DemonstrationMetaData
            {
        /// </summary>
        /// <param name="bpp">An instance of a brain parameters protobuf object.</param>
        /// <returns>A BrainParameters struct.</returns>
-        internal static BrainParameters ToBrainParameters(this BrainParametersProto bpp)
+        public static BrainParameters ToBrainParameters(this BrainParametersProto bpp)
        {
            var bp = new BrainParameters
            {
            return bp;
        }

-        internal static UnityRLInitParameters ToUnityRLInitParameters(this UnityRLInitializationInputProto inputProto)
+        public static UnityRLInitParameters ToUnityRLInitParameters(this UnityRLInitializationInputProto inputProto)
        {
            return new UnityRLInitParameters
            {

-        internal static AgentAction ToAgentAction(this AgentActionProto aap)
+        public static AgentAction ToAgentAction(this AgentActionProto aap)
        {
            return new AgentAction
            {

-        internal static List<AgentAction> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
+        public static List<AgentAction> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
        {
            var agentActions = new List<AgentAction>(proto.Value.Count);
            foreach (var ap in proto.Value)
            return agentActions;
        }

-        internal static ObservationProto ToProto(this Observation obs)
+        public static ObservationProto ToProto(this Observation obs)
        {
            ObservationProto obsProto = null;

        /// <param name="sensor"></param>
        /// <param name="writeAdapter"></param>
        /// <returns></returns>
-        internal static ObservationProto GetObservationProto(this ISensor sensor, WriteAdapter writeAdapter)
+        public static ObservationProto GetObservationProto(this ISensor sensor, WriteAdapter writeAdapter)
        {
            var shape = sensor.GetObservationShape();
            ObservationProto observationProto = null;
--- a/com.unity.ml-agents/Runtime/Grpc/RpcCommunicator.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/RpcCommunicator.cs
 namespace MLAgents
 {
    /// Responsible for communication with External using gRPC.
-    public class RpcCommunicator : ICommunicator
+    internal class RpcCommunicator : ICommunicator
-        public struct IdCallbackPair
-        {
-            public int AgentId;
-            public Action<AgentAction> Callback;
-        }
        public event QuitCommandHandler QuitCommandReceived;
        public event ResetCommandHandler ResetCommandReceived;

-        /// The default number of agents in the scene
-        const int k_NumAgents = 32;
-
-        Dictionary<string, List<IdCallbackPair>> m_ActionCallbacks = new Dictionary<string, List<IdCallbackPair>>();
+        Dictionary<string, List<int>> m_OrderedAgentsRequestingDecisions = new Dictionary<string, List<int>>();
-        Dictionary<string, Dictionary<int, AgentAction>> m_LastActionsReceived =
-            new Dictionary<string, Dictionary<int, AgentAction>>();
+        Dictionary<string, Dictionary<int, float[]>> m_LastActionsReceived =
+            new Dictionary<string, Dictionary<int, float[]>>();

        // Brains that we have sent over the communicator with agents.
        HashSet<string> m_SentBrainKeys = new HashSet<string>();
                }
                case CommandProto.Reset:
                {
-                    foreach (var brainName in m_ActionCallbacks.Keys)
+                    foreach (var brainName in m_OrderedAgentsRequestingDecisions.Keys)
-                        m_ActionCallbacks[brainName].Clear();
+                        m_OrderedAgentsRequestingDecisions[brainName].Clear();
                    }
                    ResetCommandReceived?.Invoke();
                    return;
        /// <summary>
        /// Sends the observations of one Agent.
        /// </summary>
-        /// <param name="brainKey">Batch Key.</param>
+        /// <param name="behaviorName">Batch Key.</param>
-        public void PutObservations(string brainKey, AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
+        public void PutObservations(string behaviorName, AgentInfo info, List<ISensor> sensors)
-            if (!m_SensorShapeValidators.ContainsKey(brainKey))
+            if (!m_SensorShapeValidators.ContainsKey(behaviorName))
-                m_SensorShapeValidators[brainKey] = new SensorShapeValidator();
+                m_SensorShapeValidators[behaviorName] = new SensorShapeValidator();
-            m_SensorShapeValidators[brainKey].ValidateSensors(sensors);
+            m_SensorShapeValidators[behaviorName].ValidateSensors(sensors);
 #endif

            using (TimerStack.Instance.Scoped("AgentInfo.ToProto"))
                        agentInfoProto.Observations.Add(obsProto);
                    }
                }
-                m_CurrentUnityRlOutput.AgentInfos[brainKey].Value.Add(agentInfoProto);
+                m_CurrentUnityRlOutput.AgentInfos[behaviorName].Value.Add(agentInfoProto);
-            if (!m_ActionCallbacks.ContainsKey(brainKey))
+            if (!m_OrderedAgentsRequestingDecisions.ContainsKey(behaviorName))
+            {
+                m_OrderedAgentsRequestingDecisions[behaviorName] = new List<int>();
+            }
+            m_OrderedAgentsRequestingDecisions[behaviorName].Add(info.episodeId);
+            if (!m_LastActionsReceived.ContainsKey(behaviorName))
+            {
+                m_LastActionsReceived[behaviorName] = new Dictionary<int, float[]>();
+            }
+            m_LastActionsReceived[behaviorName][info.episodeId] = null;
+            if (info.done)
-                m_ActionCallbacks[brainKey] = new List<IdCallbackPair>();
+                m_LastActionsReceived[behaviorName].Remove(info.episodeId);
-            m_ActionCallbacks[brainKey].Add(new IdCallbackPair { AgentId = info.episodeId, Callback = action });
        }

        /// <summary>

            UpdateEnvironmentWithInput(rlInput);

-            m_LastActionsReceived.Clear();
-                if (!m_ActionCallbacks[brainName].Any())
+                if (!m_OrderedAgentsRequestingDecisions[brainName].Any())
                {
                    continue;
                }
                }

                var agentActions = rlInput.AgentActions[brainName].ToAgentActionList();
-                var numAgents = m_ActionCallbacks[brainName].Count;
-                var agentActionDict = new Dictionary<int, AgentAction>(numAgents);
-                m_LastActionsReceived[brainName] = agentActionDict;
+                var numAgents = m_OrderedAgentsRequestingDecisions[brainName].Count;
-                    var agentId = m_ActionCallbacks[brainName][i].AgentId;
-                    agentActionDict[agentId] = agentAction;
-                    m_ActionCallbacks[brainName][i].Callback.Invoke(agentAction);
+                    var agentId = m_OrderedAgentsRequestingDecisions[brainName][i];
+                    if (m_LastActionsReceived[brainName].ContainsKey(agentId))
+                    {
+                        m_LastActionsReceived[brainName][agentId] = agentAction.vectorActions;
+                    }
-            foreach (var brainName in m_ActionCallbacks.Keys)
+            foreach (var brainName in m_OrderedAgentsRequestingDecisions.Keys)
-                m_ActionCallbacks[brainName].Clear();
+                m_OrderedAgentsRequestingDecisions[brainName].Clear();
-        public Dictionary<int, AgentAction> GetActions(string key)
+        public float[] GetActions(string behaviorName, int agentId)
-            return m_LastActionsReceived[key];
+            if (m_LastActionsReceived.ContainsKey(behaviorName))
+            {
+                if (m_LastActionsReceived[behaviorName].ContainsKey(agentId))
+                {
+                    return m_LastActionsReceived[behaviorName][agentId];
+                }
+            }
+            return null;
        }

        /// <summary>
        }

        /// <summary>
-        /// Wraps the UnityOuptut into a message with the appropriate status.
+        /// Wraps the UnityOutput into a message with the appropriate status.
        /// </summary>
        /// <returns>The UnityMessage corresponding.</returns>
        /// <param name="content">The UnityOutput to be wrapped.</param>
            };
        }

-        void CacheBrainParameters(string brainKey, BrainParameters brainParameters)
+        void CacheBrainParameters(string behaviorName, BrainParameters brainParameters)
-            if (m_SentBrainKeys.Contains(brainKey))
+            if (m_SentBrainKeys.Contains(behaviorName))
-            m_UnsentBrainKeys[brainKey] = brainParameters;
+            m_UnsentBrainKeys[behaviorName] = brainParameters;
-            foreach (var brainKey in m_UnsentBrainKeys.Keys)
+            foreach (var behaviorName in m_UnsentBrainKeys.Keys)
-                if (m_CurrentUnityRlOutput.AgentInfos.ContainsKey(brainKey))
+                if (m_CurrentUnityRlOutput.AgentInfos.ContainsKey(behaviorName))
                {
                    if (output == null)
                    {
-                    var brainParameters = m_UnsentBrainKeys[brainKey];
-                    output.BrainParameters.Add(brainParameters.ToProto(brainKey, true));
+                    var brainParameters = m_UnsentBrainKeys[behaviorName];
+                    output.BrainParameters.Add(brainParameters.ToProto(behaviorName, true));
                }
            }

--- a/com.unity.ml-agents/Runtime/ICommunicator.cs
+++ b/com.unity.ml-agents/Runtime/ICommunicator.cs

 namespace MLAgents
 {
-    public struct CommunicatorInitParameters
+    internal struct CommunicatorInitParameters
    {
        /// <summary>
        /// Port to listen for connections on.
        /// </summary>
        public string version;
    }
-    public struct UnityRLInitParameters
+    internal struct UnityRLInitParameters
    {
        /// <summary>
        /// An RNG seed sent from the python process to Unity.
-    public struct UnityRLInputParameters
+    internal struct UnityRLInputParameters
    {
        /// <summary>
        /// Boolean sent back from python to indicate whether or not training is happening.
    /// <summary>
    /// Delegate for handling quite events sent back from the communicator.
    /// </summary>
-    public delegate void QuitCommandHandler();
+    internal delegate void QuitCommandHandler();
-    public delegate void ResetCommandHandler();
+    internal delegate void ResetCommandHandler();
-    public delegate void RLInputReceivedHandler(UnityRLInputParameters inputParams);
+    internal delegate void RLInputReceivedHandler(UnityRLInputParameters inputParams);

    /**
    This is the interface of the Communicators.
    UnityOutput and UnityInput can be extended to provide functionalities beyond RL
    UnityRLOutput and UnityRLInput can be extended to provide new RL functionalities
     */
-    public interface ICommunicator : IDisposable
+    internal interface ICommunicator : IDisposable
    {
        /// <summary>
        /// Quit was received by the communicator.
        /// <param name="info">Agent info.</param>
        /// <param name="sensors">The list of ISensors of the Agent.</param>
        /// <param name="action">The action that will be called once the next AgentAction is ready.</param>
-        void PutObservations(string brainKey, AgentInfo info, List<ISensor> sensors, Action<AgentAction> action);
+        void PutObservations(string brainKey, AgentInfo info, List<ISensor> sensors);

        /// <summary>
        /// Signals the ICommunicator that the Agents are now ready to receive their action
        /// <summary>
        /// Gets the AgentActions based on the batching key.
        /// </summary>
-        /// <param name="key">A key to identify which actions to get</param>
+        /// <param name="key">A key to identify which behavior actions to get</param>
+        /// <param name="agentId">A key to identify which Agent actions to get</param>
-        Dictionary<int, AgentAction> GetActions(string key);
+        float[] GetActions(string key, int agentId);

        /// <summary>
        /// Registers a side channel to the communicator. The side channel will exchange
--- a/com.unity.ml-agents/Runtime/InferenceBrain/ApplierImpl.cs
+++ b/com.unity.ml-agents/Runtime/InferenceBrain/ApplierImpl.cs
    /// The Applier for the Continuous Action output tensor. Tensor is assumed to contain the
    /// continuous action data of the agents in the batch.
    /// </summary>
-    public class ContinuousActionOutputApplier : TensorApplier.IApplier
+    internal class ContinuousActionOutputApplier : TensorApplier.IApplier
-        public void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
-            foreach (var idActionPair in actions)
+            foreach (int agentId in actionIds)
-                var actionValue = new float[actionSize];
-                for (var j = 0; j < actionSize; j++)
+                if (lastActions.ContainsKey(agentId))
-                    actionValue[j] = tensorProxy.data[agentIndex, j];
+                    var actionValue = lastActions[agentId];
+                    if (actionValue == null)
+                    {
+                        actionValue = new float[actionSize];
+                        lastActions[agentId] = actionValue;
+                    }
+                    for (var j = 0; j < actionSize; j++)
+                    {
+                        actionValue[j] = tensorProxy.data[agentIndex, j];
+                    }
+
-                idActionPair.action.Invoke(new AgentAction { vectorActions = actionValue });
                agentIndex++;
            }
        }
    /// The Applier for the Discrete Action output tensor. Uses multinomial to sample discrete
    /// actions from the logits contained in the tensor.
    /// </summary>
-    public class DiscreteActionOutputApplier : TensorApplier.IApplier
+    internal class DiscreteActionOutputApplier : TensorApplier.IApplier
    {
        readonly int[] m_ActionSize;
        readonly Multinomial m_Multinomial;
            m_Allocator = allocator;
        }

-        public void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
-            var idActionPairList = actions as List<AgentIdActionPair> ?? actions.ToList();
+            var idActionPairList = actionIds as List<int> ?? actionIds.ToList();
            var batchSize = idActionPairList.Count;
            var actionValues = new float[batchSize, m_ActionSize.Length];
            var startActionIndices = Utilities.CumSum(m_ActionSize);
                outputTensor.data.Dispose();
            }
            var agentIndex = 0;
-            foreach (var idActionPair in idActionPairList)
+            foreach (int agentId in actionIds)
-                var actionVal = new float[m_ActionSize.Length];
-                for (var j = 0; j < m_ActionSize.Length; j++)
+                if (lastActions.ContainsKey(agentId))
-                    actionVal[j] = actionValues[agentIndex, j];
+                    var actionVal = lastActions[agentId];
+                    if (actionVal == null)
+                    {
+                        actionVal = new float[m_ActionSize.Length];
+                        lastActions[agentId] = actionVal;
+                    }
+                    for (var j = 0; j < m_ActionSize.Length; j++)
+                    {
+                        actionVal[j] = actionValues[agentIndex, j];
+                    }
+
-                idActionPair.action.Invoke(new AgentAction { vectorActions = actionVal });
                agentIndex++;
            }
        }
    /// The Applier for the Memory output tensor. Tensor is assumed to contain the new
    /// memory data of the agents in the batch.
    /// </summary>
-    public class MemoryOutputApplier : TensorApplier.IApplier
+    internal class MemoryOutputApplier : TensorApplier.IApplier
    {
        Dictionary<int, List<float>> m_Memories;

            m_Memories = memories;
        }

-        public void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
-            foreach (var idActionPair in actions)
+            foreach (int agentId in actionIds)
-                if (!m_Memories.TryGetValue(idActionPair.agentId, out memory)
+                if (!m_Memories.TryGetValue(agentId, out memory)
                    || memory.Count < memorySize)
                {
                    memory = new List<float>();
-                m_Memories[idActionPair.agentId] = memory;
+                m_Memories[agentId] = memory;
-    public class BarracudaMemoryOutputApplier : TensorApplier.IApplier
+    internal class BarracudaMemoryOutputApplier : TensorApplier.IApplier
    {
        readonly int m_MemoriesCount;
        readonly int m_MemoryIndex;
            m_Memories = memories;
        }

-        public void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
-            foreach (var idActionPair in actions)
+            foreach (int agentId in actionIds)
-                if (!m_Memories.TryGetValue(idActionPair.agentId, out memory)
+                if (!m_Memories.TryGetValue(agentId, out memory)
                    || memory.Count < memorySize * m_MemoriesCount)
                {
                    memory = new List<float>();
                    memory[memorySize * m_MemoryIndex + j] = tensorProxy.data[agentIndex, j];
                }

-                m_Memories[idActionPair.agentId] = memory;
+                m_Memories[agentId] = memory;
                agentIndex++;
            }
        }
--- a/com.unity.ml-agents/Runtime/InferenceBrain/BarracudaModelParamLoader.cs
+++ b/com.unity.ml-agents/Runtime/InferenceBrain/BarracudaModelParamLoader.cs
    /// Prepares the Tensors for the Learning Brain and exposes a list of failed checks if Model
    /// and BrainParameters are incompatible.
    /// </summary>
-    public class BarracudaModelParamLoader
+    internal class BarracudaModelParamLoader
    {
        enum ModelActionType
        {
--- a/com.unity.ml-agents/Runtime/InferenceBrain/GeneratorImpl.cs
+++ b/com.unity.ml-agents/Runtime/InferenceBrain/GeneratorImpl.cs
    /// and initializes its content to be zeros. Will only work on 2-dimensional tensors.
    /// The second dimension of the Tensor will not be modified.
    /// </summary>
-    public class BiDimensionalOutputGenerator : TensorGenerator.IGenerator
+    internal class BiDimensionalOutputGenerator : TensorGenerator.IGenerator
    {
        readonly ITensorAllocator m_Allocator;

    /// Generates the Tensor corresponding to the BatchSize input : Will be a one dimensional
    /// integer array of size 1 containing the batch size.
    /// </summary>
-    public class BatchSizeGenerator : TensorGenerator.IGenerator
+    internal class BatchSizeGenerator : TensorGenerator.IGenerator
    {
        readonly ITensorAllocator m_Allocator;

    /// Note : the sequence length is always one since recurrent networks only predict for
    /// one step at the time.
    /// </summary>
-    public class SequenceLengthGenerator : TensorGenerator.IGenerator
+    internal class SequenceLengthGenerator : TensorGenerator.IGenerator
    {
        readonly ITensorAllocator m_Allocator;

    /// It will use the Vector Observation data contained in the agentInfo to fill the data
    /// of the tensor.
    /// </summary>
-    public class VectorObservationGenerator : TensorGenerator.IGenerator
+    internal class VectorObservationGenerator : TensorGenerator.IGenerator
    {
        readonly ITensorAllocator m_Allocator;
        List<int> m_SensorIndices = new List<int>();
    /// It will use the Memory data contained in the agentInfo to fill the data
    /// of the tensor.
    /// </summary>
-    public class RecurrentInputGenerator : TensorGenerator.IGenerator
+    internal class RecurrentInputGenerator : TensorGenerator.IGenerator
    {
        private readonly ITensorAllocator m_Allocator;
        Dictionary<int, List<float>> m_Memories;
        }
    }

-    public class BarracudaRecurrentInputGenerator : TensorGenerator.IGenerator
+    internal class BarracudaRecurrentInputGenerator : TensorGenerator.IGenerator
    {
        int m_MemoriesCount;
        readonly int m_MemoryIndex;
    /// It will use the previous action data contained in the agentInfo to fill the data
    /// of the tensor.
    /// </summary>
-    public class PreviousActionInputGenerator : TensorGenerator.IGenerator
+    internal class PreviousActionInputGenerator : TensorGenerator.IGenerator
    {
        readonly ITensorAllocator m_Allocator;

    /// It will use the Action Mask data contained in the agentInfo to fill the data
    /// of the tensor.
    /// </summary>
-    public class ActionMaskInputGenerator : TensorGenerator.IGenerator
+    internal class ActionMaskInputGenerator : TensorGenerator.IGenerator
    {
        readonly ITensorAllocator m_Allocator;

    /// dimensional float array of dimension [batchSize x actionSize].
    /// It will use the generate random input data from a normal Distribution.
    /// </summary>
-    public class RandomNormalInputGenerator : TensorGenerator.IGenerator
+    internal class RandomNormalInputGenerator : TensorGenerator.IGenerator
    {
        readonly RandomNormal m_RandomNormal;
        readonly ITensorAllocator m_Allocator;
    /// It will use the Texture input data contained in the agentInfo to fill the data
    /// of the tensor.
    /// </summary>
-    public class VisualObservationInputGenerator : TensorGenerator.IGenerator
+    internal class VisualObservationInputGenerator : TensorGenerator.IGenerator
    {
        readonly int m_SensorIndex;
        readonly ITensorAllocator m_Allocator;
--- a/com.unity.ml-agents/Runtime/InferenceBrain/ModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/InferenceBrain/ModelRunner.cs

 namespace MLAgents.InferenceBrain
 {
-    public struct AgentInfoSensorsPair
+    internal struct AgentInfoSensorsPair
-    }
-    public struct AgentIdActionPair
-    {
-        public int agentId;
-        public Action<AgentAction> action;
-    public class ModelRunner
+    internal class ModelRunner
-        List<AgentIdActionPair> m_ActionFuncs = new List<AgentIdActionPair>();
+        Dictionary<int, float[]> m_LastActionsReceived = new Dictionary<int, float[]>();
+        List<int> m_OrderedAgentsRequestingDecisions = new List<int>();
+
        ITensorAllocator m_TensorAllocator;
        TensorGenerator m_TensorGenerator;
        TensorApplier m_TensorApplier;

                D.logEnabled = m_Verbose;

-                barracudaModel = ModelLoader.Load(model.Value);
+                barracudaModel = ModelLoader.Load(model);
-                    ? BarracudaWorkerFactory.Type.ComputePrecompiled
-                    : BarracudaWorkerFactory.Type.CSharp;
-                m_Engine = BarracudaWorkerFactory.CreateWorker(executionDevice, barracudaModel, m_Verbose);
+                    ? WorkerFactory.Type.ComputePrecompiled
+                    : WorkerFactory.Type.CSharp;
+                m_Engine = WorkerFactory.CreateWorker(executionDevice, barracudaModel, m_Verbose);
            }
            else
            {
            var outputs = new List<TensorProxy>();
            foreach (var n in names)
            {
-                var output = m_Engine.Peek(n);
+                var output = m_Engine.PeekOutput(n);
                outputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n));
            }

-        public void PutObservations(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
+        public void PutObservations(AgentInfo info, List<ISensor> sensors)
        {
 #if DEBUG
            m_SensorShapeValidator.ValidateSensors(sensors);
                sensors = sensors
            });

-            m_ActionFuncs.Add(new AgentIdActionPair { action = action, agentId = info.episodeId });
+            // We add the episodeId to this list to maintain the order in which the decisions were requested
+            m_OrderedAgentsRequestingDecisions.Add(info.episodeId);
+
+            if (!m_LastActionsReceived.ContainsKey(info.episodeId))
+            {
+                m_LastActionsReceived[info.episodeId] = null;
+            }
+            if (info.done)
+            {
+                // If the agent is done, we remove the key from the last action dictionary since no action
+                // should be taken.
+                m_LastActionsReceived.Remove(info.episodeId);
+            }
        }

        public void DecideBatch()

            Profiler.BeginSample($"MLAgents.{m_Model.name}.ApplyTensors");
            // Update the outputs
-            m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_ActionFuncs);
+            m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_OrderedAgentsRequestingDecisions, m_LastActionsReceived);
            Profiler.EndSample();

            Profiler.EndSample();
-            m_ActionFuncs.Clear();
+            m_OrderedAgentsRequestingDecisions.Clear();
+        }
+
+        public float[] GetAction(int agentId)
+        {
+            if (m_LastActionsReceived.ContainsKey(agentId))
+            {
+                return m_LastActionsReceived[agentId];
+            }
+            return null;
        }
    }
 }
--- a/com.unity.ml-agents/Runtime/InferenceBrain/TensorApplier.cs
+++ b/com.unity.ml-agents/Runtime/InferenceBrain/TensorApplier.cs
    /// This action takes as input the tensor and the Dictionary of Agent to AgentInfo for
    /// the current batch.
    /// </summary>
-    public class TensorApplier
+    internal class TensorApplier
    {
        /// <summary>
        /// A tensor Applier's Execute method takes a tensor and a Dictionary of Agent to AgentInfo.
            /// <param name="tensorProxy">
            /// The Tensor containing the data to be applied to the Agents
            /// </param>
-            /// <param name="agents">
-            /// List of Agents that will receive the values of the Tensor.
+            /// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
+            /// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
-            void Apply(TensorProxy tensorProxy, IEnumerable<AgentIdActionPair> actions);
+            void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions);
        }

        readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();
        /// Updates the state of the agents based on the data present in the tensor.
        /// </summary>
        /// <param name="tensors"> Enumerable of tensors containing the data.</param>
-        /// <param name="agents"> List of Agents that will be updated using the tensor's data</param>
+        /// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
+        /// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
-            IEnumerable<TensorProxy> tensors, IEnumerable<AgentIdActionPair> actions)
+            IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
        {
            foreach (var tensor in tensors)
            {
                        $"Unknown tensorProxy expected as output : {tensor.name}");
                }
-                m_Dict[tensor.name].Apply(tensor, actions);
+                m_Dict[tensor.name].Apply(tensor, actionIds, lastActions);
            }
        }
    }
--- a/com.unity.ml-agents/Runtime/InferenceBrain/TensorGenerator.cs
+++ b/com.unity.ml-agents/Runtime/InferenceBrain/TensorGenerator.cs
    /// When the TensorProxy is an Output of the model, only the shape of the Tensor will be
    /// modified using the current batch size. The data will be pre-filled with zeros.
    /// </summary>
-    public class TensorGenerator
+    internal class TensorGenerator
    {
        public interface IGenerator
        {
--- a/com.unity.ml-agents/Runtime/InferenceBrain/TensorNames.cs
+++ b/com.unity.ml-agents/Runtime/InferenceBrain/TensorNames.cs
    /// <summary>
    /// Contains the names of the input and output tensors for the Inference Brain.
    /// </summary>
-    public static class TensorNames
+    internal static class TensorNames
    {
        public const string BatchSizePlaceholder = "batch_size";
        public const string SequenceLengthPlaceholder = "sequence_length";
--- a/com.unity.ml-agents/Runtime/InferenceBrain/TensorProxy.cs
+++ b/com.unity.ml-agents/Runtime/InferenceBrain/TensorProxy.cs
        public Tensor data;
    }

-    public static class TensorUtils
+    internal static class TensorUtils
    {
        public static void ResizeTensor(TensorProxy tensor, int batch, ITensorAllocator allocator)
        {
        {
            if (src.height == 1 && src.width == 1)
            {
-                return new long[] {src.batch, src.channels};
+                return new long[] { src.batch, src.channels };
-            return new long[] {src.batch, src.height, src.width, src.channels};
+            return new long[] { src.batch, src.height, src.width, src.channels };
        }

        public static TensorProxy TensorProxyFromBarracuda(Tensor src, string nameOverride = null)
--- a/com.unity.ml-agents/Runtime/Policy/BarracudaPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policy/BarracudaPolicy.cs
    /// every step. It uses a ModelRunner that is shared accross all
    /// Barracuda Policies that use the same model and inference devices.
    /// </summary>
-    public class BarracudaPolicy : IPolicy
+    internal class BarracudaPolicy : IPolicy
+
+        private int m_AgentId;

        /// <summary>
        /// Sensor shapes for the associated Agents. All Agents must have the same shapes for their Sensors.
        }

        /// <inheritdoc />
-        public void RequestDecision(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
-            m_ModelRunner?.PutObservations(info, sensors, action);
+            m_AgentId = info.episodeId;
+            m_ModelRunner?.PutObservations(info, sensors);
-        public void DecideAction()
+        public float[] DecideAction()
+            return m_ModelRunner?.GetAction(m_AgentId);
        }

        public void Dispose()
--- a/com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs
        [HideInInspector]
        [SerializeField]
        string m_BehaviorName = "My Behavior";
-        [HideInInspector][SerializeField]
-        int m_TeamID = 0;
+        [HideInInspector]
+        [SerializeField]
+        public int m_TeamID = 0;
        [HideInInspector]
        [SerializeField]
        [Tooltip("Use all Sensor components attached to child GameObjects of this Agent.")]

        public string behaviorName
        {
-            get { return m_BehaviorName + "?team=" + m_TeamID;}
+            get { return m_BehaviorName; }
+        }
+
+        /// <summary>
+        /// Returns the behavior name, concatenated with any other metadata (i.e. team id).
+        /// </summary>
+        public string fullyQualifiedBehaviorName
+        {
+            get { return m_BehaviorName + "?team=" + m_TeamID; }
-        public IPolicy GeneratePolicy(Func<float[]> heuristic)
+        internal IPolicy GeneratePolicy(Func<float[]> heuristic)
        {
            switch (m_BehaviorType)
            {
                case BehaviorType.Default:
                    if (Academy.Instance.IsCommunicatorOn)
                    {
-                        return new RemotePolicy(m_BrainParameters, behaviorName);
+                        return new RemotePolicy(m_BrainParameters, fullyQualifiedBehaviorName);
                    }
                    if (m_Model != null)
                    {
--- a/com.unity.ml-agents/Runtime/Policy/HeuristicPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policy/HeuristicPolicy.cs
    /// to take decisions each time the RequestDecision method is
    /// called.
    /// </summary>
-    public class HeuristicPolicy : IPolicy
+    internal class HeuristicPolicy : IPolicy
-        Action<AgentAction> m_ActionFunc;
+        float[] m_LastDecision;

        /// <inheritdoc />
        public HeuristicPolicy(Func<float[]> heuristic)

        /// <inheritdoc />
-        public void RequestDecision(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
-            m_ActionFunc = action;
+            m_LastDecision = m_Heuristic.Invoke();
-        public void DecideAction()
+        public float[] DecideAction()
-            if (m_ActionFunc != null)
-            {
-                m_ActionFunc.Invoke(new AgentAction { vectorActions = m_Heuristic.Invoke() });
-                m_ActionFunc = null;
-            }
+            return m_LastDecision;
        }

        public void Dispose()
--- a/com.unity.ml-agents/Runtime/Policy/IPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policy/IPolicy.cs
    /// will not be taken immediately but will be taken before or when
    /// DecideAction is called.
    /// </summary>
-    public interface IPolicy : IDisposable
+    internal interface IPolicy : IDisposable
    {
        /// <summary>
        /// Signals the Brain that the Agent needs a Decision. The Policy
        /// <param name="agent"></param>
-        void RequestDecision(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action);
+        void RequestDecision(AgentInfo info, List<ISensor> sensors);

        /// <summary>
        /// Signals the Policy that if the Decision has not been taken yet,
-        void DecideAction();
+        float[] DecideAction();
    }
 }
--- a/com.unity.ml-agents/Runtime/Policy/RemotePolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policy/RemotePolicy.cs
    /// The Remote Policy only works when training.
    /// When training your Agents, the RemotePolicy will be controlled by Python.
    /// </summary>
-    public class RemotePolicy : IPolicy
+    internal class RemotePolicy : IPolicy
-        string m_BehaviorName;
-        protected ICommunicator m_Communicator;
-        /// <summary>
-        /// Sensor shapes for the associated Agents. All Agents must have the same shapes for their Sensors.
-        /// </summary>
-        List<int[]> m_SensorShapes;
+        int m_AgentId;
+        string m_FullyQualifiedBehaviorName;
+
+        internal ICommunicator m_Communicator;
-            string behaviorName)
+            string fullyQualifiedBehaviorName)
-            m_BehaviorName = behaviorName;
+            m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
-            m_Communicator.SubscribeBrain(m_BehaviorName, brainParameters);
+            m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, brainParameters);
-        public void RequestDecision(AgentInfo info, List<ISensor> sensors, Action<AgentAction> action)
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors)
-            m_Communicator?.PutObservations(m_BehaviorName, info, sensors, action);
+            m_AgentId = info.episodeId;
+            m_Communicator?.PutObservations(m_FullyQualifiedBehaviorName, info, sensors);
-        public void DecideAction()
+        public float[] DecideAction()
+            return m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
+
        }

        public void Dispose()
--- a/com.unity.ml-agents/Runtime/Sensor/Observation.cs
+++ b/com.unity.ml-agents/Runtime/Sensor/Observation.cs

 namespace MLAgents.Sensor
 {
-    public struct Observation
+    internal struct Observation
    {
        /// <summary>
        /// The compressed sensor data. Assumed to be non-null if CompressionType != CompressionType.None
--- a/com.unity.ml-agents/Runtime/Sensor/RayPerceptionSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensor/RayPerceptionSensor.cs
                public Vector3 worldEnd;
                public bool castHit;
                public float hitFraction;
+                public float castRadius;
            }

            public void Reset()
        ///    nothing was hit.
        ///
        /// </summary>
-        /// <param name="rayLength"></param>
+        /// <param name="unscaledRayLength"></param>
-        /// <param name="castRadius">Radius of the sphere to use for spherecasting. If 0 or less, rays are used
+        /// <param name="unscaledCastRadius">Radius of the sphere to use for spherecasting. If 0 or less, rays are used
        /// instead - this may be faster, especially for complex environments.</param>
        /// <param name="transform">Transform of the GameObject</param>
        /// <param name="castType">Whether to perform the casts in 2D or 3D.</param>
-        public static void PerceiveStatic(float rayLength,
+        public static void PerceiveStatic(float unscaledRayLength,
-            float startOffset, float endOffset, float castRadius,
+            float startOffset, float endOffset, float unscaledCastRadius,
            Transform transform, CastType castType, float[] perceptionBuffer,
            int layerMask = Physics.DefaultRaycastLayers,
            DebugDisplayInfo debugInfo = null)
                if (castType == CastType.Cast3D)
                {
                    startPositionLocal = new Vector3(0, startOffset, 0);
-                    endPositionLocal = PolarToCartesian3D(rayLength, angle);
+                    endPositionLocal = PolarToCartesian3D(unscaledRayLength, angle);
                    endPositionLocal.y += endOffset;
                }
                else
-                    endPositionLocal = PolarToCartesian2D(rayLength, angle);
+                    endPositionLocal = PolarToCartesian2D(unscaledRayLength, angle);
                }

                var startPositionWorld = transform.TransformPoint(startPositionLocal);
+                // If there is non-unity scale, |rayDirection| will be different from rayLength.
+                // We want to use this transformed ray length for determining cast length, hit fraction etc.
+                // We also it to scale up or down the sphere or circle radii
+                var scaledRayLength = rayDirection.magnitude;
+                // Avoid 0/0 if unscaledRayLength is 0
+                var scaledCastRadius = unscaledRayLength > 0 ? unscaledCastRadius * scaledRayLength / unscaledRayLength : unscaledCastRadius;

                // Do the cast and assign the hit information for each detectable object.
                //     sublist[0           ] <- did hit detectableObjects[0]
                if (castType == CastType.Cast3D)
                {
                    RaycastHit rayHit;
-                    if (castRadius > 0f)
+                    if (scaledCastRadius > 0f)
-                        castHit = Physics.SphereCast(startPositionWorld, castRadius, rayDirection, out rayHit,
-                            rayLength, layerMask);
+                        castHit = Physics.SphereCast(startPositionWorld, scaledCastRadius, rayDirection, out rayHit,
+                            scaledRayLength, layerMask);
-                            rayLength, layerMask);
+                            scaledRayLength, layerMask);
-                    hitFraction = castHit ? rayHit.distance / rayLength : 1.0f;
+                    // If scaledRayLength is 0, we still could have a hit with sphere casts (maybe?).
+                    // To avoid 0/0, set the fraction to 0.
+                    hitFraction = castHit ? (scaledRayLength > 0 ? rayHit.distance / scaledRayLength : 0.0f) : 1.0f;
-                    if (castRadius > 0f)
+                    if (scaledCastRadius > 0f)
-                        rayHit = Physics2D.CircleCast(startPositionWorld, castRadius, rayDirection,
-                            rayLength, layerMask);
+                        rayHit = Physics2D.CircleCast(startPositionWorld, scaledCastRadius, rayDirection,
+                            scaledRayLength, layerMask);
-                        rayHit = Physics2D.Raycast(startPositionWorld, rayDirection, rayLength, layerMask);
+                        rayHit = Physics2D.Raycast(startPositionWorld, rayDirection, scaledRayLength, layerMask);
                    }

                    castHit = rayHit;
                    debugInfo.rayInfos[rayIndex].worldEnd = endPositionWorld;
                    debugInfo.rayInfos[rayIndex].castHit = castHit;
                    debugInfo.rayInfos[rayIndex].hitFraction = hitFraction;
+                    debugInfo.rayInfos[rayIndex].castRadius = scaledCastRadius;
                }
                else if (Application.isEditor)
                {
--- a/com.unity.ml-agents/Runtime/Sensor/RayPerceptionSensorComponentBase.cs
+++ b/com.unity.ml-agents/Runtime/Sensor/RayPerceptionSensorComponentBase.cs
                // hit fraction ^2 will shift "far" hits closer to the hit color
                var lerpT = rayInfo.hitFraction * rayInfo.hitFraction;
                var color = Color.Lerp(rayHitColor, rayMissColor, lerpT);
-                color.a = alpha;
+                color.a *= alpha;
                Gizmos.color = color;
                Gizmos.DrawRay(startPositionWorld, rayDirection);

-                    var hitRadius = Mathf.Max(sphereCastRadius, .05f);
+                    var hitRadius = Mathf.Max(rayInfo.castRadius, .05f);
                    Gizmos.DrawWireSphere(startPositionWorld + rayDirection, hitRadius);
                }
            }
--- a/com.unity.ml-agents/Runtime/Sensor/SensorShapeValidator.cs
+++ b/com.unity.ml-agents/Runtime/Sensor/SensorShapeValidator.cs

 namespace MLAgents.Sensor
 {
-    public class SensorShapeValidator
+    internal class SensorShapeValidator
    {
        private List<int[]> m_SensorShapes;

--- a/com.unity.ml-agents/Runtime/Sensor/StackingSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensor/StackingSensor.cs
    ///   |  t = now - 3  |  t = now -3  |  t = now - 2  |  t = now  |
    /// Internally, a circular buffer of arrays is used. The m_CurrentIndex represents the most recent observation.
    /// </summary>
-    public class StackingSensor : ISensor
+    internal class StackingSensor : ISensor
    {
        /// <summary>
        /// The wrapped sensor.
--- a/com.unity.ml-agents/Runtime/Timer.cs
+++ b/com.unity.ml-agents/Runtime/Timer.cs
 using UnityEngine.Profiling;
 using System.Runtime.Serialization;
 using System.Runtime.Serialization.Json;
+using UnityEngine.SceneManagement;
-    public class TimerNode
+    internal class TimerNode
    {
        static string s_Separator = ".";
        static double s_TicksToSeconds = 1e-7; // 100 ns per tick
        /// <summary>
        /// Stop timing a block of code, and increment internal counts.
        /// </summary>
-        public void End()
+        public void End(bool isRecording)
-            var elapsed = DateTime.Now.Ticks - m_TickStart;
-            m_TotalTicks += elapsed;
-            m_TickStart = 0;
-            m_NumCalls++;
+            if (isRecording)
+            {
+                var elapsed = DateTime.Now.Ticks - m_TickStart;
+                m_TotalTicks += elapsed;
+                m_TickStart = 0;
+                m_NumCalls++;
+            }
+            // Note that samplers are always updated regardless of recording state, to ensure matching start and ends.
            m_Sampler?.End();
        }

    /// Tracks the most recent value of a metric. This is analogous to gauges in statsd.
    /// </summary>
    [DataContract]
-    public class GaugeNode
+    internal class GaugeNode
+        const float k_SmoothingFactor = .25f; // weight for exponential moving average.
+
        [DataMember]
        public float value;
        [DataMember(Name = "min")]
+        [DataMember(Name = "weightedAverage")]
+        public float weightedAverage;
+            weightedAverage = value;
            minValue = value;
            maxValue = value;
            count = 1;
        {
            minValue = Mathf.Min(minValue, newValue);
            maxValue = Mathf.Max(maxValue, newValue);
+            // update exponential moving average
+            weightedAverage = (k_SmoothingFactor * newValue) + ((1f - k_SmoothingFactor) * weightedAverage);
            value = newValue;
            ++count;
        }

        Stack<TimerNode> m_Stack;
        TimerNode m_RootNode;
+        // Whether or not new timers and gauges can be added.
+        bool m_Recording = true;

        // Explicit static constructor to tell C# compiler
        // not to mark type as beforefieldinit
            get { return k_Instance; }
        }

-        public TimerNode RootNode
+        internal TimerNode RootNode
+        }
+
+        public bool Recording
+        {
+            get { return m_Recording; }
+            set { m_Recording = value; }
+            if (!Recording)
+            {
+                return;
+            }
+
            if (!float.IsNaN(value))
            {
                GaugeNode gauge;
        void Pop()
        {
            var node = m_Stack.Pop();
-            node.End();
+            node.End(Recording);
        }

        /// <summary>
        /// Potentially slow so call sparingly.
        /// </summary>
        /// <returns></returns>
-        public string DebugGetTimerString()
+        internal string DebugGetTimerString()
        {
            return m_RootNode.DebugGetTimerString();
        }
        /// <param name="filename"></param>
        public void SaveJsonTimers(string filename = null)
        {
-            if (filename == null)
+# if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
+            try
+            {
+                if (filename == null)
+                {
+                    var activeScene = SceneManager.GetActiveScene();
+                    var timerDir = Path.Combine(Application.dataPath, "ML-Agents", "Timers");
+                    Directory.CreateDirectory(timerDir);
+
+                    filename = Path.Combine(timerDir, $"{activeScene.name}_timers.json");
+                }
+
+                var fs = new FileStream(filename, FileMode.Create, FileAccess.Write);
+                SaveJsonTimers(fs);
+                fs.Close();
+            }
+            catch (IOException)
-                var fullpath = Path.GetFullPath(".");
-                filename = $"{fullpath}/csharp_timers.json";
+                // It's possible we don't have write access to the directory.
+                Debug.LogWarning($"Unable to save timers to file {filename}");
-            var fs = new FileStream(filename, FileMode.Create, FileAccess.Write);
-            SaveJsonTimers(fs);
-            fs.Close();
+#endif
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Unity.ML-Agents.asmdef
+++ b/com.unity.ml-agents/Runtime/Unity.ML-Agents.asmdef
 {
    "name": "Unity.ML-Agents",
-    "references": [],
+    "references": [
+        "Barracuda"
+    ],
-    "overrideReferences": true,
+    "overrideReferences": false,
-        "Barracuda.dll",
-}
+}
--- a/com.unity.ml-agents/Runtime/Utilities.cs
+++ b/com.unity.ml-agents/Runtime/Utilities.cs

 namespace MLAgents
 {
-    public static class Utilities
+    internal static class Utilities
    {
        /// <summary>
        /// Puts a Texture2D into a WriteAdapter.
--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs

            var applier = new ContinuousActionOutputApplier();

-            var action0 = new AgentAction();
-            var action1 = new AgentAction();
-            var callbacks = new List<AgentIdActionPair>()
-            {
-                new AgentIdActionPair {agentId = 0, action = (a) => action0 = a},
-                new AgentIdActionPair {agentId = 1, action = (a) => action1 = a}
-            };
+            var agentIds = new List<int>() { 0, 1 };
+            // Dictionary from AgentId to Action
+            var actionDict = new Dictionary<int, float[]>() { { 0, null }, { 1, null } };
-            applier.Apply(inputTensor, callbacks);
+            applier.Apply(inputTensor, agentIds, actionDict);
-            Assert.AreEqual(action0.vectorActions[0], 1);
-            Assert.AreEqual(action0.vectorActions[1], 2);
-            Assert.AreEqual(action0.vectorActions[2], 3);
+            Assert.AreEqual(actionDict[0][0], 1);
+            Assert.AreEqual(actionDict[0][1], 2);
+            Assert.AreEqual(actionDict[0][2], 3);
-            Assert.AreEqual(action1.vectorActions[0], 4);
-            Assert.AreEqual(action1.vectorActions[1], 5);
-            Assert.AreEqual(action1.vectorActions[2], 6);
+            Assert.AreEqual(actionDict[1][0], 4);
+            Assert.AreEqual(actionDict[1][1], 5);
+            Assert.AreEqual(actionDict[1][2], 6);
        }

        [Test]
            var alloc = new TensorCachingAllocator();
            var applier = new DiscreteActionOutputApplier(new[] { 2, 3 }, 0, alloc);

-            var action0 = new AgentAction();
-            var action1 = new AgentAction();
-            var callbacks = new List<AgentIdActionPair>()
-            {
-                new AgentIdActionPair {agentId = 0, action = (a) => action0 = a},
-                new AgentIdActionPair {agentId = 1, action = (a) => action1 = a}
-            };
+            var agentIds = new List<int>() { 0, 1 };
+            // Dictionary from AgentId to Action
+            var actionDict = new Dictionary<int, float[]>() { { 0, null }, { 1, null } };
+
-            applier.Apply(inputTensor, callbacks);
+            applier.Apply(inputTensor, agentIds, actionDict);
-            Assert.AreEqual(action0.vectorActions[0], 1);
-            Assert.AreEqual(action0.vectorActions[1], 1);
+            Assert.AreEqual(actionDict[0][0], 1);
+            Assert.AreEqual(actionDict[0][1], 1);
-            Assert.AreEqual(action1.vectorActions[0], 1);
-            Assert.AreEqual(action1.vectorActions[1], 2);
+            Assert.AreEqual(actionDict[1][0], 1);
+            Assert.AreEqual(actionDict[1][1], 2);
            alloc.Dispose();
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
            var agents = new List<TestAgent> { agentA, agentB };
            foreach (var agent in agents)
            {
-                var agentEnableMethod = typeof(Agent).GetMethod("OnEnableHelper",
-                    BindingFlags.Instance | BindingFlags.NonPublic);
-                agentEnableMethod?.Invoke(agent, new object[] {});
+                agent.LazyInitialize();
            }
            agentA.collectObservationsSensor.AddObservation(new Vector3(1, 2, 3));
            agentB.collectObservationsSensor.AddObservation(new Vector3(4, 5, 6));
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
 using NUnit.Framework;
 using System.Reflection;
 using MLAgents.Sensor;
+using System.Collections.Generic;
+
+    internal class TestPolicy : IPolicy
+    {
+        public void RequestDecision(AgentInfo info, List<ISensor> sensors) { }
+
+        public float[] DecideAction() { return new float[0]; }
+
+        public void Dispose() { }
+    }
+
-        public AgentInfo _Info
+        internal AgentInfo _Info
        {
            get
            {
            }
        }

+        internal void SetPolicy(IPolicy policy)
+        {
+            typeof(Agent).GetField("m_Brain", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, policy);
+        }
+
        public int initializeAgentCalls;
        public int collectObservationsCalls;
        public int agentActionCalls;
            return sensorName;
        }

-        public void Update() {}
+        public void Update() { }
    }

    [TestFixture]
            Assert.AreEqual(0, agent1.agentActionCalls);
            Assert.AreEqual(0, agent2.agentActionCalls);

-            var agentEnableMethod = typeof(Agent).GetMethod("OnEnableHelper",
-                BindingFlags.Instance | BindingFlags.NonPublic);
-            agentEnableMethod?.Invoke(agent2, new object[] {});
-            agentEnableMethod?.Invoke(agent1, new object[] {});
+            agent2.LazyInitialize();
+            agent1.LazyInitialize();

            // agent1 was not enabled when the academy started
            // The agents have been initialized

            var aca = Academy.Instance;

-            var agentEnableMethod = typeof(Agent).GetMethod(
-                "OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
-
            var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
            decisionRequester.DecisionPeriod = 2;
            decisionRequester.Awake();
-            agentEnableMethod?.Invoke(agent1, new object[] {});
+            agent1.LazyInitialize();

            var numberAgent1Reset = 0;
            var numberAgent2Initialization = 0;
                //Agent 2 is only initialized at step 2
                if (i == 2)
                {
-                    agentEnableMethod?.Invoke(agent2, new object[] {});
+                    agent2.LazyInitialize();
                    numberAgent2Initialization += 1;
                }


            var aca = Academy.Instance;

-            var agentEnableMethod = typeof(Agent).GetMethod(
-                "OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
-
-            agentEnableMethod?.Invoke(agent2, new object[] {});
+            agent2.LazyInitialize();

            var numberAgent1Reset = 0;
            var numberAgent2Reset = 0;
                //Agent 1 is only initialized at step 2
                if (i == 2)
                {
-                    agentEnableMethod?.Invoke(agent1, new object[] {});
+                    agent1.LazyInitialize();
                }
                // Set agent 1 to done every 11 steps to test behavior
                if (i % 11 == 5)
            var agent2 = agentGo2.GetComponent<TestAgent>();
            var aca = Academy.Instance;

-            var agentEnableMethod = typeof(Agent).GetMethod(
-                "OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
-
-            agent1.maxStep = 20;
-            agentEnableMethod?.Invoke(agent2, new object[] {});
-            agentEnableMethod?.Invoke(agent1, new object[] {});
+            agent1.maxStep = 20;
+            agent2.LazyInitialize();
+            agent1.LazyInitialize();
+            agent2.SetPolicy(new TestPolicy());

            var j = 0;
            for (var i = 0; i < 500; i++)
--- a/com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
                }
            }
        }
+
+        [Test]
+        public void TestRaycastsScaled()
+        {
+            SetupScene();
+            var obj = new GameObject("agent");
+            var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
+            obj.transform.localScale = new Vector3(2, 2,2 );
+
+            perception.raysPerDirection = 0;
+            perception.maxRayDegrees = 45;
+            perception.rayLength = 20;
+            perception.detectableTags = new List<string>();
+            perception.detectableTags.Add(k_CubeTag);
+
+            var radii = new[] { 0f, .5f };
+            foreach (var castRadius in radii)
+            {
+                perception.sphereCastRadius = castRadius;
+                var sensor = perception.CreateSensor();
+
+                var expectedObs = (2 * perception.raysPerDirection + 1) * (perception.detectableTags.Count + 2);
+                Assert.AreEqual(sensor.GetObservationShape()[0], expectedObs);
+                var outputBuffer = new float[expectedObs];
+
+                WriteAdapter writer = new WriteAdapter();
+                writer.SetTarget(outputBuffer, sensor.GetObservationShape(), 0);
+
+                var numWritten = sensor.Write(writer);
+                Assert.AreEqual(numWritten, expectedObs);
+
+                // Expected hits:
+                // ray 0 should hit the cube at roughly 1/4 way
+                //
+                Assert.AreEqual(1.0f, outputBuffer[0]); // hit cube
+                Assert.AreEqual(0.0f, outputBuffer[1]); // missed unknown tag
+
+                // Hit is at z=9.0 in world space, ray length was 20
+                // But scale increases the cast size and the ray length
+                var scaledRayLength = 2 * perception.rayLength;
+                var scaledCastRadius = 2 * castRadius;
+                Assert.That(
+                    outputBuffer[2], Is.EqualTo((9.5f - scaledCastRadius) / scaledRayLength).Within(.0005f)
+                );
+            }
+        }
+
+        [Test]
+        public void TestRayZeroLength()
+        {
+            // Place the cube touching the origin
+            var cube = GameObject.CreatePrimitive(PrimitiveType.Cube);
+            cube.transform.position = new Vector3(0, 0, .5f);
+            cube.tag = k_CubeTag;
+
+            Physics.SyncTransforms();
+
+            var obj = new GameObject("agent");
+            var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
+            perception.raysPerDirection = 0;
+            perception.rayLength = 0.0f;
+            perception.sphereCastRadius = .5f;
+            perception.detectableTags = new List<string>();
+            perception.detectableTags.Add(k_CubeTag);
+
+            {
+                // Set the layer mask to either the default, or one that ignores the close cube's layer
+
+                var sensor = perception.CreateSensor();
+                var expectedObs = (2 * perception.raysPerDirection + 1) * (perception.detectableTags.Count + 2);
+                Assert.AreEqual(sensor.GetObservationShape()[0], expectedObs);
+                var outputBuffer = new float[expectedObs];
+
+                WriteAdapter writer = new WriteAdapter();
+                writer.SetTarget(outputBuffer, sensor.GetObservationShape(), 0);
+
+                var numWritten = sensor.Write(writer);
+                Assert.AreEqual(numWritten, expectedObs);
+
+                // hit fraction is arbitrary but should be finite in [0,1]
+                Assert.GreaterOrEqual(outputBuffer[2], 0.0f);
+                Assert.LessOrEqual(outputBuffer[2], 1.0f);
+            }
+        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Unity.ML-Agents.Editor.Tests.asmdef
+++ b/com.unity.ml-agents/Tests/Editor/Unity.ML-Agents.Editor.Tests.asmdef
    "name": "Unity.ML-Agents.Editor.Tests",
    "references": [
        "Unity.ML-Agents.Editor",
-        "Unity.ML-Agents"
+        "Unity.ML-Agents",
+        "Barracuda"
    ],
    "optionalUnityReferences": [
        "TestAssemblies"
    "precompiledReferences": [
        "System.IO.Abstractions.dll",
        "System.IO.Abstractions.TestingHelpers.dll",
-        "Google.Protobuf.dll",
-        "Barracuda.dll"
+        "Google.Protobuf.dll"
-}
+}
--- a/com.unity.ml-agents/package.json
+++ b/com.unity.ml-agents/package.json
 	"displayName":"ML Agents",
 	"version": "0.13.0-preview",
 	"unity": "2018.4",
-	"description": "Add interactivity to your game with ML-Agents trained using Deep Reinforcement Learning. \n\nFor best results, use this text to summarize: \n\u25AA What the package does \n\u25AA How it can benefit the user \n\nNote: Special formatting characters are supported, including line breaks ('\\n') and bullets ('\\u25AA').",
+	"description": "Add interactivity to your game with ML-Agents trained using Deep Reinforcement Learning.",
-		"com.unity.barracuda": "0.3.2-preview"
+		"com.unity.barracuda": "0.5.0-preview"
 	}
 }
--- a/config/curricula/wall_jump.yaml
+++ b/config/curricula/wall_jump.yaml
    big_wall_max_height: [4.0, 7.0, 8.0, 8.0]

 SmallWallJump:
-    measure: progress
-    thresholds: [0.1, 0.3, 0.5]
-    min_lesson_length: 100
-    signal_smoothing: true
-    parameters:
-      small_wall_height: [1.5, 2.0, 2.5, 4.0]
+  measure: progress
+  thresholds: [0.1, 0.3, 0.5]
+  min_lesson_length: 100
+  signal_smoothing: true
+  parameters:
+    small_wall_height: [1.5, 2.0, 2.5, 4.0]
--- a/config/sac_trainer_config.yaml
+++ b/config/sac_trainer_config.yaml

 Bouncer:
    normalize: true
-    max_steps: 2.0e7
+    max_steps: 2.0e6
    num_layers: 2
    hidden_units: 64
    summary_freq: 20000
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml

 Bouncer:
    normalize: true
-    max_steps: 2.0e7
+    max_steps: 7.0e6
    num_layers: 2
    hidden_units: 64

    batch_size: 128
    buffer_size: 2048
    beta: 5.0e-3
-    hidden_units: 256
-    summary_freq: 20000
-    time_horizon: 128
-    num_layers: 2
-    normalize: false
-
-Striker:
-    max_steps: 5.0e6
-    learning_rate: 1e-3
-    batch_size: 128
-    num_epoch: 3
-    buffer_size: 2000
-    beta: 1.0e-2
-    hidden_units: 256
-    summary_freq: 20000
-    time_horizon: 128
-    num_layers: 2
-    normalize: false
-
-Goalie:
-    max_steps: 5.0e6
-    learning_rate: 1e-3
-    batch_size: 320
-    num_epoch: 3
-    buffer_size: 2000
-    beta: 1.0e-2
    hidden_units: 256
    summary_freq: 20000
    time_horizon: 128

 Tennis:
    normalize: true
-    max_steps: 2e7
+    max_steps: 5.0e7
+    batch_size: 1024
+    buffer_size: 10240
+    time_horizon: 1000
+    self_play:
+        window: 10
+        play_against_current_self_ratio: 0.5
+        save_steps: 50000
+        swap_steps: 50000
+
+Soccer:
+    normalize: false
+    max_steps: 5.0e7
+    learning_rate_schedule: constant
+    batch_size: 2048
+    buffer_size: 20480
+    hidden_units: 512
+    time_horizon: 1000
+    num_layers: 2
    self_play:
        window: 10
        play_against_current_self_ratio: 0.5
--- a/docs/Installation.md
+++ b/docs/Installation.md
 to help you get started.

 ### Package Installation
+ML-Agents C# SDK is transitioning to a Unity Package.  While we are working on getting into the
+official packages list, you can add the `com.unity.ml-agents` package to your project by
+navigating to the menu `Window`  -> `Package Manager`.  In the package manager window click
+on the `+` button.
-If you intend to copy the `com.unity.ml-agents` folder in to your project, ensure that
-you have the [Barracuda preview package](https://docs.unity3d.com/Packages/com.unity.barracuda@0.3/manual/index.html) installed.
+<p align="center">
+  <img src="images/unity_package_manager_window.png"
+       alt="Linux Build Support"
+       width="500" border="10" />
+</p>
-To install the Barracuda package in later versions of Unity, navigate to the Package
-Manager window by navigating to the menu `Window` -> `Package Manager`.  Click on the
-`Advanced` dropdown menu to the left of the search bar and make sure "Show Preview Packages"
-is checked.  Search for or select the `Barracuda` package and install the latest version.
+**NOTE:** In Unity 2018.4 it's on the bottom right of the packages list, and in Unity 2019.3 it's on the top left of the packages list.
+
+Select `Add package from disk...` and navigate into the
+`com.unity.ml-agents` folder and select the `package.json` folder.
-  <img src="images/barracuda-package.png"
-       alt="Barracuda Package Manager"
-       width="710" border="10"
-       height="569" />
+  <img src="images/unity_package_json.png"
+       alt="Linux Build Support"
+       width="500" border="10" />
+
+If you are going to follow the examples from our documentation, you can open the `Project`
+folder in Unity and start tinkering immediately.
+

 The `ml-agents` subdirectory contains a Python package which provides deep reinforcement
 learning trainers to use with Unity environments.
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
 3. Select the Floor Plane to view its properties in the Inspector window.
 4. Set Transform to Position = (0, 0, 0), Rotation = (0, 0, 0), Scale = (1, 1, 1).
 5. On the Plane's Mesh Renderer, expand the Materials property and change the
-    default-material to *LightGridFloorSquare* (or any suitable material of your choice).
+    default-material to *GridMatFloor* (or any suitable material of your choice).

 (To set a new material, click the small circle icon next to the current material
 name. This opens the **Object Picker** dialog so that you can choose a
 3. Select the Target Cube to view its properties in the Inspector window.
 4. Set Transform to Position = (3, 0.5, 3), Rotation = (0, 0, 0), Scale = (1, 1, 1).
 5. On the Cube's Mesh Renderer, expand the Materials property and change the
-    default-material to *Block*.
+    default-material to *AgentBlue*.

 ![The Target Cube in the Inspector window](images/mlagents-NewTutBlock.png)

 3. Select the RollerAgent Sphere to view its properties in the Inspector window.
 4. Set Transform to Position = (0, 0.5, 0), Rotation = (0, 0, 0), Scale = (1, 1, 1).
 5. On the Sphere's Mesh Renderer, expand the Materials property and change the
-    default-material to *CheckerSquare*.
+    default-material to *Checkers_Ball*.
 6. Click **Add Component**.
 7. Add the Physics/Rigidbody component to the Sphere.

--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
  always cast forward, and this many rays are cast to the left and right.
 * _Max Ray Degrees_ The angle (in degrees) for the outermost rays. 90 degrees
  corresponds to the left and right of the agent.
- * _ Sphere Cast Radius_ The size of the sphere used for sphere casting. If set
+ * _Sphere Cast Radius_ The size of the sphere used for sphere casting. If set
  to 0, rays will be used instead of spheres. Rays may be more efficient,
  especially in complex scenes.
 * _Ray Length_ The length of the casts
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md

 ![Tennis](images/tennis.png)

-* Set-up: Two-player game where agents control rackets to bounce ball over a
+* Set-up: Two-player game where agents control rackets to hit a ball over the
-* Goal: The agents must bounce ball between one another while not dropping or
-  sending ball out of bounds.
+* Goal: The agents must hit the ball so that the opponent cannot hit a valid
+return.
-  * +0.1 To agent when hitting ball over net.
-  * -0.1 To agent who let ball hit their ground, or hit ball out of bounds.
+  * +1.0 To the agent that wins the point. An agent wins a point by preventing
+   the opponent from hitting a valid return.
+  * -1.0 To the agent who loses the point.
-  * Vector Observation space: 8 variables corresponding to position and velocity
-    of ball and racket.
-  * Vector Action space: (Continuous) Size of 2, corresponding to movement
-    toward net or away from net, and jumping.
+  * Vector Observation space: 9 variables corresponding to position, velocity
+    and orientation of ball and racket.
+  * Vector Action space: (Continuous) Size of 3, corresponding to movement
+    toward net or away from net, jumping and rotation.
-    * angle: Angle of the racket from the vertical (Y) axis.
-      * Default: 55
-      * Recommended Minimum: 35
-      * Recommended Maximum: 65
-      * Default: 1
+      * Default: .5
-* Benchmark Mean Reward: 2.5

 ## [Push Block](https://youtu.be/jKdw216ZgoE)


 * Set-up: Environment where four agents compete in a 2 vs 2 toy soccer game.
 * Goal:
-  * Striker: Get the ball into the opponent's goal.
-  * Goalie: Prevent the ball from entering its own goal.
-* Agents: The environment contains four agents, with two different sets of
-  Behavior Parameters : Striker and Goalie.
+  * Get the ball into the opponent's goal while preventing
+  the ball from entering own goal.
+  * Goalie:
+* Agents: The environment contains four agents, with the same
+  Behavior Parameters : Soccer.
-  * Striker:
-    * -0.1 When ball enters own team's goal.
-    * -0.001 Existential penalty.
-  * Goalie:
-    * +0.1 When ball enters opponents goal.
-    * +0.001 Existential bonus.
+    * -0.001 Existential penalty.
-  * Vector Observation space: 112 corresponding to local 14 ray casts, each
-    detecting 7 possible object types, along with the object's distance.
-    Perception is in 180 degree view from front of agent.
-  * Vector Action space: (Discrete) One Branch
-    * Striker: 6 actions corresponding to forward, backward, sideways movement,
+  * Vector Observation space: 336 corresponding to 11 ray-casts forward distributed over 120 degrees (264)
+    and 3 ray-casts backward distributed over 90 degrees each detecting 6 possible object types, along with the object's distance.
+    The forward ray-casts contribute 264 state dimensions and backward 72 state dimensions.
+  * Vector Action space: (Discrete) Three branched actions corresponding to forward, backward, sideways movement,
-    * Goalie: 4 actions corresponding to forward, backward, sideways movement.
  * Visual Observations: None
 * Float Properties: Two
  * ball_scale: Specifies the scale of the ball in the 3 dimensions (equal across the three dimensions)
    * Default: 9.81
    * Recommended minimum: 6
    * Recommended maximum: 20
-* Benchmark Mean Reward (Striker & Goalie): 0 (the means will be inverse
-  of each other and criss crosses during training) __Note that our trainer is currently unable to consistently train this environment__

 ## Walker

--- a/docs/Migrating.md
+++ b/docs/Migrating.md

 # Migrating

+## Migrating from 0.14 to latest
+### Important changes
+### Steps to Migrate
-## Migrating from 0.13 to latest
+## Migrating from 0.13 to 0.14
+* The `UnitySDK` folder has been split into a Unity Package (`com.unity.ml-agents`) and an examples project (`Project`).  Please follow the [Installation Guide](Installation.md) to get up and running with this new repo structure.
+* Several changes were made to how agents are reset and marked as done:
+  * Calling `Done()` on the Agent will now reset it immediately and call the `AgentReset` virtual method. (This is to simplify the previous logic in which the Agent had to wait for the next `EnvironmentStep` to reset)
+  * The "Reset on Done" setting in AgentParameters was removed; this is now effectively always true. `AgentOnDone` virtual method on the Agent has been removed.
-* The `--num-runs` command-line option has been removed.
-* The "Reset on Done" setting in AgentParameters was removed; this is now effectively always true. `AgentOnDone` virtual method on the Agent has been removed.
-* Agents will always request a decision after being marked as `Done()` and will no longer wait for the next call to `RequestDecision()`.
-* The `agentParameters` field of the Agent has been removed. (Contained only `maxStep` information)
-* `maxStep` is now a public field on the Agent. (Was moved from `agentParameters`)
-* The `Info` field of the Agent has been made private. (Was only used internally and not meant to be modified outside of the Agent)
-* The `GetReward()` method on the Agent has been removed. (It was being confused with `GetCumulativeReward()`)
-* The `AgentAction` struct no longer contains a `value` field. (Value estimates were not set during inference)
-* The `GetValueEstimate()` method on the Agent has been removed.
-* The `UpdateValueAction()` method on the Agent has been removed.
-* Calling `Done()` on the Agent will now reset it immediately and call the `AgentReset` virtual method. (This is to simplify the previous logic in which the Agent had to wait for the next `EnvironmentStep` to reset)
+* The `--num-runs` command-line option has been removed from `mlagents-learn`.
+* Several fields on the Agent were removed or made private in order to simplify the interface.
+  * The `agentParameters` field of the Agent has been removed. (Contained only `maxStep` information)
+  * `maxStep` is now a public field on the Agent. (Was moved from `agentParameters`)
+  * The `Info` field of the Agent has been made private. (Was only used internally and not meant to be modified outside of the Agent)
+  * The `GetReward()` method on the Agent has been removed. (It was being confused with `GetCumulativeReward()`)
+  * The `AgentAction` struct no longer contains a `value` field. (Value estimates were not set during inference)
+  * The `GetValueEstimate()` method on the Agent has been removed.
+  * The `UpdateValueAction()` method on the Agent has been removed.
+* The deprecated `RayPerception3D` and `RayPerception2D` classes were removed, and the `legacyHitFractionBehavior` argument was removed from `RayPerceptionSensor.PerceiveStatic()`.
+* RayPerceptionSensor was inconsistent in how it handle scale on the Agent's transform. It now scales the ray length and sphere size for casting as the transform's scale changes.
+* Follow the instructions on how to install the `com.unity.ml-agents` package into your project in the [Installation Guide](Installation.md).
+* If your Agent implemented `AgentOnDone` and did not have the checkbox `Reset On Done` checked in the inspector, you must call the code that was in `AgentOnDone` manually.
+* If you give your Agent a reward or penalty at the end of an episode (e.g. for reaching a goal or falling off of a platform), make sure you call `AddReward()` or `SetReward()` *before* calling `Done()`. Previously, the order didn't matter.
 * If you were not using `On Demand Decision` for your Agent, you **must** add a `DecisionRequester` component to your Agent GameObject and set its `Decision Period` field to the old `Decision Period` of the Agent.
 * If you have a class that inherits from Academy:
  * If the class didn't override any of the virtual methods and didn't store any additional data, you can just remove the old script from the scene.
    * Move the AcademyStep code to MonoBehaviour.FixedUpdate
-    * Move the OnDestroy code to MonoBehaviour.OnDestroy or add it to the to Academy.DestroyAction action.
+    * Move the OnDestroy code to MonoBehaviour.OnDestroy.
-* If your Agent implements `AgentOnDone` and your Agent does not have the checkbox `Reset On Done` checked in the inspector, you must call the code that was in `AgentOnDone` manually.
+* If you have a model trained which uses RayPerceptionSensor and has non-1.0 scale in the Agent's transform, it must be retrained.


 ## Migrating from ML-Agents toolkit v0.12.0 to v0.13.0
 * `UnitySDK/Assets/ML-Agents/Scripts/Communicator.cs` and its class `Communicator` have been renamed to `UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs` and `ICommunicator` respectively.
 * The `SpaceType` Enums `discrete`, and `continuous` have been renamed to `Discrete` and `Continuous`.
 * We have removed the `Done` call as well as the capacity to set `Max Steps` on the Academy. Therefore an AcademyReset will never be triggered from C# (only from Python). If you want to reset the simulation after a
-fixed number of steps, or when an event in the simulation occurs, we recommend looking at our multi-agent example environments (such as BananaCollector).
+fixed number of steps, or when an event in the simulation occurs, we recommend looking at our multi-agent example environments (such as FoodCollector).
 In our examples, groups of Agents can be reset through an "Area" that can reset groups of Agents.
 * The import for `mlagents.envs.UnityEnvironment` was removed. If you are using the Python API, change `from mlagents_envs import UnityEnvironment` to `from mlagents_envs.environment import UnityEnvironment`.

--- a/docs/Training-Curriculum-Learning.md
+++ b/docs/Training-Curriculum-Learning.md
    big_wall_min_height: [0.0, 4.0, 6.0, 8.0]
    big_wall_max_height: [4.0, 7.0, 8.0, 8.0]
 SmallWallJump:
-    measure: progress
-    thresholds: [0.1, 0.3, 0.5]
-    min_lesson_length: 100
-    signal_smoothing: true
-    parameters:
-      small_wall_height: [1.5, 2.0, 2.5, 4.0]
+  measure: progress
+  thresholds: [0.1, 0.3, 0.5]
+  min_lesson_length: 100
+  signal_smoothing: true
+  parameters:
+    small_wall_height: [1.5, 2.0, 2.5, 4.0]
 ```

 At the top level of the config is the behavior name.  The curriculum for each
--- a/docs/Training-Generalized-Reinforcement-Learning-Agents.md
+++ b/docs/Training-Generalized-Reinforcement-Learning-Agents.md

    * `sampler-type-sub-arguments` - Specify the sub-arguments depending on the `sampler-type`.
    In the example above, this would correspond to the `intervals`
-    under the `sampler-type` `"multirange_uniform"` for the `Reset Parameter` called gravity`.
+    under the `sampler-type` `"multirange_uniform"` for the `Reset Parameter` called `gravity`.
    The key name should match the name of the corresponding argument in the sampler definition.
    (See below)

--- a/docs/images/tennis.png
+++ b/docs/images/tennis.png
--- a/gym-unity/gym_unity/init.py
+++ b/gym-unity/gym_unity/init.py
-__version__ = "0.14.0.dev0"
+__version__ = "0.15.0.dev0"
--- a/ml-agents-envs/mlagents_envs/init.py
+++ b/ml-agents-envs/mlagents_envs/init.py
-__version__ = "0.14.0.dev0"
+__version__ = "0.15.0.dev0"
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
 class UnityEnvironment(BaseEnv):
    SCALAR_ACTION_TYPES = (int, np.int32, np.int64, float, np.float32, np.float64)
    SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
-    API_VERSION = "API-14-dev0"
+    API_VERSION = "API-15-dev0"

    def __init__(
        self,
        :bool docker_training: Informs this class whether the process is being run within a container.
        :bool no_graphics: Whether to run the Unity simulator in no-graphics mode
        :int timeout_wait: Time (in seconds) to wait for connection from environment.
-        :bool train_mode: Whether to run in training mode, speeding up the simulation, by default.
        :list args: Addition Unity command line arguments
        :list side_channels: Additional side channel for no-rl communication with Unity
        """