Merge remote-tracking branch 'origin/develop' into try-tf2-support

5 年前 · a1967c19
--- a/UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs
        private const float k_LineHeight = 17f;
        // The vertical space left below the BroadcastHub UI.
        private const float k_ExtraSpaceBelow = 10f;
-        // The horizontal size of the Control checkbox
-        private const int k_ControlSize = 80;

        /// <summary>
        /// Computes the height of the Drawer depending on the property it is showing
            position.y += k_LineHeight;

            // This is the labels for each columns
-            var brainWidth = position.width - k_ControlSize;
+            var brainWidth = position.width;
-            var controlRect = new Rect(
-                position.x + brainWidth, position.y, k_ControlSize, position.height);
-                EditorGUI.LabelField(controlRect, "Control");
-                controlRect.y += k_LineHeight;
-                controlRect.x += 15;
-            DrawBrains(brainRect, controlRect);
+            DrawBrains(brainRect);
            EditorGUI.indentLevel--;
            EditorGUI.EndProperty();
        }
        }

        /// <summary>
-        /// Draws the Brain and Control checkbox for the brains contained in the BroadCastHub.
+        /// Draws the Brain  contained in the BroadcastHub.
-        /// <param name="controlRect">The Rect to draw the control checkbox.</param>
-        private void DrawBrains(Rect brainRect, Rect controlRect)
+        private void DrawBrains(Rect brainRect)
-                var exposedBrains = m_Hub.broadcastingBrains;
-                var brain = exposedBrains[index];
+                var controlledBrains = m_Hub.brainsToControl;
+                var brain = controlledBrains[index];
-                    brainRect, brain, typeof(Brain), true) as Brain;
+                    brainRect, brain, typeof(LearningBrain), true) as LearningBrain;
-                    m_Hub.broadcastingBrains.RemoveAt(index);
-                    var brainToInsert = exposedBrains.Contains(newBrain) ? null : newBrain;
-                    exposedBrains.Insert(index, brainToInsert);
+                    m_Hub.brainsToControl.RemoveAt(index);
+                    var brainToInsert = controlledBrains.Contains(newBrain) ? null : newBrain;
+                    controlledBrains.Insert(index, brainToInsert);
-                }
-                // This is the Rectangle for the control checkbox
-                EditorGUI.BeginChangeCheck();
-                if (brain is LearningBrain)
-                {
-                    var isTraining = m_Hub.IsControlled(brain);
-                    isTraining = EditorGUI.Toggle(controlRect, isTraining);
-                    m_Hub.SetControlled(brain, isTraining);
-                }
-                controlRect.y += k_LineHeight;
-                if (EditorGUI.EndChangeCheck())
-                {
-                    MarkSceneAsDirty();
                }
            }
        }
        {
            if (m_Hub.Count > 0)
            {
-                m_Hub.broadcastingBrains.RemoveAt(m_Hub.broadcastingBrains.Count - 1);
+                m_Hub.brainsToControl.RemoveAt(m_Hub.brainsToControl.Count - 1);
            }
        }

        private void AddBrain()
        {
-            m_Hub.broadcastingBrains.Add(null);
+            m_Hub.brainsToControl.Add(null);
        }
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs

                reader.Seek(DemonstrationStore.MetaDataBytes + 1, 0);
                var brainParamsProto = BrainParametersProto.Parser.ParseDelimitedFrom(reader);
-                var brainParameters = new BrainParameters(brainParamsProto);
+                var brainParameters = brainParamsProto.ToBrainParameters();

                reader.Close();

--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs
 using System.Collections.Generic;
-using System.Linq;
 using NUnit.Framework;
 using UnityEngine;
 using System.Reflection;
            }
        }

-        private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
+        private List<Agent> GetFakeAgentInfos()
-            var infoA = new AgentInfo();
-            var infoB = new AgentInfo();
-            return new Dictionary<Agent, AgentInfo>(){{agentA, infoA}, {agentB, infoB}};
+            return new List<Agent> {agentA, agentB};
        }

        [Test]

            var applier = new ContinuousActionOutputApplier();
            applier.Apply(inputTensor, agentInfos);
-            var agents = agentInfos.Keys.ToList();
+            var agents = agentInfos;

            var agent = agents[0] as TestAgent;
            Assert.NotNull(agent);
            var alloc = new TensorCachingAllocator();
            var applier = new DiscreteActionOutputApplier(new[] {2, 3}, 0, alloc);
            applier.Apply(inputTensor, agentInfos);
-            var agents = agentInfos.Keys.ToList();
+            var agents = agentInfos;

            var agent = agents[0] as TestAgent;
            Assert.NotNull(agent);

            var applier = new MemoryOutputApplier();
            applier.Apply(inputTensor, agentInfos);
-            var agents = agentInfos.Keys.ToList();
+            var agents = agentInfos;

            var agent = agents[0] as TestAgent;
            Assert.NotNull(agent);

            var applier = new ValueEstimateApplier();
            applier.Apply(inputTensor, agentInfos);
-            var agents = agentInfos.Keys.ToList();
+            var agents = agentInfos;

            var agent = agents[0] as TestAgent;
            Assert.NotNull(agent);
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
        {
        }

-        private Dictionary<Agent, AgentInfo> GetFakeAgentInfos()
+        private static IEnumerable<Agent> GetFakeAgentInfos()
-            var infoA = new AgentInfo()
+            var infoA = new AgentInfo
-                stackedVectorObservation = (new[] {1f, 2f, 3f}).ToList(),
+                stackedVectorObservation = new[] {1f, 2f, 3f}.ToList(),
-                actionMasks = null,
+                actionMasks = null
-            var infoB = new AgentInfo()
+            var infoB = new AgentInfo
-                stackedVectorObservation = (new[] {4f, 5f, 6f}).ToList(),
-                memories = (new[] {1f, 1f, 1f}).ToList(),
+                stackedVectorObservation = new[] {4f, 5f, 6f}.ToList(),
+                memories = new[] {1f, 1f, 1f}.ToList(),
+            agentA.Info = infoA;
+            agentB.Info = infoB;
-            return new Dictionary<Agent, AgentInfo>(){{agentA, infoA}, {agentB, infoB}};
+            return new List<Agent> {agentA, agentB};
        }

        [Test]
        [Test]
        public void GenerateVectorObservation()
        {
-            var inputTensor = new TensorProxy()
+            var inputTensor = new TensorProxy
            {
                shape = new long[] {2, 3}
            };
        [Test]
        public void GenerateRecurrentInput()
        {
-            var inputTensor = new TensorProxy()
+            var inputTensor = new TensorProxy
            {
                shape = new long[] {2, 5}
            };
        [Test]
        public void GeneratePreviousActionInput()
        {
-            var inputTensor = new TensorProxy()
+            var inputTensor = new TensorProxy
            {
                shape = new long[] {2, 2},
                valueType = TensorProxy.TensorType.Integer
        [Test]
        public void GenerateActionMaskInput()
        {
-            var inputTensor = new TensorProxy()
+            var inputTensor = new TensorProxy
            {
                shape = new long[] {2, 5},
                valueType = TensorProxy.TensorType.FloatingPoint
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs

        public override void AcademyReset()
        {
-
        }

        public override void AcademyStep()
        protected override void DecideAction()
        {
            numberOfCallsToDecideAction++;
-            m_AgentInfos.Clear();
+            m_Agents.Clear();
        }
    }

            //This will call the method even though it is private
            var academyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
                BindingFlags.Instance | BindingFlags.NonPublic);
-            academyInitializeMethod?.Invoke(aca, new object[] { });
+            academyInitializeMethod?.Invoke(aca, new object[] {});
            Assert.AreEqual(1, aca.initializeAcademyCalls);
            Assert.AreEqual(0, aca.GetEpisodeCount());
            Assert.AreEqual(0, aca.GetStepCount());


            agentEnableMethod?.Invoke(agent2, new object[] { aca });
-            academyInitializeMethod?.Invoke(aca, new object[] { });
+            academyInitializeMethod?.Invoke(aca, new object[] {});
            agentEnableMethod?.Invoke(agent1, new object[] { aca });

            Assert.AreEqual(false, agent1.IsDone());
            var aca = acaGo.GetComponent<TestAcademy>();
            var academyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
                BindingFlags.Instance | BindingFlags.NonPublic);
-            academyInitializeMethod?.Invoke(aca, new object[] { });
+            academyInitializeMethod?.Invoke(aca, new object[] {});

            var academyStepMethod = typeof(Academy).GetMethod("EnvironmentStep",
                BindingFlags.Instance | BindingFlags.NonPublic);
                {
                    numberReset += 1;
                }
-                academyStepMethod?.Invoke(aca, new object[] { });
+                academyStepMethod?.Invoke(aca, new object[] {});
            }
        }

            agent2.GiveBrain(brain);

            agentEnableMethod?.Invoke(agent1, new object[] { aca });
-            academyInitializeMethod?.Invoke(aca, new object[] { });
+            academyInitializeMethod?.Invoke(aca, new object[] {});

            var academyStepMethod = typeof(Academy).GetMethod(
                "EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
                    requestAction += 1;
                    agent2.RequestAction();
                }
-                academyStepMethod?.Invoke(aca, new object[] { });
+                academyStepMethod?.Invoke(aca, new object[] {});
            }
        }
    }
            var aca = acaGo.GetComponent<TestAcademy>();
            var academyInitializeMethod = typeof(Academy).GetMethod(
                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
-            academyInitializeMethod?.Invoke(aca, new object[] { });
+            academyInitializeMethod?.Invoke(aca, new object[] {});

            var academyStepMethod = typeof(Academy).GetMethod(
                "EnvironmentStep", BindingFlags.Instance | BindingFlags.NonPublic);
                }

                stepsSinceReset += 1;
-                academyStepMethod.Invoke((object)aca, new object[] { });
+                academyStepMethod.Invoke(aca, new object[] {});
            }
        }

            agent2.GiveBrain(brain);

            agentEnableMethod?.Invoke(agent2, new object[] { aca });
-            academyInitializeMethod?.Invoke(aca, new object[] { });
+            academyInitializeMethod?.Invoke(aca, new object[] {});

            var numberAgent1Reset = 0;
            var numberAgent2Reset = 0;
                agent2StepSinceReset += 1;
                //Agent 1 is only initialized at step 2
                if (i < 2)
-                { }
-                academyStepMethod?.Invoke(aca, new object[] { });
+                {}
+                academyStepMethod?.Invoke(aca, new object[] {});
            }
        }
    }
            agent2.GiveBrain(brain);

            agentEnableMethod?.Invoke(agent2, new object[] { aca });
-            academyInitializeMethod?.Invoke(aca, new object[] { });
+            academyInitializeMethod?.Invoke(aca, new object[] {});
            agentEnableMethod?.Invoke(agent1, new object[] { aca });

            var agent1ResetOnDone = 0;
                }


-                academyStepMethod?.Invoke(aca, new object[] { });
+                academyStepMethod?.Invoke(aca, new object[] {});
            }
        }

            agent2.GiveBrain(brain);

            agentEnableMethod?.Invoke(agent2, new object[] { aca });
-            academyInitializeMethod?.Invoke(aca, new object[] { });
+            academyInitializeMethod?.Invoke(aca, new object[] {});
            agentEnableMethod?.Invoke(agent1, new object[] { aca });


                Assert.LessOrEqual(Mathf.Abs(i * 0.1f - agent2.GetCumulativeReward()), 0.05f);


-                academyStepMethod?.Invoke(aca, new object[] { });
+                academyStepMethod?.Invoke(aca, new object[] {});
                agent1.AddReward(10f);

                if ((i % 21 == 0) && (i > 0))
--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
+  m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
 --- !u!157 &3
 LightmapSettings:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 300
    height: 200
--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
+  m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
 --- !u!157 &3
 LightmapSettings:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 300
    height: 200
--- a/UnitySDK/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Basic/Scenes/Basic.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 80
    height: 80
--- a/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/Bouncer.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 80
    height: 80
--- a/UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 80
    height: 80
--- a/UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerStaticTarget.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 1280
    height: 720
--- a/UnitySDK/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs

    public Transform ground;
    public bool detectTargets;
-    public bool targetIsStatic = false;
+    public bool targetIsStatic;
    public bool respawnTargetWhenTouched;
    public float targetSpawnRadius;

--- a/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollector.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollector.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
+  m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
 --- !u!157 &3
 LightmapSettings:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 1500
  m_TrainingConfiguration:
    width: 500
    height: 500
--- a/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/VisualFoodCollector.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 500
    height: 500
--- a/UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scenes/GridWorld.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.4497121, g: 0.4997778, b: 0.5756369, a: 1}
+  m_IndirectSpecularColor: {r: 0.44971162, g: 0.49977726, b: 0.5756362, a: 1}
 --- !u!157 &3
 LightmapSettings:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 84
    height: 84
--- a/UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/Hallway.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 128
    height: 128
--- a/UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/VisualHallway.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/VisualHallway.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
  m_TrainingConfiguration:
    width: 128
    height: 128
--- a/UnitySDK/Assets/ML-Agents/Examples/PushBlock/Brains/PushBlockLearning.asset
+++ b/UnitySDK/Assets/ML-Agents/Examples/PushBlock/Brains/PushBlockLearning.asset
  m_Name: PushBlockLearning
  m_EditorClassIdentifier: 
  brainParameters:
-    vectorObservationSize: 0
+    vectorObservationSize: 70
-    cameraResolutions:
-    - width: 84
-      height: 84
-      blackAndWhite: 0
+    cameraResolutions: []
    vectorActionDescriptions:
    - 
    vectorActionSpaceType: 0
--- a/UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/PushBlock.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/PushBlock.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 400
    height: 300
--- a/UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/VisualPushBlock.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scenes/VisualPushBlock.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
  m_TrainingConfiguration:
    width: 1280
    height: 720
--- a/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab
+++ b/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Prefabs/VisualAreaPyramids.prefab
  m_Script: {fileID: 11500000, guid: b8db44472779248d3be46895c4d562d5, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  brain: {fileID: 11400000, guid: 59a04e208fb8a423586adf25bf1fecd0, type: 2}
+  brain: {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
  agentParameters:
    agentCameras:
    - {fileID: 20712684238256298}
--- a/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/Pyramids.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 80
    height: 80
--- a/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scenes/VisualPyramids.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl:
-    - {fileID: 11400000, guid: 60f0ffcd08c3b43a6bdc746cfc0c4059, type: 2}
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 80
    height: 80
--- a/UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/Reacher.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Reacher/Scenes/Reacher.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 80
    height: 80
--- a/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
-    - {fileID: 11400000, guid: 29ed78b3e8fef4340b3a1f6954b88f18, type: 2}
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
+    - {fileID: 11400000, guid: 29ed78b3e8fef4340b3a1f6954b88f18, type: 2}
  m_TrainingConfiguration:
    width: 800
    height: 500
--- a/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 25000
  m_TrainingConfiguration:
    width: 300
    height: 200
--- a/UnitySDK/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Walker/Scenes/Walker.unity
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
  m_TrainingConfiguration:
    width: 80
    height: 80
--- a/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scenes/WallJump.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.44971484, g: 0.49977952, b: 0.57563835, a: 1}
+  m_IndirectSpecularColor: {r: 0.44971442, g: 0.499779, b: 0.5756377, a: 1}
 --- !u!157 &3
 LightmapSettings:
  m_ObjectHideFlags: 0
  m_Name: 
  m_EditorClassIdentifier: 
  broadcastHub:
-    broadcastingBrains:
-    - {fileID: 11400000, guid: b5f530c5bf8d64bf8a18df92e283bb9c, type: 2}
+    brainsToControl:
-    m_BrainsToControl: []
-  m_MaxSteps: 0
+    - {fileID: 11400000, guid: b5f530c5bf8d64bf8a18df92e283bb9c, type: 2}
  m_TrainingConfiguration:
    width: 80
    height: 80
--- a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
 using UnityEngine;
-using System.IO;
 using System.Linq;
 using UnityEngine.Serialization;
 #if UNITY_EDITOR
    /// The mode is determined by the presence or absence of a Communicator. In
    /// the presence of a communicator, the academy is run in training mode where
    /// the states and observations of each agent are sent through the
-    /// communicator. In the absence of a communciator, the academy is run in
+    /// communicator. In the absence of a communicator, the academy is run in
    /// inference mode where the agent behavior is determined by the brain
    /// attached to it (which may be internal, heuristic or player).
    /// </remarks>
        private Vector3 m_OriginalGravity;

        /// Temporary storage for global fixedDeltaTime value
-        /// Used to restore oringal value when deriving Academy modifies it
+        /// Used to restore original value when deriving Academy modifies it
-        /// Used to restore oringal value when deriving Academy modifies it
+        /// Used to restore original value when deriving Academy modifies it
        private float m_OriginalMaximumDeltaTime;

        // Fields provided in the Inspector
        /// </summary>
        /// <remarks>
        /// Default reset parameters are specified in the academy Editor, and can
-        /// be modified when training with an external Brain by passinga config
+        /// be modified when training with an external Brain by passing a config
        /// dictionary at reset.
        /// </remarks>
        [SerializeField]

        // Fields not provided in the Inspector.

-        /// Boolean flag indicating whether a communicator is accessible by the
-        /// environment. This also specifies whether the environment is in
-        /// Training or Inference mode.
-        bool m_IsCommunicatorOn;
-
-        /// Keeps track of the id of the last communicator message received.
-        /// Remains 0 if there are no communicators. Is used to ensure that
-        /// the same message is not used multiple times.
-        private ulong m_LastCommunicatorMessageNumber;
+        /// <summary>
+        /// Returns whether or not the communicator is on.
+        /// </summary>
+        /// <returns>
+        /// <c>true</c>, if communicator is on, <c>false</c> otherwise.
+        /// </returns>
+        bool IsCommunicatorOn
+        {
+            get { return m_Communicator != null; }
+        }

        /// If true, the Academy will use inference settings. This field is
        /// initialized in <see cref="Awake"/> depending on the presence
        /// each time the environment is reset.
        int m_EpisodeCount;

-        /// The number of steps completed within the current episide. Incremented
+        /// The number of steps completed within the current episode. Incremented
        /// each time a step is taken in the environment. Is reset to 0 during
        /// <see cref="AcademyReset"/>.
        int m_StepCount;
        /// engine settings at the next environment step.
        bool m_ModeSwitched;

-        /// Pointer to the batcher currently in use by the Academy.
-        Batcher m_BrainBatcher;
+        /// Pointer to the communicator currently in use by the Academy.
+        ICommunicator m_Communicator;

        // Flag used to keep track of the first time the Academy is reset.
        bool m_FirstAcademyReset;
        // they have requested a decision.
        public event System.Action AgentAct;

-        // Sigals to all the agents each time the Academy force resets.
+        // Signals to all the agents each time the Academy force resets.
-        /// Monobehavior function called at the very beginning of environment
+        /// MonoBehavior function called at the very beginning of environment
        /// creation. Academy uses this time to initialize internal data
        /// structures, initialize the environment and check for the existence
        /// of a communicator.
        }

        // Used to read Python-provided environment parameters
-        private int ReadArgs()
+        private static int ReadArgs()
        {
            var args = System.Environment.GetCommandLineArgs();
            var inputPort = "";
            m_OriginalMaximumDeltaTime = Time.maximumDeltaTime;

            InitializeAcademy();
-            ICommunicator communicator;
-            var exposedBrains = broadcastHub.broadcastingBrains.Where(x => x != null).ToList();
-            var controlledBrains = broadcastHub.broadcastingBrains.Where(
-                x => x != null && x is LearningBrain && broadcastHub.IsControlled(x));
-            foreach (var brain1 in controlledBrains)
-            {
-                var brain = (LearningBrain)brain1;
-                brain.SetToControlledExternally();
-            }
+            var controlledBrains = broadcastHub.brainsToControl.Where(x => x != null).ToList();
-            // Try to launch the communicator by usig the arguments passed at launch
+            // Try to launch the communicator by using the arguments passed at launch
-                communicator = new RpcCommunicator(
-                    new CommunicatorParameters
+                m_Communicator = new RpcCommunicator(
+                    new CommunicatorInitParameters
+            // and if Unity is in Editor mode
-            // If there arn't, there is no need for a communicator and it is set
-            // to null
-                communicator = null;
-                if (controlledBrains.ToList().Count > 0)
+#if UNITY_EDITOR
+                m_Communicator = null;
+                if (controlledBrains.Any())
-                    communicator = new RpcCommunicator(
-                        new CommunicatorParameters
+                    m_Communicator = new RpcCommunicator(
+                        new CommunicatorInitParameters
+#endif
-
-            m_BrainBatcher = new Batcher(communicator);
-
-            foreach (var trainingBrain in exposedBrains)
+            foreach (var trainingBrain in controlledBrains)
-                trainingBrain.SetBatcher(m_BrainBatcher);
+                trainingBrain.SetCommunicator(m_Communicator);
-            if (communicator != null)
+            if (m_Communicator != null)
-                m_IsCommunicatorOn = true;
+                m_Communicator.QuitCommandReceived += OnQuitCommandReceived;
+                m_Communicator.ResetCommandReceived += OnResetCommand;
+                m_Communicator.RLInputReceived += OnRLInputReceived;
-                var academyParameters =
-                    new CommunicatorObjects.UnityRLInitializationOutputProto();
-                academyParameters.Name = gameObject.name;
-                academyParameters.Version = k_ApiVersion;
-                foreach (var brain in exposedBrains)
+
+                // We try to exchange the first message with Python. If this fails, it means
+                // no Python Process is ready to train the environment. In this case, the
+                //environment must use Inference.
+                try
-                    var bp = brain.brainParameters;
-                    academyParameters.BrainParameters.Add(
-                        bp.ToProto(brain.name, broadcastHub.IsControlled(brain)));
+                    var unityRLInitParameters = m_Communicator.Initialize(
+                        new CommunicatorInitParameters
+                        {
+                            version = k_ApiVersion,
+                            name = gameObject.name,
+                            brains = controlledBrains,
+                            environmentResetParameters = new EnvironmentResetParameters
+                            {
+                                resetParameters = resetParameters,
+                                customResetParameters = customResetParameters
+                            }
+                        }, broadcastHub);
+                    Random.InitState(unityRLInitParameters.seed);
-                academyParameters.EnvironmentParameters =
-                    new CommunicatorObjects.EnvironmentParametersProto();
-                foreach (var key in resetParameters.Keys)
+                catch
-                    academyParameters.EnvironmentParameters.FloatParameters.Add(
-                        key, resetParameters[key]
-                    );
+                    m_Communicator = null;
+                    foreach (var brain in controlledBrains)
+                    {
+                        brain.SetCommunicator(null);
+                    }
-
-                var pythonParameters = m_BrainBatcher.SendAcademyParameters(academyParameters);
-                Random.InitState(pythonParameters.Seed);
-            m_IsInference = !m_IsCommunicatorOn;
+            SetIsInference(!IsCommunicatorOn);
-            BrainDecideAction += () => { };
-            DestroyAction += () => { };
-            AgentSetStatus += (i) => { };
-            AgentResetIfDone += () => { };
-            AgentSendState += () => { };
-            AgentAct += () => { };
-            AgentForceReset += () => { };
-
+            BrainDecideAction += () => {};
+            DestroyAction += () => {};
+            AgentSetStatus += i => {};
+            AgentResetIfDone += () => {};
+            AgentSendState += () => {};
+            AgentAct += () => {};
+            AgentForceReset += () => {};
-            // Configure the environment using the configurations provided by
-            // the developer in the Editor.
-            SetIsInference(!m_BrainBatcher.GetIsTraining());
-        private void UpdateResetParameters()
+        static void OnQuitCommandReceived()
-            var newResetParameters = m_BrainBatcher.GetEnvironmentParameters();
-            if (newResetParameters != null)
+#if UNITY_EDITOR
+            EditorApplication.isPlaying = false;
+#endif
+            Application.Quit();
+        }
+
+        private void OnResetCommand(EnvironmentResetParameters newResetParameters)
+        {
+            UpdateResetParameters(newResetParameters);
+            ForcedFullReset();
+        }
+
+        void OnRLInputReceived(UnityRLInputParameters inputParams)
+        {
+            m_IsInference = !inputParams.isTraining;
+        }
+
+        private void UpdateResetParameters(EnvironmentResetParameters newResetParameters)
+        {
+            if (newResetParameters.resetParameters != null)
-                foreach (var kv in newResetParameters.FloatParameters)
+                foreach (var kv in newResetParameters.resetParameters)
-                customResetParameters = newResetParameters.CustomResetParameters;
+            customResetParameters = newResetParameters.customResetParameters;
        }

        /// <summary>

                // This signals to the academy that at the next environment step
                // the engine configurations need updating to the respective mode
-                // (i.e. training vs inference) configuraiton.
+                // (i.e. training vs inference) configuration.
                m_ModeSwitched = true;
            }
        }
        }

        /// <summary>
-        /// Returns whether or not the communicator is on.
-        /// </summary>
-        /// <returns>
-        /// <c>true</c>, if communicator is on, <c>false</c> otherwise.
-        /// </returns>
-        public bool IsCommunicatorOn()
-        {
-            return m_IsCommunicatorOn;
-        }
-
-        /// <summary>
        /// Forces the full reset. The done flags are not affected. Is either
        /// called the first reset at inference and every external reset
        /// at training.
                m_ModeSwitched = false;
            }

-            if ((m_IsCommunicatorOn) &&
-                (m_LastCommunicatorMessageNumber != m_BrainBatcher.GetNumberMessageReceived()))
-            {
-                m_LastCommunicatorMessageNumber = m_BrainBatcher.GetNumberMessageReceived();
-                if (m_BrainBatcher.GetCommand() ==
-                    CommunicatorObjects.CommandProto.Reset)
-                {
-                    UpdateResetParameters();
-
-                    SetIsInference(!m_BrainBatcher.GetIsTraining());
-
-                    ForcedFullReset();
-                }
-
-                if (m_BrainBatcher.GetCommand() ==
-                    CommunicatorObjects.CommandProto.Quit)
-                {
-#if UNITY_EDITOR
-                    EditorApplication.isPlaying = false;
-#endif
-                    Application.Quit();
-                    return;
-                }
-            }
-            else if (!m_FirstAcademyReset)
+            if (!m_FirstAcademyReset)
-                UpdateResetParameters();
-            AgentResetIfDone();
+            using (TimerStack.Instance.Scoped("AgentResetIfDone"))
+            {
+                AgentResetIfDone();
+            }
-            AgentSendState();
+            using (TimerStack.Instance.Scoped("AgentSendState"))
+            {
+                AgentSendState();
+            }
-            BrainDecideAction();
+            using (TimerStack.Instance.Scoped("BrainDecideAction"))
+            {
+                BrainDecideAction();
+            }
-            AcademyStep();
+            using (TimerStack.Instance.Scoped("AcademyStep"))
+            {
+                AcademyStep();
+            }
-            AgentAct();
+            using (TimerStack.Instance.Scoped("AgentAct"))
+            {
+                AgentAct();
+            }

            m_StepCount += 1;
            m_TotalStepCount += 1;
        }

        /// <summary>
-        /// Monobehavior function that dictates each environment step.
+        /// MonoBehaviour function that dictates each environment step.
        /// </summary>
        void FixedUpdate()
        {

            // Signal to listeners that the academy is being destroyed now
            DestroyAction();
+
+            // TODO - Pass worker ID or some other identifier,
+            // so that multiple envs won't overwrite each others stats.
+            TimerStack.Instance.SaveJsonTimers();
        }
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
 using System.Collections.Generic;
-using MLAgents.CommunicatorObjects;
 using UnityEngine;


        /// <summary>
        /// User-customizable object for sending structured output from Unity to Python in response
        /// to an action in addition to a scalar reward.
+        /// TODO(cgoy): All references to protobuf objects should be removed.
-        public CustomObservationProto customObservation;
+        public CommunicatorObjects.CustomObservationProto customObservation;

        /// <summary>
        /// Remove the visual observations from memory. Call at each timestep
        public string textActions;
        public List<float> memories;
        public float value;
-        public CustomActionProto customAction;
+        /// TODO(cgoy): All references to protobuf objects should be removed.
+        public CommunicatorObjects.CustomActionProto customAction;
    }

    /// <summary>

        /// Current Agent information (message sent to Brain).
        AgentInfo m_Info;
+        public AgentInfo Info
+        {
+            get { return m_Info; }
+            set { m_Info = value;  }
+        }

        /// Current Agent action (message sent from Brain).
        AgentAction m_Action;
            m_Info.storedTextActions = m_Action.textActions;
            m_Info.vectorObservation.Clear();
            m_ActionMasker.ResetMask();
-            CollectObservations();
+            using (TimerStack.Instance.Scoped("CollectObservations"))
+            {
+                CollectObservations();
+            }
            m_Info.actionMasks = m_ActionMasker.GetMask();

            var param = brain.brainParameters;
            m_Info.maxStepReached = m_MaxStepReached;
            m_Info.id = m_Id;

-            brain.SendState(this, m_Info);
+            brain.SubscribeAgentForDecision(this);

            if (m_Recorder != null && m_Recorder.record && Application.isEditor)
            {
            m_Info.textObservation = "";
+        }
+
+        public void ClearVisualObservations()
+        {
+            m_Info.ClearVisualObs();
        }

        /// <summary>
        /// A custom action, defined by the user as custom protobuf message. Useful if the action is hard to encode
        /// as either a flat vector or a single string.
        /// </param>
-        public virtual void AgentAction(float[] vectorAction, string textAction, CustomActionProto customAction)
+        public virtual void AgentAction(float[] vectorAction, string textAction, CommunicatorObjects.CustomActionProto customAction)
        {
            // We fall back to not using the custom action if the subclassed Agent doesn't override this method.
            AgentAction(vectorAction, textAction);
            AgentReset();
        }

+        public void UpdateAgentAction(AgentAction action)
+        {
+            m_Action = action;
+        }
+
        /// <summary>
        /// Updates the vector action.
        /// </summary>
        }

        /// <summary>
-        /// Updates the text action.
-        /// </summary>
-        /// <param name="textActions">Text actions.</param>
-        public void UpdateTextAction(string textActions)
-        {
-            m_Action.textActions = textActions;
-        }
-
-        /// <summary>
-        /// Updates the custom action.
-        /// </summary>
-        /// <param name="customAction">Custom action.</param>
-        public void UpdateCustomAction(CustomActionProto customAction)
-        {
-            m_Action.customAction = customAction;
-        }
-
-        /// <summary>
        /// Updates the value of the agent.
        /// </summary>
        public void UpdateValueAction(float value)
        }

        /// <summary>
-        /// Sets the status of the agent. Will request decisions or actions according 
+        /// Sets the status of the agent. Will request decisions or actions according
        /// to the Academy's stepcount.
        /// </summary>
        /// <param name="academyStepCounter">Number of current steps in episode</param>
        /// Sets the custom observation for the agent for this episode.
        /// </summary>
        /// <param name="customObservation">New value of the agent's custom observation.</param>
-        public void SetCustomObservation(CustomObservationProto customObservation)
+        public void SetCustomObservation(CommunicatorObjects.CustomObservationProto customObservation)
        {
            m_Info.customObservation = customObservation;
        }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
+using System;
 using System.Collections.Generic;
 using UnityEngine;

-    /// Brain receive data from Agents through calls to SendState. The brain then updates the
+    /// Brain receive data from Agents through calls to SubscribeAgentForDecision. The brain then updates the
    /// actions of the agents at each FixedUpdate.
    /// The Brain encapsulates the decision making process. Every Agent must be assigned a Brain,
    /// but you can use the same Brain with more than one Agent. You can also create several
    {
        [SerializeField] public BrainParameters brainParameters;

-        protected Dictionary<Agent, AgentInfo> m_AgentInfos =
-            new Dictionary<Agent, AgentInfo>(1024);
+        /// <summary>
+        /// List of agents subscribed for decisions.
+        /// </summary>
+        protected List<Agent> m_Agents = new List<Agent>(1024);
-        protected Batcher m_BrainBatcher;
-
-        [System.NonSerialized]
+        [NonSerialized]
-        /// Sets the Batcher of the Brain. The brain will call the batcher at every step and give
-        /// it the agent's data using SendBrainInfo at each DecideAction call.
-        /// </summary>
-        /// <param name="batcher"> The Batcher the brain will use for the current session</param>
-        public void SetBatcher(Batcher batcher)
-        {
-            if (batcher == null)
-            {
-                m_BrainBatcher = null;
-            }
-            else
-            {
-                m_BrainBatcher = batcher;
-                m_BrainBatcher.SubscribeBrain(name);
-            }
-            LazyInitialize();
-        }
-
-        /// <summary>
-        /// Adds the data of an agent to the current batch so it will be processed in DecideAction.
+        /// Registers an agent to current batch so it will be processed in DecideAction.
-        /// <param name="info"></param>
-        public void SendState(Agent agent, AgentInfo info)
+        public void SubscribeAgentForDecision(Agent agent)
-            m_AgentInfos[agent] = info;
+            m_Agents.Add(agent);
        }

        /// <summary>
-        private void LazyInitialize()
+        protected void LazyInitialize()
        {
            if (!m_IsInitialized)
            {
        {
            if (m_IsInitialized)
            {
-                m_AgentInfos.Clear();
-
+                m_Agents.Clear();
                m_IsInitialized = false;
            }
        }
        /// </summary>
        private void BrainDecideAction()
        {
-            m_BrainBatcher?.SendBrainInfo(name, m_AgentInfos);
+            // Clear the agent Decision subscription collection for the next update cycle.
+            m_Agents.Clear();
-        /// Is called only once at the begening of the training or inference session.
+        /// Is called only once at the beginning of the training or inference session.
        /// </summary>
        protected abstract void Initialize();

--- a/UnitySDK/Assets/ML-Agents/Scripts/BrainParameters.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/BrainParameters.cs
 using System;
 using UnityEngine;
-using System.Linq;

 namespace MLAgents
 {
        Continuous
-    };
+    }

    /// <summary>
    /// The resolution of a camera used by an agent.
        /// <summary>Defines if the action is discrete or continuous</summary>
        public SpaceType vectorActionSpaceType = SpaceType.Discrete;

-        public BrainParameters()
-        {
-        }
-
-        /// <summary>
-        /// Converts Resolution protobuf array to C# Resolution array.
-        /// </summary>
-        private static Resolution[] ResolutionProtoToNative(
-            CommunicatorObjects.ResolutionProto[] resolutionProtos)
-        {
-            var localCameraResolutions = new Resolution[resolutionProtos.Length];
-            for (var i = 0; i < resolutionProtos.Length; i++)
-            {
-                localCameraResolutions[i] = new Resolution
-                {
-                    height = resolutionProtos[i].Height,
-                    width = resolutionProtos[i].Width,
-                    blackAndWhite = resolutionProtos[i].GrayScale
-                };
-            }
-
-            return localCameraResolutions;
-        }
-
-        public BrainParameters(CommunicatorObjects.BrainParametersProto brainParametersProto)
-        {
-            vectorObservationSize = brainParametersProto.VectorObservationSize;
-            cameraResolutions = ResolutionProtoToNative(
-                brainParametersProto.CameraResolutions.ToArray()
-            );
-            numStackedVectorObservations = brainParametersProto.NumStackedVectorObservations;
-            vectorActionSize = brainParametersProto.VectorActionSize.ToArray();
-            vectorActionDescriptions = brainParametersProto.VectorActionDescriptions.ToArray();
-            vectorActionSpaceType = (SpaceType)brainParametersProto.VectorActionSpaceType;
-        }
-
        /// <summary>
        /// Deep clones the BrainParameter object
        /// </summary>
-            return new BrainParameters()
+            return new BrainParameters
            {
                vectorObservationSize = vectorObservationSize,
                numStackedVectorObservations = numStackedVectorObservations,
--- a/UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs
    [System.Serializable]
    public class BroadcastHub
    {
-        [SerializeField]
-        public List<Brain> broadcastingBrains = new List<Brain>();
-        private List<Brain> m_BrainsToControl = new List<Brain>();
+        public List<LearningBrain> brainsToControl = new List<LearningBrain>();

        /// <summary>
        /// The number of Brains inside the BroadcastingHub.
-            get { return broadcastingBrains.Count; }
-        }
-
-        /// <summary>
-        /// Checks that a given Brain is set to be remote controlled.
-        /// </summary>
-        /// <param name="brain"> The Brain that is beeing checked</param>
-        /// <returns>true if the Brain is set to Controlled and false otherwise. Will return
-        /// false if the Brain is not present in the Hub.</returns>
-        public bool IsControlled(Brain brain)
-        {
-            return m_BrainsToControl.Contains(brain);
+            get { return brainsToControl.Count; }
        }

        /// <summary>
-        /// <param name="controlled"> if true, the Brain will be set to remote controlled. Otherwise
-        /// the brain will be set to broadcast only.</param>
-        public void SetControlled(Brain brain, bool controlled)
+        public void SetControlled(LearningBrain brain)
-            if (broadcastingBrains.Contains(brain))
+            if (!brainsToControl.Contains(brain))
-                if (controlled && !m_BrainsToControl.Contains(brain))
-                {
-                    m_BrainsToControl.Add(brain);
-                }
-
-                if (!controlled && m_BrainsToControl.Contains(brain))
-                {
-                    m_BrainsToControl.Remove(brain);
-                }
+                brainsToControl.Add(brain);
            }
        }

        public void Clear()
        {
-            broadcastingBrains.Clear();
-            m_BrainsToControl.Clear();
+            brainsToControl.Clear();
        }
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs
        public float meanReward;
        public string demonstrationName;
        public const int ApiVersion = 1;
-
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
 using System;
+using System.Collections.Generic;
+using System.Linq;
+using Google.Protobuf.Collections;
 using MLAgents.CommunicatorObjects;
 using UnityEngine;


            foreach (var obs in ai.visualObservations)
            {
-                agentInfoProto.VisualObservations.Add(
-                    ByteString.CopyFrom(obs.EncodeToPNG())
-                );
+                using (TimerStack.Instance.Scoped("encodeVisualObs"))
+                {
+                    agentInfoProto.VisualObservations.Add(
+                        ByteString.CopyFrom(obs.EncodeToPNG())
+                    );
+                }
-        
+
+        /// <param name="bp">The instance of BrainParameter to extend.</param>
        /// <param name="name">The name of the brain.</param>
        /// <param name="isTraining">Whether or not the Brain is training.</param>
        public static BrainParametersProto ToProto(this BrainParameters bp, string name, bool isTraining)
                VectorObservationSize = bp.vectorObservationSize,
                NumStackedVectorObservations = bp.numStackedVectorObservations,
-                VectorActionSize = {bp.vectorActionSize},
+                VectorActionSize = { bp.vectorActionSize },
                VectorActionSpaceType =
                    (SpaceTypeProto)bp.vectorActionSpaceType,
                BrainName = name,
            };
            return demoProto;
        }
-        
+
        /// <summary>
        /// Initialize metadata values based on proto object.
        /// </summary>
                throw new Exception("API versions of demonstration are incompatible.");
            }
            return dm;
+        }
+
+        /// <summary>
+        /// Converts Resolution protobuf array to C# Resolution array.
+        /// </summary>
+        private static Resolution[] ResolutionProtoToNative(IReadOnlyList<ResolutionProto> resolutionProtos)
+        {
+            var localCameraResolutions = new Resolution[resolutionProtos.Count];
+            for (var i = 0; i < resolutionProtos.Count; i++)
+            {
+                localCameraResolutions[i] = new Resolution
+                {
+                    height = resolutionProtos[i].Height,
+                    width = resolutionProtos[i].Width,
+                    blackAndWhite = resolutionProtos[i].GrayScale
+                };
+            }
+
+            return localCameraResolutions;
+        }
+
+        /// <summary>
+        /// Convert a BrainParametersProto to a BrainParameters struct.
+        /// </summary>
+        /// <param name="bpp">An instance of a brain parameters protobuf object.</param>
+        /// <returns>A BrainParameters struct.</returns>
+        public static BrainParameters ToBrainParameters(this BrainParametersProto bpp)
+        {
+            var bp = new BrainParameters
+            {
+                vectorObservationSize = bpp.VectorObservationSize,
+                cameraResolutions = ResolutionProtoToNative(
+                    bpp.CameraResolutions
+                    ),
+                numStackedVectorObservations = bpp.NumStackedVectorObservations,
+                vectorActionSize = bpp.VectorActionSize.ToArray(),
+                vectorActionDescriptions = bpp.VectorActionDescriptions.ToArray(),
+                vectorActionSpaceType = (SpaceType)bpp.VectorActionSpaceType
+            };
+            return bp;
+        }
+
+        /// <summary>
+        /// Convert a MapField to ResetParameters.
+        /// </summary>
+        /// <param name="floatParams">The mapping of strings to floats from a protobuf MapField.</param>
+        /// <returns></returns>
+        public static ResetParameters ToResetParameters(this MapField<string, float> floatParams)
+        {
+            return new ResetParameters(floatParams);
+        }
+
+        /// <summary>
+        /// Convert an EnvironmnetParametersProto protobuf object to an EnvironmentResetParameters struct.
+        /// </summary>
+        /// <param name="epp">The instance of the EnvironmentParametersProto object.</param>
+        /// <returns>A new EnvironmentResetParameters struct.</returns>
+        public static EnvironmentResetParameters ToEnvironmentResetParameters(this EnvironmentParametersProto epp)
+        {
+            return new EnvironmentResetParameters
+            {
+                resetParameters = epp.FloatParameters?.ToResetParameters(),
+                customResetParameters = epp.CustomResetParameters
+            };
+        }
+
+        public static UnityRLInitParameters ToUnityRLInitParameters(this UnityRLInitializationInputProto inputProto)
+        {
+            return new UnityRLInitParameters
+            {
+                seed = inputProto.Seed
+            };
+        }
+
+        public static AgentAction ToAgentAction(this AgentActionProto aap)
+        {
+            return new AgentAction
+            {
+                vectorActions = aap.VectorActions.ToArray(),
+                textActions = aap.TextActions,
+                memories = aap.Memories.ToList(),
+                value = aap.Value,
+                customAction = aap.CustomAction
+            };
+        }
+
+        public static List<AgentAction> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
+        {
+            var agentActions = new List<AgentAction>(proto.Value.Count);
+            foreach (var ap in proto.Value)
+            {
+                agentActions.Add(ap.ToAgentAction());
+            }
+            return agentActions;
        }
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
 #if UNITY_EDITOR
 using UnityEditor;
 #endif
+using System;
+using System.Collections.Generic;
+using System.Linq;
 using UnityEngine;
 using MLAgents.CommunicatorObjects;

    public class RpcCommunicator : ICommunicator
    {
+        public event QuitCommandHandler QuitCommandReceived;
+        public event ResetCommandHandler ResetCommandReceived;
+        public event RLInputReceivedHandler RLInputReceived;
+
+        /// The default number of agents in the scene
+        private const int k_NumAgents = 32;
+
+        /// Keeps track of which brains have data to send on the current step
+        Dictionary<string, bool> m_HasData =
+            new Dictionary<string, bool>();
+
+        /// Keeps track of which brains queried the batcher on the current step
+        Dictionary<string, bool> m_HasQueried =
+            new Dictionary<string, bool>();
+
+        /// Keeps track of the agents of each brain on the current step
+        Dictionary<string, List<Agent>> m_CurrentAgents =
+            new Dictionary<string, List<Agent>>();
+
+        /// The current UnityRLOutput to be sent when all the brains queried the batcher
+        UnityRLOutputProto m_CurrentUnityRlOutput =
+            new UnityRLOutputProto();
+
+        Dictionary<string, Dictionary<Agent, AgentAction>> m_LastActionsReceived =
+            new Dictionary<string, Dictionary<Agent, AgentAction>>();
+
+
-        CommunicatorParameters m_CommunicatorParameters;
-        
+        CommunicatorInitParameters m_CommunicatorInitParameters;
+
-        /// <param name="communicatorParameters">Communicator parameters.</param>
-        public RpcCommunicator(CommunicatorParameters communicatorParameters)
+        /// <param name="communicatorInitParameters">Communicator parameters.</param>
+        public RpcCommunicator(CommunicatorInitParameters communicatorInitParameters)
-            m_CommunicatorParameters = communicatorParameters;
+            m_CommunicatorInitParameters = communicatorInitParameters;
+
+        #region Initialization
+
-        /// Initialize the communicator by sending the first UnityOutput and receiving the
-        /// first UnityInput. The second UnityInput is stored in the unityInput argument.
+        /// Sends the initialization parameters through the Communicator.
+        /// Is used by the academy to send initialization parameters to the communicator.
-        /// <returns>The first Unity Input.</returns>
-        /// <param name="unityOutput">The first Unity Output.</param>
-        /// <param name="unityInput">The second Unity input.</param>
-        public UnityInputProto Initialize(UnityOutputProto unityOutput,
+        /// <returns>The External Initialization Parameters received.</returns>
+        /// <param name="initParameters">The Unity Initialization Parameters to be sent.</param>
+        /// <param name="broadcastHub">The BroadcastHub to get the controlled brains.</param>
+        public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters,
+            BroadcastHub broadcastHub)
+        {
+            var academyParameters = new UnityRLInitializationOutputProto
+            {
+                Name = initParameters.name,
+                Version = initParameters.version
+            };
+
+            foreach (var brain in initParameters.brains)
+            {
+                academyParameters.BrainParameters.Add(brain.brainParameters.ToProto(
+                    brain.name, true));
+                SubscribeBrain(brain.name);
+            }
+
+            academyParameters.EnvironmentParameters = new EnvironmentParametersProto();
+
+            var resetParameters = initParameters.environmentResetParameters.resetParameters;
+            foreach (var key in resetParameters.Keys)
+            {
+                academyParameters.EnvironmentParameters.FloatParameters.Add(key, resetParameters[key]);
+            }
+
+            UnityInputProto input;
+            UnityInputProto initializationInput;
+            try
+            {
+                initializationInput = Initialize(
+                    new UnityOutputProto
+                    {
+                        RlInitializationOutput = academyParameters
+                    },
+                    out input);
+            }
+            catch
+            {
+                var exceptionMessage = "The Communicator was unable to connect. Please make sure the External " +
+                    "process is ready to accept communication with Unity.";
+
+                // Check for common error condition and add details to the exception message.
+                var httpProxy = Environment.GetEnvironmentVariable("HTTP_PROXY");
+                var httpsProxy = Environment.GetEnvironmentVariable("HTTPS_PROXY");
+                if (httpProxy != null || httpsProxy != null)
+                {
+                    exceptionMessage += " Try removing HTTP_PROXY and HTTPS_PROXY from the" +
+                        "environment variables and try again.";
+                }
+                throw new UnityAgentsException(exceptionMessage);
+            }
+
+            UpdateEnvironmentWithInput(input.RlInput);
+            return initializationInput.RlInitializationInput.ToUnityRLInitParameters();
+        }
+
+        void UpdateEnvironmentWithInput(UnityRLInputProto rlInput)
+        {
+            SendRLInputReceivedEvent(rlInput.IsTraining);
+            SendCommandEvent(rlInput.Command, rlInput.EnvironmentParameters);
+        }
+
+        private UnityInputProto Initialize(UnityOutputProto unityOutput,
-                "localhost:" + m_CommunicatorParameters.port,
+                "localhost:" + m_CommunicatorInitParameters.port,
                ChannelCredentials.Insecure);

            m_Client = new UnityToExternalProto.UnityToExternalProtoClient(channel);
 #endif
        }

+        #endregion
+
+        #region Destruction
+
+        /// <summary>
+        /// Ensure that when this object is destructed, the connection is closed.
+        /// </summary>
+        ~RpcCommunicator()
+        {
+            Close();
+        }
+
        /// <summary>
        /// Close the communicator gracefully on both sides of the communication.
        /// </summary>
 #endif
        }

+        #endregion
+
+        #region Sending Events
+        private void SendCommandEvent(CommandProto command, EnvironmentParametersProto environmentParametersProto)
+        {
+            switch (command)
+            {
+                case CommandProto.Quit:
+                {
+                    QuitCommandReceived?.Invoke();
+                    return;
+                }
+                case CommandProto.Reset:
+                {
+                    ResetCommandReceived?.Invoke(environmentParametersProto.ToEnvironmentResetParameters());
+                    return;
+                }
+                default:
+                {
+                    return;
+                }
+            }
+        }
+
+        private void SendRLInputReceivedEvent(bool isTraining)
+        {
+            RLInputReceived?.Invoke(new UnityRLInputParameters { isTraining = isTraining });
+        }
+
+        #endregion
+
+        #region Sending and retreiving data
+
+        /// <summary>
+        /// Adds the brain to the list of brains which will be sending information to External.
+        /// </summary>
+        /// <param name="brainKey">Brain key.</param>
+        private void SubscribeBrain(string brainKey)
+        {
+            m_HasQueried[brainKey] = false;
+            m_HasData[brainKey] = false;
+            m_CurrentAgents[brainKey] = new List<Agent>(k_NumAgents);
+            m_CurrentUnityRlOutput.AgentInfos.Add(
+                brainKey,
+                new UnityRLOutputProto.Types.ListAgentInfoProto());
+        }
+
+        public void PutObservations(
+            string brainKey, IEnumerable<Agent> agents)
+        {
+            // The brain tried called GiveBrainInfo, update m_hasQueried
+            m_HasQueried[brainKey] = true;
+            // Populate the currentAgents dictionary
+            m_CurrentAgents[brainKey].Clear();
+            foreach (var agent in agents)
+            {
+                m_CurrentAgents[brainKey].Add(agent);
+            }
+
+            // If at least one agent has data to send, then append data to
+            // the message and update hasSentState
+            if (m_CurrentAgents[brainKey].Count > 0)
+            {
+                foreach (var agent in m_CurrentAgents[brainKey])
+                {
+                    var agentInfoProto = agent.Info.ToProto();
+                    m_CurrentUnityRlOutput.AgentInfos[brainKey].Value.Add(agentInfoProto);
+                    // Avoid visual obs memory leak. This should be called AFTER we are done with the visual obs.
+                    // e.g. after recording them to demo and using them for inference.
+                    agent.ClearVisualObservations();
+                }
+
+                m_HasData[brainKey] = true;
+            }
+
+            // If any agent needs to send data, then the whole message
+            // must be sent
+            if (m_HasQueried.Values.All(x => x))
+            {
+                if (m_HasData.Values.Any(x => x))
+                {
+                    SendBatchedMessageHelper();
+                }
+
+                // The message was just sent so we must reset hasSentState and
+                // triedSendState
+                foreach (var k in m_CurrentAgents.Keys)
+                {
+                    m_HasData[k] = false;
+                    m_HasQueried[k] = false;
+                }
+            }
+        }
+
+        /// <summary>
+        /// Helper method that sends the current UnityRLOutput, receives the next UnityInput and
+        /// Applies the appropriate AgentAction to the agents.
+        /// </summary>
+        void SendBatchedMessageHelper()
+        {
+            var input = Exchange(
+                new UnityOutputProto
+                {
+                    RlOutput = m_CurrentUnityRlOutput
+                });
+
+            foreach (var k in m_CurrentUnityRlOutput.AgentInfos.Keys)
+            {
+                m_CurrentUnityRlOutput.AgentInfos[k].Value.Clear();
+            }
+
+            var rlInput = input?.RlInput;
+
+            if (rlInput?.AgentActions == null)
+            {
+                return;
+            }
+
+            UpdateEnvironmentWithInput(rlInput);
+
+            m_LastActionsReceived.Clear();
+            foreach (var brainName in rlInput.AgentActions.Keys)
+            {
+                if (!m_CurrentAgents[brainName].Any())
+                {
+                    continue;
+                }
+
+                if (!rlInput.AgentActions[brainName].Value.Any())
+                {
+                    continue;
+                }
+
+                var agentActions = rlInput.AgentActions[brainName].ToAgentActionList();
+                var numAgents = m_CurrentAgents[brainName].Count;
+                var agentActionDict = new Dictionary<Agent, AgentAction>(numAgents);
+                m_LastActionsReceived[brainName] = agentActionDict;
+                for (var i = 0; i < numAgents; i++)
+                {
+                    var agent = m_CurrentAgents[brainName][i];
+                    var agentAction = agentActions[i];
+                    agentActionDict[agent] = agentAction;
+                    agent.UpdateAgentAction(agentAction);
+                }
+            }
+        }
+
+        public Dictionary<Agent, AgentAction> GetActions(string key)
+        {
+            return m_LastActionsReceived[key];
+        }
+
-        public UnityInputProto Exchange(UnityOutputProto unityOutput)
+        private UnityInputProto Exchange(UnityOutputProto unityOutput)
        {
 # if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
            if (!m_IsOpen)
                {
                    return message.UnityInput;
                }
-                else
-                {
-                    m_IsOpen = false;
-                    return null;
-                }
+
+                m_IsOpen = false;
+                // Not sure if the quit command is actually sent when a
+                // non 200 message is received.  Notify that we are indeed
+                // quitting.
+                QuitCommandReceived?.Invoke();
+                return message.UnityInput;
+                QuitCommandReceived?.Invoke();
                return null;
            }
 #else
            };
        }

-        /// <summary>
-        /// When the Unity application quits, the communicator must be closed
-        /// </summary>
-        private void OnApplicationQuit()
-        {
-            Close();
-        }
+        #endregion

 #if UNITY_EDITOR
 #if UNITY_2017_2_OR_NEWER
--- a/UnitySDK/Assets/ML-Agents/Scripts/HeuristicBrain.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/HeuristicBrain.cs
                throw new UnityAgentsException(
                    "The Brain is set to Heuristic, but no decision script attached to it");
            }
-            foreach (var agent in m_AgentInfos.Keys)
+            foreach (var agent in m_Agents)
+                var info = agent.Info;
-                    m_AgentInfos[agent].stackedVectorObservation,
-                    m_AgentInfos[agent].visualObservations,
-                    m_AgentInfos[agent].reward,
-                    m_AgentInfos[agent].done,
-                    m_AgentInfos[agent].memories));
+                    info.stackedVectorObservation,
+                    info.visualObservations,
+                    info.reward,
+                    info.done,
+                    info.memories));
-            foreach (var agent in m_AgentInfos.Keys)
+            foreach (var agent in m_Agents)
+                var info = agent.Info;
-                    m_AgentInfos[agent].stackedVectorObservation,
-                    m_AgentInfos[agent].visualObservations,
-                    m_AgentInfos[agent].reward,
-                    m_AgentInfos[agent].done,
-                    m_AgentInfos[agent].memories));
+                    info.stackedVectorObservation,
+                    info.visualObservations,
+                    info.reward,
+                    info.done,
+                    info.memories));
-            m_AgentInfos.Clear();
        }
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs
+using System;
+using System.Collections.Generic;
-    public struct CommunicatorParameters
+    public struct EnvironmentResetParameters
+    {
+        /// <summary>
+        /// Mapping of string : float which defines which parameters can be
+        /// reset from python.
+        /// </summary>
+        public ResetParameters resetParameters;
+
+        /// <summary>
+        /// The protobuf for custom reset parameters.
+        /// NOTE: This is the last remaining relic of gRPC protocol
+        /// that is left in our code.  We need to decide how to handle this
+        /// moving forward.
+        /// </summary>
+        public CustomResetParametersProto customResetParameters;
+    }
+    public struct CommunicatorInitParameters
+        /// <summary>
+        /// Port to listen for connections on.
+        /// </summary>
+        /// <summary>
+        /// The name of the environment.
+        /// </summary>
+        public string name;
+        /// <summary>
+        /// The version of the Unity SDK.
+        /// </summary>
+        public string version;
+        /// <summary>
+        /// The list of brains parameters used for training.
+        /// </summary>
+        public IEnumerable<Brain> brains;
+        /// <summary>
+        /// The set of environment parameters defined by the user that will be sent to the communicator.
+        /// </summary>
+        public EnvironmentResetParameters environmentResetParameters;
+    }
+    public struct UnityRLInitParameters
+    {
+        /// <summary>
+        /// An RNG seed sent from the python process to Unity.
+        /// </summary>
+        public int seed;
+    }
+    public struct UnityRLInputParameters
+    {
+        /// <summary>
+        /// Boolean sent back from python to indicate whether or not training is happening.
+        /// </summary>
+        public bool isTraining;
+    /// <summary>
+    /// Delegate for handling quite events sent back from the communicator.
+    /// </summary>
+    public delegate void QuitCommandHandler();
+
+    /// <summary>
+    /// Delegate for handling reset parameter updates sent from the communicator.
+    /// </summary>
+    /// <param name="resetParams"></param>
+    public delegate void ResetCommandHandler(EnvironmentResetParameters resetParams);
+
+    /// <summary>
+    /// Delegate to handle UnityRLInputParameters updates from the communicator.
+    /// </summary>
+    /// <param name="inputParams"></param>
+    public delegate void RLInputReceivedHandler(UnityRLInputParameters inputParams);
+
    /**
    This is the interface of the Communicators.
    This does not need to be modified nor implemented to create a Unity environment.
    ......UnityRLOutput
    ......UnityRLInitializationOutput
    ...UnityInput
-    ......UnityRLIntput
-    ......UnityRLInitializationIntput
+    ......UnityRLInput
+    ......UnityRLInitializationInput

    UnityOutput and UnityInput can be extended to provide functionalities beyond RL
    UnityRLOutput and UnityRLInput can be extended to provide new RL functionalities
        /// <summary>
-        /// Initialize the communicator by sending the first UnityOutput and receiving the
-        /// first UnityInput. The second UnityInput is stored in the unityInput argument.
+        /// Quit was received by the communicator.
+        /// </summary>
+        event QuitCommandHandler QuitCommandReceived;
+
+
+        /// <summary>
+        /// Reset command sent back from the communicator.
+        /// </summary>
+        event ResetCommandHandler ResetCommandReceived;
+
+        /// <summary>
+        /// Unity RL Input was received by the communicator.
+        /// </summary>
+        event RLInputReceivedHandler RLInputReceived;
+
+        /// <summary>
+        /// Sends the academy parameters through the Communicator.
+        /// Is used by the academy to send the AcademyParameters to the communicator.
+        /// </summary>
+        /// <returns>The External Initialization Parameters received.</returns>
+        /// <param name="initParameters">The Unity Initialization Parameters to be sent.</param>
+        /// <param name="broadcastHub">The BroadcastHub to get the controlled brains.</param>
+        UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters,
+            BroadcastHub broadcastHub);
+
+        /// <summary>
+        /// Sends the observations. If at least one brain has an agent in need of
+        /// a decision or if the academy is done, the data is sent via
+        /// Communicator. Else, a new step is realized. The data can only be
+        /// sent once all the brains that subscribed to the batcher have tried
+        /// to send information.
-        /// <returns>The first Unity Input.</returns>
-        /// <param name="unityOutput">The first Unity Output.</param>
-        /// <param name="unityInput">The second Unity input.</param>
-        UnityInputProto Initialize(UnityOutputProto unityOutput,
-            out UnityInputProto unityInput);
+        /// <param name="key">Batch Key.</param>
+        /// <param name="agents">Agent info.</param>
+        void PutObservations(string key, IEnumerable<Agent> agents);
-        /// Send a UnityOutput and receives a UnityInput.
+        /// Gets the AgentActions based on the batching key.
-        /// <returns>The next UnityInput.</returns>
-        /// <param name="unityOutput">The UnityOutput to be sent.</param>
-        UnityInputProto Exchange(UnityOutputProto unityOutput);
+        /// <param name="key">A key to identify which actions to get</param>
+        /// <returns></returns>
+        Dictionary<Agent, AgentAction> GetActions(string key);

        /// <summary>
        /// Close the communicator gracefully on both sides of the communication.
--- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs
    /// </summary>
    public class ContinuousActionOutputApplier : TensorApplier.IApplier
    {
-        public void Apply(TensorProxy tensorProxy, Dictionary<Agent, AgentInfo> agentInfo)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agents)
            {
                var action = new float[actionSize];
                for (var j = 0; j < actionSize; j++)
            m_Allocator = allocator;
        }

-        public void Apply(TensorProxy tensorProxy, Dictionary<Agent, AgentInfo> agentInfo)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
-            var batchSize = agentInfo.Keys.Count;
+            var agentsArray = agents as List<Agent> ?? agents.ToList();
+            var batchSize = agentsArray.Count;
            var actions = new float[batchSize, m_ActionSize.Length];
            var startActionIndices = Utilities.CumSum(m_ActionSize);
            for (var actionIndex = 0; actionIndex < m_ActionSize.Length; actionIndex++)
                outputTensor.data.Dispose();
            }
            var agentIndex = 0;
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agentsArray)
            {
                var action = new float[m_ActionSize.Length];
                for (var j = 0; j < m_ActionSize.Length; j++)
            m_MemoryIndex = memoryIndex;
        }

-        public void Apply(TensorProxy tensorProxy, Dictionary<Agent, AgentInfo> agentInfo)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agents)
            {
                var memory = agent.GetMemoriesAction();

    /// </summary>
    public class MemoryOutputApplier : TensorApplier.IApplier
    {
-        public void Apply(TensorProxy tensorProxy, Dictionary<Agent, AgentInfo> agentInfo)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agents)
            {
                var memory = new List<float>();
                for (var j = 0; j < memorySize; j++)
    /// </summary>
    public class ValueEstimateApplier : TensorApplier.IApplier
    {
-        public void Apply(TensorProxy tensorProxy, Dictionary<Agent, AgentInfo> agentInfo)
+        public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agents)
            {
                agent.UpdateValueAction(tensorProxy.data[agentIndex, 0]);
                agentIndex++;
--- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
            m_Allocator = allocator;
        }

-        public void Generate(TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+        public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
        {
            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
        }
            m_Allocator = allocator;
        }

-        public void Generate(TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+        public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
        {
            tensorProxy.data?.Dispose();
            tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1));
            m_Allocator = allocator;
        }

-        public void Generate(TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+        public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
        {
            tensorProxy.shape = new long[0];
            tensorProxy.data?.Dispose();
        }

        public void Generate(
-            TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+            TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agents)
-                var vectorObs = agentInfo[agent].stackedVectorObservation;
+                var info = agent.Info;
+                var vectorObs = info.stackedVectorObservation;
                for (var j = 0; j < vecObsSizeT; j++)
                {
                    tensorProxy.data[agentIndex, j] = vectorObs[j];
        }

        public void Generate(
-            TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+            TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agents)
-                var memory = agentInfo[agent].memories;
+                var info = agent.Info;
+                var memory = info.memories;
                if (memory == null)
                {
                    agentIndex++;
        }

        public void Generate(
-            TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+            TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agents)
-                var memory = agentInfo[agent].memories;
+                var agentInfo = agent.Info;
+                var memory = agentInfo.memories;

                var offset = memorySize * m_MemoryIndex;

        }

        public void Generate(
-            TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+            TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agents)
-                var pastAction = agentInfo[agent].storedVectorActions;
+                var info = agent.Info;
+                var pastAction = info.storedVectorActions;
                for (var j = 0; j < actionSize; j++)
                {
                    tensorProxy.data[agentIndex, j] = pastAction[j];
        }

        public void Generate(
-            TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+            TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
-            foreach (var agent in agentInfo.Keys)
+            foreach (var agent in agents)
-                var maskList = agentInfo[agent].actionMasks;
+                var agentInfo = agent.Info;
+                var maskList = agentInfo.actionMasks;
                for (var j = 0; j < maskSize; j++)
                {
                    var isUnmasked = (maskList != null && maskList[j]) ? 0.0f : 1.0f;
        }

        public void Generate(
-            TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+            TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
        {
            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
            TensorUtils.FillTensorWithRandomNormal(tensorProxy, m_RandomNormal);
        }

        public void Generate(
-            TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo)
+            TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
-            var textures = agentInfo.Keys.Select(
-                agent => agentInfo[agent].visualObservations[m_Index]).ToList();
+            var textures = agents.Select(
+                agent => agent.Info.visualObservations[m_Index]).ToList();

            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
            Utilities.TextureToTensorProxy(textures, tensorProxy, m_GrayScale);
--- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs
            /// <param name="tensorProxy">
            /// The Tensor containing the data to be applied to the Agents
            /// </param>
-            /// <param name="agentInfo">
-            /// Dictionary of Agents to AgentInfo that will receive
-            /// the values of the Tensor.
+            /// <param name="agents">
+            /// List of Agents that will receive the values of the Tensor.
-            void Apply(TensorProxy tensorProxy, Dictionary<Agent, AgentInfo> agentInfo);
+            void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents);
        }

        private readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();
        /// Updates the state of the agents based on the data present in the tensor.
        /// </summary>
        /// <param name="tensors"> Enumerable of tensors containing the data.</param>
-        /// <param name="agentInfos"> Dictionary of Agent to AgentInfo that contains the
-        /// Agents that will be updated using the tensor's data</param>
+        /// <param name="agents"> List of Agents that will be updated using the tensor's data</param>
-            IEnumerable<TensorProxy> tensors,  Dictionary<Agent, AgentInfo> agentInfos)
+            IEnumerable<TensorProxy> tensors,  IEnumerable<Agent> agents)
        {
            foreach (var tensor in tensors)
            {
                        $"Unknown tensorProxy expected as output : {tensor.name}");
                }
-                m_Dict[tensor.name].Apply(tensor, agentInfos);
+                m_Dict[tensor.name].Apply(tensor, agents);
            }
        }
    }
--- a/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs
            /// </summary>
            /// <param name="tensorProxy"> The tensor the data and shape will be modified</param>
            /// <param name="batchSize"> The number of agents present in the current batch</param>
-            /// <param name="agentInfo"> Dictionary of Agent to AgentInfo containing the
+            /// <param name="agents"> List of Agents containing the
-                TensorProxy tensorProxy, int batchSize, Dictionary<Agent, AgentInfo> agentInfo);
+                TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents);
        }

        private readonly Dictionary<string, IGenerator> m_Dict = new Dictionary<string, IGenerator>();
        /// <param name="tensors"> Enumerable of tensors that will be modified.</param>
        /// <param name="currentBatchSize"> The number of agents present in the current batch
        /// </param>
-        /// <param name="agentInfos"> Dictionary of Agent to AgentInfo that contains the
+        /// <param name="agents"> List of Agents that contains the
        /// data that will be used to modify the tensors</param>
        /// <exception cref="UnityAgentsException"> One of the tensor does not have an
        /// associated generator.</exception>
-            Dictionary<Agent, AgentInfo> agentInfos)
+            IEnumerable<Agent> agents)
        {
            foreach (var tensor in tensors)
            {
                        $"Unknown tensorProxy expected as input : {tensor.name}");
                }
-                m_Dict[tensor.name].Generate(tensor, currentBatchSize, agentInfos);
+                m_Dict[tensor.name].Generate(tensor, currentBatchSize, agents);
            }
        }
    }
--- a/UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs

    /// <summary>
    /// The Learning Brain works differently if you are training it or not.
-    /// When training your Agents, drag the Learning Brain to the Academy's BroadcastHub and check
-    /// the checkbox Control. When using a pretrained model, just drag the Model file into the
+    /// When training your Agents, drag the Learning Brain to the Academy's BroadcastHub.
+    ///  When using a pretrained model, just drag the Model file into the
+    /// The training will start automatically if Python is ready to train and there is at
+    /// least one LearningBrain in the BroadcastHub.
    /// The property model corresponds to the Model currently attached to the Brain. Before
    /// being used, a call to ReloadModel is required.
    /// When the Learning Brain is not training, it uses a TensorFlow model to make decisions.
        private IReadOnlyList<TensorProxy> m_InferenceInputs;
        private IReadOnlyList<TensorProxy> m_InferenceOutputs;

-        [NonSerialized]
-        private bool m_IsControlled;
+        protected ICommunicator m_Communicator;
-        /// When Called, the brain will be controlled externally. It will not use the
-        /// model to decide on actions.
+        /// Sets the Batcher of the Brain. The brain will call the communicator at every step and give
+        /// it the agent's data using PutObservations at each DecideAction call.
-        public void SetToControlledExternally()
+        /// <param name="communicator"> The Batcher the brain will use for the current session</param>
+        public void SetCommunicator(ICommunicator communicator)
-            m_IsControlled = true;
+            m_Communicator = communicator;
+            LazyInitialize();
        }

        /// <inheritdoc />
        /// <inheritdoc />
        protected override void DecideAction()
        {
-            if (m_IsControlled)
+            if (m_Communicator != null)
-                m_AgentInfos.Clear();
+                m_Communicator?.PutObservations(name, m_Agents);
-            var currentBatchSize = m_AgentInfos.Count();
+            var currentBatchSize = m_Agents.Count;
            if (currentBatchSize == 0)
            {
                return;

            Profiler.BeginSample($"MLAgents.{name}.GenerateTensors");
            // Prepare the input tensors to be feed into the engine
-            m_TensorGenerator.GenerateTensors(m_InferenceInputs, currentBatchSize, m_AgentInfos);
+            m_TensorGenerator.GenerateTensors(m_InferenceInputs, currentBatchSize, m_Agents);
            Profiler.EndSample();

            Profiler.BeginSample($"MLAgents.{name}.PrepareBarracudaInputs");

            Profiler.BeginSample($"MLAgents.{name}.ApplyTensors");
            // Update the outputs
-            m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_AgentInfos);
+            m_TensorApplier.ApplyTensors(m_InferenceOutputs, m_Agents);
-            m_AgentInfos.Clear();
            Profiler.EndSample();
        }

--- a/UnitySDK/Assets/ML-Agents/Scripts/PlayerBrain.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/PlayerBrain.cs
        {
            if (brainParameters.vectorActionSpaceType == SpaceType.Continuous)
            {
-                foreach (var agent in m_AgentInfos.Keys)
+                foreach (var agent in m_Agents)
                {
                    var action = new float[brainParameters.vectorActionSize[0]];
                    foreach (var cha in keyContinuousPlayerActions)
            }
            else
            {
-                foreach (var agent in m_AgentInfos.Keys)
+                foreach (var agent in m_Agents)
                {
                    var action = new float[brainParameters.vectorActionSize.Length];
                    foreach (var dha in discretePlayerActions)
                    agent.UpdateVectorAction(action);
                }
            }
-            m_AgentInfos.Clear();
        }
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Scripts/ResetParameters.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/ResetParameters.cs
            public float value;
        }

-        [FormerlySerializedAs("resetParameters")]
-        [SerializeField] private List<ResetParameter> m_ResetParameters = new List<ResetParameter>();
+        public ResetParameters() {}
+
+        public ResetParameters(IDictionary<string, float> dict) : base(dict)
+        {
+            UpdateResetParameters();
+        }
-        public void OnBeforeSerialize()
+        private void UpdateResetParameters()
-
-                var rp = new ResetParameter();
-                rp.key = pair.Key;
+                m_ResetParameters.Add(new ResetParameter { key = pair.Key, value = pair.Value });
+            }
+        }
-                rp.value = pair.Value;
-                m_ResetParameters.Add(rp);
-            }
+        [FormerlySerializedAs("resetParameters")]
+        [SerializeField] private List<ResetParameter> m_ResetParameters = new List<ResetParameter>();
+
+        public void OnBeforeSerialize()
+        {
+            UpdateResetParameters();
        }

        public void OnAfterDeserialize()
--- a/UnitySDK/UnitySDK.sln.DotSettings
+++ b/UnitySDK/UnitySDK.sln.DotSettings
 	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=CPU/@EntryIndexedValue">CPU</s:String>
 	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=GPU/@EntryIndexedValue">GPU</s:String>
 	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=NN/@EntryIndexedValue">NN</s:String>
+	<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=RL/@EntryIndexedValue">RL</s:String>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=BLAS/@EntryIndexedValue">True</s:Boolean>
 	<s:Boolean x:Key="/Default/UserDictionary/Words/=Logits/@EntryIndexedValue">True</s:Boolean>
 	
--- a/docs/Basic-Guide.md
+++ b/docs/Basic-Guide.md
 if you want to [use an executable](Learning-Environment-Executable.md) or to
 `None` if you want to interact with the current scene in the Unity Editor.

-Before building the environment or interacting with it in the editor, select `Ball3DAcademy` in the **Hierarchy** window of the Unity editor and make sure `Control` checkbox is checked under `Ball 3D Academy` component. 
+Before building the environment or interacting with it in the editor, select `Ball3DAcademy` in the **Hierarchy** window of the Unity editor and make sure the `3DBallLearningBrain` is in the Broadcast Hub of the `Ball3DAcademy` component. 

 More information and documentation is provided in the
 [Python API](Python-API.md) page.
   **Note**: The Unity prefab system will modify all instances of the agent properties in your scene.  If the agent does not synchronize automatically with the prefab, you can hit the Revert button in the top of the **Inspector** window.

 2. In the **Hierarchy** window, select `Ball3DAcademy`.
-3. In the **Project** window, go to `Assets/ML-Agents/Examples/3DBall/Brains` folder and drag the **3DBallLearning** Brain to the `Brains` property under `Braodcast Hub` in the `Ball3DAcademy` object in the **Inspector** window.  In order to train, make sure the `Control` checkbox is selected.
+3. In the **Project** window, go to `Assets/ML-Agents/Examples/3DBall/Brains` folder and drag the **3DBallLearning** Brain to the `Brains` property under `Braodcast Hub` in the `Ball3DAcademy` object in the **Inspector** window.
-The `Control` checkbox means that in addition to being exposed to Python, the Brain will
-be controlled by the Python process (required for training).

   ![Set Brain to External](images/mlagents-SetBrainToTrain.png)

 4. Drag the `<brain_name>.nn` file from the Project window of
   the Editor to the **Model** placeholder in the **3DBallLearning**
   inspector window.
-5. Select Ball3DAcademy in the scene and toggle off Control, each platform's brain now regains control.
-6. Press the :arrow_forward: button at the top of the Editor.
+5. Press the :arrow_forward: button at the top of the Editor.

 ## Next Steps

--- a/docs/FAQ.md
+++ b/docs/FAQ.md

 There may be a number of possible causes:

-* _Cause_: There may be no LearningBrain with `Control` option checked in the
+* _Cause_: There may be no LearningBrain in the
-  `Broadcast Hub`, and drag your LearningBrain asset into the `Brains` field,
-  and check the `Control` toggle. Also you need to assign this LearningBrain
+  `Broadcast Hub`, and drag your LearningBrain asset into the `Brains` field. 
+  Also you need to assign this LearningBrain
  asset to all of the Agents you wish to do training on.
 * _Cause_: On OSX, the firewall may be preventing communication with the
  environment. _Solution_: Add the built environment binary to the list of
--- a/docs/Getting-Started-with-Balance-Ball.md
+++ b/docs/Getting-Started-with-Balance-Ball.md
 properties that control how the environment works.
 The **Broadcast Hub** keeps track of which Brains will send data during training.
 If a Brain is added to the hub, the data from this Brain will be sent to the external training
-process. If the `Control` checkbox is checked, the training process will be able to
-control and train the agents linked to the Brain.
+process. 
 The **Training Configuration** and **Inference Configuration** properties
 set the graphics and timescale properties for the Unity application.
 The Academy uses the **Training Configuration**  during training and the
 You can create new Brain assets by selecting `Assets ->
 Create -> ML-Agents -> Brain`. There are 3 types of Brains.
 The **Learning Brain** is a Brain that uses a trained neural network to make decisions.
-When the `Control` box is checked in the Brains property under the **Broadcast Hub** in the Academy, the external process that is training the neural network will take over decision making for the agents
+When the **Learning Brain** is dragged into the **Broadcast Hub** in the Academy, the external process that is training the neural network will take over decision making for the agents
 and ultimately generate a trained neural network. You can also use the
 **Learning Brain** with a pre-trained model.
 The **Heuristic** Brain allows you to hand-code the Agent logic by extending
--- a/docs/Installation.md
+++ b/docs/Installation.md
       width="500" border="10" />
 </p>

-## Windows Users
-For setting up your environment on Windows, we have created a [detailed
-guide](Installation-Windows.md) to setting up your env. For Mac and Linux,
-continue with this guide.
-
-## Mac and Unix Users
+## Environment Setup
+We now support a single mechanism for installing ML-Agents on Mac/Windows/Linux using Virtual
+Environments. For more information on Virtual Environments and installation instructions, 
+follow this [guide](Using-Virtual-Environment.md).

 ### Clone the ML-Agents Toolkit Repository

 Running pip with the `-e` flag will let you make changes to the Python files directly and have those
 reflected when you run `mlagents-learn`. It is important to install these packages in this order as the
 `mlagents` package depends on `mlagents_envs`, and installing it in the other 
-order will download `mlagents_envs` from PyPi. 
-
-## Docker-based Installation
-
-If you'd like to use Docker for ML-Agents, please follow
-[this guide](Using-Docker.md).
+order will download `mlagents_envs` from PyPi.

 ## Next Steps

--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
 5. Add your Agent subclasses to appropriate GameObjects, typically, the object
    in the scene that represents the Agent in the simulation. Each Agent object
    must be assigned a Brain object.
-6. If training, check the `Control` checkbox in the BroadcastHub of the Academy.
+6. If training, drag the Brain in the BroadcastHub of the Academy.
    [run the training process](Training-ML-Agents.md).

 **Note:** If you are unfamiliar with Unity, refer to

 Now you can train the Agent. To get ready for training, you must first drag the 
 `RollerBallBrain` asset to the **RollerAgent** GameObject `Brain` field to change to the learning brain.
-Then, select the Academy GameObject and check the `Control` checkbox for 
+Then, select the Academy GameObject and drag
 the RollerBallBrain item in the **Broadcast Hub** list. From there, the process is
 the same as described in [Training ML-Agents](Training-ML-Agents.md). Note that the 
 models will be created in the original ml-agents project folder, `ml-agents/models`.
--- a/docs/Learning-Environment-Design-Academy.md
+++ b/docs/Learning-Environment-Design-Academy.md
 ![Academy Inspector](images/academy.png)
 * `Broadcast Hub` - Gathers the Brains that will communicate with the external 
  process. Any Brain added to the Broadcast Hub will be visible from the external
-  process. In addition, if the checkbox `Control` is checked, the Brain will be 
-  controllable from the external process and will thus be trainable.
+  process and controllable from the external process and will thus be trainable.
 * `Configuration` - The engine-level settings which correspond to rendering
  quality and engine speed.
  * `Width` - Width of the environment window in pixels.
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
 action, for example, move the agent in one direction or another. In order to
 [train an agent using reinforcement learning](Learning-Environment-Design.md),
 your agent must calculate a reward value at each action. The reward is used to
-discover the optimal decision-making policy. (A reward is not used by already
-trained agents or for imitation learning.)
+discover the optimal decision-making policy.

 The Brain class abstracts out the decision making logic from the Agent itself so
 that you can use the same Brain in multiple Agents. How a Brain makes its
--- a/docs/Learning-Environment-Design-Brains.md
+++ b/docs/Learning-Environment-Design-Brains.md
   useful to test your Agent code.

 During training, use a **Learning Brain** 
-and drag it into the Academy's `Broadcast Hub` with the `Control` checkbox checked.
+and drag it into the Academy's `Broadcast Hub`.
-project, add it to the **Model** property of the **Learning Brain** and uncheck
-the `Control` checkbox of the `Broadcast Hub`.
+project, add it to the **Model** property of the **Learning Brain**.

 Brain assets has several important properties that you can set using the
 Inspector window. These properties must be appropriate for the Agents using the
      actions for the Brain.

 The other properties of the Brain depend on the type of Brain you are using.
-
-## Using the Broadcast Feature
-
-The Player, Heuristic and Learning Brains can support
-broadcast to an external process. The broadcast feature allows you to collect data 
-from your Agents using a Python program without controlling them.
-
-### How to use: Unity
-
-To turn it on in Unity, drag the Brain into the Academy's Broadcast Hub but leave
-the `Control` checkbox unchecked when present. This will expose the Brain's data 
-without letting the external process control it.
-
-![Broadcast](images/broadcast.png)
-
-### How to use: Python
-
-When you launch your Unity Environment from a Python program, you can see what
-the Agents connected to Brains present in the `Broadcast Hub` are doing. 
-When calling `step` or
-`reset` on your environment, you retrieve a dictionary mapping Brain names to
-`BrainInfo` objects. The  dictionary contains a `BrainInfo` object for each
-Brain in the `Broadcast Hub`.
-
-Just like with a Learning Brain, the `BrainInfo` object contains the fields for
-`visual_observations`, `vector_observations`,  `text_observations`,
-`memories`,`rewards`, `local_done`, `max_reached`, `agents` and
-`previous_actions`. Note that `previous_actions` corresponds to the actions that
-were taken by the Agents at the previous step, not the current one.
-
-Note that when you do a `step` on the environment, you can only provide actions
-for the Brains in the `Broadcast Hub` with the `Control` checkbox checked. If there
-are Brains in the `Broadcast Hub` with the 
-`Control` checkbox checked, simply call `step()` with no arguments.
-
-You can use the broadcast feature to collect data generated by Player,
-Heuristics or Learning Brains game sessions. You can then use this data to train
-an agent in a supervised context.
--- a/docs/Learning-Environment-Design-Learning-Brains.md
+++ b/docs/Learning-Environment-Design-Learning-Brains.md
 # Learning Brains

 The **Learning Brain** works differently if you are training it or not.
-When training your Agents, drag the **Learning Brain** to the
-Academy's `Broadcast Hub` and check the checkbox `Control`. When using a pre-trained 
-model, just drag the Model file into the `Model` property of the **Learning Brain**.
+When used in an environment connected to Python, the Python process will train 
+the Brain. If no Python Process exists, the **Learning Brain** will use its 
+pre-trained model.
-one Brain asset must be in the Academy's `Broadcast Hub` with the checkbox `Control`
-checked. This allows the training process to collect the observations of Agents 
-using that Brain and give the Agents their actions.
+one Brain asset must be in the Academy's `Broadcast Hub`. This allows the training 
+process to collect the observations of Agents using that Brain and give the Agents 
+their actions.

 In addition to using a **Learning Brain** for training using the ML-Agents learning
 algorithms, you can use a **Learning Brain** to control Agents in a Unity
--- a/docs/Learning-Environment-Design-Player-Brains.md
+++ b/docs/Learning-Environment-Design-Player-Brains.md
 # Player Brain

 The **Player Brain** allows you to control an Agent using keyboard
-commands. You can use Player Brains to control a "teacher" Agent that trains
-other Agents during [imitation learning](Training-Imitation-Learning.md). You
+commands. You can use Player Brains to record demonstrations in order to train
+other Agents with [imitation learning](Training-Imitation-Learning.md). You
 can also use Player Brains to test your Agents and environment before replacing them by **Learning Brains** and running the training process.

 ## Player Brain properties
--- a/docs/Learning-Environment-Design.md
+++ b/docs/Learning-Environment-Design.md

 To Create a Brain, go to `Assets -> Create -> Ml-Agents` and select the 
 type of Brain you want to use. During training, use a **Learning Brain** 
-and drag it into the Academy's `Broadcast Hub` with the `Control` checkbox checked.
+and drag it into the Academy's `Broadcast Hub`.
-project, add it to the **Model** property of the **Learning Brain** and uncheck
-the `Control` checkbox of the `Broadcast Hub`. See
+project, add it to the **Model** property of the **Learning Brain**. 
+If the Python process is not active, the **Learning Brain** will not train but
+use its model. See
 [Brains](Learning-Environment-Design-Brains.md) for details on using the
 different types of Brains. You can create new kinds of Brains if the three
 built-in don't do what you need.

 * The training scene must start automatically when your Unity application is
  launched by the training process.
-* The scene must include an Academy with at least one Brain in the `Broadcast Hub`
-  with the `Control` checkbox checked.
+* The scene must include an Academy with at least one Brain in the `Broadcast Hub`.
 * The Academy must reset the scene to a valid starting point for each episode of
  training.
 * A training episode must have a definite end — either using `Max Steps` or by
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
-# Example Learning Environments
+# Example Learning Environments

 The Unity ML-Agents toolkit contains an expanding set of example environments
 which demonstrate various features of the platform. Environments are located in
      * Recommended Minimum: 0.2
      * Recommended Maximum: 5
 * Benchmark Mean Reward: 2.5
-* Optional Imitation Learning scene: `TennisIL`.

 ## [Push Block](https://youtu.be/jKdw216ZgoE)

        * Recommended Minimum: 0
        * Recommended Maximum: 2000
 * Benchmark Mean Reward: 4.5
-* Optional Imitation Learning scene: `PushBlockIL`.

 ## [Wall Jump](https://youtu.be/NITLug2DIWQ)

    * Recommended Minimum: 0.5
    * Recommended Maximum: 5
 * Benchmark Mean Reward: 10
-* Optional Imitation Learning scene: `FoodCollectorIL`.

 ## [Hallway](https://youtu.be/53GyfpPQRUQ)

 * Reset Parameters: None
 * Benchmark Mean Reward: 0.7
  * To speed up training, you can enable curiosity by adding `use_curiosity: true` in `config/trainer_config.yaml`
-* Optional Imitation Learning scene: `HallwayIL`.

 ## [Bouncer](https://youtu.be/Tkv-c-b1b2I)

     this environment does not train with the provided default
     training parameters.__
 * Reset Parameters: None
-* Optional Imitation Learning scene: `PyramidsIL`.
 * Benchmark Mean Reward: 1.75
--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md
 Make sure the Brains in the scene have the right type. For example, if you want
 to be able to control your agents from Python, you will need to put the Brain
 controlling the Agents to be a **Learning Brain** and drag it into the
-Academy's `Broadcast Hub` with the `Control` checkbox checked. In the 3DBall
-scene, this can be done in the Platform GameObject within the Game prefab in 
-`Assets/ML-Agents/Examples/3DBall/Prefabs/`, or in each instance of the 
+Academy's `Broadcast Hub`. In the 3DBall
+scene, this can be done in the Platform GameObject within the Game prefab in
+`Assets/ML-Agents/Examples/3DBall/Prefabs/`, or in each instance of the
 Platform in the Scene.

 Next, we want the set up scene to play correctly when the training process
 'file_name' of the `UnityEnvironment`. For instance:

 ```python
-from mlagents.envs import UnityEnvironment
+from mlagents.envs.environment import UnityEnvironment
 env = UnityEnvironment(file_name=<env_name>)
 ```

--- a/docs/ML-Agents-Overview.md
+++ b/docs/ML-Agents-Overview.md
  [TensorFlow](Background-TensorFlow.md) model. The embedded TensorFlow model
  represents a learned policy and the Brain directly uses this model to
  determine the action for each Agent. You can train a **Learning Brain**
-  by dragging it into the Academy's `Broadcast Hub` with the `Control`
-  checkbox checked.
+  by dragging it into the Academy's `Broadcast Hub` and launching the game with 
+  the Python training process.
 - **Player** - where decisions are made using real input from a keyboard or
  controller. Here, a human player is controlling the Agent and the observations
  and rewards collected by the Brain are not used to control the Agent.
 a TensorFlow model that the Learning Brain can later use. However,
 any user of the ML-Agents toolkit can leverage their own algorithms for
 training. In this case, the Brain type would be set to Learning and be linked
-to the BroadcastHub (with checked `Control` checkbox)
+to the BroadcastHub
 and the behaviors of all the Agents in the scene will be controlled within Python.
 You can even turn your environment into a [gym.](../gym-unity/README.md)

 this mode allows providing real examples from a game controller on how the medic
 should behave. More specifically, in this mode, the Brain type during training
 is set to Player and all the actions performed with the controller (in addition
-to the agent observations) will be recorded and sent to the Python API. The
+to the agent observations) will be recorded. The
-to help speed up reward-based training (RL).  We include two algorithms called
+to help speed up reward-based training (RL). We include two algorithms called
 Behavioral Cloning (BC) and Generative Adversarial Imitation Learning (GAIL). The
 [Training with Imitation Learning](Training-Imitation-Learning.md) tutorial covers these
 features in more depth.
  particularly when debugging agent behaviors. You can learn more about using
  the broadcasting feature
  [here](Learning-Environment-Design-Brains.md#using-the-broadcast-feature).
-
- **Docker Set-up (Experimental)** - To facilitate setting up ML-Agents without
-  installing Python or TensorFlow directly, we provide a
-  [guide](Using-Docker.md) on how to create and run a Docker container.

 - **Cloud Training on AWS** - To facilitate using the ML-Agents toolkit on
  Amazon Web Services (AWS) machines, we provide a
--- a/docs/Migrating.md
+++ b/docs/Migrating.md

 ### Important Changes
 * The definition of the gRPC service has changed.
+* The online BC training feature has been removed. 
+* The BroadcastHub of the Academy no longer has a `Control` checkbox. All Learning Brains in the BroadcastHub will be considered as trainable (although the training will only be launched if the Python Process is ready and will use inference otherwise)
+* The broadcast feature has been deprecated. Only LearningBrains can communicate with Python. 

 #### Steps to Migrate
 * In order to be able to train, make sure both your ML-Agents Python package and UnitySDK code come from the v0.11 release. Training will not work, for example, if you update the ML-Agents Python package, and only update the API Version in UnitySDK.
 * `UnitySDK/Assets/ML-Agents/Scripts/Communicator.cs` and its class `Communicator` have been renamed to `UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs` and `ICommunicator` respectively.
 * The `SpaceType` Enums `discrete`, and `continuous` have been renamed to `Discrete` and `Continuous`.
 * We have removed the `Done` call as well as the capacity to set `Max Steps` on the Academy. Therefore an AcademyReset will never be triggered from C# (only from Python). If you want to reset the simulation after a fixed number of steps, or when an event in the simulation occurs, we recommend looking at our multi-agent example environments (such as BananaCollector). In our examples, groups of Agents can be reset through an "Area" that can reset groups of Agents.
+* The import for `mlagents.envs.UnityEnvironment` was removed. If you are using the Python API, change `from mlagents.envs import UnityEnvironment` to `from mlagents.envs.environment import UnityEnvironment`.


 ## Migrating from ML-Agents toolkit v0.8 to v0.9
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
 The ML-Agents Toolkit provides a Python API for controlling the Agent simulation
 loop of an environment or game built with Unity. This API is used by the
 training algorithms inside the ML-Agent Toolkit, but you can also write your own
-Python programs using this API. Go [here](../notebooks/getting-started.ipynb) 
+Python programs using this API. Go [here](../notebooks/getting-started.ipynb)
 for a Jupyter Notebook walking through the functionality of the API.

 The key objects in the Python API include:
 the ML-Agents SDK.

 To communicate with an Agent in a Unity environment from a Python program, the
-Agent must either use a Brain present in the Academy's `Broadcast Hub`.
+Agent mus use a LearningBrain present in the Academy's `Broadcast Hub`.
-actions for Agents with Brains with the `Control` checkbox of the
-Academy's `Broadcast Hub` checked, but can only observe broadcasting
-Brains (the information you receive for an Agent is the same in both cases).
+actions for Agents with Brains in the
+Academy's `Broadcast Hub`..

 _Notice: Currently communication between Unity and Python takes place over an
 open socket without authentication. As such, please make sure that the network
 of your Unity environment is 3DBall.app, in python, run:

 ```python
-from mlagents.envs import UnityEnvironment
+from mlagents.envs.environment import UnityEnvironment
 env = UnityEnvironment(file_name="3DBall", worker_id=0, seed=1)
 ```

  `(batch size, number of branches)` if the vector action space is discrete.

 Once loaded, you can use your UnityEnvironment object, which referenced by a
-variable named `env` in this example, can be used in the following way:  
+variable named `env` in this example, can be used in the following way:
- **Print : `print(str(env))`**  
-  Prints all parameters relevant to the loaded environment and the 
-  Brains.  
- **Reset : `env.reset(train_mode=True, config=None)`**  
+- **Print : `print(str(env))`**
+  Prints all parameters relevant to the loaded environment and the
+  Brains.
+- **Reset : `env.reset(train_mode=True, config=None)`**
-  Brain names to BrainInfo objects.  
+  Brain names to BrainInfo objects.
  - `train_mode` indicates whether to run the environment in train (`True`) or
    test (`False`) mode.
  - `config` is an optional dictionary of configuration flags specific to the
    Define the reset parameters on the Academy Inspector window in the Unity
    Editor.
- **Step : `env.step(action, memory=None, text_action=None)`**  
+- **Step : `env.step(action, memory=None, text_action=None)`**
  Sends a step signal to the environment using the actions. For each Brain :
  - `action` can be one dimensional arrays or two dimensional arrays if you have
    multiple Agents per Brain.
    observations = brainInfo.vector_observations
    ```

-    Note that if you have more than one Brain in the Academy's `Broadcast Hub` with
-    the `Control` checkbox checked, you
+    Note that if you have more than one Brain in the Academy's `Broadcast Hub`, you
    must provide dictionaries from Brain names to arrays for `action`, `memory`
    and `value`. For example: If you have two Learning Brains named `brain1` and
    `brain2` each with one Agent taking two continuous actions, then you can
    action = {'brain1':[1.0, 2.0], 'brain2':[3.0,4.0]}
    ```

-    Returns a dictionary mapping Brain names to BrainInfo objects.  
+    Returns a dictionary mapping Brain names to BrainInfo objects.
 - **Close : `env.close()`**
  Sends a shutdown signal to the environment and closes the communication
  socket.
--- a/docs/Readme.md
+++ b/docs/Readme.md

 * [Installation](Installation.md)
  * [Background: Jupyter Notebooks](Background-Jupyter.md)
-  * [Docker Set-up](Using-Docker.md)
+  * [Using Virtual Environment](Using-Virtual-Environment.md)
 * [Basic Guide](Basic-Guide.md)

 ## Getting Started
    [Heuristic](Learning-Environment-Design-Heuristic-Brains.md),
    [Learning](Learning-Environment-Design-Learning-Brains.md)
 * [Learning Environment Best Practices](Learning-Environment-Best-Practices.md)
-* [Using the Monitor](Feature-Monitor.md)
-* [Using the Video Recorder](https://github.com/Unity-Technologies/video-recorder)
-* [Using an Executable Environment](Learning-Environment-Executable.md)
-* [Creating Custom Protobuf Messages](Creating-Custom-Protobuf-Messages.md)
+
+### Advanced Usage
+  * [Using the Monitor](Feature-Monitor.md)
+  * [Using the Video Recorder](https://github.com/Unity-Technologies/video-recorder)
+  * [Using an Executable Environment](Learning-Environment-Executable.md)
+  * [Creating Custom Protobuf Messages](Creating-Custom-Protobuf-Messages.md)
+* [Using TensorBoard to Observe Training](Using-Tensorboard.md)
+* [Training Using Concurrent Unity Instances](Training-Using-Concurrent-Unity-Instances.md)
+
+### Advanced Training Methods
+
+
+### Cloud Training (Deprecated)
+Here are the cloud training set-up guides for Azure and AWS. We no longer use them ourselves and 
+so they may not be work correctly. We've decided to keep them up just in case they are helpful to
+you.
+
-* [Training Using Concurrent Unity Instances](Training-Using-Concurrent-Unity-Instances.md)
-* [Using TensorBoard to Observe Training](Using-Tensorboard.md)

 ## Inference

--- a/docs/Training-Behavioral-Cloning.md
+++ b/docs/Training-Behavioral-Cloning.md
 1. Choose an agent you would like to learn to imitate some set of demonstrations. 
 2. Record a set of demonstration using the `Demonstration Recorder` (see [here](Training-Imitation-Learning.md)). 
   For illustrative purposes we will refer to this file as `AgentRecording.demo`. 
-3. Build the scene, assigning the agent a Learning Brain, and set the Brain to 
-   Control in the Broadcast Hub. For more information on Brains, see 
+3. Build the scene, assigning the agent a Learning Brain, and dragging it in the Broadcast Hub. For more information on Brains, see 
   [here](Learning-Environment-Design-Brains.md).
 4. Open the `config/offline_bc_config.yaml` file. 
 5. Modify the `demo_path` parameter in the file to reference the path to the 
 This will use the demonstration file to train a neural network driven agent 
 to directly imitate the actions provided in the demonstration. The environment 
 will launch and be used for evaluating the agent's performance during training.
-
-## Online Training
-
-It is also possible to provide demonstrations in realtime during training, 
-without pre-recording a demonstration file. The steps to do this are as follows:
-
-1. First create two Brains, one which will be the "Teacher," and the other which
-   will be the "Student." We will assume that the names of the Brain
-   Assets are "Teacher" and "Student" respectively.
-2. The "Teacher" Brain must be a **Player Brain**. You must properly 
-   configure the inputs to map to the corresponding actions.
-3. The "Student" Brain must be a **Learning Brain**.
-4. The Brain Parameters of both the "Teacher" and "Student" Brains must be 
-   compatible with the agent.
-5. Drag both the "Teacher" and "Student" Brain into the Academy's `Broadcast Hub` 
-   and check the `Control` checkbox on the "Student" Brain. 
-6. Link the Brains to the desired Agents (one Agent as the teacher and at least
-   one Agent as a student).
-7. In `config/online_bc_config.yaml`, add an entry for the "Student" Brain. Set
-   the `trainer` parameter of this entry to `online_bc`, and the
-   `brain_to_imitate` parameter to the name of the teacher Brain: "Teacher".
-   Additionally, set `batches_per_epoch`, which controls how much training to do
-   each moment. Increase the `max_steps` option if you'd like to keep training
-   the Agents for a longer period of time.
-8. Launch the training process with `mlagents-learn config/online_bc_config.yaml
-   --train --slow`, and press the :arrow_forward: button in Unity when the
-   message _"Start training by pressing the Play button in the Unity Editor"_ is
-   displayed on the screen
-9. From the Unity window, control the Agent with the Teacher Brain by providing
-   "teacher demonstrations" of the behavior you would like to see.
-10. Watch as the Agent(s) with the student Brain attached begin to behave
-   similarly to the demonstrations.
-11. Once the Student Agents are exhibiting the desired behavior, end the training
-   process with `CTL+C` from the command line.
-12. Move the resulting `*.nn` file into the `TFModels` subdirectory of the
-    Assets folder (or a subdirectory within Assets of your choosing) , and use
-    with `Learning` Brain.
-
-**BC Teacher Helper**
-
-We provide a convenience utility, `BC Teacher Helper` component that you can add
-to the Teacher Agent.
-
-<p align="center">
-  <img src="images/bc_teacher_helper.png"
-       alt="BC Teacher Helper"
-       width="375" border="10" />
-</p>
-
-This utility enables you to use keyboard shortcuts to do the following:
-
-1. To start and stop recording experiences. This is useful in case you'd like to
-   interact with the game _but not have the agents learn from these
-   interactions_. The default command to toggle this is to press `R` on the
-   keyboard.
-
-2. Reset the training buffer. This enables you to instruct the agents to forget
-   their buffer of recent experiences. This is useful if you'd like to get them
-   to quickly learn a new behavior. The default command to reset the buffer is
-   to press `C` on the keyboard.
--- a/docs/Training-Imitation-Learning.md
+++ b/docs/Training-Imitation-Learning.md
  on the PPO trainer, in addition to using a small GAIL reward signal.
 * To train an agent to exactly mimic demonstrations, you can use the
  [Behavioral Cloning](Training-Behavioral-Cloning.md) trainer. Behavioral Cloning can be
-  used offline and online (in-editor), and learns very quickly. However, it usually is ineffective
+  used with demonstrations (in-editor), and learns very quickly. However, it usually is ineffective
  on more complex environments without a large number of demonstrations.

 ### How to Choose
 if you have few (<10) episodes of demonstrations. An example of this is provided for the Crawler example
 environment under `CrawlerStaticLearning` in `config/gail_config.yaml`.

-If you have plenty of demonstrations and/or a very simple environment, Behavioral Cloning
-(online and offline) can be effective and quick. However, it cannot be combined with RL.
+If you have plenty of demonstrations and/or a very simple environment, Offline Behavioral Cloning can be effective and quick. However, it cannot be combined with RL.

 ## Recording Demonstrations

--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 using TensorBoard during or after training by running the following command:

 ```sh
-tensorboard --logdir=summaries
+tensorboard --logdir=summaries --port 6006
+
+**Note:** The default port TensorBoard uses is 6006. If there is an existing session
+running on port 6006 a new session can be launched on an open port using the --port 
+option.

 When training is finished, you can find the saved model in the `models` folder
 under the assigned run-id — in the cats example, the path to the model would be
  the oldest checkpoint is deleted when saving a new checkpoint. Defaults to 5.
 * `--lesson=<n>`: Specify which lesson to start with when performing curriculum
  training. Defaults to 0.
-* `--load`: If set, the training code loads an already trained model to
-  initialize the neural network before training. The learning code looks for the
-  model in `models/<run-id>/` (which is also where it saves models at the end of
-  training). When not set (the default), the neural network weights are randomly
-  initialized and an existing model is not loaded.
+* `--num-envs=<n>`: Specifies the number of concurrent Unity environment instances to 
+  collect experiences from when training. Defaults to 1.
 * `--run-id=<path>`: Specifies an identifier for each training run. This
  identifier is used to name the subdirectories in which the trained model and
  summary statistics are saved as well as the saved model itself. The default id
  training. Defaults to 50000.
 * `--seed=<n>`: Specifies a number to use as a seed for the random number
  generator used by the training code.
+* `--env-args=<string>`: Specify arguments for the executable environment. Be aware that
+  the standalone build will also process these as
+  [Unity Command Line Arguments](https://docs.unity3d.com/Manual/CommandLineArguments.html).
+  You should choose different argument names if you want to create environment-specific arguments.
+  All arguments after this flag will be passed to the executable. For example, setting
+  `mlagents-learn config/trainer_config.yaml --env-args --num-orcs 42` would result in
+   ` --num-orcs 42` passed to the executable.
+* `--base-port`: Specifies the starting port. Each concurrent Unity environment instance 
+  will get assigned a port sequentially, starting from the `base-port`. Each instance 
+  will use the port `(base_port + worker_id)`, where the `worker_id` is sequential IDs 
+  given to each instance from 0 to `num_envs - 1`. Default is 5005.
 * `--slow`: Specify this option to run the Unity environment at normal, game
  speed. The `--slow` mode uses the **Time Scale** and **Target Frame Rate**
  specified in the Academy's **Inference Configuration**. By default, training
 * `--train`: Specifies whether to train model or only run in inference mode.
  When training, **always** use the `--train` option.
-* `--num-envs=<n>`: Specifies the number of concurrent Unity environment instances to collect
-  experiences from when training. Defaults to 1.
-* `--base-port`: Specifies the starting port. Each concurrent Unity environment instance will
-  get assigned a port sequentially, starting from the `base-port`.  Each instance will use the
-  port `(base_port + worker_id)`, where the `worker_id` is sequential IDs given to each instance
-  from 0 to `num_envs - 1`. Default is 5005.
-* `--docker-target-name=<dt>`: The Docker Volume on which to store curriculum,
-  executable and model files. See [Using Docker](Using-Docker.md).
+* `--load`: If set, the training code loads an already trained model to
+  initialize the neural network before training. The learning code looks for the
+  model in `models/<run-id>/` (which is also where it saves models at the end of
+  training). When not set (the default), the neural network weights are randomly
+  initialized and an existing model is not loaded.
 * `--no-graphics`: Specify this option to run the Unity executable in
  `-batchmode` and doesn't initialize the graphics driver. Use this only if your
  training doesn't involve visual observations (reading from Pixels). See
 * `--multi-gpu`: Setting this flag enables the use of multiple GPU's (if available) during training.
-* `--env-args=<string>`: Specify arguments for the executable environment. Be aware that
-  the standalone build will also process these as
-  [Unity Command Line Arguments](https://docs.unity3d.com/Manual/CommandLineArguments.html).
-  You should choose different argument names if you want to create environment-specific arguments.
-  All arguments after this flag will be passed to the executable. For example, setting
-  `mlagents-learn config/trainer_config.yaml --env-args --num-orcs 42` would result in
-   ` --num-orcs 42` passed to the executable.
+
-`config/gail_config.yaml`, `config/online_bc_config.yaml` and `config/offline_bc_config.yaml`
-specifies the training method, the hyperparameters, and a few additional values to use when
-training with PPO, SAC, GAIL (with PPO), and online and offline BC. These files are divided into sections.
-The **default** section defines the default values for all the available
-settings. You can also add new sections to override these defaults to train
-specific Brains. Name each of these override sections after the GameObject
-containing the Brain component that should use these settings. (This GameObject
-will be a child of the Academy in your scene.) Sections for the example
-environments are included in the provided config file.
+`config/gail_config.yaml` and `config/offline_bc_config.yaml` specifies the training method,
+the hyperparameters, and a few additional values to use when training with Proximal Policy 
+Optimization(PPO), Soft Actor-Critic(SAC), GAIL (Generative Adversarial Imitation Learning) 
+with PPO, and online and offline Behavioral Cloning(BC)/Imitation. These files are divided 
+into sections. The **default** section defines the default values for all the available
+training with PPO, SAC, GAIL (with PPO), and offline BC. These files are divided into sections.
+The **default** section defines the default values for all the available settings. You can 
+also add new sections to override these defaults to train specific Brains. Name each of these
+override sections after the GameObject containing the Brain component that should use these 
+settings. (This GameObject will be a child of the Academy in your scene.) Sections for the 
+example environments are included in the provided config file.
-| batch_size           | The number of experiences in each iteration of gradient descent.                                                                                                                        | PPO, SAC, BC                  |
+| batch_size           | The number of experiences in each iteration of gradient descent.                                                                                                                        | PPO, SAC, BC             |
-| brain\_to\_imitate   | For online imitation learning, the name of the GameObject containing the Brain component to imitate.                                                                                    | (online)BC               |
-| buffer_size          | The number of experiences to collect before updating the policy model. In SAC, the max size of the experience buffer.                                                                   | PPO, SAC                      |
-| buffer_init_steps          | The number of experiences to collect into the buffer before updating the policy model.                                                                                            | SAC                      |
+| buffer_size          | The number of experiences to collect before updating the policy model. In SAC, the max size of the experience buffer.                                                                   | PPO, SAC                 |
+| buffer_init_steps    | The number of experiences to collect into the buffer before updating the policy model.                                                                                                  | SAC                      |
-| hidden_units         | The number of units in the hidden layers of the neural network.                                                                                                                         | PPO, SAC, BC                  |
-| init_entcoef         | How much the agent should explore in the beginning of training.                                                                                                                         | SAC                  |
+| hidden_units         | The number of units in the hidden layers of the neural network.                                                                                                                         | PPO, SAC, BC             |
+| init_entcoef         | How much the agent should explore in the beginning of training.                                                                                                                         | SAC                      |
-| learning_rate        | The initial learning rate for gradient descent.                                                                                                                                         | PPO, SAC, BC                  |
-| max_steps            | The maximum number of simulation steps to run during a training session.                                                                                                                | PPO, SAC, BC                  |
-| memory_size          | The size of the memory an agent must keep. Used for training with a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md).                                 | PPO, SAC, BC                  |
-| normalize            | Whether to automatically normalize observations.                                                                                                                                        | PPO, SAC                      |
+| learning_rate        | The initial learning rate for gradient descent.                                                                                                                                         | PPO, SAC, BC             |
+| max_steps            | The maximum number of simulation steps to run during a training session.                                                                                                                | PPO, SAC, BC             |
+| memory_size          | The size of the memory an agent must keep. Used for training with a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md).                                 | PPO, SAC, BC             |
+| normalize            | Whether to automatically normalize observations.                                                                                                                                        | PPO, SAC                 |
-| num_layers           | The number of hidden layers in the neural network.                                                                                                                                      | PPO, SAC, BC                  |
-| pretraining          | Use demonstrations to bootstrap the policy neural network. See [Pretraining Using Demonstrations](Training-PPO.md#optional-pretraining-using-demonstrations).                                                                                            | PPO, SAC                      |
-| reward_signals       | The reward signals used to train the policy. Enable Curiosity and GAIL here. See [Reward Signals](Reward-Signals.md) for configuration options.                           | PPO, SAC, BC                  |
-| save_replay_buffer      | Saves the replay buffer when exiting training, and loads it on resume.                                                                                                         | SAC                |
-| sequence_length      | Defines how long the sequences of experiences must be while training. Only used for training with a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md). | PPO, SAC, BC                  |
-| summary_freq         | How often, in steps, to save training statistics. This determines the number of data points shown by TensorBoard.                                                                       | PPO, SAC, BC                  |
-| tau                  | How aggressively to update the target network used for bootstrapping value estimation in SAC.                                                                                            | SAC                          |
-| time_horizon         | How many steps of experience to collect per-agent before adding it to the experience buffer.                                                                                            | PPO, SAC, (online)BC          |
-| trainer              | The type of training to perform: "ppo", "sac", "offline_bc" or "online_bc".                                                                                                                                  | PPO, SAC, BC                  |
-| train_interval              | How often to update the agent.                                                                                                                                                    | SAC                  |
-| num_update           | Number of mini-batches to update the agent with during each update.                                                                                       | SAC                  |
-| use_recurrent        | Train using a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md).                                                                                       | PPO, SAC, BC                  |
-
+<<<<<<< HEAD
+| num_layers           | The number of hidden layers in the neural network.                                                                                                                                      | PPO, SAC, BC             |
+| pretraining          | Use demonstrations to bootstrap the policy neural network. See [Pretraining Using Demonstrations](Training-PPO.md#optional-pretraining-using-demonstrations).                           | PPO, SAC                 |
+| reward_signals       | The reward signals used to train the policy. Enable Curiosity and GAIL here. See [Reward Signals](Reward-Signals.md) for configuration options.                                         | PPO, SAC, BC             |
+| save_replay_buffer   | Saves the replay buffer when exiting training, and loads it on resume.                                                                                                                  | SAC                      |
+| sequence_length      | Defines how long the sequences of experiences must be while training. Only used for training with a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md). | PPO, SAC, BC             |
+| summary_freq         | How often, in steps, to save training statistics. This determines the number of data points shown by TensorBoard.                                                                       | PPO, SAC, BC             |
+| tau                  | How aggressively to update the target network used for bootstrapping value estimation in SAC.                                                                                           | SAC                      |
+| time_horizon         | How many steps of experience to collect per-agent before adding it to the experience buffer.                                                                                            | PPO, SAC, (online)BC     |
+| trainer              | The type of training to perform: "ppo", "sac", "offline_bc" or "online_bc".                                                                                                             | PPO, SAC, BC             |
+| train_interval       | How often to update the agent.                                                                                                                                                          | SAC                      |
+| num_update           | Number of mini-batches to update the agent with during each update.                                                                                                                     | SAC                      |
+| use_recurrent        | Train using a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md).                                                                                       | PPO, SAC, BC             |

 \*PPO = Proximal Policy Optimization, SAC = Soft Actor-Critic, BC = Behavioral Cloning (Imitation)

--- a/docs/Training-on-Amazon-Web-Service.md
+++ b/docs/Training-on-Amazon-Web-Service.md
 # Training on Amazon Web Service

+Note: We no longer use this guide ourselves and so it may not work correctly. We've 
+decided to keep it up just in case it is helpful to you.
+
 This page contains instructions for setting up an EC2 instance on Amazon Web
 Service for training ML-Agents environments.

 `us-east-1` region. It was created as a modification of [Deep Learning AMI
-(Ubuntu)](https://aws.amazon.com/marketplace/pp/B077GCH38C). The AMI has been 
-tested with p2.xlarge instance. Furthermore, if you want to train without 
-headless mode, you need to enable X Server. 
+(Ubuntu)](https://aws.amazon.com/marketplace/pp/B077GCH38C). The AMI has been
+tested with p2.xlarge instance. Furthermore, if you want to train without
+headless mode, you need to enable X Server.
-After launching your EC2 instance using the ami and ssh into it, run the 
+After launching your EC2 instance using the ami and ssh into it, run the
 following commands to enable it:

 ```sh

    # Remove the Section "Files" from the /etc/X11/xorg.conf file
    # And remove two lines that contain Section "Files" and EndSection
-    $ sudo vim /etc/X11/xorg.conf 
+    $ sudo vim /etc/X11/xorg.conf
    ```

 #### Update and setup Nvidia driver:
-    # Please refer to http://download.nvidia.com/XFree86/Linux-#x86_64/latest.txt     
+    # Please refer to http://download.nvidia.com/XFree86/Linux-#x86_64/latest.txt
    $ wget http://download.nvidia.com/XFree86/Linux-x86_64/390.87/NVIDIA-Linux-x86_64-390.87.run
    $ sudo /bin/bash ./NVIDIA-Linux-x86_64-390.67.run --accept-license --no-questions --ui=none

   # You will have a list of processes running on the GPU, Xorg should not be in
   # the list, as shown below.
   $ nvidia-smi
-   
+
   # Thu Jun 14 20:21:11 2018
   # +-----------------------------------------------------------------------------+
   # | NVIDIA-SMI 390.67                 Driver Version: 390.67                    |
   # |=============================================================================|
   # |  No running processes found                                                 |
   # +-----------------------------------------------------------------------------+
-   
+
   ```

 #### Start X Server and make the ubuntu use X Server for display:
    # For more information on glxgears, see ftp://www.x.org/pub/X11R6.8.1/doc/glxgears.1.html.
    $ glxgears
    # If Xorg is configured correctly, you should see the following message
-    
+
-    
+
    ```

 ## Training on EC2 instance
 2. Open the Build Settings window (menu: File > Build Settings).
-3. Select Linux as the Target Platform, and x86_64 as the target architecture 
+3. Select Linux as the Target Platform, and x86_64 as the target architecture
 (the default x86 currently does not work).
 4. Check Headless Mode if you have not setup the X Server. (If you do not use
 Headless Mode, you have to setup the X Server to enable training.)
 9. Test the instance setup from Python using:

    ```python
-    from mlagents.envs import UnityEnvironment
+    from mlagents.envs.environment import UnityEnvironment

    env = UnityEnvironment(<your_env>)
    ```
    ```console
    mlagents-learn <trainer-config-file> --env=<your_env> --train
    ```
-    
+
-If you've built your Linux executable, but forget to copy over the corresponding <Executable_Name>_Data folder, you will see error message like the following: 
+If you've built your Linux executable, but forget to copy over the corresponding <Executable_Name>_Data folder, you will see error message like the following:

 ```sh
 Set current directory to /home/ubuntu/ml-agents/ml-agents
         The environment and the Python interface have compatible versions.
 ```

-It would be also really helpful to check your /home/ubuntu/.config/unity3d/<Some_Path>/Player.log to see what happens with your Unity environment. 
+It would be also really helpful to check your /home/ubuntu/.config/unity3d/<Some_Path>/Player.log to see what happens with your Unity environment.

 ### Could not launch X Server

 ```sh
 NVIDIA-SMI has failed because it couldn't communicate with the NVIDIA driver. Make sure that the latest NVIDIA driver is installed and running.
 ```
-This means the NVIDIA's driver needs to be updated. Refer to [this section](Training-on-Amazon-Web-Service.md#update-and-setup-nvidia-driver) for more information. 
+This means the NVIDIA's driver needs to be updated. Refer to [this section](Training-on-Amazon-Web-Service.md#update-and-setup-nvidia-driver) for more information.
--- a/docs/Training-on-Microsoft-Azure.md
+++ b/docs/Training-on-Microsoft-Azure.md
 # Training on Microsoft Azure (works with ML-Agents toolkit v0.3)

+Note: We no longer use this guide ourselves and so it may not work correctly. We've 
+decided to keep it up just in case it is helpful to you.
+
 This page contains instructions for setting up training on Microsoft Azure
 through either
 [Azure Container Instances](https://azure.microsoft.com/services/container-instances/)
 7. Test the instance setup from Python using:

 ```python
-from mlagents.envs import UnityEnvironment
+from mlagents.envs.environment import UnityEnvironment

 env = UnityEnvironment(<your_env>)
 ```
 ## Monitoring your Training Run with TensorBoard

 Once you have started training, you can [use TensorBoard to observe the
-training](Using-Tensorboard.md).  
+training](Using-Tensorboard.md).
-1. Start by [opening the appropriate port for web traffic to connect to your VM](https://docs.microsoft.com/en-us/azure/virtual-machines/windows/nsg-quickstart-portal).  
+1. Start by [opening the appropriate port for web traffic to connect to your VM](https://docs.microsoft.com/en-us/azure/virtual-machines/windows/nsg-quickstart-portal).

    * Note that you don't need to generate a new `Network Security Group` but
      instead, go to the **Networking** tab under **Settings** for your VM.
 [Azure Container Instances](https://azure.microsoft.com/services/container-instances/)
 allow you to spin up a container, on demand, that will run your training and
 then be shut down.  This ensures you aren't leaving a billable VM running when
-it isn't needed.  You can read more about
-[The ML-Agents toolkit support for Docker containers here](Using-Docker.md).
-Using ACI enables you to offload training of your models without needing to
-install Python and TensorFlow on your own computer.  You can find instructions,
-including a pre-deployed image in DockerHub for you to use, available
-[here](https://github.com/druttka/unity-ml-on-azure).
+it isn't needed. Using ACI enables you to offload training of your models without needing to
+install Python and TensorFlow on your own computer.
--- a/docs/Unity-Inference-Engine.md
+++ b/docs/Unity-Inference-Engine.md

 When using a **Learning Brain**, drag the `.nn` file into the **Model** field 
 in the Inspector. 
-Uncheck the `Control` checkbox for the corresponding **Brain** in the 
-**BroadcastHub** of the Academy.
 Select the **Inference Device** : CPU or GPU you want to use for Inference.

 **Note:** For most of the models generated with the ML-Agents toolkit, CPU will be faster than GPU.
--- a/docs/Using-Tensorboard.md
+++ b/docs/Using-Tensorboard.md
 3. From the command line run :

      ```sh
-      tensorboard --logdir=summaries
+      tensorboard --logdir=summaries --port=6006
+
+**Note:** The default port TensorBoard uses is 6006. If there is an existing session
+running on port 6006 a new session can be launched on an open port using the --port 
+option.

 **Note:** If you don't assign a `run-id` identifier, `mlagents-learn` uses the
 default string, "ppo". All the statistics will be saved to the same sub-folder
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
        self._academy_name = aca_params.name
        self._log_path = aca_params.log_path
        self._brains: Dict[str, BrainParameters] = {}
-        self._brain_names: List[str] = []
-            self._brain_names += [brain_param.brain_name]
-            if brain_param.is_training:
-                self._external_brain_names += [brain_param.brain_name]
-        self._num_brains = len(self._brain_names)
+            self._external_brain_names += [brain_param.brain_name]
        self._num_external_brains = len(self._external_brain_names)
        self._resetParameters = dict(aca_params.environment_parameters.float_parameters)
        logger.info(
        return self._academy_name

    @property
-    def number_brains(self):
-        return self._num_brains
-
-    @property
-
-    @property
-    def brain_names(self):
-        return self._brain_names

    @property
    def external_brain_names(self):
    def __str__(self):
        return (
            """Unity Academy name: {0}
-        Number of Brains: {1}
-        Number of Training Brains : {2}
-        Reset Parameters :\n\t\t{3}""".format(
+        Number of Training Brains : {1}
+        Reset Parameters :\n\t\t{2}""".format(
-                str(self._num_brains),
                str(self._num_external_brains),
                "\n\t\t".join(
                    [
                elif self._num_external_brains > 1:
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names a keys, "
-                        "and vector_actions as values".format(self._num_brains)
+                        "and vector_actions as values".format(self._num_external_brains)
                    )
                else:
                    raise UnityActionException(
                elif self._num_external_brains > 1:
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names as keys "
-                        "and memories as values".format(self._num_brains)
+                        "and memories as values".format(self._num_external_brains)
                    )
                else:
                    raise UnityActionException(
                elif self._num_external_brains > 1:
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names as keys "
-                        "and text_actions as values".format(self._num_brains)
+                        "and text_actions as values".format(self._num_external_brains)
                    )
                else:
                    raise UnityActionException(
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names as keys "
                        "and state/action value estimates as values".format(
-                            self._num_brains
+                            self._num_external_brains
                        )
                    )
                else:
                elif self._num_external_brains > 1:
                    raise UnityActionException(
                        "You have {0} brains, you need to feed a dictionary of brain names as keys "
-                        "and CustomAction instances as values".format(self._num_brains)
+                        "and CustomAction instances as values".format(
+                            self._num_external_brains
+                        )
                    )
                else:
                    raise UnityActionException(
--- a/ml-agents-envs/mlagents/envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_envs.py
        discrete_action=False, visual_inputs=0
    )
    env = UnityEnvironment(" ")
-    assert env.brain_names[0] == "RealFakeBrain"
+    assert env.external_brain_names[0] == "RealFakeBrain"
    env.close()


--- a/ml-agents/mlagents/trainers/bc/models.py
+++ b/ml-agents/mlagents/trainers/bc/models.py
            for size in self.act_size:
                policy_branches.append(
                    tf.layers.dense(
-                        hidden,
+                        hidden_reg,
                        size,
                        activation=None,
                        use_bias=False,
--- a/ml-agents/mlagents/trainers/bc/trainer.py
+++ b/ml-agents/mlagents/trainers/bc/trainer.py
            len(self.demonstration_buffer.update_buffer["actions"]) // self.n_sequences,
            self.batches_per_epoch,
        )
-        for i in range(num_batches):
+
+        batch_size = self.n_sequences * self.policy.sequence_length
+
+        for i in range(0, num_batches * batch_size, batch_size):
-            start = i * self.n_sequences
-            end = (i + 1) * self.n_sequences
-            mini_batch = update_buffer.make_mini_batch(start, end)
+            mini_batch = update_buffer.make_mini_batch(i, i + batch_size)
            run_out = self.policy.update(mini_batch, self.n_sequences)
            loss = run_out["policy_loss"]
            batch_losses.append(loss)
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
            mean_return=float(np.mean(self.cumulative_returns_since_policy_update)),
        )
        self.cumulative_returns_since_policy_update = []
-        batch_size = self.trainer_parameters["batch_size"]
+
+        # Make sure batch_size is a multiple of sequence length. During training, we
+        # will need to reshape the data into a batch_size x sequence_length tensor.
+        batch_size = (
+            self.trainer_parameters["batch_size"]
+            - self.trainer_parameters["batch_size"] % self.policy.sequence_length
+        )
+        # Make sure there is at least one sequence
+        batch_size = max(batch_size, self.policy.sequence_length)
+
        n_sequences = max(
            int(self.trainer_parameters["batch_size"] / self.policy.sequence_length), 1
        )
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
    mock_env.return_value.academy_name = "MockAcademy"
    mock_env.return_value.brains = {brain_name: mock_brain}
    mock_env.return_value.external_brain_names = [brain_name]
-    mock_env.return_value.brain_names = [brain_name]
    mock_env.return_value.reset.return_value = {brain_name: mock_braininfo}
    mock_env.return_value.step.return_value = {brain_name: mock_braininfo}

    for i in range(buffer_init_samples):
-        brain_info_list.append(env.step()[env.brain_names[0]])
+        brain_info_list.append(env.step()[env.external_brain_names[0]])
    buffer = create_buffer(brain_info_list, policy.brain, policy.sequence_length)
    return buffer

        vector_observation_space_size=8,
    )
    mock_brain.brain_name = "Ball3DBrain"
+    return mock_brain
+
+
+def create_mock_pushblock_brain():
+    mock_brain = create_mock_brainparams(
+        vector_action_space_type="discrete",
+        vector_action_space_size=[7],
+        vector_observation_space_size=70,
+    )
+    mock_brain.brain_name = "PushblockLearning"
    return mock_brain


--- a/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
+++ b/ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
 import os
+import yaml
+import pytest
+from mlagents.trainers.tests.test_bc import create_bc_trainer


 def test_barracuda_converter():

    # cleanup
    os.remove(tmpfile)
+
+
+@pytest.fixture
+def bc_dummy_config():
+    return yaml.safe_load(
+        """
+            hidden_units: 32
+            learning_rate: 3.0e-4
+            num_layers: 1
+            use_recurrent: false
+            sequence_length: 32
+            memory_size: 64
+            batches_per_epoch: 1
+            batch_size: 64
+            summary_freq: 2000
+            max_steps: 4000
+            """
+    )
+
+
+@pytest.mark.parametrize("use_lstm", [False, True], ids=["nolstm", "lstm"])
+@pytest.mark.parametrize("use_discrete", [True, False], ids=["disc", "cont"])
+def test_bc_export(bc_dummy_config, use_lstm, use_discrete):
+    bc_dummy_config["use_recurrent"] = use_lstm
+    trainer, env = create_bc_trainer(bc_dummy_config, use_discrete)
+    trainer.export_model()
--- a/ml-agents/mlagents/trainers/tests/test_bc.py
+++ b/ml-agents/mlagents/trainers/tests/test_bc.py
    )


-def create_bc_trainer(dummy_config):
+def create_bc_trainer(dummy_config, is_discrete=False):
-    mock_brain = mb.create_mock_3dball_brain()
-    mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8)
+    if is_discrete:
+        mock_brain = mb.create_mock_pushblock_brain()
+        mock_braininfo = mb.create_mock_braininfo(
+            num_agents=12, num_vector_observations=70
+        )
+    else:
+        mock_brain = mb.create_mock_3dball_brain()
+        mock_braininfo = mb.create_mock_braininfo(
+            num_agents=12, num_vector_observations=8
+        )
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

    )
    env = UnityEnvironment(" ")
    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
+    brain_info = brain_infos[env.external_brain_names[0]]
-    model_path = env.brain_names[0]
+    model_path = env.external_brain_names[0]
-    policy = BCPolicy(0, env.brains[env.brain_names[0]], trainer_parameters, False)
+    policy = BCPolicy(
+        0, env.brains[env.external_brain_names[0]], trainer_parameters, False
+    )
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (3, 2)

--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    env = mock_env()

-    model_path = env.brain_names[0]
+    model_path = env.external_brain_names[0]
    trainer_config["model_path"] = model_path
    trainer_config["keep_checkpoints"] = 3
    trainer_config["use_recurrent"] = use_rnn
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
 from mlagents.envs.brain import BrainParameters
 from mlagents.envs.environment import UnityEnvironment
 from mlagents.envs.mock_communicator import MockCommunicator
+from mlagents.trainers.tests import mock_brain as mb


@pytest.fixture
        memory_size: 8
        curiosity_strength: 0.0
        curiosity_enc_size: 1
+        summary_path: test
+        model_path: test
        reward_signals:
          extrinsic:
            strength: 1.0


+VECTOR_ACTION_SPACE = [2]
+VECTOR_OBS_SPACE = 8
+DISCRETE_ACTION_SPACE = [3, 3, 3, 2]
+BUFFER_INIT_SAMPLES = 32
+NUM_AGENTS = 12
+
+
@mock.patch("mlagents.envs.environment.UnityEnvironment.executable_launcher")
@mock.patch("mlagents.envs.environment.UnityEnvironment.get_communicator")
 def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config):
    )
    env = UnityEnvironment(" ")
    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
+    brain_info = brain_infos[env.external_brain_names[0]]
-    model_path = env.brain_names[0]
+    model_path = env.external_brain_names[0]
-        0, env.brains[env.brain_names[0]], trainer_parameters, False, False
+        0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False
    )
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (3, 2)
    )
    env = UnityEnvironment(" ")
    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
+    brain_info = brain_infos[env.external_brain_names[0]]
-    model_path = env.brain_names[0]
+    model_path = env.external_brain_names[0]
-        0, env.brains[env.brain_names[0]], trainer_parameters, False, False
+        0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False
    )
    run_out = policy.get_value_estimates(brain_info, 0, done=False)
    for key, val in run_out.items():
    np.testing.assert_array_almost_equal(returns, np.array([0.729, 0.81, 0.9, 1.0]))


-def test_trainer_increment_step():
-    trainer_params = {
-        "trainer": "ppo",
-        "batch_size": 2048,
-        "beta": 0.005,
-        "buffer_size": 20480,
-        "epsilon": 0.2,
-        "gamma": 0.995,
-        "hidden_units": 512,
-        "lambd": 0.95,
-        "learning_rate": 0.0003,
-        "max_steps": "2e6",
-        "memory_size": 256,
-        "normalize": True,
-        "num_epoch": 3,
-        "num_layers": 3,
-        "time_horizon": 1000,
-        "sequence_length": 64,
-        "summary_freq": 3000,
-        "use_recurrent": False,
-        "use_curiosity": False,
-        "curiosity_strength": 0.01,
-        "curiosity_enc_size": 128,
-        "summary_path": "./summaries/test_trainer_summary",
-        "model_path": "./models/test_trainer_models/TestModel",
-        "keep_checkpoints": 5,
-        "reward_signals": {"extrinsic": {"strength": 1.0, "gamma": 0.99}},
-    }
+def test_trainer_increment_step(dummy_config):
+    trainer_params = dummy_config
    brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0)

    trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0", False)
    trainer.increment_step(5)
    policy_mock.increment_step.assert_called_with(5)
    assert trainer.step == 10
+
+
+@mock.patch("mlagents.envs.environment.UnityEnvironment")
+@pytest.mark.parametrize("use_discrete", [True, False])
+def test_trainer_update_policy(mock_env, dummy_config, use_discrete):
+    env, mock_brain, _ = mb.setup_mock_env_and_brains(
+        mock_env,
+        use_discrete,
+        False,
+        num_agents=NUM_AGENTS,
+        vector_action_space=VECTOR_ACTION_SPACE,
+        vector_obs_space=VECTOR_OBS_SPACE,
+        discrete_action_space=DISCRETE_ACTION_SPACE,
+    )
+
+    trainer_params = dummy_config
+    trainer_params["use_recurrent"] = True
+
+    trainer = PPOTrainer(mock_brain, 0, trainer_params, True, False, 0, "0", False)
+    # Test update with sequence length smaller than batch size
+    buffer = mb.simulate_rollout(env, trainer.policy, BUFFER_INIT_SAMPLES)
+    # Mock out reward signal eval
+    buffer.update_buffer["extrinsic_rewards"] = buffer.update_buffer["rewards"]
+    buffer.update_buffer["extrinsic_returns"] = buffer.update_buffer["rewards"]
+    buffer.update_buffer["extrinsic_value_estimates"] = buffer.update_buffer["rewards"]
+    trainer.training_buffer = buffer
+    trainer.update_policy()
+    # Make batch length a larger multiple of sequence length
+    trainer.trainer_parameters["batch_size"] = 128
+    trainer.update_policy()
+    # Make batch length a larger non-multiple of sequence length
+    trainer.trainer_parameters["batch_size"] = 100
+    trainer.update_policy()


 def test_add_rewards_output(dummy_config):
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
    )

    trainer_parameters = trainer_config
-    model_path = env.brain_names[0]
+    model_path = env.external_brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    trainer_parameters["reward_signals"].update(reward_signal_config)

 def reward_signal_eval(env, policy, reward_signal_name):
    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
-    next_brain_info = env.step()[env.brain_names[0]]
+    brain_info = brain_infos[env.external_brain_names[0]]
+    next_brain_info = env.step()[env.external_brain_names[0]]
    # Test evaluate
    rsig_result = policy.reward_signals[reward_signal_name].evaluate(
        brain_info, next_brain_info
--- a/ml-agents/mlagents/trainers/tests/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/test_sac.py
    )

    trainer_parameters = dummy_config
-    model_path = env.brain_names[0]
+    model_path = env.external_brain_names[0]
    trainer_parameters["model_path"] = model_path
    trainer_parameters["keep_checkpoints"] = 3
    trainer_parameters["use_recurrent"] = use_rnn
        mock_env, dummy_config, use_rnn=False, use_discrete=False, use_visual=False
    )
    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
+    brain_info = brain_infos[env.external_brain_names[0]]
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE[0])

        mock_env, dummy_config, use_rnn=False, use_discrete=True, use_visual=False
    )
    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
+    brain_info = brain_infos[env.external_brain_names[0]]
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))

        mock_env, dummy_config, use_rnn=False, use_discrete=True, use_visual=True
    )
    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
+    brain_info = brain_infos[env.external_brain_names[0]]
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))

        mock_env, dummy_config, use_rnn=True, use_discrete=True, use_visual=False
    )
    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
+    brain_info = brain_infos[env.external_brain_names[0]]
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))

--- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py
 from mlagents.trainers.trainer_metrics import TrainerMetrics
 from mlagents.trainers.ppo.trainer import PPOTrainer
 from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
-from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
 from mlagents.envs.exception import UnityEnvironmentException


        """
        default:
            trainer: ppo
-            batch_size: 32
-            beta: 5.0e-3
-            buffer_size: 512
-            epsilon: 0.2
-            gamma: 0.99
-            hidden_units: 128
-            lambd: 0.95
-            learning_rate: 3.0e-4
-            max_steps: 5.0e4
-            normalize: true
-            num_epoch: 5
-            num_layers: 2
-            time_horizon: 64
-            sequence_length: 64
-            summary_freq: 1000
-            use_recurrent: false
-            memory_size: 8
-            use_curiosity: false
-            curiosity_strength: 0.0
-            curiosity_enc_size: 1
-        """
-    )
-
-
-@pytest.fixture
-def dummy_online_bc_config():
-    return yaml.safe_load(
-        """
-        default:
-            trainer: online_bc
-            brain_to_imitate: ExpertBrain
-            batches_per_epoch: 16
            batch_size: 32
            beta: 5.0e-3
            buffer_size: 512
        )
        assert "testbrain" in trainers
        assert isinstance(trainers["testbrain"], OfflineBCTrainer)
-
-
-@patch("mlagents.envs.brain.BrainParameters")
-def test_initialize_online_bc_trainer(BrainParametersMock):
-    summaries_dir = "test_dir"
-    run_id = "testrun"
-    model_path = "model_dir"
-    keep_checkpoints = 1
-    train_model = True
-    load_model = False
-    seed = 11
-
-    base_config = dummy_online_bc_config()
-    expected_config = base_config["default"]
-    expected_config["summary_path"] = summaries_dir + f"/{run_id}_testbrain"
-    expected_config["model_path"] = model_path + "/testbrain"
-    expected_config["keep_checkpoints"] = keep_checkpoints
-
-    brain_params_mock = BrainParametersMock()
-    external_brains = {"testbrain": brain_params_mock}
-
-    def mock_constructor(self, brain, trainer_parameters, training, load, seed, run_id):
-        assert brain == brain_params_mock
-        assert trainer_parameters == expected_config
-        assert training == train_model
-        assert load == load_model
-        assert seed == seed
-        assert run_id == run_id
-
-    with patch.object(OnlineBCTrainer, "__init__", mock_constructor):
-        trainers = trainer_util.initialize_trainers(
-            trainer_config=base_config,
-            external_brains=external_brains,
-            summaries_dir=summaries_dir,
-            run_id=run_id,
-            model_path=model_path,
-            keep_checkpoints=keep_checkpoints,
-            train_model=train_model,
-            load_model=load_model,
-            seed=seed,
-        )
-        assert "testbrain" in trainers
-        assert isinstance(trainers["testbrain"], OnlineBCTrainer)


@patch("mlagents.envs.brain.BrainParameters")
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
 from mlagents.trainers.ppo.trainer import PPOTrainer
 from mlagents.trainers.sac.trainer import SACTrainer
 from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
-from mlagents.trainers.bc.online_trainer import OnlineBCTrainer


 def initialize_trainers(
    for brain_name in external_brains:
        if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
            trainers[brain_name] = OfflineBCTrainer(
-                external_brains[brain_name],
-                trainer_parameters_dict[brain_name],
-                train_model,
-                load_model,
-                seed,
-                run_id,
-            )
-        elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
-            trainers[brain_name] = OnlineBCTrainer(
                external_brains[brain_name],
                trainer_parameters_dict[brain_name],
                train_model,
--- a/notebooks/getting-started.ipynb
+++ b/notebooks/getting-started.ipynb
    "import numpy as np\n",
    "import sys\n",
    "\n",
-    "from mlagents.envs import UnityEnvironment\n",
+    "from mlagents.envs.environment import UnityEnvironment\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "env = UnityEnvironment(file_name=env_name)\n",
    "\n",
    "# Set the default brain to work with\n",
-    "default_brain = env.brain_names[0]\n",
+    "default_brain = env.external_brain_names[0]\n",
    "brain = env.brains[default_brain]"
   ]
  },
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/TimerTest.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/TimerTest.cs
+using NUnit.Framework;
+using UnityEngine;
+
+namespace MLAgents.Tests
+{
+    public class TimerTests
+    {
+        [Test]
+        public void TestNested()
+        {
+            TimerStack myTimer = TimerStack.Instance;
+            myTimer.Reset();
+
+            using (myTimer.Scoped("foo"))
+            {
+                for (int i = 0; i < 5; i++)
+                {
+                    using (myTimer.Scoped("bar"))
+                    {
+                    }
+                }
+            }
+
+            var rootChildren = myTimer.RootNode.Children;
+            Assert.That(rootChildren, Contains.Key("foo"));
+            Assert.AreEqual(rootChildren["foo"].NumCalls, 1);
+
+            var fooChildren = rootChildren["foo"].Children;
+            Assert.That(fooChildren, Contains.Key("bar"));
+            Assert.AreEqual(fooChildren["bar"].NumCalls, 5);
+
+            myTimer.Reset();
+            Assert.AreEqual(myTimer.RootNode.Children, null);
+        }
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/TimerTest.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/TimerTest.cs.meta
+fileFormatVersion: 2
+guid: 506de2f6a1c74967a6f16ebf494c01d5
+timeCreated: 1569370981
--- a/UnitySDK/Assets/ML-Agents/Scripts/Timer.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Timer.cs
+using UnityEngine;
+using System.Collections.Generic;
+using System.Collections.ObjectModel;
+using System.IO;
+using UnityEngine.Profiling;
+using System.Runtime.Serialization;
+using System.Runtime.Serialization.Json;
+#if UNITY_EDITOR
+using UnityEditor;
+
+#endif
+
+
+namespace MLAgents
+{
+    [DataContract]
+    public class TimerNode
+    {
+        static string s_Separator = ".";
+        static double s_TicksToSeconds = 1e-7; // 100 ns per tick
+
+        /// <summary>
+        ///  Full name of the node. This is the node's parents full name concatenated with this node's name
+        /// </summary>
+        string m_FullName;
+
+        /// <summary>
+        /// Child nodes, indexed by name.
+        /// </summary>
+        [DataMember(Name = "children", Order = 999)]
+        Dictionary<string, TimerNode> m_Children;
+
+        /// <summary>
+        /// Custom sampler used to add timings to the profiler.
+        /// </summary>
+        private CustomSampler m_Sampler;
+
+        /// <summary>
+        /// Number of total ticks elapsed for this node.
+        /// </summary>
+        long m_TotalTicks = 0;
+
+        /// <summary>
+        /// If the node is currently running, the time (in ticks) when the node was started.
+        /// If the node is not running, is set to 0.
+        /// </summary>
+        long m_TickStart = 0;
+
+        /// <summary>
+        /// Number of times the corresponding code block has been called.
+        /// </summary>
+        [DataMember(Name = "count")]
+        int m_NumCalls = 0;
+
+        /// <summary>
+        /// The total recorded ticks for the timer node, plus the currently elapsed ticks
+        /// if the timer is still running (i.e. if m_TickStart is non-zero).
+        /// </summary>
+        public long CurrentTicks
+        {
+            get
+            {
+                long currentTicks = m_TotalTicks;
+                if (m_TickStart != 0)
+                {
+                    currentTicks += (System.DateTime.Now.Ticks - m_TickStart);
+                }
+
+                return currentTicks;
+            }
+        }
+
+        /// <summary>
+        /// Total elapsed seconds.
+        /// </summary>
+        [DataMember(Name = "total")]
+        public double TotalSeconds
+        {
+            get { return CurrentTicks * s_TicksToSeconds; }
+            set {}  // Serialization needs this, but unused.
+        }
+
+        /// <summary>
+        /// Total seconds spent in this block, excluding it's children.
+        /// </summary>
+        [DataMember(Name = "self")]
+        public double SelfSeconds
+        {
+            get
+            {
+                long totalChildTicks = 0;
+                if (m_Children != null)
+                {
+                    foreach (var child in m_Children.Values)
+                    {
+                        totalChildTicks += child.m_TotalTicks;
+                    }
+                }
+
+                var selfTicks = Mathf.Max(0, CurrentTicks - totalChildTicks);
+                return selfTicks * s_TicksToSeconds;
+            }
+            set {}  // Serialization needs this, but unused.
+        }
+
+        public IReadOnlyDictionary<string, TimerNode> Children
+        {
+            get { return m_Children; }
+        }
+
+        public int NumCalls
+        {
+            get { return m_NumCalls; }
+        }
+
+        public TimerNode(string name, bool isRoot = false)
+        {
+            m_FullName = name;
+            if (isRoot)
+            {
+                // The root node is considered always running. This means that when we output stats, it'll
+                // have a sensible value for total time (the running time since reset).
+                // The root node doesn't have a sampler since that could interfere with the profiler.
+                m_NumCalls = 1;
+                m_TickStart = System.DateTime.Now.Ticks;
+            }
+            else
+            {
+                m_Sampler = CustomSampler.Create(m_FullName);
+            }
+        }
+
+        /// <summary>
+        /// Start timing a block of code.
+        /// </summary>
+        public void Begin()
+        {
+            m_Sampler?.Begin();
+            m_TickStart = System.DateTime.Now.Ticks;
+        }
+
+        /// <summary>
+        /// Stop timing a block of code, and increment internal counts.
+        /// </summary>
+        public void End()
+        {
+            var elapsed = System.DateTime.Now.Ticks - m_TickStart;
+            m_TotalTicks += elapsed;
+            m_TickStart = 0;
+            m_NumCalls++;
+            m_Sampler?.End();
+        }
+
+        /// <summary>
+        /// Return a child node for the given name.
+        /// The children dictionary will be created if it does not already exist, and
+        /// a new Node will be created if it's not already in the dictionary.
+        /// Note that these allocations only happen once for a given timed block.
+        /// </summary>
+        /// <param name="name"></param>
+        /// <returns></returns>
+        public TimerNode GetChild(string name)
+        {
+            // Lazily create the children dictionary.
+            if (m_Children == null)
+            {
+                m_Children = new Dictionary<string, TimerNode>();
+            }
+
+            if (!m_Children.ContainsKey(name))
+            {
+                var childFullName = m_FullName + s_Separator + name;
+                var newChild = new TimerNode(childFullName);
+                m_Children[name] = newChild;
+                return newChild;
+            }
+
+            return m_Children[name];
+        }
+
+        /// <summary>
+        /// Recursively form a string representing the current timer information.
+        /// </summary>
+        /// <param name="parentName"></param>
+        /// <param name="level"></param>
+        /// <returns></returns>
+        public string DebugGetTimerString(string parentName = "", int level = 0)
+        {
+            string indent = new string(' ', 2 * level); // TODO generalize
+            string shortName = (level == 0) ? m_FullName : m_FullName.Replace(parentName + s_Separator, "");
+            string timerString = "";
+            if (level == 0)
+            {
+                timerString = $"{shortName}(root)\n";
+            }
+            else
+            {
+                timerString = $"{indent}{shortName}\t\traw={TotalSeconds}  rawCount={m_NumCalls}\n";
+            }
+
+            // TODO use stringbuilder? might be overkill since this is only debugging code?
+            if (m_Children != null)
+            {
+                foreach (TimerNode c in m_Children.Values)
+                {
+                    timerString += c.DebugGetTimerString(m_FullName, level + 1);
+                }
+            }
+            return timerString;
+        }
+    }
+
+    /// <summary>
+    /// A "stack" of timers that allows for lightweight hierarchical profiling of long-running processes.
+    /// Example usage:
+    ///
+    /// using(TimerStack.Instance.Scoped("foo"))
+    /// {
+    ///     doSomeWork();
+    ///     for (int i=0; i<5; i++)
+    ///     {
+    ///         using(myTimer.Scoped("bar"))
+    ///         {
+    ///             doSomeMoreWork();
+    ///         }
+    ///     }
+    /// }
+    /// </summary>
+    /// <remarks>
+    /// This implements the Singleton pattern (solution 4) as described in
+    /// https://csharpindepth.com/articles/singleton
+    /// </remarks>
+    public class TimerStack : System.IDisposable
+    {
+        private static readonly TimerStack instance = new TimerStack();
+
+        Stack<TimerNode> m_Stack;
+        TimerNode m_RootNode;
+
+        // Explicit static constructor to tell C# compiler
+        // not to mark type as beforefieldinit
+        static TimerStack()
+        {
+        }
+
+        private TimerStack()
+        {
+            Reset();
+        }
+
+        public void Reset(string name = "root")
+        {
+            m_Stack = new Stack<TimerNode>();
+            m_RootNode = new TimerNode(name, true);
+            m_Stack.Push(m_RootNode);
+        }
+
+        public static TimerStack Instance
+        {
+            get { return instance; }
+        }
+
+        public TimerNode RootNode
+        {
+            get { return m_RootNode; }
+        }
+
+        private void Push(string name)
+        {
+            TimerNode current = m_Stack.Peek();
+            TimerNode next = current.GetChild(name);
+            m_Stack.Push(next);
+            next.Begin();
+        }
+
+        private void Pop()
+        {
+            var node = m_Stack.Pop();
+            node.End();
+        }
+
+        /// <summary>
+        /// Start a scoped timer. This should be used with the "using" statement.
+        /// </summary>
+        /// <param name="name"></param>
+        /// <returns></returns>
+        public TimerStack Scoped(string name)
+        {
+            Push(name);
+            return this;
+        }
+
+        /// <summary>
+        /// Closes the current scoped timer. This should never be called directly, only
+        /// at the end of a "using" statement.
+        /// Note that the instance is not actually disposed of; this is just to allow it to be used
+        /// conveniently with "using".
+        /// </summary>
+        public void Dispose()
+        {
+            Pop();
+        }
+
+        /// <summary>
+        /// Get a string representation of the timers.
+        /// Potentially slow so call sparingly.
+        /// </summary>
+        /// <returns></returns>
+        public string DebugGetTimerString()
+        {
+            return m_RootNode.DebugGetTimerString();
+        }
+
+        /// <summary>
+        /// Save the timers in JSON format to the provided filename.
+        /// If the filename is null, a default one will be used.
+        /// </summary>
+        /// <param name="filename"></param>
+        public void SaveJsonTimers(string filename = null)
+        {
+            if (filename == null)
+            {
+                var fullpath = Path.GetFullPath(".");
+                filename = $"{fullpath}/csharp_timers.json";
+            }
+            var fs = new FileStream(filename, FileMode.Create, FileAccess.Write);
+            SaveJsonTimers(fs);
+            fs.Close();
+        }
+
+        /// <summary>
+        /// Write the timers in JSON format to the provided stream.
+        /// </summary>
+        /// <param name="stream"></param>
+        public void SaveJsonTimers(Stream stream)
+        {
+            var jsonSettings = new DataContractJsonSerializerSettings();
+            jsonSettings.UseSimpleDictionaryFormat = true;
+            var ser = new DataContractJsonSerializer(typeof(TimerNode), jsonSettings);
+            ser.WriteObject(stream, m_RootNode);
+        }
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Scripts/Timer.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Timer.cs.meta
+fileFormatVersion: 2
+guid: d268f7dfcc74c47939e1fc520adb8d81
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/docs/Using-Virtual-Environment.md
+++ b/docs/Using-Virtual-Environment.md
+# Using Virtual Environment
+
+## What is a Virtual Environment?
+A Virtual Environment is a self contained directory tree that contains a Python installation 
+for a particular version of Python, plus a number of additional packages. To learn more about 
+Virtual Environments see [here](https://docs.python.org/3/library/venv.html)
+
+## Why should I use a Virtual Environment?
+A Virtual Environment keeps all dependencies for the Python project separate from dependencies 
+of other projects. This has a few advantages:
+1. It makes dependency management for the project easy.
+1. It enables using and testing of different library versions by quickly 
+spinning up a new environment and verifying the compatibility of the code with the
+different version. 
+
+Requirement - Python 3.6 must be installed on the machine you would like 
+to run ML-Agents on (either local laptop/desktop or remote server). Python 3.6 can be 
+installed from [here](https://www.python.org/downloads/). 
+
+
+## Installing Pip (Required)
+
+1. Download the `get-pip.py` file using the command `curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py`
+1. Run the following `python3 get-pip.py`
+1. Check pip version using `pip3 -V`
+
+Note (for Ubuntu users): If the `ModuleNotFoundError: No module named 'distutils.util'` error is encountered, then
+python3-distutils needs to be installed. Install python3-distutils using `sudo apt-get install python3-distutils` 
+
+## Mac OS X Setup
+
+1. Create a folder where the virtual environments will reside `$ mkdir ~/python-envs`
+1. To create a new environment named `sample-env` execute `$ python3 -m venv ~/python-envs/sample-env`  
+1. To activate the environment execute `$ source ~/python-envs/sample-env/bin/activate`
+1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the latest, upgrade to 
+the latest pip version using `pip3 install --upgrade pip` 
+1. Install ML-Agents package using `$ pip3 install mlagents`
+1. To deactivate the environment execute `$ deactivate`
+
+## Ubuntu Setup 
+
+1. Install the python3-venv package using `$ sudo apt-get install python3-venv`
+1. Follow the steps in the Mac OS X installation.
+
+## Windows Setup
+
+1. Create a folder where the virtual environments will reside `$ md python-envs`
+1. To create a new environment named `sample-env` execute `$ python3 -m venv python-envs\sample-env`  
+1. To activate the environment execute `$ python-envs\sample-env\Scripts\activate`
+1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the latest, upgrade to 
+the latest pip version using `pip3 install --upgrade pip`
+1. Install ML-Agents package using `$ pip3 install mlagents`
+1. To deactivate the environment execute `$ deactivate`
--- a/UnitySDK/Assets/ML-Agents/Editor/Builder.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Builder.cs
-#if UNITY_CLOUD_BUILD
-
-namespace MLAgents
-{
-    public static class Builder
-    {
-        public static void PreExport()
-        {
-            BuilderUtils.SwitchAllLearningBrainToControlMode();
-        }
-    }
-}
-
-#endif
--- a/UnitySDK/Assets/ML-Agents/Editor/Builder.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Editor/Builder.cs.meta
-fileFormatVersion: 2
-guid: 41ad366a346f4fbdaeb7dc8fff9a5025
-timeCreated: 1555622970
--- a/UnitySDK/Assets/ML-Agents/Editor/BuilderUtils.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/BuilderUtils.cs
-#if UNITY_CLOUD_BUILD
-
-using System.Linq;
-using UnityEditor;
-using UnityEditor.SceneManagement;
-using UnityEngine;
-using System.IO;
-
-namespace MLAgents
-{
-    public static class BuilderUtils
-    {
-        public static void SwitchAllLearningBrainToControlMode()
-        {
-            Debug.Log("The Switching to control mode function is triggered");
-            string[] scenePaths = Directory.GetFiles("Assets/ML-Agents/Examples/", "*.unity", SearchOption.AllDirectories);
-            foreach (string scenePath in scenePaths)
-            {
-                var curScene = EditorSceneManager.OpenScene(scenePath);
-                var aca = SceneAsset.FindObjectOfType<Academy>();
-                if (aca != null)
-                {
-                    var learningBrains = aca.broadcastHub.broadcastingBrains.Where(
-                        x => x != null && x is LearningBrain);
-                    foreach (Brain brain in learningBrains)
-                    {
-                        if (!aca.broadcastHub.IsControlled(brain))
-                        {
-                            Debug.Log("Switched brain in scene " + scenePath);
-                            aca.broadcastHub.SetControlled(brain, true);
-                        }
-                    }
-                    EditorSceneManager.SaveScene(curScene);
-                }
-                else
-                {
-                    Debug.Log("scene " + scenePath + " doesn't have a Academy in it");
-                }
-            }
-        }
-    }
-}
-
-#endif
--- a/UnitySDK/Assets/ML-Agents/Editor/BuilderUtils.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Editor/BuilderUtils.cs.meta
-fileFormatVersion: 2
-guid: 08deb8536161410982b88716adb02f69
-timeCreated: 1555623012
--- a/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/BouncerIL.unity.meta
+++ b/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/BouncerIL.unity.meta
-fileFormatVersion: 2
-guid: 5ba7f629310d74f3f9200482b6bde8dc
-DefaultImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/BouncerIL.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scenes/BouncerIL.unity
--- a/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollectorIL.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollectorIL.unity
-%YAML 1.1
-%TAG !u! tag:unity3d.com,2011:
--- !u!29 &1
-OcclusionCullingSettings:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_OcclusionBakeSettings:
-    smallestOccluder: 5
-    smallestHole: 0.25
-    backfaceThreshold: 100
-  m_SceneGUID: 00000000000000000000000000000000
-  m_OcclusionCullingData: {fileID: 0}
--- !u!104 &2
-RenderSettings:
-  m_ObjectHideFlags: 0
-  serializedVersion: 8
-  m_Fog: 0
-  m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
-  m_FogMode: 3
-  m_FogDensity: 0.01
-  m_LinearFogStart: 0
-  m_LinearFogEnd: 300
-  m_AmbientSkyColor: {r: 0.8, g: 0.8, b: 0.8, a: 1}
-  m_AmbientEquatorColor: {r: 0.6965513, g: 0, b: 1, a: 1}
-  m_AmbientGroundColor: {r: 1, g: 0.45977026, b: 0, a: 1}
-  m_AmbientIntensity: 1
-  m_AmbientMode: 3
-  m_SubtractiveShadowColor: {r: 0.42, g: 0.478, b: 0.627, a: 1}
-  m_SkyboxMaterial: {fileID: 0}
-  m_HaloStrength: 0.5
-  m_FlareStrength: 1
-  m_FlareFadeSpeed: 3
-  m_HaloTexture: {fileID: 0}
-  m_SpotCookie: {fileID: 10001, guid: 0000000000000000e000000000000000, type: 0}
-  m_DefaultReflectionMode: 0
-  m_DefaultReflectionResolution: 128
-  m_ReflectionBounces: 1
-  m_ReflectionIntensity: 1
-  m_CustomReflection: {fileID: 0}
-  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1}
--- !u!157 &3
-LightmapSettings:
-  m_ObjectHideFlags: 0
-  serializedVersion: 11
-  m_GIWorkflowMode: 1
-  m_GISettings:
-    serializedVersion: 2
-    m_BounceScale: 1
-    m_IndirectOutputScale: 1
-    m_AlbedoBoost: 1
-    m_TemporalCoherenceThreshold: 1
-    m_EnvironmentLightingMode: 0
-    m_EnableBakedLightmaps: 1
-    m_EnableRealtimeLightmaps: 1
-  m_LightmapEditorSettings:
-    serializedVersion: 9
-    m_Resolution: 2
-    m_BakeResolution: 40
-    m_TextureWidth: 1024
-    m_TextureHeight: 1024
-    m_AO: 0
-    m_AOMaxDistance: 1
-    m_CompAOExponent: 1
-    m_CompAOExponentDirect: 0
-    m_Padding: 2
-    m_LightmapParameters: {fileID: 0}
-    m_LightmapsBakeMode: 1
-    m_TextureCompression: 1
-    m_FinalGather: 0
-    m_FinalGatherFiltering: 1
-    m_FinalGatherRayCount: 256
-    m_ReflectionCompression: 2
-    m_MixedBakeMode: 2
-    m_BakeBackend: 0
-    m_PVRSampling: 1
-    m_PVRDirectSampleCount: 32
-    m_PVRSampleCount: 500
-    m_PVRBounces: 2
-    m_PVRFilterTypeDirect: 0
-    m_PVRFilterTypeIndirect: 0
-    m_PVRFilterTypeAO: 0
-    m_PVRFilteringMode: 1
-    m_PVRCulling: 1
-    m_PVRFilteringGaussRadiusDirect: 1
-    m_PVRFilteringGaussRadiusIndirect: 5
-    m_PVRFilteringGaussRadiusAO: 2
-    m_PVRFilteringAtrousPositionSigmaDirect: 0.5
-    m_PVRFilteringAtrousPositionSigmaIndirect: 2
-    m_PVRFilteringAtrousPositionSigmaAO: 1
-    m_ShowResolutionOverlay: 1
-  m_LightingDataAsset: {fileID: 112000002, guid: 03723c7f910c3423aa1974f1b9ce8392,
-    type: 2}
-  m_UseShadowmask: 1
--- !u!196 &4
-NavMeshSettings:
-  serializedVersion: 2
-  m_ObjectHideFlags: 0
-  m_BuildSettings:
-    serializedVersion: 2
-    agentTypeID: 0
-    agentRadius: 0.5
-    agentHeight: 2
-    agentSlope: 45
-    agentClimb: 0.4
-    ledgeDropHeight: 0
-    maxJumpAcrossDistance: 0
-    minRegionArea: 2
-    manualCellSize: 0
-    cellSize: 0.16666667
-    manualTileSize: 0
-    tileSize: 256
-    accuratePlacement: 0
-    debug:
-      m_Flags: 0
-  m_NavMeshData: {fileID: 0}
--- !u!1 &192430538
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 192430542}
-  - component: {fileID: 192430541}
-  - component: {fileID: 192430540}
-  m_Layer: 0
-  m_Name: Top-Down
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!124 &192430540
-Behaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 192430538}
-  m_Enabled: 1
--- !u!20 &192430541
-Camera:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 192430538}
-  m_Enabled: 1
-  serializedVersion: 2
-  m_ClearFlags: 3
-  m_BackGroundColor: {r: 0.19215687, g: 0.3019608, b: 0.4745098, a: 0}
-  m_NormalizedViewPortRect:
-    serializedVersion: 2
-    x: 0.62
-    y: 0.24
-    width: 0.6
-    height: 0.6
-  near clip plane: 0.3
-  far clip plane: 1000
-  field of view: 60
-  orthographic: 1
-  orthographic size: 26
-  m_Depth: 3
-  m_CullingMask:
-    serializedVersion: 2
-    m_Bits: 4294967295
-  m_RenderingPath: -1
-  m_TargetTexture: {fileID: 0}
-  m_TargetDisplay: 0
-  m_TargetEye: 3
-  m_HDR: 1
-  m_AllowMSAA: 1
-  m_AllowDynamicResolution: 0
-  m_ForceIntoRT: 0
-  m_OcclusionCulling: 1
-  m_StereoConvergence: 10
-  m_StereoSeparation: 0.022
--- !u!4 &192430542
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 192430538}
-  m_LocalRotation: {x: 0.7071068, y: 0, z: 0, w: 0.7071068}
-  m_LocalPosition: {x: 0, y: 90, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 3
-  m_LocalEulerAnglesHint: {x: 90, y: 0, z: 0}
--- !u!1 &273651478
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 273651479}
-  - component: {fileID: 273651481}
-  - component: {fileID: 273651480}
-  m_Layer: 5
-  m_Name: Text
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!224 &273651479
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 273651478}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 1799584681}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0, y: 0}
-  m_AnchorMax: {x: 1, y: 1}
-  m_AnchoredPosition: {x: 0, y: 0}
-  m_SizeDelta: {x: 0, y: 0}
-  m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &273651480
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 273651478}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 708705254, guid: f70555f144d8491a825f0804e09c671c, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_Material: {fileID: 0}
-  m_Color: {r: 0.9338235, g: 0.9338235, b: 0.9338235, a: 0.784}
-  m_RaycastTarget: 1
-  m_OnCullStateChanged:
-    m_PersistentCalls:
-      m_Calls: []
-    m_TypeName: UnityEngine.UI.MaskableGraphic+CullStateChangedEvent, UnityEngine.UI,
-      Version=1.0.0.0, Culture=neutral, PublicKeyToken=null
-  m_FontData:
-    m_Font: {fileID: 10102, guid: 0000000000000000e000000000000000, type: 0}
-    m_FontSize: 300
-    m_FontStyle: 1
-    m_BestFit: 0
-    m_MinSize: 8
-    m_MaxSize: 300
-    m_Alignment: 4
-    m_AlignByGeometry: 0
-    m_RichText: 1
-    m_HorizontalOverflow: 0
-    m_VerticalOverflow: 0
-    m_LineSpacing: 1
-  m_Text: NOM
--- !u!222 &273651481
-CanvasRenderer:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 273651478}
--- !u!1 &378228137
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 378228141}
-  - component: {fileID: 378228140}
-  - component: {fileID: 378228139}
-  - component: {fileID: 378228138}
-  m_Layer: 5
-  m_Name: Canvas
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!114 &378228138
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 378228137}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 1301386320, guid: f70555f144d8491a825f0804e09c671c, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_IgnoreReversedGraphics: 1
-  m_BlockingObjects: 0
-  m_BlockingMask:
-    serializedVersion: 2
-    m_Bits: 4294967295
--- !u!114 &378228139
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 378228137}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 1980459831, guid: f70555f144d8491a825f0804e09c671c, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_UiScaleMode: 1
-  m_ReferencePixelsPerUnit: 100
-  m_ScaleFactor: 1
-  m_ReferenceResolution: {x: 800, y: 600}
-  m_ScreenMatchMode: 0
-  m_MatchWidthOrHeight: 0.5
-  m_PhysicalUnit: 3
-  m_FallbackScreenDPI: 96
-  m_DefaultSpriteDPI: 96
-  m_DynamicPixelsPerUnit: 1
--- !u!223 &378228140
-Canvas:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 378228137}
-  m_Enabled: 1
-  serializedVersion: 3
-  m_RenderMode: 0
-  m_Camera: {fileID: 0}
-  m_PlaneDistance: 100
-  m_PixelPerfect: 0
-  m_ReceivesEvents: 1
-  m_OverrideSorting: 0
-  m_OverridePixelPerfect: 0
-  m_SortingBucketNormalizedSize: 0
-  m_AdditionalShaderChannelsFlag: 0
-  m_SortingLayerID: 0
-  m_SortingOrder: 0
-  m_TargetDisplay: 0
--- !u!224 &378228141
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 378228137}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 0, y: 0, z: 0}
-  m_Children:
-  - {fileID: 1799584681}
-  - {fileID: 1196437248}
-  m_Father: {fileID: 0}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0, y: 0}
-  m_AnchorMax: {x: 0, y: 0}
-  m_AnchoredPosition: {x: 0, y: 0}
-  m_SizeDelta: {x: 0, y: 0}
-  m_Pivot: {x: 0, y: 0}
--- !u!1 &499540684
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 499540687}
-  - component: {fileID: 499540686}
-  - component: {fileID: 499540685}
-  m_Layer: 0
-  m_Name: EventSystem
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!114 &499540685
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 499540684}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 1077351063, guid: f70555f144d8491a825f0804e09c671c, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_HorizontalAxis: Horizontal
-  m_VerticalAxis: Vertical
-  m_SubmitButton: Submit
-  m_CancelButton: Cancel
-  m_InputActionsPerSecond: 10
-  m_RepeatDelay: 0.5
-  m_ForceModuleActive: 0
--- !u!114 &499540686
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 499540684}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: -619905303, guid: f70555f144d8491a825f0804e09c671c, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_FirstSelected: {fileID: 0}
-  m_sendNavigationEvents: 1
-  m_DragThreshold: 5
--- !u!4 &499540687
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 499540684}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 2
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1001 &974906832
-Prefab:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_Modification:
-    m_TransformParent: {fileID: 0}
-    m_Modifications:
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalPosition.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalPosition.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalPosition.z
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalRotation.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalRotation.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalRotation.z
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalRotation.w
-      value: 1
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_RootOrder
-      value: 6
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchoredPosition.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchoredPosition.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_SizeDelta.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_SizeDelta.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchorMin.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchorMin.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchorMax.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchorMax.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_Pivot.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_Pivot.y
-      value: 0
-      objectReference: {fileID: 0}
-    m_RemovedComponents: []
-  m_ParentPrefab: {fileID: 100100000, guid: 3ce107b4a79bc4eef83afde434932a68, type: 2}
-  m_IsPrefabParent: 0
--- !u!1 &1196437247
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 1196437248}
-  - component: {fileID: 1196437250}
-  - component: {fileID: 1196437249}
-  m_Layer: 5
-  m_Name: Stats
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!224 &1196437248
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1196437247}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 378228141}
-  m_RootOrder: 1
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0.5, y: 0.5}
-  m_AnchorMax: {x: 0.5, y: 0.5}
-  m_AnchoredPosition: {x: 282.4, y: -170.26}
-  m_SizeDelta: {x: 300, y: 30}
-  m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &1196437249
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1196437247}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 708705254, guid: f70555f144d8491a825f0804e09c671c, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_Material: {fileID: 0}
-  m_Color: {r: 1, g: 1, b: 1, a: 1}
-  m_RaycastTarget: 1
-  m_OnCullStateChanged:
-    m_PersistentCalls:
-      m_Calls: []
-    m_TypeName: UnityEngine.UI.MaskableGraphic+CullStateChangedEvent, UnityEngine.UI,
-      Version=1.0.0.0, Culture=neutral, PublicKeyToken=null
-  m_FontData:
-    m_Font: {fileID: 10102, guid: 0000000000000000e000000000000000, type: 0}
-    m_FontSize: 23
-    m_FontStyle: 0
-    m_BestFit: 0
-    m_MinSize: 10
-    m_MaxSize: 40
-    m_Alignment: 1
-    m_AlignByGeometry: 0
-    m_RichText: 1
-    m_HorizontalOverflow: 0
-    m_VerticalOverflow: 0
-    m_LineSpacing: 1
-  m_Text: 'Total Reward: 0'
--- !u!222 &1196437250
-CanvasRenderer:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1196437247}
--- !u!1001 &1357737440
-Prefab:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_Modification:
-    m_TransformParent: {fileID: 0}
-    m_Modifications:
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalPosition.x
-      value: 106.38621
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalPosition.y
-      value: 38.840767
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalPosition.z
-      value: 34.72934
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalRotation.x
-      value: 0.31598538
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalRotation.y
-      value: -0.3596048
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalRotation.z
-      value: 0.13088542
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalRotation.w
-      value: 0.8681629
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_RootOrder
-      value: 5
-      objectReference: {fileID: 0}
-    m_RemovedComponents: []
-  m_ParentPrefab: {fileID: 100100000, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-  m_IsPrefabParent: 0
--- !u!1 &1570348456
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 1570348460}
-  - component: {fileID: 1570348459}
-  m_Layer: 0
-  m_Name: Camera
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!20 &1570348459
-Camera:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1570348456}
-  m_Enabled: 1
-  serializedVersion: 2
-  m_ClearFlags: 2
-  m_BackGroundColor: {r: 0, g: 0, b: 0, a: 0}
-  m_NormalizedViewPortRect:
-    serializedVersion: 2
-    x: 0
-    y: 0
-    width: 1
-    height: 1
-  near clip plane: 0.3
-  far clip plane: 1000
-  field of view: 60
-  orthographic: 0
-  orthographic size: 5
-  m_Depth: 0
-  m_CullingMask:
-    serializedVersion: 2
-    m_Bits: 0
-  m_RenderingPath: -1
-  m_TargetTexture: {fileID: 0}
-  m_TargetDisplay: 0
-  m_TargetEye: 3
-  m_HDR: 1
-  m_AllowMSAA: 1
-  m_AllowDynamicResolution: 0
-  m_ForceIntoRT: 0
-  m_OcclusionCulling: 1
-  m_StereoConvergence: 10
-  m_StereoSeparation: 0.022
--- !u!4 &1570348460
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1570348456}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: -12.999352, y: -5.8986187, z: 24.784279}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 4
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &1574236047
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 1574236049}
-  - component: {fileID: 1574236048}
-  m_Layer: 0
-  m_Name: Academy
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!114 &1574236048
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1574236047}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 4fe57113e76a5426297487dd6faadc5b, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  broadcastHub:
-    broadcastingBrains:
-    - {fileID: 11400000, guid: 9e7865ec29c894c2d8c1617b0fa392f9, type: 2}
-    - {fileID: 11400000, guid: dff7429d656234fed84c4fac2a7a683c, type: 2}
-    m_BrainsToControl: []
-  m_MaxSteps: 1500
-  m_TrainingConfiguration:
-    width: 500
-    height: 500
-    qualityLevel: 0
-    timeScale: 15
-    targetFrameRate: -1
-  m_InferenceConfiguration:
-    width: 1280
-    height: 720
-    qualityLevel: 5
-    timeScale: 1
-    targetFrameRate: 60
-  resetParameters:
-    m_ResetParameters:
-    - key: laser_length
-      value: 1
-    - key: agent_scale
-      value: 1
-  agents: []
-  listArea: []
-  totalScore: 0
-  scoreText: {fileID: 1196437249}
--- !u!4 &1574236049
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1574236047}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0.71938086, y: 0.27357092, z: 4.1970553}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 1
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &1799584680
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 1799584681}
-  - component: {fileID: 1799584683}
-  - component: {fileID: 1799584682}
-  m_Layer: 5
-  m_Name: Panel
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 0
--- !u!224 &1799584681
-RectTransform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1799584680}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children:
-  - {fileID: 273651479}
-  m_Father: {fileID: 378228141}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
-  m_AnchorMin: {x: 0, y: 0}
-  m_AnchorMax: {x: 1, y: 1}
-  m_AnchoredPosition: {x: 0, y: 0}
-  m_SizeDelta: {x: 0, y: 0}
-  m_Pivot: {x: 0.5, y: 0.5}
--- !u!114 &1799584682
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1799584680}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: -765806418, guid: f70555f144d8491a825f0804e09c671c, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_Material: {fileID: 0}
-  m_Color: {r: 0, g: 0, b: 0, a: 0.472}
-  m_RaycastTarget: 1
-  m_OnCullStateChanged:
-    m_PersistentCalls:
-      m_Calls: []
-    m_TypeName: UnityEngine.UI.MaskableGraphic+CullStateChangedEvent, UnityEngine.UI,
-      Version=1.0.0.0, Culture=neutral, PublicKeyToken=null
-  m_Sprite: {fileID: 10907, guid: 0000000000000000f000000000000000, type: 0}
-  m_Type: 1
-  m_PreserveAspect: 0
-  m_FillCenter: 1
-  m_FillMethod: 4
-  m_FillAmount: 1
-  m_FillClockwise: 1
-  m_FillOrigin: 0
--- !u!222 &1799584683
-CanvasRenderer:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1799584680}
--- !u!1001 &2043682756
-Prefab:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_Modification:
-    m_TransformParent: {fileID: 0}
-    m_Modifications:
-    - target: {fileID: 4372429183926998, guid: 4e556f5e95e27473da078d43fcea9c54, type: 2}
-      propertyPath: m_LocalPosition.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4372429183926998, guid: 4e556f5e95e27473da078d43fcea9c54, type: 2}
-      propertyPath: m_LocalPosition.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4372429183926998, guid: 4e556f5e95e27473da078d43fcea9c54, type: 2}
-      propertyPath: m_LocalPosition.z
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4372429183926998, guid: 4e556f5e95e27473da078d43fcea9c54, type: 2}
-      propertyPath: m_LocalRotation.x
-      value: -0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4372429183926998, guid: 4e556f5e95e27473da078d43fcea9c54, type: 2}
-      propertyPath: m_LocalRotation.y
-      value: -0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4372429183926998, guid: 4e556f5e95e27473da078d43fcea9c54, type: 2}
-      propertyPath: m_LocalRotation.z
-      value: -0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4372429183926998, guid: 4e556f5e95e27473da078d43fcea9c54, type: 2}
-      propertyPath: m_LocalRotation.w
-      value: 1
-      objectReference: {fileID: 0}
-    - target: {fileID: 4372429183926998, guid: 4e556f5e95e27473da078d43fcea9c54, type: 2}
-      propertyPath: m_RootOrder
-      value: 7
-      objectReference: {fileID: 0}
-    m_RemovedComponents: []
-  m_ParentPrefab: {fileID: 100100000, guid: 4e556f5e95e27473da078d43fcea9c54, type: 2}
-  m_IsPrefabParent: 0
--- a/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollectorIL.unity.meta
+++ b/UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scenes/FoodCollectorIL.unity.meta
-fileFormatVersion: 2
-guid: 3ae10073cde7641f488ef7c87862333a
-timeCreated: 1517881609
-licenseType: Pro
-DefaultImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/HallwayIL.unity.meta
+++ b/UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/HallwayIL.unity.meta
-fileFormatVersion: 2
-guid: 5be1eb3996f96423d81321a4ca613466
-DefaultImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/HallwayIL.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Hallway/Scenes/HallwayIL.unity
-%YAML 1.1
-%TAG !u! tag:unity3d.com,2011:
--- !u!29 &1
-OcclusionCullingSettings:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_OcclusionBakeSettings:
-    smallestOccluder: 5
-    smallestHole: 0.25
-    backfaceThreshold: 100
-  m_SceneGUID: 00000000000000000000000000000000
-  m_OcclusionCullingData: {fileID: 0}
--- !u!104 &2
-RenderSettings:
-  m_ObjectHideFlags: 0
-  serializedVersion: 8
-  m_Fog: 0
-  m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1}
-  m_FogMode: 3
-  m_FogDensity: 0.01
-  m_LinearFogStart: 0
-  m_LinearFogEnd: 300
-  m_AmbientSkyColor: {r: 0.8, g: 0.8, b: 0.8, a: 1}
-  m_AmbientEquatorColor: {r: 0.114, g: 0.125, b: 0.133, a: 1}
-  m_AmbientGroundColor: {r: 0.047, g: 0.043, b: 0.035, a: 1}
-  m_AmbientIntensity: 1
-  m_AmbientMode: 3
-  m_SubtractiveShadowColor: {r: 0.42, g: 0.478, b: 0.627, a: 1}
-  m_SkyboxMaterial: {fileID: 0}
-  m_HaloStrength: 0.5
-  m_FlareStrength: 1
-  m_FlareFadeSpeed: 3
-  m_HaloTexture: {fileID: 0}
-  m_SpotCookie: {fileID: 10001, guid: 0000000000000000e000000000000000, type: 0}
-  m_DefaultReflectionMode: 0
-  m_DefaultReflectionResolution: 128
-  m_ReflectionBounces: 1
-  m_ReflectionIntensity: 1
-  m_CustomReflection: {fileID: 0}
-  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1}
--- !u!157 &3
-LightmapSettings:
-  m_ObjectHideFlags: 0
-  serializedVersion: 11
-  m_GIWorkflowMode: 0
-  m_GISettings:
-    serializedVersion: 2
-    m_BounceScale: 1
-    m_IndirectOutputScale: 1
-    m_AlbedoBoost: 1
-    m_TemporalCoherenceThreshold: 1
-    m_EnvironmentLightingMode: 0
-    m_EnableBakedLightmaps: 1
-    m_EnableRealtimeLightmaps: 1
-  m_LightmapEditorSettings:
-    serializedVersion: 9
-    m_Resolution: 2
-    m_BakeResolution: 40
-    m_TextureWidth: 1024
-    m_TextureHeight: 1024
-    m_AO: 0
-    m_AOMaxDistance: 1
-    m_CompAOExponent: 1
-    m_CompAOExponentDirect: 0
-    m_Padding: 2
-    m_LightmapParameters: {fileID: 0}
-    m_LightmapsBakeMode: 1
-    m_TextureCompression: 1
-    m_FinalGather: 0
-    m_FinalGatherFiltering: 1
-    m_FinalGatherRayCount: 256
-    m_ReflectionCompression: 2
-    m_MixedBakeMode: 2
-    m_BakeBackend: 0
-    m_PVRSampling: 1
-    m_PVRDirectSampleCount: 32
-    m_PVRSampleCount: 500
-    m_PVRBounces: 2
-    m_PVRFilterTypeDirect: 0
-    m_PVRFilterTypeIndirect: 0
-    m_PVRFilterTypeAO: 0
-    m_PVRFilteringMode: 1
-    m_PVRCulling: 1
-    m_PVRFilteringGaussRadiusDirect: 1
-    m_PVRFilteringGaussRadiusIndirect: 5
-    m_PVRFilteringGaussRadiusAO: 2
-    m_PVRFilteringAtrousPositionSigmaDirect: 0.5
-    m_PVRFilteringAtrousPositionSigmaIndirect: 2
-    m_PVRFilteringAtrousPositionSigmaAO: 1
-    m_ShowResolutionOverlay: 1
-  m_LightingDataAsset: {fileID: 112000002, guid: 03723c7f910c3423aa1974f1b9ce8392,
-    type: 2}
-  m_UseShadowmask: 1
--- !u!196 &4
-NavMeshSettings:
-  serializedVersion: 2
-  m_ObjectHideFlags: 0
-  m_BuildSettings:
-    serializedVersion: 2
-    agentTypeID: 0
-    agentRadius: 0.5
-    agentHeight: 2
-    agentSlope: 45
-    agentClimb: 0.4
-    ledgeDropHeight: 0
-    maxJumpAcrossDistance: 0
-    minRegionArea: 2
-    manualCellSize: 0
-    cellSize: 0.16666667
-    manualTileSize: 0
-    tileSize: 256
-    accuratePlacement: 0
-    debug:
-      m_Flags: 0
-  m_NavMeshData: {fileID: 0}
--- !u!1001 &95311345
-Prefab:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_Modification:
-    m_TransformParent: {fileID: 0}
-    m_Modifications:
-    - target: {fileID: 4935900384141556, guid: ce3434d96b87c40c6a765e6d78da40cd, type: 2}
-      propertyPath: m_LocalPosition.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4935900384141556, guid: ce3434d96b87c40c6a765e6d78da40cd, type: 2}
-      propertyPath: m_LocalPosition.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4935900384141556, guid: ce3434d96b87c40c6a765e6d78da40cd, type: 2}
-      propertyPath: m_LocalPosition.z
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4935900384141556, guid: ce3434d96b87c40c6a765e6d78da40cd, type: 2}
-      propertyPath: m_LocalRotation.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4935900384141556, guid: ce3434d96b87c40c6a765e6d78da40cd, type: 2}
-      propertyPath: m_LocalRotation.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4935900384141556, guid: ce3434d96b87c40c6a765e6d78da40cd, type: 2}
-      propertyPath: m_LocalRotation.z
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4935900384141556, guid: ce3434d96b87c40c6a765e6d78da40cd, type: 2}
-      propertyPath: m_LocalRotation.w
-      value: 1
-      objectReference: {fileID: 0}
-    - target: {fileID: 4935900384141556, guid: ce3434d96b87c40c6a765e6d78da40cd, type: 2}
-      propertyPath: m_RootOrder
-      value: 6
-      objectReference: {fileID: 0}
-    m_RemovedComponents: []
-  m_ParentPrefab: {fileID: 100100000, guid: ce3434d96b87c40c6a765e6d78da40cd, type: 2}
-  m_IsPrefabParent: 0
--- !u!1001 &121098826
-Prefab:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_Modification:
-    m_TransformParent: {fileID: 0}
-    m_Modifications:
-    - target: {fileID: 4533847489817540, guid: 344c35f795e0c4641991cc5b96547e6d, type: 2}
-      propertyPath: m_LocalPosition.x
-      value: 27
-      objectReference: {fileID: 0}
-    - target: {fileID: 4533847489817540, guid: 344c35f795e0c4641991cc5b96547e6d, type: 2}
-      propertyPath: m_LocalPosition.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4533847489817540, guid: 344c35f795e0c4641991cc5b96547e6d, type: 2}
-      propertyPath: m_LocalPosition.z
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4533847489817540, guid: 344c35f795e0c4641991cc5b96547e6d, type: 2}
-      propertyPath: m_LocalRotation.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4533847489817540, guid: 344c35f795e0c4641991cc5b96547e6d, type: 2}
-      propertyPath: m_LocalRotation.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4533847489817540, guid: 344c35f795e0c4641991cc5b96547e6d, type: 2}
-      propertyPath: m_LocalRotation.z
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 4533847489817540, guid: 344c35f795e0c4641991cc5b96547e6d, type: 2}
-      propertyPath: m_LocalRotation.w
-      value: 1
-      objectReference: {fileID: 0}
-    - target: {fileID: 4533847489817540, guid: 344c35f795e0c4641991cc5b96547e6d, type: 2}
-      propertyPath: m_RootOrder
-      value: 7
-      objectReference: {fileID: 0}
-    m_RemovedComponents: []
-  m_ParentPrefab: {fileID: 100100000, guid: 344c35f795e0c4641991cc5b96547e6d, type: 2}
-  m_IsPrefabParent: 0
--- !u!1 &255077123
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 255077126}
-  - component: {fileID: 255077125}
-  - component: {fileID: 255077124}
-  m_Layer: 0
-  m_Name: EventSystem
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!114 &255077124
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 255077123}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 1077351063, guid: f70555f144d8491a825f0804e09c671c, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_HorizontalAxis: Horizontal
-  m_VerticalAxis: Vertical
-  m_SubmitButton: Submit
-  m_CancelButton: Cancel
-  m_InputActionsPerSecond: 10
-  m_RepeatDelay: 0.5
-  m_ForceModuleActive: 0
--- !u!114 &255077125
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 255077123}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: -619905303, guid: f70555f144d8491a825f0804e09c671c, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_FirstSelected: {fileID: 0}
-  m_sendNavigationEvents: 1
-  m_DragThreshold: 5
--- !u!4 &255077126
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 255077123}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 1
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &318490716
-GameObject:
-  m_ObjectHideFlags: 1
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 318490717}
-  m_Layer: 0
-  m_Name: UnityEngine-Recorder
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!4 &318490717
-Transform:
-  m_ObjectHideFlags: 1
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 318490716}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children:
-  - {fileID: 365376271}
-  - {fileID: 1265651286}
-  m_Father: {fileID: 0}
-  m_RootOrder: 4
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &365376270
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 365376271}
-  m_Layer: 0
-  m_Name: Settings
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!4 &365376271
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 365376270}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children:
-  - {fileID: 1257687049}
-  m_Father: {fileID: 318490717}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1001 &631219891
-Prefab:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_Modification:
-    m_TransformParent: {fileID: 0}
-    m_Modifications:
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalPosition.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalPosition.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalPosition.z
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalRotation.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalRotation.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalRotation.z
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_LocalRotation.w
-      value: 1
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_RootOrder
-      value: 3
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchoredPosition.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchoredPosition.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_SizeDelta.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_SizeDelta.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchorMin.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchorMin.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchorMax.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_AnchorMax.y
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_Pivot.x
-      value: 0
-      objectReference: {fileID: 0}
-    - target: {fileID: 224194346362733190, guid: 3ce107b4a79bc4eef83afde434932a68,
-        type: 2}
-      propertyPath: m_Pivot.y
-      value: 0
-      objectReference: {fileID: 0}
-    m_RemovedComponents: []
-  m_ParentPrefab: {fileID: 100100000, guid: 3ce107b4a79bc4eef83afde434932a68, type: 2}
-  m_IsPrefabParent: 0
--- !u!1 &1257687048
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 1257687049}
-  m_Layer: 0
-  m_Name: 50bfc0f4c3d6f46df98d3c66ceb89209
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!4 &1257687049
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1257687048}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 365376271}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &1265651285
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 1265651286}
-  m_Layer: 0
-  m_Name: RecordingSessions
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!4 &1265651286
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1265651285}
-  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 318490717}
-  m_RootOrder: 1
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &1319872499
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 1319872503}
-  - component: {fileID: 1319872502}
-  m_Layer: 0
-  m_Name: Camera
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!20 &1319872502
-Camera:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1319872499}
-  m_Enabled: 1
-  serializedVersion: 2
-  m_ClearFlags: 1
-  m_BackGroundColor: {r: 0.46666667, g: 0.5647059, b: 0.60784316, a: 1}
-  m_NormalizedViewPortRect:
-    serializedVersion: 2
-    x: 0
-    y: 0
-    width: 1
-    height: 1
-  near clip plane: 0.3
-  far clip plane: 1000
-  field of view: 60
-  orthographic: 1
-  orthographic size: 27.3
-  m_Depth: 0
-  m_CullingMask:
-    serializedVersion: 2
-    m_Bits: 4294967295
-  m_RenderingPath: -1
-  m_TargetTexture: {fileID: 0}
-  m_TargetDisplay: 0
-  m_TargetEye: 3
-  m_HDR: 1
-  m_AllowMSAA: 1
-  m_AllowDynamicResolution: 0
-  m_ForceIntoRT: 0
-  m_OcclusionCulling: 1
-  m_StereoConvergence: 10
-  m_StereoSeparation: 0.022
--- !u!4 &1319872503
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1319872499}
-  m_LocalRotation: {x: 0.35355338, y: -0.35355338, z: 0.1464466, w: 0.8535535}
-  m_LocalPosition: {x: 33, y: 30, z: -18.5}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 5
-  m_LocalEulerAnglesHint: {x: 45, y: -45, z: 0}
--- !u!1 &1574236047
-GameObject:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  serializedVersion: 5
-  m_Component:
-  - component: {fileID: 1574236049}
-  - component: {fileID: 1574236048}
-  m_Layer: 0
-  m_Name: Academy
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!114 &1574236048
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1574236047}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 40db664a3061b46a0a0628f90b2264f7, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  broadcastHub:
-    broadcastingBrains:
-    - {fileID: 11400000, guid: 533f2edd327794ca996d0320901b501c, type: 2}
-    - {fileID: 11400000, guid: 51f870f0190b643adae5432c0e6205e7, type: 2}
-    m_BrainsToControl: []
-  m_MaxSteps: 0
-  m_TrainingConfiguration:
-    width: 128
-    height: 128
-    qualityLevel: 0
-    timeScale: 20
-    targetFrameRate: -1
-  m_InferenceConfiguration:
-    width: 1280
-    height: 720
-    qualityLevel: 5
-    timeScale: 1
-    targetFrameRate: 60
-  resetParameters:
-    m_ResetParameters: []
-  agentRunSpeed: 1.5
-  agentRotationSpeed: 1
-  goalScoredMaterial: {fileID: 2100000, guid: df32cc593804f42df97464dc455057b8, type: 2}
-  failMaterial: {fileID: 2100000, guid: a1daf31cdf41e484ca9ac33a5c6f524a, type: 2}
-  gravityMultiplier: 2
--- !u!4 &1574236049
-Transform:
-  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
-  m_GameObject: {fileID: 1574236047}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: 0, y: 0, z: 0}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 0
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1001 &1745169174
-Prefab:
-  m_ObjectHideFlags: 0
-  serializedVersion: 2
-  m_Modification:
-    m_TransformParent: {fileID: 0}
-    m_Modifications:
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalPosition.x
-      value: 106.38621
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalPosition.y
-      value: 38.840767
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalPosition.z
-      value: 34.72934
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalRotation.x
-      value: 0.18587677
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalRotation.y
-      value: -0.7888064
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalRotation.z
-      value: 0.28710198
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalRotation.w
-      value: 0.51069236
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_RootOrder
-      value: 2
-      objectReference: {fileID: 0}
-    - target: {fileID: 4943719350691982, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-      propertyPath: m_LocalEulerAnglesHint.y
-      value: -114.16
-      objectReference: {fileID: 0}
-    m_RemovedComponents: []
-  m_ParentPrefab: {fileID: 100100000, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 2}
-  m_IsPrefabParent: 0