Merge remote-tracking branch 'origin/develop' into try-tf2-support

5 年前 · 254c7d86
--- a/README.md
+++ b/README.md
 * Support for multiple environment configurations and training scenarios
 * Train memory-enhanced agents using deep reinforcement learning
 * Easily definable Curriculum Learning and Generalization scenarios
-* Broadcasting of agent behavior for supervised learning
 * Built-in support for Imitation Learning
 * Flexible agent control with On Demand Decision Making
 * Visualizing network outputs within the environment
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
+            aca.resetParameters = new ResetParameters();
            Assert.AreEqual(0, aca.initializeAcademyCalls);
            Assert.AreEqual(0, aca.GetStepCount());
            Assert.AreEqual(0, aca.GetEpisodeCount());
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
+            aca.resetParameters = new ResetParameters();
            var brain = TestBrain.Instantiate();
            brain.brainParameters = new BrainParameters();
            brain.brainParameters.vectorObservationSize = 0;
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
+            aca.resetParameters = new ResetParameters();
            var academyInitializeMethod = typeof(Academy).GetMethod("InitializeEnvironment",
                BindingFlags.Instance | BindingFlags.NonPublic);
            academyInitializeMethod?.Invoke(aca, new object[] {});
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
+            aca.resetParameters = new ResetParameters();
            var brain = TestBrain.Instantiate();


            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
+            aca.resetParameters = new ResetParameters();
            var academyInitializeMethod = typeof(Academy).GetMethod(
                "InitializeEnvironment", BindingFlags.Instance | BindingFlags.NonPublic);
            academyInitializeMethod?.Invoke(aca, new object[] {});
            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
+            aca.resetParameters = new ResetParameters();
            var brain = TestBrain.Instantiate();


            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
+            aca.resetParameters = new ResetParameters();
            var brain = TestBrain.Instantiate();


            var acaGo = new GameObject("TestAcademy");
            acaGo.AddComponent<TestAcademy>();
            var aca = acaGo.GetComponent<TestAcademy>();
+            aca.resetParameters = new ResetParameters();
            var brain = TestBrain.Instantiate();


--- a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
        "docs/Learning-Environment-Design-Academy.md")]
    public abstract class Academy : MonoBehaviour
    {
-        [SerializeField]
-        public BroadcastHub broadcastHub = new BroadcastHub();
-
        private const string k_ApiVersion = "API-10";

        /// Temporary storage for global gravity value
        /// </returns>
        bool IsCommunicatorOn
        {
-            get { return m_Communicator != null; }
+            get { return Communicator != null; }
        }

        /// If true, the Academy will use inference settings. This field is
        bool m_ModeSwitched;

        /// Pointer to the communicator currently in use by the Academy.
-        ICommunicator m_Communicator;
+        public ICommunicator Communicator;

        // Flag used to keep track of the first time the Academy is reset.
        bool m_FirstAcademyReset;

            InitializeAcademy();

-            var controlledBrains = broadcastHub.brainsToControl.Where(x => x != null).ToList();
-
-                m_Communicator = new RpcCommunicator(
+                Communicator = new RpcCommunicator(
-            // If it fails, we check if there are any external brains in the scene
-            // and if Unity is in Editor mode
-            // If there are : Launch the communicator on the default port
-                m_Communicator = null;
-                if (controlledBrains.Any())
-                {
-                    m_Communicator = new RpcCommunicator(
-                        new CommunicatorInitParameters
-                        {
-                            port = 5005
-                        });
-                }
+                Communicator = new RpcCommunicator(
+                    new CommunicatorInitParameters
+                    {
+                        port = 5004
+                    });
-            foreach (var trainingBrain in controlledBrains)
-            {
-                trainingBrain.SetCommunicator(m_Communicator);
-            }
-            if (m_Communicator != null)
+            if (Communicator != null)
-                m_Communicator.QuitCommandReceived += OnQuitCommandReceived;
-                m_Communicator.ResetCommandReceived += OnResetCommand;
-                m_Communicator.RLInputReceived += OnRLInputReceived;
-
-
-                    var unityRLInitParameters = m_Communicator.Initialize(
+                    var unityRLInitParameters = Communicator.Initialize(
-                            brains = controlledBrains,
-                        }, broadcastHub);
+                        });
-                    m_Communicator = null;
-                    foreach (var brain in controlledBrains)
-                    {
-                        brain.SetCommunicator(null);
-                    }
+                    Communicator = null;
+                }
+
+                if (Communicator != null){
+                    Communicator.QuitCommandReceived += OnQuitCommandReceived;
+                    Communicator.ResetCommandReceived += OnResetCommand;
+                    Communicator.RLInputReceived += OnRLInputReceived;
                }
            }

+
            SetIsInference(!IsCommunicatorOn);

            BrainDecideAction += () => {};
                ConfigureEnvironment();
                m_ModeSwitched = false;
            }
-
            if (!m_FirstAcademyReset)
            {
                ForcedFullReset();
--- a/UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
                var academy = FindObjectOfType<Academy>();
                if (academy)
                {
+                    m_IsInitialized = true;
-                    m_IsInitialized = true;
                }
            }
        }
--- a/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
        Dictionary<string, bool> m_HasData =
            new Dictionary<string, bool>();

-        /// Keeps track of which brains queried the batcher on the current step
+        /// Keeps track of which brains queried the communicator on the current step
        Dictionary<string, bool> m_HasQueried =
            new Dictionary<string, bool>();


-        /// The current UnityRLOutput to be sent when all the brains queried the batcher
+        /// The current UnityRLOutput to be sent when all the brains queried the communicator
+
+        private UnityRLInitializationOutputProto m_CurrentUnityRlInitializationOutput;


 # if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
        /// </summary>
        /// <returns>The External Initialization Parameters received.</returns>
        /// <param name="initParameters">The Unity Initialization Parameters to be sent.</param>
-        /// <param name="broadcastHub">The BroadcastHub to get the controlled brains.</param>
-        public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters,
-            BroadcastHub broadcastHub)
+        public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters)
        {
            var academyParameters = new UnityRLInitializationOutputProto
            {
-
-            foreach (var brain in initParameters.brains)
-            {
-                academyParameters.BrainParameters.Add(brain.brainParameters.ToProto(
-                    brain.name, true));
-                SubscribeBrain(brain.name);
-            }

            academyParameters.EnvironmentParameters = new EnvironmentParametersProto();

            return initializationInput.RlInitializationInput.ToUnityRLInitParameters();
        }

+        /// <summary>
+        /// Adds the brain to the list of brains which will be sending information to External.
+        /// </summary>
+        /// <param name="brainKey">Brain key.</param>
+        public void SubscribeBrain(string brainKey, BrainParameters brainParameters)
+        {
+            m_HasQueried[brainKey] = false;
+            m_HasData[brainKey] = false;
+            m_CurrentAgents[brainKey] = new List<Agent>(k_NumAgents);
+            m_CurrentUnityRlOutput.AgentInfos.Add(
+                brainKey,
+                new CommunicatorObjects.UnityRLOutputProto.Types.ListAgentInfoProto());
+            if (m_CurrentUnityRlInitializationOutput == null){
+                m_CurrentUnityRlInitializationOutput = new CommunicatorObjects.UnityRLInitializationOutputProto();
+            }
+            m_CurrentUnityRlInitializationOutput.BrainParameters.Add(brainParameters.ToProto(brainKey, true));
+        }
+
        void UpdateEnvironmentWithInput(UnityRLInputProto rlInput)
        {
            SendRLInputReceivedEvent(rlInput.IsTraining);

        #region Sending and retreiving data

-        /// <summary>
-        /// Adds the brain to the list of brains which will be sending information to External.
-        /// </summary>
-        /// <param name="brainKey">Brain key.</param>
-        private void SubscribeBrain(string brainKey)
-        {
-            m_HasQueried[brainKey] = false;
-            m_HasData[brainKey] = false;
-            m_CurrentAgents[brainKey] = new List<Agent>(k_NumAgents);
-            m_CurrentUnityRlOutput.AgentInfos.Add(
-                brainKey,
-                new UnityRLOutputProto.Types.ListAgentInfoProto());
-        }
-
        public void PutObservations(
            string brainKey, IEnumerable<Agent> agents)
        {
        /// </summary>
        void SendBatchedMessageHelper()
        {
-            var input = Exchange(
-                new UnityOutputProto
-                {
-                    RlOutput = m_CurrentUnityRlOutput
-                });
+            var message = new CommunicatorObjects.UnityOutputProto
+            {
+                RlOutput = m_CurrentUnityRlOutput,
+            };
+            if (m_CurrentUnityRlInitializationOutput != null)
+            {
+                message.RlInitializationOutput = m_CurrentUnityRlInitializationOutput;
+            }
+
+            var input = Exchange(message);
+            m_CurrentUnityRlInitializationOutput = null;

            foreach (var k in m_CurrentUnityRlOutput.AgentInfos.Keys)
            {
--- a/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs
        /// </summary>
        public string version;
        /// <summary>
-        /// The list of brains parameters used for training.
-        /// </summary>
-        public IEnumerable<Brain> brains;
-        /// <summary>
        /// The set of environment parameters defined by the user that will be sent to the communicator.
        /// </summary>
        public EnvironmentResetParameters environmentResetParameters;
        /// </summary>
        /// <returns>The External Initialization Parameters received.</returns>
        /// <param name="initParameters">The Unity Initialization Parameters to be sent.</param>
-        /// <param name="broadcastHub">The BroadcastHub to get the controlled brains.</param>
-        UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters,
-            BroadcastHub broadcastHub);
+        UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters);
+
+        /// <summary>
+        /// Registers a new Brain to the Communicator.
+        /// </summary>
+        /// <param name="name">The name or key uniquely identifying the Brain</param>
+        /// <param name="brainParameters">The Parameters for the Brain being registered</param>
+        void SubscribeBrain(string name, BrainParameters brainParameters);
-        /// sent once all the brains that subscribed to the batcher have tried
+        /// sent once all the brains that were part of initialization have tried
        /// to send information.
        /// </summary>
        /// <param name="key">Batch Key.</param>
--- a/UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/LearningBrain.cs

    /// <summary>
    /// The Learning Brain works differently if you are training it or not.
-    /// When training your Agents, drag the Learning Brain to the Academy's BroadcastHub.
-    ///  When using a pretrained model, just drag the Model file into the
-    /// Model property of the Learning Brain.
+    /// When training your Agents, the LearningBrain will be controlled by Python.
+    /// When using a pretrained model, just drag the Model file into the
+    /// Model property of the Learning Brain and do not launch the Python training process.
-    /// least one LearningBrain in the BroadcastHub.
+    /// least one LearningBrain in the scene.
    /// The property model corresponds to the Model currently attached to the Brain. Before
    /// being used, a call to ReloadModel is required.
    /// When the Learning Brain is not training, it uses a TensorFlow model to make decisions.
        protected ICommunicator m_Communicator;

        /// <summary>
-        /// Sets the Batcher of the Brain. The brain will call the communicator at every step and give
+        /// Sets the ICommunicator of the Brain. The brain will call the communicator at every step and give
-        public void SetCommunicator(ICommunicator communicator)
+        private void SetCommunicator(ICommunicator communicator)
+            m_Communicator?.SubscribeBrain(name, brainParameters);
+
        }

        /// <inheritdoc />
+            var comm = FindObjectOfType<Academy>()?.Communicator;
+            SetCommunicator(comm);
        }

        /// <summary>
--- a/docs/Basic-Guide.md
+++ b/docs/Basic-Guide.md
 if you want to [use an executable](Learning-Environment-Executable.md) or to
 `None` if you want to interact with the current scene in the Unity Editor.

-Before building the environment or interacting with it in the editor, select `Ball3DAcademy` in the **Hierarchy** window of the Unity editor and make sure the `3DBallLearningBrain` is in the Broadcast Hub of the `Ball3DAcademy` component. 
-
 More information and documentation is provided in the
 [Python API](Python-API.md) page.

 to the training and which Brain is being trained. You can only perform training with
 a `Learning Brain`.

-1. Each platform agent needs an assigned `Learning Brain`.  In this example, each platform agent was created using a prefab.  To update all of the brains in each platform agent at once, you only need to update the platform agent prefab.  In the **Project** window, go to the `Assets/ML-Agents/Examples/3DBall/Prefabs` folder. Expand `Game` and click on the `Platform` prefab.  You should see the `Platform` prefab in the **Inspector** window.  In the **Project** window, drag the **3DBallLearning** Brain located in  `Assets/ML-Agents/Examples/3DBall/Brains` into the `Brain` property under `Ball 3D Agent (Script)` component in the **Inspector** window.  
+Each platform agent needs an assigned `Learning Brain`.  In this example, each platform agent was created using a prefab.  To update all of the brains in each platform agent at once, you only need to update the platform agent prefab.  In the **Project** window, go to the `Assets/ML-Agents/Examples/3DBall/Prefabs` folder. Expand `Game` and click on the `Platform` prefab.  You should see the `Platform` prefab in the **Inspector** window.  In the **Project** window, drag the **3DBallLearning** Brain located in  `Assets/ML-Agents/Examples/3DBall/Brains` into the `Brain` property under `Ball 3D Agent (Script)` component in the **Inspector** window.  
-2. In the **Hierarchy** window, select `Ball3DAcademy`.
-3. In the **Project** window, go to `Assets/ML-Agents/Examples/3DBall/Brains` folder and drag the **3DBallLearning** Brain to the `Brains` property under `Braodcast Hub` in the `Ball3DAcademy` object in the **Inspector** window.
-
-the agent) means that the Brain will be making decision for that agent. Whereas dragging
-a Brain into the Broadcast Hub means that the Brain will be exposed to the Python process.
-   ![Set Brain to External](images/mlagents-SetBrainToTrain.png)
+the agent) means that the Brain will be making decision for that agent. If the Agent uses a
+LearningBrain either Python controls the Brain or the model on the Brain does.

 ### Training the environment

--- a/docs/FAQ.md
+++ b/docs/FAQ.md

 There may be a number of possible causes:

-* _Cause_: There may be no LearningBrain in the
-  `Broadcast Hub` of the Academy. In this case, the environment will not attempt
-  to communicate with Python. _Solution_: Click `Add New` in your Academy's
-  `Broadcast Hub`, and drag your LearningBrain asset into the `Brains` field. 
-  Also you need to assign this LearningBrain
-  asset to all of the Agents you wish to do training on.
+* _Cause_: There may be no agent in the scene with a LearningBrain
 * _Cause_: On OSX, the firewall may be preventing communication with the
  environment. _Solution_: Add the built environment binary to the list of
  exceptions on the firewall by following
--- a/docs/Getting-Started-with-Balance-Ball.md
+++ b/docs/Getting-Started-with-Balance-Ball.md
 The Academy object for the scene is placed on the Ball3DAcademy GameObject. When
 you look at an Academy component in the inspector, you can see several
 properties that control how the environment works.
-The **Broadcast Hub** keeps track of which Brains will send data during training.
-If a Brain is added to the hub, the data from this Brain will be sent to the external training
-process. 
 The **Training Configuration** and **Inference Configuration** properties
 set the graphics and timescale properties for the Unity application.
 The Academy uses the **Training Configuration**  during training and the
 You can create new Brain assets by selecting `Assets ->
 Create -> ML-Agents -> Brain`. There are 3 types of Brains.
 The **Learning Brain** is a Brain that uses a trained neural network to make decisions.
-When the **Learning Brain** is dragged into the **Broadcast Hub** in the Academy, the external process that is training the neural network will take over decision making for the agents
+When Unity is connected to Python, the external process will be controlling the Brain. 
+The external process that is training the neural network will take over decision making for the agents
 and ultimately generate a trained neural network. You can also use the
 **Learning Brain** with a pre-trained model.
 The **Heuristic** Brain allows you to hand-code the Agent logic by extending
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
 5. Add your Agent subclasses to appropriate GameObjects, typically, the object
    in the scene that represents the Agent in the simulation. Each Agent object
    must be assigned a Brain object.
-6. If training, drag the Brain in the BroadcastHub of the Academy.
-    [run the training process](Training-ML-Agents.md).

 **Note:** If you are unfamiliar with Unity, refer to
 [Learning the interface](https://docs.unity3d.com/Manual/LearningtheInterface.html)

 The default settings for the Academy properties are also fine for this
 environment, so we don't need to change anything for the RollerAcademy component
-in the Inspector window. You may not have the RollerBrain in the Broadcast Hub yet, 
-more on that later. 
+in the Inspector window. 

 ![The Academy properties](images/mlagents-NewTutAcademy.png)

 Brain asset to the Agent, changing some of the Agent Component's properties, and
 setting the Brain properties so that they are compatible with our Agent code.

-1. In the Academy Inspector, add the `RollerBallBrain` and `RollerBallPlayer`
-    Brains to the **Broadcast Hub**.
-2. Select the **RollerAgent** GameObject to show its properties in the Inspector
+1. Select the **RollerAgent** GameObject to show its properties in the Inspector
-3. Drag the Brain **RollerBallPlayer** from the Project window to the 
+2. Drag the Brain **RollerBallPlayer** from the Project window to the 
-4. Change **Decision Interval** from `1` to `10`.
-5. Drag the Target GameObject from the Hierarchy window to the RollerAgent
+3. Change **Decision Interval** from `1` to `10`.
+4. Drag the Target GameObject from the Hierarchy window to the RollerAgent
    Target field.

 ![Assign the Brain to the RollerAgent](images/mlagents-NewTutAssignBrain.png)
 ## Training the Environment

 Now you can train the Agent. To get ready for training, you must first drag the 
-`RollerBallBrain` asset to the **RollerAgent** GameObject `Brain` field to change to the learning brain.
-Then, select the Academy GameObject and drag
-the RollerBallBrain item in the **Broadcast Hub** list. From there, the process is
+`RollerBallBrain` asset to the **RollerAgent** GameObject `Brain` field to change 
+to the LearningBrain. From there, the process is
 the same as described in [Training ML-Agents](Training-ML-Agents.md). Note that the 
 models will be created in the original ml-agents project folder, `ml-agents/models`.

 Keep in mind:

 * There can only be one Academy game object in a scene.
-* You can only train Learning Brains that have been added to the Academy's Broadcast Hub list.
+

--- a/docs/Learning-Environment-Design-Academy.md
+++ b/docs/Learning-Environment-Design-Academy.md
 ## Academy Properties

 ![Academy Inspector](images/academy.png)
-* `Broadcast Hub` - Gathers the Brains that will communicate with the external 
-  process. Any Brain added to the Broadcast Hub will be visible from the external
-  process and controllable from the external process and will thus be trainable.
 * `Configuration` - The engine-level settings which correspond to rendering
  quality and engine speed.
  * `Width` - Width of the environment window in pixels.
--- a/docs/Learning-Environment-Design-Brains.md
+++ b/docs/Learning-Environment-Design-Brains.md
   **PlayerBrain** to map keyboard keys to Agent actions, which can be 
   useful to test your Agent code.

-During training, use a **Learning Brain** 
-and drag it into the Academy's `Broadcast Hub`.
+During training, use a **Learning Brain**.
 When you want to use the trained model, import the model file into the Unity
 project, add it to the **Model** property of the **Learning Brain**.

--- a/docs/Learning-Environment-Design-Learning-Brains.md
+++ b/docs/Learning-Environment-Design-Learning-Brains.md
 ## Training Mode / External Control

 When [running an ML-Agents training algorithm](Training-ML-Agents.md), at least
-one Brain asset must be in the Academy's `Broadcast Hub`. This allows the training 
+one Agent must have a LearningBrain. This allows the training 
 process to collect the observations of Agents using that Brain and give the Agents 
 their actions.

--- a/docs/Learning-Environment-Design.md
+++ b/docs/Learning-Environment-Design.md
 search bar on top of the Scene Hierarchy window.

 To Create a Brain, go to `Assets -> Create -> Ml-Agents` and select the 
-type of Brain you want to use. During training, use a **Learning Brain** 
-and drag it into the Academy's `Broadcast Hub`.
+type of Brain you want to use. During training, use a **Learning Brain** .
 When you want to use the trained model, import the model file into the Unity
 project, add it to the **Model** property of the **Learning Brain**. 
 If the Python process is not active, the **Learning Brain** will not train but

 * The training scene must start automatically when your Unity application is
  launched by the training process.
-* The scene must include an Academy with at least one Brain in the `Broadcast Hub`.
 * The Academy must reset the scene to a valid starting point for each episode of
  training.
 * A training episode must have a definite end — either using `Max Steps` or by
--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md

 Make sure the Brains in the scene have the right type. For example, if you want
 to be able to control your agents from Python, you will need to put the Brain
-controlling the Agents to be a **Learning Brain** and drag it into the
-Academy's `Broadcast Hub`. In the 3DBall
+controlling the Agents to be a **Learning Brain**. In the 3DBall
 scene, this can be done in the Platform GameObject within the Game prefab in
 `Assets/ML-Agents/Examples/3DBall/Prefabs/`, or in each instance of the
 Platform in the Scene.
 5. Drag the `<brain_name>.nn` file from the Project window of
   the Editor to the **Model** placeholder in the **Ball3DLearning**
   inspector window.
-6. Remove the **Ball3DLearning** from the Academy's `Broadcast Hub`
-7. Press the Play button at the top of the editor.
+6. Press the Play button at the top of the editor.
--- a/docs/ML-Agents-Overview.md
+++ b/docs/ML-Agents-Overview.md
  [TensorFlow](Background-TensorFlow.md) model. The embedded TensorFlow model
  represents a learned policy and the Brain directly uses this model to
  determine the action for each Agent. You can train a **Learning Brain**
-  by dragging it into the Academy's `Broadcast Hub` and launching the game with 
-  the Python training process.
+  by launching the game with the Python training process.
 - **Player** - where decisions are made using real input from a keyboard or
  controller. Here, a human player is controlling the Agent and the observations
  and rewards collected by the Brain are not used to control the Agent.
  trained a Brain for the medics we could assign a medic on one team to the
  trained Brain and assign the medic on the other team a Heuristic Brain with
  hard-coded behaviors. We can then evaluate which medic is more effective.
-
-As currently described, it may seem that the External Communicator and Python
-API are only leveraged by the Learning Brain. This is not true. It is possible
-to configure the Learning, Player and Heuristic Brains to also send the
-observations, rewards and actions to the Python API through the External
-Communicator (a feature called _broadcasting_). As we will see shortly, this
-enables additional training modes.

 <p align="center">
  <img src="images/learning_environment.png"
 In the previous mode, the Learning Brain was used for training to generate
 a TensorFlow model that the Learning Brain can later use. However,
 any user of the ML-Agents toolkit can leverage their own algorithms for
-training. In this case, the Brain type would be set to Learning and be linked
-to the BroadcastHub
+training. In this case, the Brain type would be set to Learning
 and the behaviors of all the Agents in the scene will be controlled within Python.
 You can even turn your environment into a [gym.](../gym-unity/README.md)

  a way to randomly sample Reset Parameters of the environment during training. See
  [Training Generalized Reinforcement Learning Agents](Training-Generalized-Reinforcement-Learning-Agents.md)
  to learn more about this feature.
-
- **Broadcasting** - As discussed earlier, a Learning Brain sends the
-  observations for all its Agents to the Python API when dragged into the
-  Academy's `Broadcast Hub` with the `Control` checkbox checked. This is helpful
-  for training and later inference. Broadcasting is a feature which can be
-  enabled all types of Brains (Player, Learning, Heuristic) where the Agent
-  observations and actions are also sent to the Python API (despite the fact
-  that the Agent is **not** controlled by the Python API). This feature is
-  leveraged by Imitation Learning, where the observations and actions for a
-  Player Brain are used to learn the policies of an agent through demonstration.
-  However, this could also be helpful for the Heuristic and Learning Brains,
-  particularly when debugging agent behaviors. You can learn more about using
-  the broadcasting feature
-  [here](Learning-Environment-Design-Brains.md#using-the-broadcast-feature).

 - **Cloud Training on AWS** - To facilitate using the ML-Agents toolkit on
  Amazon Web Services (AWS) machines, we provide a
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 ### Important Changes
 * The definition of the gRPC service has changed.
 * The online BC training feature has been removed. 
-* The BroadcastHub of the Academy no longer has a `Control` checkbox. All Learning Brains in the BroadcastHub will be considered as trainable (although the training will only be launched if the Python Process is ready and will use inference otherwise)
-* The broadcast feature has been deprecated. Only LearningBrains can communicate with Python. 
+* The BroadcastHub has been deprecated. If there is a training Python process, all LearningBrains in the scene will automatically be trained. If there is no Python process, inference will be used.

 #### Steps to Migrate
 * In order to be able to train, make sure both your ML-Agents Python package and UnitySDK code come from the v0.11 release. Training will not work, for example, if you update the ML-Agents Python package, and only update the API Version in UnitySDK.
--- a/docs/Python-API.md
+++ b/docs/Python-API.md
 the ML-Agents SDK.

 To communicate with an Agent in a Unity environment from a Python program, the
-Agent mus use a LearningBrain present in the Academy's `Broadcast Hub`.
+Agent must use a LearningBrain.
-actions for Agents with Brains in the
-Academy's `Broadcast Hub`..
+actions for Agents with LearningBrains.

 _Notice: Currently communication between Unity and Python takes place over an
 open socket without authentication. As such, please make sure that the network
    observations = brainInfo.vector_observations
    ```

-    Note that if you have more than one Brain in the Academy's `Broadcast Hub`, you
+    Note that if you have more than one LearningBrain in the scene, you
    must provide dictionaries from Brain names to arrays for `action`, `memory`
    and `value`. For example: If you have two Learning Brains named `brain1` and
    `brain2` each with one Agent taking two continuous actions, then you can
--- a/docs/Training-Behavioral-Cloning.md
+++ b/docs/Training-Behavioral-Cloning.md
 1. Choose an agent you would like to learn to imitate some set of demonstrations. 
 2. Record a set of demonstration using the `Demonstration Recorder` (see [here](Training-Imitation-Learning.md)). 
   For illustrative purposes we will refer to this file as `AgentRecording.demo`. 
-3. Build the scene, assigning the agent a Learning Brain, and dragging it in the Broadcast Hub. For more information on Brains, see 
+3. Build the scene, assigning the agent a Learning Brain. For more information on Brains, see 
   [here](Learning-Environment-Design-Brains.md).
 4. Open the `config/offline_bc_config.yaml` file. 
 5. Modify the `demo_path` parameter in the file to reference the path to the 
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 * `--base-port`: Specifies the starting port. Each concurrent Unity environment instance 
  will get assigned a port sequentially, starting from the `base-port`. Each instance 
  will use the port `(base_port + worker_id)`, where the `worker_id` is sequential IDs 
-  given to each instance from 0 to `num_envs - 1`. Default is 5005.
+  given to each instance from 0 to `num_envs - 1`. Default is 5005. __Note:__ When
+  training using the Editor rather than an executable, the base port will be ignored.
 * `--slow`: Specify this option to run the Unity environment at normal, game
  speed. The `--slow` mode uses the **Time Scale** and **Target Frame Rate**
  specified in the Academy's **Inference Configuration**. By default, training
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md
 `vis_encode_type` corresponds to the encoder type for encoding visual observations.
 Valid options include:
 * `simple` (default): a simple encoder which consists of two convolutional layers
-* `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236),
+* `nature_cnn`: [CNN implementation proposed by Mnih et al.](https://www.nature.com/articles/nature14236),
-* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561),
+* `resnet`: [IMPALA Resnet implementation](https://arxiv.org/abs/1802.01561),
 consisting of three stacked layers, each with two residual blocks, making a
 much larger network than the other two.

--- a/docs/Training-SAC.md
+++ b/docs/Training-SAC.md
 `vis_encode_type` corresponds to the encoder type for encoding visual observations.
 Valid options include:
 * `simple` (default): a simple encoder which consists of two convolutional layers
-* `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236),
+* `nature_cnn`: [CNN implementation proposed by Mnih et al.](https://www.nature.com/articles/nature14236),
-* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561),
-consisting of three stacked layers, each with two risidual blocks, making a
+* `resnet`: [IMPALA Resnet implementation](https://arxiv.org/abs/1802.01561),
+consisting of three stacked layers, each with two residual blocks, making a
 much larger network than the other two.

 Options: `simple`, `nature_cnn`, `resnet`
--- a/docs/images/academy.png
+++ b/docs/images/academy.png
--- a/docs/images/mlagents-NewTutAcademy.png
+++ b/docs/images/mlagents-NewTutAcademy.png
--- a/markdown-link-check.config.json
+++ b/markdown-link-check.config.json
        {
            "pattern": "^https://developer.nvidia.com/compute/machine-learning/cudnn/secure",
            "comment": "Requires login"
+        },
+        {
+            "pattern": "^https?://bair.berkeley.edu",
+            "comment": "Temporary berkeley outage"
        }
    ]
 }
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
        self._log_path = aca_params.log_path
        self._brains: Dict[str, BrainParameters] = {}
        self._external_brain_names: List[str] = []
-        for brain_param in aca_params.brain_parameters:
-            self._brains[brain_param.brain_name] = BrainParameters.from_proto(
-                brain_param
-            )
-            self._external_brain_names += [brain_param.brain_name]
-        self._num_external_brains = len(self._external_brain_names)
+        self._num_external_brains = 0
+        self._update_brain_parameters(aca_params)
-        if self._num_external_brains == 0:
-            logger.warning(
-                " No Learning Brains set to train found in the Unity Environment. "
-                "You will not be able to pass actions to your agent(s)."
-            )

    @property
    def logfile_path(self):
            )
            if outputs is None:
                raise UnityCommunicationException("Communicator has stopped.")
+            self._update_brain_parameters(outputs.rl_initialization_output)
            rl_output = outputs.rl_output
            s = self._get_state(rl_output)
            for _b in self._external_brain_names:
                outputs = self.communicator.exchange(step_input)
            if outputs is None:
                raise UnityCommunicationException("Communicator has stopped.")
+            self._update_brain_parameters(outputs.rl_initialization_output)
            rl_output = outputs.rl_output
            state = self._get_state(rl_output)
            for _b in self._external_brain_names:
                self.worker_id, agent_info_list, self.brains[brain_name]
            )
        return _data
+
+    def _update_brain_parameters(
+        self, init_output: Optional[UnityRLInitializationOutputProto]
+    ) -> None:
+        if init_output is not None:
+            for brain_param in init_output.brain_parameters:
+                self._brains[brain_param.brain_name] = BrainParameters.from_proto(
+                    brain_param
+                )
+            self._external_brain_names = list(self._brains.keys())
+            self._num_external_brains = len(self._external_brain_names)

    @timed
    def _generate_step_input(
--- a/ml-agents-envs/mlagents/envs/rpc_communicator.py
+++ b/ml-agents-envs/mlagents/envs/rpc_communicator.py
            raise UnityTimeOutException(
                "The Unity environment took too long to respond. Make sure that :\n"
                "\t The environment does not need user interaction to launch\n"
-                "\t The Academy's Broadcast Hub is configured correctly\n"
                "\t The Agents are linked to the appropriate Brains\n"
                "\t The environment and the Python interface have compatible versions."
            )
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 # # Unity ML-Agents Toolkit
-
 import logging
 import argparse

 from mlagents.trainers.trainer_controller import TrainerController
 from mlagents.trainers.exception import TrainerError
 from mlagents.trainers.meta_curriculum import MetaCurriculumError, MetaCurriculum
-from mlagents.trainers.trainer_util import initialize_trainers, load_config
+from mlagents.trainers.trainer_util import load_config, TrainerFactory
 from mlagents.envs.environment import UnityEnvironment
 from mlagents.envs.sampler_class import SamplerManager
 from mlagents.envs.exception import SamplerException
        nargs=argparse.REMAINDER,
        help="Arguments passed to the Unity executable.",
    )
-
    args = parser.parse_args(argv)
    return CommandLineOptions.from_argparse(args)

    :param run_options: Command line arguments for training.
    """
    # Docker Parameters
-
-
    # Recognize and use docker volume if one is passed as an argument
    if not options.docker_target_name:
        model_path = "./models/{run_id}-{sub_id}".format(
        summaries_dir = "/{docker_target_name}/summaries".format(
            docker_target_name=options.docker_target_name
        )
-
+    port = options.base_port + (sub_id * options.num_envs)
+    if options.env_path is None:
+        port = 5004  # This is the in Editor Training Port
-        options.base_port + (sub_id * options.num_envs),
+        port,
        options.env_args,
    )
    env = SubprocessEnvManager(env_factory, options.num_envs)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.sampler_file_path, env.reset_parameters, run_seed
    )
-
-    trainers = initialize_trainers(
+    trainer_factory = TrainerFactory(
-        env.external_brains,
        summaries_dir,
        options.run_id,
        model_path,
        maybe_meta_curriculum,
        options.multi_gpu,
    )
-
-        trainers,
+        trainer_factory,
        model_path,
        summaries_dir,
        options.run_id + "-" + str(sub_id),
        sampler_manager,
        resampling_interval,
    )
-
-
    # Begin training
    tc.start_learning(env)

                    "Specified resampling-interval is not valid. Please provide"
                    " a positive integer value for resampling-interval"
                )
+
+
    sampler_manager = SamplerManager(sampler_config, run_seed)
    return sampler_manager, resample_interval

 ) -> Optional[MetaCurriculum]:
    if curriculum_folder is None:
        return None
+
    else:
        meta_curriculum = MetaCurriculum(curriculum_folder, env.reset_parameters)
        # TODO: Should be able to start learning at different lesson numbers
                    "name as the Brain "
                    "whose curriculum it defines."
                )
+
        return meta_curriculum


        )
    except Exception:
        print("\n\n\tUnity Technologies\n")
-
    options = parse_command_line()
    trainer_logger = logging.getLogger("mlagents.trainers")
    env_logger = logging.getLogger("mlagents.envs")
        env_logger.setLevel("DEBUG")
-
    if options.env_path is None and options.num_runs > 1:
        raise TrainerError(
            "It is not possible to launch more than one concurrent training session "
    jobs = []
    run_seed = options.seed
-
    if options.num_runs == 1:
        if options.seed == -1:
            run_seed = np.random.randint(0, 10000)
--- a/ml-agents/mlagents/trainers/sac/models.py
+++ b/ml-agents/mlagents/trainers/sac/models.py
        )
        # We assume m_size is divisible by 4
        # Create the non-Policy inputs
+        # Use a default placeholder here so nothing has to be provided during
+        # Barracuda inference. Note that the default value is just the tiled input
+        # for the policy, which is thrown away.
-        self.other_memory_in = tf.placeholder(
+        self.other_memory_in = tf.placeholder_with_default(
+            input=tf.tile(self.inference_memory_in, [1, 3]),
-            dtype=tf.float32,
            name="other_recurrent_in",
        )

--- a/ml-agents/mlagents/trainers/tensorflow_to_barracuda.py
+++ b/ml-agents/mlagents/trainers/tensorflow_to_barracuda.py
    "OneHot": Struct(id=67, rank=lambda inputs: inputs[0] + 1),
    # Broadcast ops
    "Add": Struct(id=100, rank=lambda inputs: np.max(inputs)),
+    "AddV2": Struct(id=100, rank=lambda inputs: np.max(inputs)),
    "Sub": Struct(id=101, rank=lambda inputs: np.max(inputs)),
    "Mul": Struct(id=102, rank=lambda inputs: np.max(inputs)),
    "RealDiv": Struct(id=103, rank=lambda inputs: np.max(inputs)),
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
    return parse_command_line(args)


+@patch("mlagents.trainers.learn.TrainerFactory")
-    load_config, create_environment_factory, subproc_env_mock, sampler_manager_mock
+    load_config,
+    create_environment_factory,
+    subproc_env_mock,
+    sampler_manager_mock,
+    trainer_factory_mock,
 ):
    mock_env = MagicMock()
    mock_env.external_brain_names = []
        with patch.object(TrainerController, "start_learning", MagicMock()):
            learn.run_training(0, 0, basic_options(), MagicMock())
            mock_init.assert_called_once_with(
-                {},
+                trainer_factory_mock.return_value,
                "./models/ppo-0",
                "./summaries",
                "ppo-0",
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py


 from mlagents.trainers.trainer_controller import TrainerController
-from mlagents.trainers.trainer_util import initialize_trainers
+from mlagents.trainers.trainer_util import TrainerFactory
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
 from mlagents.envs.brain import BrainInfo, AllBrainInfo, BrainParameters
 from mlagents.envs.communicator_objects.agent_info_pb2 import AgentInfoProto

        trainer_config = yaml.safe_load(config)
        env_manager = SimpleEnvManager(env)
-        trainers = initialize_trainers(
+        trainer_factory = TrainerFactory(
-            external_brains=env_manager.external_brains,
            summaries_dir=dir,
            run_id=run_id,
            model_path=dir,
        )

        tc = TrainerController(
-            trainers=trainers,
+            trainer_factory=trainer_factory,
            summaries_dir=dir,
            model_path=dir,
            run_id=run_id,
--- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
@pytest.fixture
 def basic_trainer_controller():
    return TrainerController(
+        trainer_factory=None,
        model_path="test_model_path",
        summaries_dir="test_summaries_dir",
        run_id="test_run_id",
        fast_simulation=True,
        sampler_manager=SamplerManager({}),
        resampling_interval=None,
-        trainers={},
    )


    seed = 27
    TrainerController(
+        trainer_factory=None,
        model_path="",
        summaries_dir="",
        run_id="1",
        fast_simulation=True,
        sampler_manager=SamplerManager({}),
        resampling_interval=None,
-        trainers={},
    )
    numpy_random_seed.assert_called_with(seed)
    tensorflow_set_seed.assert_called_with(seed)
 def test_take_step_adds_experiences_to_trainer_and_trains():
    tc, trainer_mock = trainer_controller_with_take_step_mocks()

-    old_step_info = EnvironmentStep(Mock(), Mock(), MagicMock())
-    new_step_info = EnvironmentStep(Mock(), Mock(), MagicMock())
+    action_info_dict = {"testbrain": MagicMock()}
+
+    old_step_info = EnvironmentStep(Mock(), Mock(), action_info_dict)
+    new_step_info = EnvironmentStep(Mock(), Mock(), action_info_dict)
    trainer_mock.is_ready_update = MagicMock(return_value=True)

    env_mock = MagicMock()
--- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py
    expected_config["normalize"] = False

    brain_params_mock = BrainParametersMock()
+    BrainParametersMock.return_value.brain_name = "testbrain"
    external_brains = {"testbrain": brain_params_mock}

    def mock_constructor(self, brain, trainer_parameters, training, load, seed, run_id):
        assert run_id == run_id

    with patch.object(OfflineBCTrainer, "__init__", mock_constructor):
-        trainers = trainer_util.initialize_trainers(
+        trainer_factory = trainer_util.TrainerFactory(
-            external_brains=external_brains,
            summaries_dir=summaries_dir,
            run_id=run_id,
            model_path=model_path,
            seed=seed,
        )
+        trainers = {}
+        for _, brain_parameters in external_brains.items():
+            trainers["testbrain"] = trainer_factory.generate(brain_parameters)
        assert "testbrain" in trainers
        assert isinstance(trainers["testbrain"], OfflineBCTrainer)

    brain_params_mock = BrainParametersMock()
+    BrainParametersMock.return_value.brain_name = "testbrain"
    external_brains = {"testbrain": BrainParametersMock()}
    summaries_dir = "test_dir"
    run_id = "testrun"
        assert multi_gpu == multi_gpu

    with patch.object(PPOTrainer, "__init__", mock_constructor):
-        trainers = trainer_util.initialize_trainers(
+        trainer_factory = trainer_util.TrainerFactory(
-            external_brains=external_brains,
            summaries_dir=summaries_dir,
            run_id=run_id,
            model_path=model_path,
            seed=seed,
        )
+        trainers = {}
+        for brain_name, brain_parameters in external_brains.items():
+            trainers[brain_name] = trainer_factory.generate(brain_parameters)
        assert "testbrain" in trainers
        assert isinstance(trainers["testbrain"], PPOTrainer)

    load_model = False
    seed = 11
    bad_config = dummy_bad_config()
+    BrainParametersMock.return_value.brain_name = "testbrain"
-        trainer_util.initialize_trainers(
+        trainer_factory = trainer_util.TrainerFactory(
-            external_brains=external_brains,
            summaries_dir=summaries_dir,
            run_id=run_id,
            model_path=model_path,
            seed=seed,
        )
+        trainers = {}
+        for brain_name, brain_parameters in external_brains.items():
+            trainers[brain_name] = trainer_factory.generate(brain_parameters)


 def test_load_config_missing_file():
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 import os
 import json
 import logging
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Set

 import numpy as np
 from mlagents.trainers import tf
 from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
 from mlagents.trainers.trainer import Trainer, TrainerMetrics
 from mlagents.trainers.meta_curriculum import MetaCurriculum
+from mlagents.trainers.trainer_util import TrainerFactory
-        trainers: Dict[str, Trainer],
+        trainer_factory: TrainerFactory,
        model_path: str,
        summaries_dir: str,
        run_id: str,
        resampling_interval: Optional[int],
    ):
        """
-        :param trainers: Trainers for each brain to train.
        :param model_path: Path to save the model.
        :param summaries_dir: Folder to save training summaries.
        :param run_id: The sub-directory name for model and summary statistics
        :param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters.
        :param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled.
        """
-        self.trainers = trainers
+        self.trainers: Dict[str, Trainer] = {}
+        self.trainer_factory = trainer_factory
        self.model_path = model_path
        self.summaries_dir = summaries_dir
        self.logger = logging.getLogger("mlagents.envs")
        return (
            any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()])
            or not self.train_model
-        )
+        ) or len(self.trainers) == 0

    def write_to_tensorboard(self, global_step: int) -> None:
        for brain_name, trainer in self.trainers.items():
            else:
                trainer.write_summary(global_step, delta_train_start)

+    def start_trainer(self, trainer: Trainer, env_manager: EnvManager) -> None:
+        self.trainers[trainer.brain_name] = trainer
+        self.logger.info(trainer)
+        if self.train_model:
+            trainer.write_tensorboard_text("Hyperparameters", trainer.parameters)
+        env_manager.set_policy(trainer.brain_name, trainer.policy)
+
-
-
-        for _, t in self.trainers.items():
-            self.logger.info(t)
-
-
-        if self.train_model:
-            for brain_name, trainer in self.trainers.items():
-                trainer.write_tensorboard_text("Hyperparameters", trainer.parameters)
+        last_brain_names: Set[str] = set()
-            for brain_name, trainer in self.trainers.items():
-                env_manager.set_policy(brain_name, trainer.policy)
+                external_brains = set(env_manager.external_brains.keys())
+                new_brains = external_brains - last_brain_names
+                if last_brain_names != env_manager.external_brains.keys():
+                    for name in new_brains:
+                        trainer = self.trainer_factory.generate(
+                            env_manager.external_brains[name]
+                        )
+                        self.start_trainer(trainer, env_manager)
+                    last_brain_names = external_brains
                n_steps = self.advance(env_manager)
                for i in range(n_steps):
                    global_step += 1
            )
        else:
            lessons_incremented = {}
-
-
-
        generalization_reset = (
            not self.sampler_manager.is_empty()
            and (steps != 0)
            time_start_step = time()
            new_step_infos = env.step()
            delta_time_step = time() - time_start_step
-
-                trainer.add_experiences(
-                    step_info.previous_all_brain_info,
-                    step_info.current_all_brain_info,
-                    step_info.brain_name_to_action_info[brain_name].outputs,
-                )
-                trainer.process_experiences(
-                    step_info.previous_all_brain_info, step_info.current_all_brain_info
-                )
+                if brain_name in step_info.brain_name_to_action_info:
+                    trainer.add_experiences(
+                        step_info.previous_all_brain_info,
+                        step_info.current_all_brain_info,
+                        step_info.brain_name_to_action_info[brain_name].outputs,
+                    )
+                    trainer.process_experiences(
+                        step_info.previous_all_brain_info,
+                        step_info.current_all_brain_info,
+                    )
        for brain_name, trainer in self.trainers.items():
            if brain_name in self.trainer_metrics:
                self.trainer_metrics[brain_name].add_delta_step(delta_time_step)
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
 from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer


-def initialize_trainers(
-    trainer_config: Dict[str, Any],
-    external_brains: Dict[str, BrainParameters],
+class TrainerFactory:
+    def __init__(
+        self,
+        trainer_config: Any,
+        summaries_dir: str,
+        run_id: str,
+        model_path: str,
+        keep_checkpoints: int,
+        train_model: bool,
+        load_model: bool,
+        seed: int,
+        meta_curriculum: MetaCurriculum = None,
+        multi_gpu: bool = False,
+    ):
+        self.trainer_config = trainer_config
+        self.summaries_dir = summaries_dir
+        self.run_id = run_id
+        self.model_path = model_path
+        self.keep_checkpoints = keep_checkpoints
+        self.train_model = train_model
+        self.load_model = load_model
+        self.seed = seed
+        self.meta_curriculum = meta_curriculum
+        self.multi_gpu = multi_gpu
+
+    def generate(self, brain_parameters: BrainParameters) -> Trainer:
+        return initialize_trainer(
+            self.trainer_config,
+            brain_parameters,
+            self.summaries_dir,
+            self.run_id,
+            self.model_path,
+            self.keep_checkpoints,
+            self.train_model,
+            self.load_model,
+            self.seed,
+            self.meta_curriculum,
+            self.multi_gpu,
+        )
+
+
+def initialize_trainer(
+    trainer_config: Any,
+    brain_parameters: BrainParameters,
    summaries_dir: str,
    run_id: str,
    model_path: str,
    seed: int,
    meta_curriculum: MetaCurriculum = None,
    multi_gpu: bool = False,
-) -> Dict[str, Trainer]:
+) -> Trainer:
-    Initializes trainers given a provided trainer configuration and set of brains from the environment, as well as
+    Initializes a trainer given a provided trainer configuration and brain parameters, as well as
-    :param external_brains: BrainParameters provided by the Unity environment
+    :param brain_parameters: BrainParameters provided by the Unity environment
    :param summaries_dir: Directory to store trainer summary statistics
    :param run_id: Run ID to associate with this training run
    :param model_path: Path to save the model
    :param multi_gpu: Whether to use multi-GPU training
    :return:
    """
-    trainers: Dict[str, Trainer] = {}
-    trainer_parameters_dict = {}
-    for brain_name in external_brains:
-        trainer_parameters = trainer_config["default"].copy()
-        trainer_parameters["summary_path"] = "{basedir}/{name}".format(
-            basedir=summaries_dir, name=str(run_id) + "_" + brain_name
+    trainer_parameters = trainer_config["default"].copy()
+    brain_name = brain_parameters.brain_name
+    trainer_parameters["summary_path"] = "{basedir}/{name}".format(
+        basedir=summaries_dir, name=str(run_id) + "_" + brain_name
+    )
+    trainer_parameters["model_path"] = "{basedir}/{name}".format(
+        basedir=model_path, name=brain_name
+    )
+    trainer_parameters["keep_checkpoints"] = keep_checkpoints
+    if brain_name in trainer_config:
+        _brain_key: Any = brain_name
+        while not isinstance(trainer_config[_brain_key], dict):
+            _brain_key = trainer_config[_brain_key]
+        trainer_parameters.update(trainer_config[_brain_key])
+
+    trainer = None
+    if trainer_parameters["trainer"] == "offline_bc":
+        trainer = OfflineBCTrainer(
+            brain_parameters, trainer_parameters, train_model, load_model, seed, run_id
-        trainer_parameters["model_path"] = "{basedir}/{name}".format(
-            basedir=model_path, name=brain_name
+    elif trainer_parameters["trainer"] == "ppo":
+        trainer = PPOTrainer(
+            brain_parameters,
+            meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
+            if meta_curriculum
+            else 1,
+            trainer_parameters,
+            train_model,
+            load_model,
+            seed,
+            run_id,
+            multi_gpu,
-        trainer_parameters["keep_checkpoints"] = keep_checkpoints
-        if brain_name in trainer_config:
-            _brain_key: Any = brain_name
-            while not isinstance(trainer_config[_brain_key], dict):
-                _brain_key = trainer_config[_brain_key]
-            trainer_parameters.update(trainer_config[_brain_key])
-        trainer_parameters_dict[brain_name] = trainer_parameters.copy()
-    for brain_name in external_brains:
-        if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
-            trainers[brain_name] = OfflineBCTrainer(
-                external_brains[brain_name],
-                trainer_parameters_dict[brain_name],
-                train_model,
-                load_model,
-                seed,
-                run_id,
-            )
-        elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
-            trainers[brain_name] = PPOTrainer(
-                external_brains[brain_name],
-                meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
-                if meta_curriculum
-                else 1,
-                trainer_parameters_dict[brain_name],
-                train_model,
-                load_model,
-                seed,
-                run_id,
-                multi_gpu,
-            )
-        elif trainer_parameters_dict[brain_name]["trainer"] == "sac":
-            trainers[brain_name] = SACTrainer(
-                external_brains[brain_name],
-                meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
-                if meta_curriculum
-                else 1,
-                trainer_parameters_dict[brain_name],
-                train_model,
-                load_model,
-                seed,
-                run_id,
-            )
-        else:
-            raise UnityEnvironmentException(
-                "The trainer config contains "
-                "an unknown trainer type for "
-                "brain {}".format(brain_name)
-            )
-    return trainers
+    elif trainer_parameters["trainer"] == "sac":
+        trainer = SACTrainer(
+            brain_parameters,
+            meta_curriculum.brains_to_curriculums[brain_name].min_lesson_length
+            if meta_curriculum
+            else 1,
+            trainer_parameters,
+            train_model,
+            load_model,
+            seed,
+            run_id,
+        )
+    else:
+        raise UnityEnvironmentException(
+            "The trainer config contains "
+            "an unknown trainer type for "
+            "brain {}".format(brain_name)
+        )
+    return trainer


 def load_config(config_path: str) -> Dict[str, Any]:
--- a/UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs.meta
-fileFormatVersion: 2
-guid: aa1bef9e5833447ab7251fc6f7a3a609
-timeCreated: 1536852419
--- a/UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/BroadcastHubDrawer.cs
-using UnityEngine;
-using UnityEditor;
-using System;
-using UnityEditor.SceneManagement;
-using UnityEngine.SceneManagement;
-
-namespace MLAgents
-{
-    /// <summary>
-    /// PropertyDrawer for BroadcastHub. Used to display the BroadcastHub in the Inspector.
-    /// </summary>
-    [CustomPropertyDrawer(typeof(BroadcastHub))]
-    public class BroadcastHubDrawer : PropertyDrawer
-    {
-        private BroadcastHub m_Hub;
-        // The height of a line in the Unity Inspectors
-        private const float k_LineHeight = 17f;
-        // The vertical space left below the BroadcastHub UI.
-        private const float k_ExtraSpaceBelow = 10f;
-
-        /// <summary>
-        /// Computes the height of the Drawer depending on the property it is showing
-        /// </summary>
-        /// <param name="property">The property that is being drawn.</param>
-        /// <param name="label">The label of the property being drawn.</param>
-        /// <returns>The vertical space needed to draw the property.</returns>
-        public override float GetPropertyHeight(SerializedProperty property, GUIContent label)
-        {
-            LazyInitializeHub(property);
-            var numLines = m_Hub.Count + 2 + (m_Hub.Count > 0 ? 1 : 0);
-            return (numLines) * k_LineHeight + k_ExtraSpaceBelow;
-        }
-
-        /// <inheritdoc />
-        public override void OnGUI(Rect position, SerializedProperty property, GUIContent label)
-        {
-            LazyInitializeHub(property);
-            position.height = k_LineHeight;
-            EditorGUI.LabelField(position, new GUIContent(label.text,
-                "The Broadcast Hub helps you define which Brains you want to expose to " +
-                "the external process"));
-            position.y += k_LineHeight;
-
-            EditorGUI.BeginProperty(position, label, property);
-
-            EditorGUI.indentLevel++;
-            DrawAddRemoveButtons(position);
-            position.y += k_LineHeight;
-
-            // This is the labels for each columns
-            var brainWidth = position.width;
-            var brainRect = new Rect(
-                position.x, position.y, brainWidth, position.height);
-            if (m_Hub.Count > 0)
-            {
-                EditorGUI.LabelField(brainRect, "Brains");
-                brainRect.y += k_LineHeight;
-            }
-            DrawBrains(brainRect);
-            EditorGUI.indentLevel--;
-            EditorGUI.EndProperty();
-        }
-
-        /// <summary>
-        /// Draws the Add and Remove buttons.
-        /// </summary>
-        /// <param name="position">The position at which to draw.</param>
-        private void DrawAddRemoveButtons(Rect position)
-        {
-            // This is the rectangle for the Add button
-            var addButtonRect = position;
-            addButtonRect.x += 20;
-            if (m_Hub.Count > 0)
-            {
-                addButtonRect.width /= 2;
-                addButtonRect.width -= 24;
-                var buttonContent = new GUIContent(
-                    "Add New", "Add a new Brain to the Broadcast Hub");
-                if (GUI.Button(addButtonRect, buttonContent, EditorStyles.miniButton))
-                {
-                    MarkSceneAsDirty();
-                    AddBrain();
-                }
-                // This is the rectangle for the Remove button
-                var removeButtonRect = position;
-                removeButtonRect.x = position.width / 2 + 15;
-                removeButtonRect.width = addButtonRect.width - 18;
-                buttonContent = new GUIContent(
-                    "Remove Last", "Remove the last Brain from the Broadcast Hub");
-                if (GUI.Button(removeButtonRect, buttonContent, EditorStyles.miniButton))
-                {
-                    MarkSceneAsDirty();
-                    RemoveLastBrain();
-                }
-            }
-            else
-            {
-                addButtonRect.width -= 50;
-                var buttonContent = new GUIContent(
-                    "Add Brain to Broadcast Hub", "Add a new Brain to the Broadcast Hub");
-                if (GUI.Button(addButtonRect, buttonContent, EditorStyles.miniButton))
-                {
-                    MarkSceneAsDirty();
-                    AddBrain();
-                }
-            }
-        }
-
-        /// <summary>
-        /// Draws the Brain  contained in the BroadcastHub.
-        /// </summary>
-        /// <param name="brainRect">The Rect to draw the Brains.</param>
-        private void DrawBrains(Rect brainRect)
-        {
-            for (var index = 0; index < m_Hub.Count; index++)
-            {
-                var controlledBrains = m_Hub.brainsToControl;
-                var brain = controlledBrains[index];
-                // This is the rectangle for the brain
-                EditorGUI.BeginChangeCheck();
-                var newBrain = EditorGUI.ObjectField(
-                    brainRect, brain, typeof(LearningBrain), true) as LearningBrain;
-                brainRect.y += k_LineHeight;
-                if (EditorGUI.EndChangeCheck())
-                {
-                    MarkSceneAsDirty();
-                    m_Hub.brainsToControl.RemoveAt(index);
-                    var brainToInsert = controlledBrains.Contains(newBrain) ? null : newBrain;
-                    controlledBrains.Insert(index, brainToInsert);
-                    break;
-                }
-            }
-        }
-
-        /// <summary>
-        /// Lazy initializes the Drawer with the property to be drawn.
-        /// </summary>
-        /// <param name="property">The SerializedProperty of the BroadcastHub
-        /// to make the custom GUI for.</param>
-        private void LazyInitializeHub(SerializedProperty property)
-        {
-            if (m_Hub != null)
-            {
-                return;
-            }
-            var target = property.serializedObject.targetObject;
-            m_Hub = fieldInfo.GetValue(target) as BroadcastHub;
-            if (m_Hub == null)
-            {
-                m_Hub = new BroadcastHub();
-                fieldInfo.SetValue(target, m_Hub);
-            }
-        }
-
-        /// <summary>
-        /// Signals that the property has been modified and requires the scene to be saved for
-        /// the changes to persist. Only works when the Editor is not playing.
-        /// </summary>
-        private static void MarkSceneAsDirty()
-        {
-            if (!EditorApplication.isPlaying)
-            {
-                EditorSceneManager.MarkSceneDirty(SceneManager.GetActiveScene());
-            }
-        }
-
-        /// <summary>
-        /// Removes the last Brain from the BroadcastHub
-        /// </summary>
-        private void RemoveLastBrain()
-        {
-            if (m_Hub.Count > 0)
-            {
-                m_Hub.brainsToControl.RemoveAt(m_Hub.brainsToControl.Count - 1);
-            }
-        }
-
-        /// <summary>
-        /// Adds a new Brain to the BroadcastHub. The value of this brain will not be initialized.
-        /// </summary>
-        private void AddBrain()
-        {
-            m_Hub.brainsToControl.Add(null);
-        }
-    }
-}
--- a/UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs.meta
-fileFormatVersion: 2
-guid: e43fd511c9f147e487d80e0bab3f6c6b
-timeCreated: 1536851538
--- a/UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/BroadcastHub.cs
-using System.Collections.Generic;
-using UnityEngine;
-using UnityEngine.Serialization;
-
-namespace MLAgents
-{
-    /// <summary>
-    /// BroadcastHub holds reference to brains and keeps track wether or not the brain be
-    /// remotely controlled.
-    /// </summary>
-    [System.Serializable]
-    public class BroadcastHub
-    {
-        [FormerlySerializedAs("_brainsToControl")]
-        [SerializeField]
-        public List<LearningBrain> brainsToControl = new List<LearningBrain>();
-
-        /// <summary>
-        /// The number of Brains inside the BroadcastingHub.
-        /// </summary>
-        public int Count
-        {
-            get { return brainsToControl.Count; }
-        }
-
-        /// <summary>
-        /// Sets a brain to controlled.
-        /// </summary>
-        /// <param name="brain"> The Brain that is being set to controlled</param>
-        public void SetControlled(LearningBrain brain)
-        {
-            if (!brainsToControl.Contains(brain))
-            {
-                brainsToControl.Add(brain);
-            }
-        }
-
-        /// <summary>
-        /// Removes all the Brains of the BroadcastHub
-        /// </summary>
-        public void Clear()
-        {
-            brainsToControl.Clear();
-        }
-    }
-}
--- a/docs/images/broadcast.png
+++ b/docs/images/broadcast.png
--- a/docs/images/mlagents-SetBrainToTrain.png
+++ b/docs/images/mlagents-SetBrainToTrain.png
--- a/ml-agents-envs/mlagents/envs/socket_communicator.py
+++ b/ml-agents-envs/mlagents/envs/socket_communicator.py
-import logging
-import socket
-import struct
-from typing import Optional
-
-from .communicator import Communicator
-from mlagents.envs.communicator_objects.unity_message_pb2 import UnityMessageProto
-from mlagents.envs.communicator_objects.unity_output_pb2 import UnityOutputProto
-from mlagents.envs.communicator_objects.unity_input_pb2 import UnityInputProto
-from .exception import UnityTimeOutException
-
-
-logger = logging.getLogger("mlagents.envs")
-
-
-class SocketCommunicator(Communicator):
-    def __init__(self, worker_id=0, base_port=5005):
-        """
-        Python side of the socket communication
-
-        :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
-        :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
-        """
-
-        self.port = base_port + worker_id
-        self._buffer_size = 12000
-        self.worker_id = worker_id
-        self._socket = None
-        self._conn = None
-
-    def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
-        try:
-            # Establish communication socket
-            self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-            self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            self._socket.bind(("localhost", self.port))
-        except Exception:
-            raise UnityTimeOutException(
-                "Couldn't start socket communication because worker number {} is still in use. "
-                "You may need to manually close a previously opened environment "
-                "or use a different worker number.".format(str(self.worker_id))
-            )
-        try:
-            self._socket.settimeout(30)
-            self._socket.listen(1)
-            self._conn, _ = self._socket.accept()
-            self._conn.settimeout(30)
-        except Exception:
-            raise UnityTimeOutException(
-                "The Unity environment took too long to respond. Make sure that :\n"
-                "\t The environment does not need user interaction to launch\n"
-                "\t The Academy's Broadcast Hub is configured correctly\n"
-                "\t The Agents are linked to the appropriate Brains\n"
-                "\t The environment and the Python interface have compatible versions."
-            )
-        message = UnityMessageProto()
-        message.header.status = 200
-        message.unity_input.CopyFrom(inputs)
-        self._communicator_send(message.SerializeToString())
-        initialization_output = UnityMessageProto()
-        initialization_output.ParseFromString(self._communicator_receive())
-        return initialization_output.unity_output
-
-    def _communicator_receive(self):
-        try:
-            s = self._conn.recv(self._buffer_size)
-            message_length = struct.unpack("I", bytearray(s[:4]))[0]
-            s = s[4:]
-            while len(s) != message_length:
-                s += self._conn.recv(self._buffer_size)
-        except socket.timeout:
-            raise UnityTimeOutException("The environment took too long to respond.")
-        return s
-
-    def _communicator_send(self, message):
-        self._conn.send(struct.pack("I", len(message)) + message)
-
-    def exchange(self, inputs: UnityInputProto) -> Optional[UnityOutputProto]:
-        message = UnityMessageProto()
-        message.header.status = 200
-        message.unity_input.CopyFrom(inputs)
-        self._communicator_send(message.SerializeToString())
-        outputs = UnityMessageProto()
-        outputs.ParseFromString(self._communicator_receive())
-        if outputs.header.status != 200:
-            return None
-        return outputs.unity_output
-
-    def close(self):
-        """
-        Sends a shutdown signal to the unity environment, and closes the socket connection.
-        """
-        if self._socket is not None and self._conn is not None:
-            message_input = UnityMessageProto()
-            message_input.header.status = 400
-            self._communicator_send(message_input.SerializeToString())
-        if self._socket is not None:
-            self._socket.close()
-            self._socket = None
-        if self._socket is not None:
-            self._conn.close()
-            self._conn = None