Demonstration Recorder (#1240)

6 年前 · 3c9603d6
--- a/.gitignore
+++ b/.gitignore
 /UnitySDK/Assets/AssetStoreTools*
 /UnitySDK/Assets/Plugins*
 /UnitySDK/Assets/Gizmos*
+/UnitySDK/Assets/Demonstrations*

 # Tensorflow Model Info
 /models
 /UnitySDK/Assets/ML-Agents/Plugins/Android*
 /UnitySDK/Assets/ML-Agents/Plugins/iOS*
 /UnitySDK/Assets/ML-Agents/Plugins/Computer*
-/UnitySDK/Assets/ML-Agents/Plugins/System*
+/UnitySDK/Assets/ML-Agents/Plugins/System.Numerics*
+/UnitySDK/Assets/ML-Agents/Plugins/System.ValueTuple*

 # Generated doc folders
 /docs/html
--- a/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
                }
            }

-            brainBatcher = new MLAgents.Batcher(communicator);
+            brainBatcher = new Batcher(communicator);

            // Initialize Brains and communicator (if present)
            foreach (var brain in brains)
                isCommunicatorOn = true;

                var academyParameters =
-                    new MLAgents.CommunicatorObjects.UnityRLInitializationOutput();
+                    new CommunicatorObjects.UnityRLInitializationOutput();
                academyParameters.Name = gameObject.name;
                academyParameters.Version = kApiVersion;
                foreach (var brain in brains)
-                        MLAgents.Batcher.BrainParametersConvertor(
-                            bp,
+                        bp.ToProto(
-                            (MLAgents.CommunicatorObjects.BrainTypeProto)
+                            (CommunicatorObjects.BrainTypeProto)
                            brain.brainType));
                }
                
--- a/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
 using System.Collections.Generic;
 using System.Linq;
+using Google.Protobuf;
+using MLAgents.CommunicatorObjects;
 using UnityEngine;


        /// to separate between different agents in the environment.
        /// </summary>
        public int id;
+
+        /// <summary>
+        /// Converts a AgentInfo to a protobuffer generated AgentInfoProto
+        /// </summary>
+        /// <returns>The protobuf verison of the AgentInfo.</returns>
+        /// <param name="info">The AgentInfo to convert.</param>
+        public CommunicatorObjects.AgentInfoProto ToProto()
+        {
+            var agentInfoProto = new CommunicatorObjects.AgentInfoProto
+            {
+                StackedVectorObservation = {stackedVectorObservation},
+                StoredVectorActions = {storedVectorActions},
+                StoredTextActions = storedTextActions,
+                TextObservation = textObservation,
+                Reward = reward,
+                MaxStepReached = maxStepReached,
+                Done = done,
+                Id = id,
+            };
+            if (memories != null)
+            {
+                agentInfoProto.Memories.Add(memories);
+            }
+
+            if (actionMasks != null)
+            {
+                agentInfoProto.ActionMask.AddRange(actionMasks);
+            }
+
+            foreach (Texture2D obs in visualObservations)
+            {
+                agentInfoProto.VisualObservations.Add(
+                    ByteString.CopyFrom(obs.EncodeToPNG())
+                );
+            }
+
+            return agentInfoProto;
+        }
    }

    /// <summary>
        /// Array of Texture2D used to render to from render buffer before  
        /// transforming into float tensor.
        Texture2D[] textureArray;
-        
+
+        /// <summary>
+        /// Demonstration recorder.
+        /// </summary>
+        private DemonstrationRecorder recorder;
+
        /// Monobehavior function that is called when the attached GameObject
        /// becomes enabled or active.
        void OnEnable()
            {
                textureArray[i] = new Texture2D(1, 1, TextureFormat.RGB24, false);
            }
+
+
+            recorder = GetComponent<DemonstrationRecorder>();
        }

        /// Helper method for the <see cref="OnEnable"/> event, created to
        /// </remarks>
        public virtual void InitializeAgent()
        {
-
        }

        /// <summary>
            info.id = id;

            brain.SendState(this, info);
+
+            if (recorder != null && recorder.record && Application.isEditor)
+            {
+                recorder.WriteExperience(info);
+            }
+
            info.textObservation = "";
        }

        /// </remarks>
        public virtual void CollectObservations()
        {
-
        }

        /// <summary>
        {
            actionMasker.SetActionMask(0, actionIndices);
        }
-        
+
        /// <summary>
        /// Sets an action mask for discrete control agents. When used, the agent will not be
        /// able to perform the action passed as argument at the next decision. If no branch is
        /// <param name="actionIndex">The index of the masked action on branch 0</param>
        protected void SetActionMask(int actionIndex)
        {
-            actionMasker.SetActionMask(0, new int[1]{actionIndex});
+            actionMasker.SetActionMask(0, new int[1] {actionIndex});
-        
+
        /// <summary>
        /// Sets an action mask for discrete control agents. When used, the agent will not be
        /// able to perform the action passed as argument at the next decision. If no branch is
        /// <param name="actionIndex">The index of the masked action</param>
        protected void SetActionMask(int branch, int actionIndex)
        {
-            actionMasker.SetActionMask(branch, new int[1]{actionIndex});
+            actionMasker.SetActionMask(branch, new int[1] {actionIndex});
        }

        /// <summary>
        {
            actionMasker.SetActionMask(branch, actionIndices);
        }
-        
+

        /// <summary>
        /// Adds a float observation to the vector observations of the agent.
            info.vectorObservation.Add(observation.z);
            info.vectorObservation.Add(observation.w);
        }
-        
+
        /// <summary>
        /// Adds a boolean observation to the vector observation of the agent.
        /// Increases the size of the agent's vector observation by 1.
        /// <param name="textAction">Text action.</param>
        public virtual void AgentAction(float[] vectorAction, string textAction)
        {
-
        }

        /// <summary>
        /// </summary>
        public virtual void AgentOnDone()
        {
-
        }

        /// <summary>
        /// </summary>
        public virtual void AgentReset()
        {
-
        }

        /// <summary>
        {
            action.textActions = textActions;
        }
-        
+
        /// <summary>
        /// Updates the value of the agent.
        /// </summary>

            var tempRT =
                RenderTexture.GetTemporary(width, height, depth, format, readWrite);
-            
+
            if (width != texture2D.width || height != texture2D.height)
            {
                texture2D.Resize(width, height);
--- a/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Batcher.cs
        /// Keeps track of which brains have data to send on the current step
        Dictionary<string, bool> m_hasData =
            new Dictionary<string, bool>();
+
+
+
+
-                               new CommunicatorObjects.UnityRLOutput();
+            new CommunicatorObjects.UnityRLOutput();
+
+
+
+
        /// Keeps track of last training mode sent by External
        bool m_isTraining;

            try
            {
                initializationInput = m_communicator.Initialize(
-                        new CommunicatorObjects.UnityOutput
-                        {
-                            RlInitializationOutput = academyParameters
-                        },
-                        out input);
+                    new CommunicatorObjects.UnityOutput
+                    {
+                        RlInitializationOutput = academyParameters
+                    },
+                    out input);
            }
            catch
            {
        }

        /// <summary>
-        /// Converts a AgentInfo to a protobuffer generated AgentInfoProto
-        /// </summary>
-        /// <returns>The protobuf verison of the AgentInfo.</returns>
-        /// <param name="info">The AgentInfo to convert.</param>
-        public static CommunicatorObjects.AgentInfoProto 
-                                         AgentInfoConvertor(AgentInfo info)
-        {
-
-            var agentInfoProto = new CommunicatorObjects.AgentInfoProto
-            {
-                StackedVectorObservation = { info.stackedVectorObservation },
-                StoredVectorActions = { info.storedVectorActions },
-                StoredTextActions = info.storedTextActions,
-                TextObservation = info.textObservation,
-                Reward = info.reward,
-                MaxStepReached = info.maxStepReached,
-                Done = info.done,
-                Id = info.id,
-            };
-            if (info.memories != null)
-            {
-                agentInfoProto.Memories.Add(info.memories);
-            }
-            if (info.actionMasks != null)
-            {
-                agentInfoProto.ActionMask.AddRange(info.actionMasks);
-            }
-            foreach (Texture2D obs in info.visualObservations)
-            {
-                agentInfoProto.VisualObservations.Add(
-                    ByteString.CopyFrom(obs.EncodeToPNG())
-                );
-            }
-            return agentInfoProto;
-        }
-
-        /// <summary>
-        /// Converts a Brain into to a Protobuff BrainInfoProto so it can be sent
-        /// </summary>
-        /// <returns>The BrainInfoProto generated.</returns>
-        /// <param name="brainParameters">The BrainParameters.</param>
-        /// <param name="name">The name of the brain.</param>
-        /// <param name="type">The type of brain.</param>
-        public static CommunicatorObjects.BrainParametersProto BrainParametersConvertor(
-            BrainParameters brainParameters, string name, CommunicatorObjects.BrainTypeProto type)
-        {
-
-            var brainParametersProto = new CommunicatorObjects.BrainParametersProto
-                {
-                    VectorObservationSize = brainParameters.vectorObservationSize,
-                    NumStackedVectorObservations = brainParameters.numStackedVectorObservations,
-                    VectorActionSize = {brainParameters.vectorActionSize},
-                    VectorActionSpaceType =
-                    (CommunicatorObjects.SpaceTypeProto)brainParameters.vectorActionSpaceType,
-                    BrainName = name,
-                    BrainType = type
-                };
-            brainParametersProto.VectorActionDescriptions.AddRange(
-                brainParameters.vectorActionDescriptions);
-            foreach (resolution res in brainParameters.cameraResolutions)
-            {
-                brainParametersProto.CameraResolutions.Add(
-                    new CommunicatorObjects.ResolutionProto
-                    {
-                        Width = res.width,
-                        Height = res.height,
-                        GrayScale = res.blackAndWhite
-                    });
-            }
-            return brainParametersProto;
-        }
-
-        /// <summary>
        /// Sends the brain info. If at least one brain has an agent in need of
        /// a decision or if the academy is done, the data is sent via 
        /// Communicator. Else, a new step is realized. The data can only be
            {
                m_currentAgents[brainKey].Add(agent);
            }
+
            // If at least one agent has data to send, then append data to
            // the message and update hasSentState
            if (m_currentAgents[brainKey].Count > 0)
-                    CommunicatorObjects.AgentInfoProto agentInfoProto =
-                        AgentInfoConvertor(agentInfo[agent]);
+                    CommunicatorObjects.AgentInfoProto agentInfoProto = agentInfo[agent].ToProto();
+
                m_hasData[brainKey] = true;
            }

                    m_currentUnityRLOutput.GlobalDone = m_academyDone;
                    SendBatchedMessageHelper();
                }
+
                // The message was just sent so we must reset hasSentState and
                // triedSendState
                foreach (string k in m_currentAgents.Keys)
        void SendBatchedMessageHelper()
        {
            var input = m_communicator.Exchange(
-                new CommunicatorObjects.UnityOutput{
-                RlOutput = m_currentUnityRLOutput
-            });
+                new CommunicatorObjects.UnityOutput
+                {
+                    RlOutput = m_currentUnityRLOutput
+                });
            m_messagesReceived += 1;

            foreach (string k in m_currentUnityRLOutput.AgentInfos.Keys)
+
            if (input == null)
            {
                m_command = CommunicatorObjects.CommandProto.Quit;
            }

            foreach (var brainName in rlInput.AgentActions.Keys)
+            {
+                if (!m_currentAgents[brainName].Any())
-                    if (!m_currentAgents[brainName].Any())
-                    {
-                        continue;
-                    }
-                    if (!rlInput.AgentActions[brainName].Value.Any())
-                    {
-                        continue;
-                    }
-                    for (var i = 0; i < m_currentAgents[brainName].Count(); i++)
-                    {
-                        var agent = m_currentAgents[brainName][i];
-                        var action = rlInput.AgentActions[brainName].Value[i];
-                        agent.UpdateVectorAction(
-                            action.VectorActions.ToArray());
-                        agent.UpdateMemoriesAction(
-                            action.Memories.ToList());
-                        agent.UpdateTextAction(
-                            action.TextActions);
-                        agent.UpdateValueAction(
-                            action.Value);
-                    }
+                    continue;
-            
-        }
+                if (!rlInput.AgentActions[brainName].Value.Any())
+                {
+                    continue;
+                }
+
+                for (var i = 0; i < m_currentAgents[brainName].Count; i++)
+                {
+                    var agent = m_currentAgents[brainName][i];
+                    var action = rlInput.AgentActions[brainName].Value[i];
+                    agent.UpdateVectorAction(action.VectorActions.ToArray());
+                    agent.UpdateMemoriesAction(action.Memories.ToList());
+                    agent.UpdateTextAction(action.TextActions);
+                    agent.UpdateValueAction(action.Value);
+                }
+            }
+        }
-
-
-
--- a/UnitySDK/Assets/ML-Agents/Scripts/Brain.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Brain.cs

        public SpaceType vectorActionSpaceType = SpaceType.discrete;
        /**< \brief Defines if the action is discrete or continuous */
+        
+        /// <summary>
+        /// Converts a Brain into to a Protobuff BrainInfoProto so it can be sent
+        /// </summary>
+        /// <returns>The BrainInfoProto generated.</returns>
+        /// <param name="name">The name of the brain.</param>
+        /// <param name="type">The type of brain.</param>
+        public CommunicatorObjects.BrainParametersProto 
+            ToProto(string name, CommunicatorObjects.BrainTypeProto type)
+        {
+            var brainParametersProto = new CommunicatorObjects.BrainParametersProto
+            {
+                VectorObservationSize = vectorObservationSize,
+                NumStackedVectorObservations = numStackedVectorObservations,
+                VectorActionSize = {vectorActionSize},
+                VectorActionSpaceType =
+                    (CommunicatorObjects.SpaceTypeProto)vectorActionSpaceType,
+                BrainName = name,
+                BrainType = type
+            };
+            brainParametersProto.VectorActionDescriptions.AddRange(vectorActionDescriptions);
+            foreach (resolution res in cameraResolutions)
+            {
+                brainParametersProto.CameraResolutions.Add(
+                    new CommunicatorObjects.ResolutionProto
+                    {
+                        Width = res.width,
+                        Height = res.height,
+                        GrayScale = res.blackAndWhite
+                    });
+            }
+            return brainParametersProto;
+        }
+
+        public BrainParameters()
+        {
+            
+        }
+
+        public BrainParameters(CommunicatorObjects.BrainParametersProto brainParametersProto)
+        {
+            vectorObservationSize = brainParametersProto.VectorObservationSize;
+            numStackedVectorObservations = brainParametersProto.NumStackedVectorObservations;
+            vectorActionSize = brainParametersProto.VectorActionSize.ToArray();
+            vectorActionDescriptions = brainParametersProto.VectorActionDescriptions.ToArray();
+            vectorActionSpaceType = (SpaceType)brainParametersProto.VectorActionSpaceType;
+        }
    }

    [HelpURL("https://github.com/Unity-Technologies/ml-agents/blob/master/" +
--- a/UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/CoreBrainInternal.cs
                }

                observationMatrixList.Add(
-                    BatchVisualObservations(texturesHolder,
+                    Utilities.TextureToFloatArray(texturesHolder,
                        brain.brainParameters.cameraResolutions[observationIndex].blackAndWhite));
            }

 #endif
        }

-        /// <summary>
-        /// Converts a list of Texture2D into a Tensor.
-        /// </summary>
-        /// <returns>
-        /// A 4 dimensional float Tensor of dimension
-        /// [batch_size, height, width, channel].
-        /// Where batch_size is the number of input textures,
-        /// height corresponds to the height of the texture,
-        /// width corresponds to the width of the texture,
-        /// channel corresponds to the number of channels extracted from the
-        /// input textures (based on the input blackAndWhite flag
-        /// (3 if the flag is false, 1 otherwise).
-        /// The values of the Tensor are between 0 and 1.
-        /// </returns>
-        /// <param name="textures">
-        /// The list of textures to be put into the tensor.
-        /// Note that the textures must have same width and height.
-        /// </param>
-        /// <param name="blackAndWhite">
-        /// If set to <c>true</c> the textures
-        /// will be converted to grayscale before being stored in the tensor.
-        /// </param>
-        public static float[,,,] BatchVisualObservations(
-            List<Texture2D> textures, bool blackAndWhite)
-        {
-            int batchSize = textures.Count();
-            int width = textures[0].width;
-            int height = textures[0].height;
-            int pixels = 0;
-            if (blackAndWhite)
-                pixels = 1;
-            else
-                pixels = 3;
-            float[,,,] result = new float[batchSize, height, width, pixels];
-            float[] resultTemp = new float[batchSize * height * width * pixels];
-            int hwp = height * width * pixels;
-            int wp = width * pixels;
-
-            for (int b = 0; b < batchSize; b++)
-            {
-                Color32[] cc = textures[b].GetPixels32();
-                for (int h = height - 1; h >= 0; h--)
-                {
-                    for (int w = 0; w < width; w++)
-                    {
-                        Color32 currentPixel = cc[(height - h - 1) * width + w];
-                        if (!blackAndWhite)
-                        {
-                            // For Color32, the r, g and b values are between
-                            // 0 and 255.
-                            resultTemp[b * hwp + h * wp + w * pixels] = currentPixel.r / 255.0f;
-                            resultTemp[b * hwp + h * wp + w * pixels + 1] = currentPixel.g / 255.0f;
-                            resultTemp[b * hwp + h * wp + w * pixels + 2] = currentPixel.b / 255.0f;
-                        }
-                        else
-                        {
-                            resultTemp[b * hwp + h * wp + w * pixels] =
-                                (currentPixel.r + currentPixel.g + currentPixel.b)
-                                / 3f / 255.0f;
-                        }
-                    }
-                }
-            }
-
-            System.Buffer.BlockCopy(resultTemp, 0, result, 0, batchSize * hwp * sizeof(float));
-            return result;
-        }
-}
+}
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
    max_steps: 5.0e5
    summary_freq: 2000
    time_horizon: 3
-
-StudentBrain:
-    trainer: imitation
-    max_steps: 10000
-    summary_freq: 1000
-    brain_to_imitate: TeacherBrain
-    batch_size: 16
-    batches_per_epoch: 5
-    num_layers: 4
-    hidden_units: 64
-    sequence_length: 16
-    buffer_size: 128
-
-StudentRecurrentBrain:
-    trainer: imitation
-    max_steps: 10000
-    summary_freq: 1000
-    brain_to_imitate: TeacherBrain
-    batch_size: 16
-    batches_per_epoch: 5
-    num_layers: 4
-    hidden_units: 64
-    use_recurrent: true
-    sequence_length: 32
-    buffer_size: 128
--- a/docs/Training-Imitation-Learning.md
+++ b/docs/Training-Imitation-Learning.md
 of training a medic NPC : instead of indirectly training a medic with the help
 of a reward function, we can give the medic real world examples of observations
 from the game and actions from a game controller to guide the medic's behavior.
-More specifically, in this mode, the Brain type during training is set to Player
-and all the actions performed with the controller (in addition to the agent
-observations) will be recorded and sent to the Python API. The imitation
-learning algorithm will then use these pairs of observations and actions from
-the human player to learn a policy. [Video Link](https://youtu.be/kpb8ZkMBFYs).
+Imitation Learning uses pairs of observations and actions from
+from a demonstration to learn a policy. [Video Link](https://youtu.be/kpb8ZkMBFYs).
+
+## Recording Demonstrations
+
+It is possible to record demonstrations of agent behavior from the Unity Editor, and save them as assets. These demonstrations contain information on the observations, actions, and rewards for a given agent during the recording session. They can be managed from the Editor, as well as used for training with Offline Behavioral Cloning (see below).
+
+In order to record demonstrations from an agent, add the `Demonstration Recorder` component to a GameObject in the scene which contains an `Agent` component. Once added, it is possible to name the demonstration that will be recorded from the agent.
-## Using Behavioral Cloning
+<p align="center">
+  <img src="images/demo_component.png"
+       alt="BC Teacher Helper"
+       width="375" border="10" />
+</p>
+
+When `Record` is checked, a demonstration will be created whenever the scene is played from the Editor. Depending on the complexity of the task, anywhere from a few minutes or a few hours of demonstration data may be necessary to be useful for imitation learning. When you have recorded enough data, end the Editor play session, and a `.demo` file will be created in the `Assets/Demonstrations` folder. This file contains the demonstrations. Clicking on the file will provide metadata about the demonstration in the inspector.
+
+<p align="center">
+  <img src="images/demo_inspector.png"
+       alt="BC Teacher Helper"
+       width="375" border="10" />
+</p>
+ 
+
+## Training with Behavioral Cloning
-the simplest one of them is Behavioral Cloning. It works by collecting training
-data from a teacher, and then simply uses it to directly learn a policy, in the
+the simplest one of them is Behavioral Cloning. It works by collecting demonstrations from a teacher, and then simply uses them to directly learn a policy, in the
-1. In order to use imitation learning in a scene, the first thing you will need
-   is to create two Brains, one which will be the "Teacher," and the other which
+
+### Offline Training
+
+With offline behavioral cloning, we can use demonstrations (`.demo` files) generated using the `Demonstration Recorder` as the dataset used to train a behavior.
+
+1. Choose an agent you would like to learn to imitate some set of demonstrations. 
+2. Record a set of demonstration using the `Demonstration Recorder` (see above). For illustrative purposes we will refer to this file as `AgentRecording.demo`. 
+3. Build the scene, assigning the agent a Learning Brain, and set the Brain to Control in the Broadcast Hub. For more information on Brains, see [here](Learning-Environment-Design-Brains.md).
+4. Open the `config/bc_config.yaml` file. 
+5. Modify the `demo_path` parameter in the file to reference the path to the demonstration file recorded in step 2. In our case this is: `./UnitySDK/Assets/Demonstrations/AgentRecording.demo`
+6. Launch `mlagent-learn`, and providing `./config/bc_config.yaml` as the config parameter, and your environment as the `--env` parameter.
+7. (Optional) Observe training performance using Tensorboard.
+
+This will use the demonstration file to train a nerual network driven agent to directly imitate the actions provided in the demonstration. The environment will launch and be used for evaluating the agent's performance during training.
+
+### Online Training
+
+It is also possible to provide demonstrations in realtime during training, without pre-recording a demonstration file. The steps to do this are as follows:
+
+1. First create two Brains, one which will be the "Teacher," and the other which
   will be the "Student." We will assume that the names of the Brain
   `GameObject`s are "Teacher" and "Student" respectively.
 2. Set the "Teacher" Brain to Player mode, and properly configure the inputs to
    Assets folder (or a subdirectory within Assets of your choosing) , and use
    with `Internal` Brain.

-### BC Teacher Helper
+**BC Teacher Helper**

 We provide a convenience utility, `BC Teacher Helper` component that you can add
 to the Teacher Agent.
 2. Reset the training buffer. This enables you to instruct the agents to forget
   their buffer of recent experiences. This is useful if you'd like to get them
   to quickly learn a new behavior. The default command to reset the buffer is
-   to press `C` on the keyboard.
+   to press `C` on the keyboard.
--- a/ml-agents/mlagents/envs/communicator_objects/init.py
+++ b/ml-agents/mlagents/envs/communicator_objects/init.py
 from .brain_parameters_proto_pb2 import *
 from .brain_type_proto_pb2 import *
 from .command_proto_pb2 import *
+from .demonstration_meta_proto_pb2 import *
 from .engine_configuration_proto_pb2 import *
 from .environment_parameters_proto_pb2 import *
 from .header_pb2 import *
--- a/ml-agents/mlagents/envs/communicator_objects/unity_to_external_pb2.py
+++ b/ml-agents/mlagents/envs/communicator_objects/unity_to_external_pb2.py
 from google.protobuf import message as _message
 from google.protobuf import reflection as _reflection
 from google.protobuf import symbol_database as _symbol_database
-from google.protobuf import descriptor_pb2
 # @@protoc_insertion_point(imports)

 _sym_db = _symbol_database.Default()
  name='mlagents/envs/communicator_objects/unity_to_external.proto',
  package='communicator_objects',
  syntax='proto3',
+  serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
  serialized_pb=_b('\n:mlagents/envs/communicator_objects/unity_to_external.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/unity_message.proto2g\n\x0fUnityToExternal\x12T\n\x08\x45xchange\x12\".communicator_objects.UnityMessage\x1a\".communicator_objects.UnityMessage\"\x00\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
  ,
  dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2.DESCRIPTOR,])
 _sym_db.RegisterFileDescriptor(DESCRIPTOR)


-DESCRIPTOR.has_options = True
-DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\252\002\034MLAgents.CommunicatorObjects'))
+DESCRIPTOR._options = None

 _UNITYTOEXTERNAL = _descriptor.ServiceDescriptor(
  name='UnityToExternal',
-  options=None,
+  serialized_options=None,
  serialized_start=140,
  serialized_end=243,
  methods=[
    containing_service=None,
    input_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
    output_type=mlagents_dot_envs_dot_communicator__objects_dot_unity__message__pb2._UNITYMESSAGE,
-    options=None,
+    serialized_options=None,
  ),
 ])
 _sym_db.RegisterServiceDescriptor(_UNITYTOEXTERNAL)
--- a/ml-agents/mlagents/envs/environment.py
+++ b/ml-agents/mlagents/envs/environment.py
 import subprocess

 from .brain import BrainInfo, BrainParameters, AllBrainInfo
+from .utilities import process_pixels
 from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException

 from .communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\
        arr = [float(x) for x in arr]
        return arr

-    @staticmethod
-    def _process_pixels(image_bytes, gray_scale):
-        """
-        Converts byte array observation image into numpy array, re-sizes it, and optionally converts it to grey scale
-        :param image_bytes: input byte array corresponding to image
-        :return: processed numpy array of observation from environment
-        """
-        s = bytearray(image_bytes)
-        image = Image.open(io.BytesIO(s))
-        s = np.array(image) / 255.0
-        if gray_scale:
-            s = np.mean(s, axis=2)
-            s = np.reshape(s, [s.shape[0], s.shape[1], 1])
-        return s
-
    def _get_state(self, output: UnityRLOutput) -> (AllBrainInfo, bool):
        """
        Collects experience information from all external brains in environment at current step.
            agent_info_list = output.agentInfos[b].value
            vis_obs = []
            for i in range(self.brains[b].number_visual_observations):
-                obs = [self._process_pixels(x.visual_observations[i],
+                obs = [process_pixels(x.visual_observations[i],
                                            self.brains[b].camera_resolutions[i]['blackAndWhite'])
                    for x in agent_info_list]
                vis_obs += [np.array(obs)]
--- a/ml-agents/mlagents/trainers/init.py
+++ b/ml-agents/mlagents/trainers/init.py
 from .models import *
 from .trainer_controller import *
 from .bc.models import *
-from .bc.trainer import *
+from .bc.offline_trainer import *
+from .bc.online_trainer import *
 from .bc.policy import *
 from .ppo.models import *
 from .ppo.trainer import *
+from .demo_loader import *
--- a/ml-agents/mlagents/trainers/bc/init.py
+++ b/ml-agents/mlagents/trainers/bc/init.py
 from .models import *
-from .trainer import *
+from .online_trainer import *
+from .offline_trainer import *
 from .policy import *
--- a/ml-agents/mlagents/trainers/bc/policy.py
+++ b/ml-agents/mlagents/trainers/bc/policy.py
        :param trainer_parameters: Defined training parameters.
        :param load: Whether a pre-trained model will be loaded or a new one created.
        """
-        super().__init__(seed, brain, trainer_parameters)
+        super(BCPolicy, self).__init__(seed, brain, trainer_parameters)

        with self.graph.as_default():
            with self.graph.as_default():
--- a/ml-agents/mlagents/trainers/bc/trainer.py
+++ b/ml-agents/mlagents/trainers/bc/trainer.py
 # # Unity ML-Agents Toolkit
-# ## ML-Agent Learning (Imitation)
+# ## ML-Agent Learning (Behavioral Cloning)
 # Contains an implementation of Behavioral Cloning Algorithm

 import logging
 from mlagents.trainers.buffer import Buffer
 from mlagents.trainers.trainer import UnityTrainerException, Trainer

-logger = logging.getLogger("mlagents.envs")
+logger = logging.getLogger("mlagents.trainers")
-class BehavioralCloningTrainer(Trainer):
-    """The ImitationTrainer is an implementation of the imitation learning."""
+class BCTrainer(Trainer):
+    """The BCTrainer is an implementation of Behavioral Cloning."""

    def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
        """
        :param seed: The seed the model will be initialized with
        :param run_id: The The identifier of the current run
        """
-        self.param_keys = ['brain_to_imitate', 'batch_size', 'time_horizon',
-                           'summary_freq', 'max_steps',
-                           'batches_per_epoch', 'use_recurrent',
-                           'hidden_units', 'learning_rate', 'num_layers',
-                           'sequence_length', 'memory_size', 'model_path']
-
-        for k in self.param_keys:
-            print(k)
-            print(k not in trainer_parameters)
-            if k not in trainer_parameters:
-                raise UnityTrainerException("The hyperparameter {0} could not be found for the Imitation trainer of "
-                                            "brain {1}.".format(k, brain.brain_name))
-
-        super(BehavioralCloningTrainer, self).__init__(brain, trainer_parameters, training, run_id)
-
+        super(BCTrainer, self).__init__(brain, trainer_parameters, training, run_id)
-        self.brain_name = brain.brain_name
-        self.brain_to_imitate = trainer_parameters['brain_to_imitate']
-        self.batches_per_epoch = trainer_parameters['batches_per_epoch']
-        self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
+        self.n_sequences = 1
-        self.training_buffer = Buffer()
+        self.batches_per_epoch = trainer_parameters['batches_per_epoch']
+        self.demonstration_buffer = Buffer()
+        self.evaluation_buffer = Buffer()
-
-    def __str__(self):
-        return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
-            self.brain_name, '\n'.join(['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))

    @property
    def parameters(self):
        else:
            return run_out['action'], None, None, None, None

-    def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo, take_action_outputs):
+    def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
+                        take_action_outputs):
        """
        Adds experiences to each agent's experience history.
        :param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).

-        # Used to collect teacher experience into training buffer
-        info_teacher = curr_info[self.brain_to_imitate]
-        next_info_teacher = next_info[self.brain_to_imitate]
-        for agent_id in info_teacher.agents:
-            self.training_buffer[agent_id].last_brain_info = info_teacher
-
-        for agent_id in next_info_teacher.agents:
-            stored_info_teacher = self.training_buffer[agent_id].last_brain_info
-            if stored_info_teacher is None:
-                continue
-            else:
-                idx = stored_info_teacher.agents.index(agent_id)
-                next_idx = next_info_teacher.agents.index(agent_id)
-                if stored_info_teacher.text_observations[idx] != "":
-                    info_teacher_record, info_teacher_reset = \
-                        stored_info_teacher.text_observations[idx].lower().split(",")
-                    next_info_teacher_record, next_info_teacher_reset = next_info_teacher.text_observations[idx].\
-                        lower().split(",")
-                    if next_info_teacher_reset == "true":
-                        self.training_buffer.reset_update_buffer()
-                else:
-                    info_teacher_record, next_info_teacher_record = "true", "true"
-                if info_teacher_record == "true" and next_info_teacher_record == "true":
-                    if not stored_info_teacher.local_done[idx]:
-                        for i in range(self.policy.vis_obs_size):
-                            self.training_buffer[agent_id]['visual_obs%d' % i]\
-                                .append(stored_info_teacher.visual_observations[i][idx])
-                        if self.policy.use_vec_obs:
-                            self.training_buffer[agent_id]['vector_obs']\
-                                .append(stored_info_teacher.vector_observations[idx])
-                        if self.policy.use_recurrent:
-                            if stored_info_teacher.memories.shape[1] == 0:
-                                stored_info_teacher.memories = np.zeros((len(stored_info_teacher.agents),
-                                                                         self.policy.m_size))
-                            self.training_buffer[agent_id]['memory'].append(stored_info_teacher.memories[idx])
-                        self.training_buffer[agent_id]['actions'].append(next_info_teacher.
-                                                                         previous_vector_actions[next_idx])
+        # Used to collect information about student performance.
-            self.training_buffer[agent_id].last_brain_info = info_student
+            self.evaluation_buffer[agent_id].last_brain_info = info_student
-        # Used to collect information about student performance.
-            stored_info_student = self.training_buffer[agent_id].last_brain_info
+            stored_info_student = self.evaluation_buffer[agent_id].last_brain_info
            if stored_info_student is None:
                continue
            else:
        :param current_info: Current AllBrainInfo
        :param next_info: Next AllBrainInfo
        """
-        info_teacher = next_info[self.brain_to_imitate]
-        for l in range(len(info_teacher.agents)):
-            teacher_action_list = len(self.training_buffer[info_teacher.agents[l]]['actions'])
-            horizon_reached = teacher_action_list > self.trainer_parameters['time_horizon']
-            teacher_filled = len(self.training_buffer[info_teacher.agents[l]]['actions']) > 0
-            if ((info_teacher.local_done[l] or horizon_reached) and teacher_filled):
-                agent_id = info_teacher.agents[l]
-                self.training_buffer.append_update_buffer(
-                    agent_id, batch_size=None, training_length=self.policy.sequence_length)
-                self.training_buffer[agent_id].reset_agent()
-
        info_student = next_info[self.brain_name]
        for l in range(len(info_student.agents)):
            if info_student.local_done[l]:
        A signal that the Episode has ended. The buffer must be reset. 
        Get only called when the academy resets.
        """
-        self.training_buffer.reset_all()
+        self.evaluation_buffer.reset_local_buffers()
        for agent_id in self.cumulative_rewards:
            self.cumulative_rewards[agent_id] = 0
        for agent_id in self.episode_steps:
        Returns whether or not the trainer has enough elements to run update model
        :return: A boolean corresponding to whether or not update_model() can be run
        """
-        return len(self.training_buffer.update_buffer['actions']) > self.n_sequences
+        return len(self.demonstration_buffer.update_buffer['actions']) > self.n_sequences
-        self.training_buffer.update_buffer.shuffle()
+        self.demonstration_buffer.update_buffer.shuffle()
-        num_batches = min(len(self.training_buffer.update_buffer['actions']) //
+        num_batches = min(len(self.demonstration_buffer.update_buffer['actions']) //
-            buffer = self.training_buffer.update_buffer
+            update_buffer = self.demonstration_buffer.update_buffer
-            mini_batch = buffer.make_mini_batch(start, end)
+            mini_batch = update_buffer.make_mini_batch(start, end)
            run_out = self.policy.update(mini_batch, self.n_sequences)
            loss = run_out['policy_loss']
            batch_losses.append(loss)
--- a/ml-agents/mlagents/trainers/buffer.py
+++ b/ml-agents/mlagents/trainers/buffer.py
        """
        self.update_buffer.reset_agent()

-    def reset_all(self):
+    def reset_local_buffers(self):
        """
        Resets all the local local_buffers
        """
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
 from mlagents.trainers.ppo.policy import PPOPolicy
 from mlagents.trainers.trainer import UnityTrainerException, Trainer

-logger = logging.getLogger("mlagents.envs")
+logger = logging.getLogger("mlagents.trainers")


 class PPOTrainer(Trainer):
        :param seed: The seed the model will be initialized with
        :param run_id: The The identifier of the current run
        """
+        super(PPOTrainer, self).__init__(brain, trainer_parameters, training, run_id)
        self.param_keys = ['batch_size', 'beta', 'buffer_size', 'epsilon', 'gamma', 'hidden_units', 'lambd',
                           'learning_rate', 'max_steps', 'normalize', 'num_epoch', 'num_layers',
                           'time_horizon', 'sequence_length', 'summary_freq', 'use_recurrent',
-        for k in self.param_keys:
-            if k not in trainer_parameters:
-                raise UnityTrainerException("The hyperparameter {0} could not be found for the PPO trainer of "
-                                            "brain {1}.".format(k, brain.brain_name))
-        super(PPOTrainer, self).__init__(brain.brain_name, trainer_parameters, training, run_id)
-
+        self.check_param_keys()
-
-
        self.policy = PPOPolicy(seed, brain, trainer_parameters,
                                self.is_training, load)

        A signal that the Episode has ended. The buffer must be reset. 
        Get only called when the academy resets.
        """
-        self.training_buffer.reset_all()
+        self.training_buffer.reset_local_buffers()
        for agent_id in self.cumulative_rewards:
            self.cumulative_rewards[agent_id] = 0
        for agent_id in self.episode_steps:

    def update_policy(self):
        """
-        Uses training_buffer to update the policy.
+        Uses demonstration_buffer to update the policy.
        """
        n_sequences = max(int(self.trainer_parameters['batch_size'] / self.policy.sequence_length), 1)
        value_total, policy_total, forward_total, inverse_total = [], [], [], []
--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py


 class Trainer(object):
-    """This class is the abstract class for the mlagents.trainers"""
+    """This class is the base class for the mlagents.trainers"""
-    def __init__(self, brain_name, trainer_parameters, training, run_id):
+    def __init__(self, brain, trainer_parameters, training, run_id):
-        :param trainer_parameters: The parameters for the trainer (dictionary).
-        :param training: Whether the trainer is set for training.
-        :param run_id: The identifier of the current run
+        :BrainParameters brain: Brain to be trained.
+        :dict trainer_parameters: The parameters for the trainer (dictionary).
+        :bool training: Whether the trainer is set for training.
+        :int run_id: The identifier of the current run
-        self.brain_name = brain_name
+        self.param_keys = []
+        self.brain_name = brain.brain_name
        self.run_id = run_id
        self.trainer_parameters = trainer_parameters
        self.is_training = training

    def __str__(self):
-        return '''Empty Trainer'''
+        return '''{} Trainer'''.format(self.__class__)
+
+    def check_param_keys(self):
+        for k in self.param_keys:
+            if k not in self.trainer_parameters:
+                raise UnityTrainerException(
+                    "The hyper-parameter {0} could not be found for the {1} trainer of "
+                    "brain {2}.".format(k, self.__class__, self.brain_name))

    @property
    def parameters(self):

    def update_policy(self):
        """
-        Uses training_buffer to update model.
+        Uses demonstration_buffer to update model.
        """
        raise UnityTrainerException("The update_model method was not implemented.")

--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 from mlagents.envs.exception import UnityEnvironmentException

 from mlagents.trainers.ppo.trainer import PPOTrainer
-from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
+from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
+from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
 from mlagents.trainers.meta_curriculum import MetaCurriculum
 from mlagents.trainers.exception import MetaCurriculumError

                    trainer_parameters[k] = trainer_config[_brain_key][k]
            trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in self.env.external_brain_names:
-            if trainer_parameters_dict[brain_name]['trainer'] == 'imitation':
-                self.trainers[brain_name] = BehavioralCloningTrainer(
+            if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc':
+                self.trainers[brain_name] = OfflineBCTrainer(
+                    self.env.brains[brain_name],
+                    trainer_parameters_dict[brain_name], self.train_model,
+                    self.load_model, self.seed, self.run_id)
+            elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc':
+                self.trainers[brain_name] = OnlineBCTrainer(
                    self.env.brains[brain_name],
                    trainer_parameters_dict[brain_name], self.train_model,
                    self.load_model, self.seed, self.run_id)
--- a/ml-agents/tests/mock_communicator.py
+++ b/ml-agents/tests/mock_communicator.py
 from mlagents.envs.communicator import Communicator
-from mlagents.envs.communicator_objects import UnityMessage, UnityOutput, UnityInput,\
-    ResolutionProto, BrainParametersProto, UnityRLInitializationOutput,\
+from mlagents.envs.communicator_objects import UnityMessage, UnityOutput, UnityInput, \
+    ResolutionProto, BrainParametersProto, UnityRLInitializationOutput, \
-    def __init__(self, discrete_action=False, visual_inputs=0, stack=True, num_agents=3):
+    def __init__(self, discrete_action=False, visual_inputs=0, stack=True, num_agents=3,
+                 brain_name="RealFakeBrain", vec_obs_size=3):
        """
        Python side of the grpc communication. Python is the client and Unity the server

        self.visual_inputs = visual_inputs
        self.has_been_closed = False
        self.num_agents = num_agents
+        self.brain_name = brain_name
+        self.vec_obs_size = vec_obs_size
        if stack:
            self.num_stacks = 2
        else:
            height=40,
            gray_scale=False) for i in range(self.visual_inputs)]
        bp = BrainParametersProto(
-            vector_observation_size=3,
+            vector_observation_size=self.vec_obs_size,
-            brain_name="RealFakeBrain",
+            brain_name=self.brain_name,
            brain_type=2
        )
        rl_init = UnityRLInitializationOutput(
            UnityRLOutput.ListAgentInfoProto(value=list_agent_info)
        global_done = False
        try:
-            global_done = (inputs.rl_input.agent_actions["RealFakeBrain"].value[0].vector_actions[0] == -1)
+            fake_brain = inputs.rl_input.agent_actions["RealFakeBrain"]
+            global_done = (fake_brain.value[0].vector_actions[0] == -1)
        except:
            pass
        result = UnityRLOutput(
--- a/ml-agents/tests/trainers/test_meta_curriculum.py
+++ b/ml-agents/tests/trainers/test_meta_curriculum.py
    assert curriculum_b.lesson_num == 3


-
@patch('mlagents.trainers.Curriculum')
@patch('mlagents.trainers.Curriculum')
 def test_increment_lessons(curriculum_a, curriculum_b, measure_vals):
--- a/ml-agents/tests/trainers/test_trainer_controller.py
+++ b/ml-agents/tests/trainers/test_trainer_controller.py
 import tensorflow as tf

 from mlagents.trainers.trainer_controller import TrainerController
-from mlagents.trainers.buffer import Buffer
-from mlagents.trainers.bc.trainer import BehavioralCloningTrainer
-from mlagents.trainers.curriculum import Curriculum
-from mlagents.trainers.exception import CurriculumError
+from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
+from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
-
-
-@pytest.fixture
-def dummy_start():
-  return '''{ "AcademyName": "RealFakeAcademy",
-              "resetParameters": {},
-              "brainNames": ["RealFakeBrain"],
-              "externalBrainNames": ["RealFakeBrain"],
-              "logPath":"RealFakePath",
-              "apiNumber":"API-5",
-              "brainParameters": [{
-                  "vectorObservationSize": 3,
-                  "numStackedVectorObservations" : 2,
-                  "vectorActionSize": 2,
-                  "memorySize": 0,
-                  "cameraResolutions": [],
-                  "vectorActionDescriptions": ["",""],
-                  "vectorActionSpaceType": 1
-                  }]
-            }'''.encode()


@pytest.fixture


@pytest.fixture
-def dummy_bc_config():
+def dummy_online_bc_config():
-            trainer: imitation
+            trainer: online_bc
            brain_to_imitate: ExpertBrain
            batches_per_epoch: 16
            batch_size: 32
            curiosity_enc_size: 1
        ''')

+
+@pytest.fixture
+def dummy_offline_bc_config():
+    return yaml.load(
+        '''
+        default:
+            trainer: offline_bc
+            demo_path: ./tests/trainers/test.demo
+            batches_per_epoch: 16
+            batch_size: 32
+            beta: 5.0e-3
+            buffer_size: 512
+            epsilon: 0.2
+            gamma: 0.99
+            hidden_units: 128
+            lambd: 0.95
+            learning_rate: 3.0e-4
+            max_steps: 5.0e4
+            normalize: true
+            num_epoch: 5
+            num_layers: 2
+            time_horizon: 64
+            sequence_length: 64
+            summary_freq: 1000
+            use_recurrent: false
+            memory_size: 8
+            use_curiosity: false
+            curiosity_strength: 0.0
+            curiosity_enc_size: 1
+        ''')
+
+
@pytest.fixture
 def dummy_bad_config():
    return yaml.load(
        discrete_action=True, visual_inputs=1)
    tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
                           1, 1, 1, '', "tests/test_mlagents.trainers.py", False)
-    assert(tc.env.brain_names[0] == 'RealFakeBrain')
+    assert (tc.env.brain_names[0] == 'RealFakeBrain')


@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
                discrete_action=True, visual_inputs=1)
            mock_load.return_value = dummy_config
            tc = TrainerController(' ', ' ', 1, None, True, True, False, 1,
-                                       1, 1, 1, '','', False)
+                                   1, 1, 1, '', '', False)
-            assert(len(config) == 1)
-            assert(config['default']['trainer'] == "ppo")
+            assert (len(config) == 1)
+            assert (config['default']['trainer'] == "ppo")
-                             dummy_bc_config, dummy_bad_config):
+                             dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config):
    open_name = 'mlagents.trainers.trainer_controller' + '.open'
    with mock.patch('yaml.load') as mock_load:
        with mock.patch(open_name, create=True) as _:
            config = tc._load_config()
            tf.reset_default_graph()
            tc._initialize_trainers(config)
-            assert(len(tc.trainers) == 1)
-            assert(isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
+            assert (len(tc.trainers) == 1)
+            assert (isinstance(tc.trainers['RealFakeBrain'], PPOTrainer))
-            # Test for Behavior Cloning Trainer
-            mock_load.return_value = dummy_bc_config
+            # Test for Online Behavior Cloning Trainer
+            mock_load.return_value = dummy_online_bc_config
-            assert(isinstance(tc.trainers['RealFakeBrain'], BehavioralCloningTrainer))
+            assert (isinstance(tc.trainers['RealFakeBrain'], OnlineBCTrainer))

            # Test for proper exception when trainer name is incorrect
            mock_load.return_value = dummy_bad_config
                tc._initialize_trainers(config)
+
+
+@mock.patch('mlagents.envs.UnityEnvironment.executable_launcher')
+@mock.patch('mlagents.envs.UnityEnvironment.get_communicator')
+def test_initialize_offline_trainers(mock_communicator, mock_launcher, dummy_config,
+                             dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config):
+    open_name = 'mlagents.trainers.trainer_controller' + '.open'
+    with mock.patch('yaml.load') as mock_load:
+        with mock.patch(open_name, create=True) as _:
+            mock_communicator.return_value = MockCommunicator(
+                discrete_action=False, stack=False, visual_inputs=0,
+                brain_name="Ball3DBrain", vec_obs_size=8)
+            tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1,
+                                   1, 1, '', "tests/test_mlagents.trainers.py",
+                                   False)
+
+            # Test for Offline Behavior Cloning Trainer
+            mock_load.return_value = dummy_offline_bc_config
+            config = tc._load_config()
+            tf.reset_default_graph()
+            tc._initialize_trainers(config)
+            assert (isinstance(tc.trainers['Ball3DBrain'], OfflineBCTrainer))
--- a/UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs
+using System.Text;
+using MLAgents;
+using UnityEditor;
+
+/// <summary>
+/// Renders a custom UI for Demonstration Scriptable Object.
+/// </summary>
+[CustomEditor(typeof(Demonstration))]
+[CanEditMultipleObjects]
+public class DemonstrationEditor : Editor
+{
+    SerializedProperty brainParameters;
+    SerializedProperty demoMetaData;
+
+    void OnEnable()
+    {
+        brainParameters = serializedObject.FindProperty("brainParameters");
+        demoMetaData = serializedObject.FindProperty("metaData");
+    }
+
+    /// <summary>
+    /// Renders Inspector UI for Demonstration metadata.
+    /// </summary>
+    void MakeMetaDataProperty(SerializedProperty property)
+    {
+        var nameProp = property.FindPropertyRelative("demonstrationName");
+        var expProp = property.FindPropertyRelative("numberExperiences");
+        var epiProp = property.FindPropertyRelative("numberEpisodes");
+        var rewProp = property.FindPropertyRelative("meanReward");
+
+        var nameLabel = nameProp.displayName + ": " + nameProp.stringValue;
+        var expLabel = expProp.displayName + ": " + expProp.intValue;
+        var epiLabel = epiProp.displayName + ": " + epiProp.intValue;
+        var rewLabel = rewProp.displayName + ": " + rewProp.floatValue;
+
+        EditorGUILayout.LabelField(nameLabel);
+        EditorGUILayout.LabelField(expLabel);
+        EditorGUILayout.LabelField(epiLabel);
+        EditorGUILayout.LabelField(rewLabel);
+    }
+
+    /// <summary>
+    /// Constructs label for action size array.
+    /// </summary>
+    static string BuildActionArrayLabel(SerializedProperty actionSizeProperty)
+    {
+        var actionSize = actionSizeProperty.arraySize;
+        StringBuilder actionLabel = new StringBuilder("[ ");
+        for (int i = 0; i < actionSize; i++)
+        {
+            actionLabel.Append(actionSizeProperty.GetArrayElementAtIndex(i).intValue);
+            if (i < actionSize - 1)
+            {
+                actionLabel.Append(", ");
+            }
+        }
+
+        actionLabel.Append(" ]");
+        return actionLabel.ToString();
+    }
+
+    /// <summary>
+    /// Renders Inspector UI for Brain Parameters of Demonstration.
+    /// </summary>
+    void MakeBrainParametersProperty(SerializedProperty property)
+    {
+        var vecObsSizeProp = property.FindPropertyRelative("vectorObservationSize");
+        var numStackedProp = property.FindPropertyRelative("numStackedVectorObservations");
+        var actSizeProperty = property.FindPropertyRelative("vectorActionSize");
+        var camResProp = property.FindPropertyRelative("cameraResolutions");
+        var actSpaceTypeProp = property.FindPropertyRelative("vectorActionSpaceType");
+
+        var vecObsSizeLabel = vecObsSizeProp.displayName + ": " + vecObsSizeProp.intValue;
+        var numStackedLabel = numStackedProp.displayName + ": " + numStackedProp.intValue;
+        var vecActSizeLabel = actSizeProperty.displayName + ": " + BuildActionArrayLabel(actSizeProperty);
+        var camResLabel = camResProp.displayName + ": " + camResProp.arraySize;
+        var actSpaceTypeLabel = actSpaceTypeProp.displayName + ": " + (SpaceType) actSpaceTypeProp.enumValueIndex;
+
+        EditorGUILayout.LabelField(vecObsSizeLabel);
+        EditorGUILayout.LabelField(numStackedLabel);
+        EditorGUILayout.LabelField(vecActSizeLabel);
+        EditorGUILayout.LabelField(camResLabel);
+        EditorGUILayout.LabelField(actSpaceTypeLabel);
+    }
+
+    public override void OnInspectorGUI()
+    {
+        serializedObject.Update();
+        EditorGUILayout.LabelField("Meta Data", EditorStyles.boldLabel);
+        MakeMetaDataProperty(demoMetaData);
+        EditorGUILayout.LabelField("Brain Parameters", EditorStyles.boldLabel);
+        MakeBrainParametersProperty(brainParameters);
+        serializedObject.ApplyModifiedProperties();
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Editor/DemonstrationDrawer.cs.meta
+fileFormatVersion: 2
+guid: 84f9cd83f56c74790a51444a6cfe4945
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs
+using System;
+using System.IO;
+using MLAgents.CommunicatorObjects;
+using UnityEditor;
+using UnityEngine;
+using UnityEditor.Experimental.AssetImporters;
+
+namespace MLAgents
+{
+    /// <summary>
+    /// Asset Importer used to parse demonstration files.
+    /// </summary>
+    [ScriptedImporter(1, new[] {"demo"})]
+    public class DemonstrationImporter : ScriptedImporter
+    {
+        private const string IconPath = "Assets/ML-Agents/Resources/DemoIcon.png";
+
+        public override void OnImportAsset(AssetImportContext ctx)
+        {
+            var inputType = Path.GetExtension(ctx.assetPath);
+            if (inputType == null)
+            {
+                throw new Exception("Demonstration import error.");
+            }
+
+            try
+            {
+                // Read first two proto objects containing metadata and brain parameters.
+                Stream reader = File.OpenRead(ctx.assetPath);
+
+                var metaDataProto = DemonstrationMetaProto.Parser.ParseDelimitedFrom(reader);
+                var metaData = new DemonstrationMetaData(metaDataProto);
+
+                reader.Seek(DemonstrationStore.MetaDataBytes + 1, 0);
+                var brainParamsProto = BrainParametersProto.Parser.ParseDelimitedFrom(reader);
+                var brainParameters = new BrainParameters(brainParamsProto);
+
+                reader.Close();
+
+                var demonstration = ScriptableObject.CreateInstance<Demonstration>();
+                demonstration.Initialize(brainParameters, metaData);
+                userData = demonstration.ToString();
+
+                Texture2D texture = (Texture2D)
+                    AssetDatabase.LoadAssetAtPath(IconPath, typeof(Texture2D));
+
+#if UNITY_2017_3_OR_NEWER
+                ctx.AddObjectToAsset(ctx.assetPath, demonstration, texture);
+                ctx.SetMainObject(demonstration);
+#else
+            ctx.SetMainAsset(ctx.assetPath, model);
+#endif
+            }
+            catch
+            {
+                return;
+            }
+        }
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs.meta
+fileFormatVersion: 2
+guid: 7bd65ce151aaa4a41a45312543c56be1
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
+using System.Collections.Generic;
+using NUnit.Framework;
+using UnityEngine;
+using System.IO.Abstractions.TestingHelpers;
+
+namespace MLAgents.Tests
+{
+    public class DemonstrationTests : MonoBehaviour
+    {
+        private const string DemoDirecory = "Assets/Demonstrations/";
+        private const string ExtensionType = ".demo";
+        private const string DemoName = "Test";
+        
+        [Test]
+        public void TestSanitization()
+        {
+            const string dirtyString = "abc123&!@";
+            const string knownCleanString = "abc123";
+            var cleanString = DemonstrationRecorder.SanitizeName(dirtyString);
+            Assert.AreNotEqual(dirtyString, cleanString);
+            Assert.AreEqual(cleanString, knownCleanString);
+        }
+
+        [Test]
+        public void TestStoreInitalize()
+        {
+            var fileSystem = new MockFileSystem();
+            var demoStore = new DemonstrationStore(fileSystem);
+
+            Assert.IsFalse(fileSystem.Directory.Exists(DemoDirecory));
+            
+            var brainParameters = new BrainParameters
+            {
+                vectorObservationSize = 3,
+                numStackedVectorObservations = 2,
+                cameraResolutions = new [] {new resolution()},
+                vectorActionDescriptions = new [] {"TestActionA", "TestActionB"},
+                vectorActionSize = new [] {2, 2},
+                vectorActionSpaceType = SpaceType.discrete
+            };
+            
+            demoStore.Initialize(DemoName, brainParameters, "TestBrain");
+            
+            Assert.IsTrue(fileSystem.Directory.Exists(DemoDirecory));
+            Assert.IsTrue(fileSystem.FileExists(DemoDirecory + DemoName + ExtensionType));
+
+            var agentInfo = new AgentInfo
+            {
+                reward = 1f,
+                visualObservations = new List<Texture2D>(),
+                actionMasks = new []{false, true},
+                done = true,
+                id = 5,
+                maxStepReached = true,
+                memories = new List<float>(),
+                stackedVectorObservation = new List<float>() {1f, 1f, 1f},
+                storedTextActions = "TestAction",
+                storedVectorActions = new [] {0f, 1f},
+                textObservation = "TestAction",
+            };
+            
+            demoStore.Record(agentInfo);
+            demoStore.Close();
+        }
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs.meta
+fileFormatVersion: 2
+guid: 4c5a970f5b6be4b57b3bd7a5f84c3623
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll
+++ b/UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll
--- a/UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll.meta
+++ b/UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.TestingHelpers.dll.meta
+fileFormatVersion: 2
+guid: 2d7ba4e1037b64de5b860bcbe15755b3
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  isPreloaded: 0
+  isOverridable: 0
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  - first:
+      Windows Store Apps: WindowsStoreApps
+    second:
+      enabled: 0
+      settings:
+        CPU: AnyCPU
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll
+++ b/UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll
--- a/UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll.meta
+++ b/UnitySDK/Assets/ML-Agents/Plugins/System.IO.Abstractions.dll.meta
+fileFormatVersion: 2
+guid: b01205587773841ad95e8ceda347e8bd
+PluginImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  iconMap: {}
+  executionOrder: {}
+  isPreloaded: 0
+  isOverridable: 0
+  platformData:
+  - first:
+      Any: 
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 0
+      settings:
+        DefaultValueInitialized: true
+  - first:
+      Windows Store Apps: WindowsStoreApps
+    second:
+      enabled: 0
+      settings:
+        CPU: AnyCPU
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Resources.meta
+++ b/UnitySDK/Assets/ML-Agents/Resources.meta
+fileFormatVersion: 2
+guid: 1b3ab22264a5447df9e52684598ac3b0
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs
+// <auto-generated>
+//     Generated by the protocol buffer compiler.  DO NOT EDIT!
+//     source: mlagents/envs/communicator_objects/demonstration_meta_proto.proto
+// </auto-generated>
+#pragma warning disable 1591, 0612, 3021
+#region Designer generated code
+
+using pb = global::Google.Protobuf;
+using pbc = global::Google.Protobuf.Collections;
+using pbr = global::Google.Protobuf.Reflection;
+using scg = global::System.Collections.Generic;
+namespace MLAgents.CommunicatorObjects {
+
+  /// <summary>Holder for reflection information generated from mlagents/envs/communicator_objects/demonstration_meta_proto.proto</summary>
+  public static partial class DemonstrationMetaProtoReflection {
+
+    #region Descriptor
+    /// <summary>File descriptor for mlagents/envs/communicator_objects/demonstration_meta_proto.proto</summary>
+    public static pbr::FileDescriptor Descriptor {
+      get { return descriptor; }
+    }
+    private static pbr::FileDescriptor descriptor;
+
+    static DemonstrationMetaProtoReflection() {
+      byte[] descriptorData = global::System.Convert.FromBase64String(
+          string.Concat(
+            "CkFtbGFnZW50cy9lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2RlbW9uc3Ry",
+            "YXRpb25fbWV0YV9wcm90by5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi",
+            "jQEKFkRlbW9uc3RyYXRpb25NZXRhUHJvdG8SEwoLYXBpX3ZlcnNpb24YASAB",
+            "KAUSGgoSZGVtb25zdHJhdGlvbl9uYW1lGAIgASgJEhQKDG51bWJlcl9zdGVw",
+            "cxgDIAEoBRIXCg9udW1iZXJfZXBpc29kZXMYBCABKAUSEwoLbWVhbl9yZXdh",
+            "cmQYBSABKAJCH6oCHE1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
+            "b3RvMw=="));
+      descriptor = pbr::FileDescriptor.FromGeneratedCode(descriptorData,
+          new pbr::FileDescriptor[] { },
+          new pbr::GeneratedClrTypeInfo(null, new pbr::GeneratedClrTypeInfo[] {
+            new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.DemonstrationMetaProto), global::MLAgents.CommunicatorObjects.DemonstrationMetaProto.Parser, new[]{ "ApiVersion", "DemonstrationName", "NumberSteps", "NumberEpisodes", "MeanReward" }, null, null, null)
+          }));
+    }
+    #endregion
+
+  }
+  #region Messages
+  public sealed partial class DemonstrationMetaProto : pb::IMessage<DemonstrationMetaProto> {
+    private static readonly pb::MessageParser<DemonstrationMetaProto> _parser = new pb::MessageParser<DemonstrationMetaProto>(() => new DemonstrationMetaProto());
+    private pb::UnknownFieldSet _unknownFields;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public static pb::MessageParser<DemonstrationMetaProto> Parser { get { return _parser; } }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public static pbr::MessageDescriptor Descriptor {
+      get { return global::MLAgents.CommunicatorObjects.DemonstrationMetaProtoReflection.Descriptor.MessageTypes[0]; }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    pbr::MessageDescriptor pb::IMessage.Descriptor {
+      get { return Descriptor; }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public DemonstrationMetaProto() {
+      OnConstruction();
+    }
+
+    partial void OnConstruction();
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public DemonstrationMetaProto(DemonstrationMetaProto other) : this() {
+      apiVersion_ = other.apiVersion_;
+      demonstrationName_ = other.demonstrationName_;
+      numberSteps_ = other.numberSteps_;
+      numberEpisodes_ = other.numberEpisodes_;
+      meanReward_ = other.meanReward_;
+      _unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public DemonstrationMetaProto Clone() {
+      return new DemonstrationMetaProto(this);
+    }
+
+    /// <summary>Field number for the "api_version" field.</summary>
+    public const int ApiVersionFieldNumber = 1;
+    private int apiVersion_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int ApiVersion {
+      get { return apiVersion_; }
+      set {
+        apiVersion_ = value;
+      }
+    }
+
+    /// <summary>Field number for the "demonstration_name" field.</summary>
+    public const int DemonstrationNameFieldNumber = 2;
+    private string demonstrationName_ = "";
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public string DemonstrationName {
+      get { return demonstrationName_; }
+      set {
+        demonstrationName_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
+      }
+    }
+
+    /// <summary>Field number for the "number_steps" field.</summary>
+    public const int NumberStepsFieldNumber = 3;
+    private int numberSteps_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int NumberSteps {
+      get { return numberSteps_; }
+      set {
+        numberSteps_ = value;
+      }
+    }
+
+    /// <summary>Field number for the "number_episodes" field.</summary>
+    public const int NumberEpisodesFieldNumber = 4;
+    private int numberEpisodes_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int NumberEpisodes {
+      get { return numberEpisodes_; }
+      set {
+        numberEpisodes_ = value;
+      }
+    }
+
+    /// <summary>Field number for the "mean_reward" field.</summary>
+    public const int MeanRewardFieldNumber = 5;
+    private float meanReward_;
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public float MeanReward {
+      get { return meanReward_; }
+      set {
+        meanReward_ = value;
+      }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public override bool Equals(object other) {
+      return Equals(other as DemonstrationMetaProto);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public bool Equals(DemonstrationMetaProto other) {
+      if (ReferenceEquals(other, null)) {
+        return false;
+      }
+      if (ReferenceEquals(other, this)) {
+        return true;
+      }
+      if (ApiVersion != other.ApiVersion) return false;
+      if (DemonstrationName != other.DemonstrationName) return false;
+      if (NumberSteps != other.NumberSteps) return false;
+      if (NumberEpisodes != other.NumberEpisodes) return false;
+      if (!pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.Equals(MeanReward, other.MeanReward)) return false;
+      return Equals(_unknownFields, other._unknownFields);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public override int GetHashCode() {
+      int hash = 1;
+      if (ApiVersion != 0) hash ^= ApiVersion.GetHashCode();
+      if (DemonstrationName.Length != 0) hash ^= DemonstrationName.GetHashCode();
+      if (NumberSteps != 0) hash ^= NumberSteps.GetHashCode();
+      if (NumberEpisodes != 0) hash ^= NumberEpisodes.GetHashCode();
+      if (MeanReward != 0F) hash ^= pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.GetHashCode(MeanReward);
+      if (_unknownFields != null) {
+        hash ^= _unknownFields.GetHashCode();
+      }
+      return hash;
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public override string ToString() {
+      return pb::JsonFormatter.ToDiagnosticString(this);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public void WriteTo(pb::CodedOutputStream output) {
+      if (ApiVersion != 0) {
+        output.WriteRawTag(8);
+        output.WriteInt32(ApiVersion);
+      }
+      if (DemonstrationName.Length != 0) {
+        output.WriteRawTag(18);
+        output.WriteString(DemonstrationName);
+      }
+      if (NumberSteps != 0) {
+        output.WriteRawTag(24);
+        output.WriteInt32(NumberSteps);
+      }
+      if (NumberEpisodes != 0) {
+        output.WriteRawTag(32);
+        output.WriteInt32(NumberEpisodes);
+      }
+      if (MeanReward != 0F) {
+        output.WriteRawTag(45);
+        output.WriteFloat(MeanReward);
+      }
+      if (_unknownFields != null) {
+        _unknownFields.WriteTo(output);
+      }
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public int CalculateSize() {
+      int size = 0;
+      if (ApiVersion != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeInt32Size(ApiVersion);
+      }
+      if (DemonstrationName.Length != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeStringSize(DemonstrationName);
+      }
+      if (NumberSteps != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumberSteps);
+      }
+      if (NumberEpisodes != 0) {
+        size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumberEpisodes);
+      }
+      if (MeanReward != 0F) {
+        size += 1 + 4;
+      }
+      if (_unknownFields != null) {
+        size += _unknownFields.CalculateSize();
+      }
+      return size;
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public void MergeFrom(DemonstrationMetaProto other) {
+      if (other == null) {
+        return;
+      }
+      if (other.ApiVersion != 0) {
+        ApiVersion = other.ApiVersion;
+      }
+      if (other.DemonstrationName.Length != 0) {
+        DemonstrationName = other.DemonstrationName;
+      }
+      if (other.NumberSteps != 0) {
+        NumberSteps = other.NumberSteps;
+      }
+      if (other.NumberEpisodes != 0) {
+        NumberEpisodes = other.NumberEpisodes;
+      }
+      if (other.MeanReward != 0F) {
+        MeanReward = other.MeanReward;
+      }
+      _unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
+    }
+
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public void MergeFrom(pb::CodedInputStream input) {
+      uint tag;
+      while ((tag = input.ReadTag()) != 0) {
+        switch(tag) {
+          default:
+            _unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input);
+            break;
+          case 8: {
+            ApiVersion = input.ReadInt32();
+            break;
+          }
+          case 18: {
+            DemonstrationName = input.ReadString();
+            break;
+          }
+          case 24: {
+            NumberSteps = input.ReadInt32();
+            break;
+          }
+          case 32: {
+            NumberEpisodes = input.ReadInt32();
+            break;
+          }
+          case 45: {
+            MeanReward = input.ReadFloat();
+            break;
+          }
+        }
+      }
+    }
+
+  }
+
+  #endregion
+
+}
+
+#endregion Designer generated code
--- a/UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects/DemonstrationMetaProto.cs.meta
+fileFormatVersion: 2
+guid: f7abfeda342414e059423ef90ede4c30
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs
+using System;
+using MLAgents.CommunicatorObjects;
+using UnityEngine;
+
+namespace MLAgents
+{
+    /// <summary>
+    /// Demonstration Object. Contains meta-data regarding demonstration.
+    /// Used for imitation learning, or other forms of learning from data.
+    /// </summary>
+    [Serializable]
+    public class Demonstration : ScriptableObject
+    {
+        public DemonstrationMetaData metaData;
+        public BrainParameters brainParameters;
+
+        public void Initialize(BrainParameters brainParameters, 
+            DemonstrationMetaData demonstrationMetaData)
+        {
+            this.brainParameters = brainParameters;
+            metaData = demonstrationMetaData;
+        }
+    }
+    
+    /// <summary>
+    /// Demonstration meta-data.
+    /// Kept in a struct for easy serialization and deserialization.
+    /// </summary>
+    [Serializable]
+    public class DemonstrationMetaData
+    {
+        public int numberExperiences;
+        public int numberEpisodes;
+        public float meanReward;
+        public string demonstrationName;
+        public const int ApiVersion = 1;
+
+        /// <summary>
+        /// Constructor for initializing metadata to default values.
+        /// </summary>
+        public DemonstrationMetaData()
+        {
+        }
+
+        /// <summary>
+        /// Initialize metadata values based on proto object. 
+        /// </summary>
+        public DemonstrationMetaData(DemonstrationMetaProto demoProto)
+        {
+            numberEpisodes = demoProto.NumberEpisodes;
+            numberExperiences = demoProto.NumberSteps;
+            meanReward = demoProto.MeanReward;
+            demonstrationName = demoProto.DemonstrationName;
+            if (demoProto.ApiVersion != ApiVersion)
+            {
+                throw new Exception("API versions of demonstration are incompatible.");
+            }
+        }
+
+        /// <summary>
+        /// Convert metadata object to proto object.
+        /// </summary>
+        public DemonstrationMetaProto ToProto()
+        {
+            var demoProto = new DemonstrationMetaProto
+            {
+                ApiVersion = ApiVersion,
+                MeanReward = meanReward,
+                NumberSteps = numberExperiences,
+                NumberEpisodes = numberEpisodes,
+                DemonstrationName = demonstrationName
+            };
+            return demoProto;
+        }
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Demonstration.cs.meta
+fileFormatVersion: 2
+guid: b651f66c75a1646c6ab48de06d0e13ef
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs
+using UnityEngine;
+using System.Text.RegularExpressions;
+
+namespace MLAgents
+{
+    /// <summary>
+    /// Demonstration Recorder Component.
+    /// </summary>
+    [RequireComponent(typeof(Agent))]
+    public class DemonstrationRecorder : MonoBehaviour
+    {
+        public bool record;
+        public string demonstrationName;
+        private Agent recordingAgent;
+        private string filePath;
+        private DemonstrationStore demoStore;
+
+        /// <summary>
+        /// Initializes Demonstration store.
+        /// </summary>
+        private void Start()
+        {
+            if (Application.isEditor && record)
+            {
+                recordingAgent = GetComponent<Agent>();
+                demoStore = new DemonstrationStore();
+                demonstrationName = SanitizeName(demonstrationName);
+                demoStore.Initialize(
+                    demonstrationName, 
+                    recordingAgent.brain.brainParameters, 
+                    recordingAgent.brain.name);            
+                Monitor.Log("Recording Demonstration of Agent: ", recordingAgent.name);
+            }
+        }
+
+        /// <summary>
+        /// Removes all characters except alphanumerics from demonstration name.
+        /// </summary>
+        public static string SanitizeName(string demoName)
+        {
+            var rgx = new Regex("[^a-zA-Z0-9 -]");
+            demoName = rgx.Replace(demoName, "");
+            return demoName;
+        }
+
+        /// <summary>
+        /// Forwards AgentInfo to Demonstration Store.
+        /// </summary>
+        public void WriteExperience(AgentInfo info)
+        {
+            demoStore.Record(info);
+        }
+
+        /// <summary>
+        /// Closes Demonstration store.
+        /// </summary>
+        private void OnApplicationQuit()
+        {
+            if (Application.isEditor && record)
+            {
+                demoStore.Close();
+            }
+        }
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs.meta
+fileFormatVersion: 2
+guid: 50f710d360a49461cad67ff5e6bcefe1
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs
+using System.IO;
+using System.IO.Abstractions;
+using Google.Protobuf;
+using MLAgents.CommunicatorObjects;
+
+namespace MLAgents
+{
+    /// <summary>
+    /// Responsible for writing demonstration data to file.
+    /// </summary>
+    public class DemonstrationStore
+    {
+        public const int MetaDataBytes = 32; // Number of bytes allocated to metadata in demo file.
+        private readonly IFileSystem fileSystem;
+        private const string DemoDirecory = "Assets/Demonstrations/";
+        private const string ExtensionType = ".demo";
+
+        private string filePath;
+        private DemonstrationMetaData metaData;
+        private Stream writer;
+        private float cumulativeReward;
+
+        public DemonstrationStore(IFileSystem fileSystem)
+        {
+            this.fileSystem = fileSystem;
+        }
+
+        public DemonstrationStore()
+        {
+            fileSystem = new FileSystem();
+        }
+
+        /// <summary>
+        /// Initializes the Demonstration Store, and writes initial data.
+        /// </summary>
+        public void Initialize(
+            string demonstrationName, BrainParameters brainParameters, string brainName)
+        {
+            CreateDirectory();
+            CreateDemonstrationFile(demonstrationName);
+            WriteBrainParameters(brainName, brainParameters);
+        }
+
+        /// <summary>
+        /// Checks for the existence of the Demonstrations directory
+        /// and creates it if it does not exist.
+        /// </summary>
+        private void CreateDirectory()
+        {
+            if (!fileSystem.Directory.Exists(DemoDirecory))
+            {
+                fileSystem.Directory.CreateDirectory(DemoDirecory);
+            }
+        }
+
+        /// <summary>
+        /// Creates demonstration file.
+        /// </summary>
+        private void CreateDemonstrationFile(string demonstrationName)
+        {
+            // Creates demonstration file.
+            var literalName = demonstrationName;
+            filePath = DemoDirecory + literalName + ExtensionType;
+            var uniqueNameCounter = 0;
+            while (fileSystem.File.Exists(filePath))
+            {
+                literalName = demonstrationName + "_" + uniqueNameCounter;
+                filePath = DemoDirecory + literalName + ExtensionType;
+                uniqueNameCounter++;
+            }
+
+            writer = fileSystem.File.Create(filePath);
+            metaData = new DemonstrationMetaData {demonstrationName = demonstrationName};
+            var metaProto = metaData.ToProto();
+            metaProto.WriteDelimitedTo(writer);
+        }
+
+        /// <summary>
+        /// Writes brain parameters to file.
+        /// </summary>
+        private void WriteBrainParameters(string brainName, BrainParameters brainParameters)
+        {
+            // Writes BrainParameters to file.
+            writer.Seek(MetaDataBytes + 1, 0);
+            var brainProto = brainParameters.ToProto(brainName, BrainTypeProto.Player);
+            brainProto.WriteDelimitedTo(writer);
+        }
+
+        /// <summary>
+        /// Write AgentInfo experience to file.
+        /// </summary>
+        public void Record(AgentInfo info)
+        {
+            // Increment meta-data counters.
+            metaData.numberExperiences++;
+            cumulativeReward += info.reward;
+            if (info.done)
+            {
+                EndEpisode();
+            }
+
+            // Write AgentInfo to file.
+            var agentProto = info.ToProto();
+            agentProto.WriteDelimitedTo(writer);
+        }
+
+        /// <summary>
+        /// Performs all clean-up necessary
+        /// </summary>
+        public void Close()
+        {
+            EndEpisode();
+            metaData.meanReward = cumulativeReward / metaData.numberEpisodes;
+            WriteMetadata();
+            writer.Close();
+        }
+
+        /// <summary>
+        /// Performs necessary episode-completion steps.
+        /// </summary>
+        private void EndEpisode()
+        {
+            metaData.numberEpisodes += 1;
+        }
+
+        /// <summary>
+        /// Writes meta-data.
+        /// </summary>
+        private void WriteMetadata()
+        {
+            var metaProto = metaData.ToProto();
+            var metaProtoBytes = metaProto.ToByteArray();
+            writer.Write(metaProtoBytes, 0, metaProtoBytes.Length);
+            writer.Seek(0, 0);
+            metaProto.WriteDelimitedTo(writer);
+        }
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs.meta
+fileFormatVersion: 2
+guid: a79c7ccb2cd042b5b1e710b9588d921b
+timeCreated: 1537388072
--- a/UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs
+using UnityEngine;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace MLAgents
+{
+    public class Utilities
+    {
+        /// <summary>
+        /// Converts a list of Texture2D into a Tensor.
+        /// </summary>
+        /// <returns>
+        /// A 4 dimensional float Tensor of dimension
+        /// [batch_size, height, width, channel].
+        /// Where batch_size is the number of input textures,
+        /// height corresponds to the height of the texture,
+        /// width corresponds to the width of the texture,
+        /// channel corresponds to the number of channels extracted from the
+        /// input textures (based on the input blackAndWhite flag
+        /// (3 if the flag is false, 1 otherwise).
+        /// The values of the Tensor are between 0 and 1.
+        /// </returns>
+        /// <param name="textures">
+        /// The list of textures to be put into the tensor.
+        /// Note that the textures must have same width and height.
+        /// </param>
+        /// <param name="blackAndWhite">
+        /// If set to <c>true</c> the textures
+        /// will be converted to grayscale before being stored in the tensor.
+        /// </param>
+        public static float[,,,] TextureToFloatArray(
+            List<Texture2D> textures, bool blackAndWhite)
+        {
+            int batchSize = textures.Count;
+            int width = textures[0].width;
+            int height = textures[0].height;
+            var pixels = blackAndWhite ? 1 : 3;
+            float[,,,] result = new float[batchSize, height, width, pixels];
+            float[] resultTemp = new float[batchSize * height * width * pixels];
+            int hwp = height * width * pixels;
+            int wp = width * pixels;
+
+            for (int b = 0; b < batchSize; b++)
+            {
+                Color32[] cc = textures[b].GetPixels32();
+                for (int h = height - 1; h >= 0; h--)
+                {
+                    for (int w = 0; w < width; w++)
+                    {
+                        Color32 currentPixel = cc[(height - h - 1) * width + w];
+                        if (!blackAndWhite)
+                        {
+                            // For Color32, the r, g and b values are between
+                            // 0 and 255.
+                            resultTemp[b * hwp + h * wp + w * pixels] = currentPixel.r / 255.0f;
+                            resultTemp[b * hwp + h * wp + w * pixels + 1] = currentPixel.g / 255.0f;
+                            resultTemp[b * hwp + h * wp + w * pixels + 2] = currentPixel.b / 255.0f;
+                        }
+                        else
+                        {
+                            resultTemp[b * hwp + h * wp + w * pixels] =
+                                (currentPixel.r + currentPixel.g + currentPixel.b)
+                                / 3f / 255.0f;
+                        }
+                    }
+                }
+            }
+
+            System.Buffer.BlockCopy(resultTemp, 0, result, 0, batchSize * hwp * sizeof(float));
+            return result;
+        }
+    }
+}
--- a/UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs.meta
+++ b/UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs.meta
+fileFormatVersion: 2
+guid: 0e664c25f496478c9c26df6688379f7e
+timeCreated: 1537468595
--- a/config/bc_config.yaml
+++ b/config/bc_config.yaml
+default:
+    trainer: offline_bc
+    batch_size: 64
+    beta: 5.0e-3
+    hidden_units: 128
+    learning_rate: 3.0e-4
+    max_steps: 5.0e4
+    memory_size: 256
+    batches_per_epoch: 10
+    num_epoch: 5
+    num_layers: 2
+    summary_freq: 1000
+    use_recurrent: false
+    sequence_length: 32
+    demo_path: ./UnitySDK/Assets/Demonstrations/Crawler_test.demo
+
+HallwayBrain:
+    trainer: offline_bc
+    max_steps: 5.0e5
+    num_epoch: 5
+    batch_size: 64
+    batches_per_epoch: 5
+    num_layers: 2
+    hidden_units: 128
+    sequence_length: 16
+    buffer_size: 512
+    use_recurrent: true
+    memory_size: 256
+    sequence_length: 32
+    demo_path: ./UnitySDK/Assets/Demonstrations/Hallway.demo
+
+StudentBrain:
+    trainer: online_bc
+    max_steps: 10000
+    summary_freq: 1000
+    brain_to_imitate: TeacherBrain
+    batch_size: 16
+    batches_per_epoch: 5
+    num_layers: 4
+    hidden_units: 64
+    sequence_length: 16
+    buffer_size: 128
+
+StudentRecurrentBrain:
+    trainer: online_bc
+    max_steps: 10000
+    summary_freq: 1000
+    brain_to_imitate: TeacherBrain
+    batch_size: 16
+    batches_per_epoch: 5
+    num_layers: 4
+    hidden_units: 64
+    use_recurrent: true
+    sequence_length: 32
+    buffer_size: 128
--- a/docs/images/demo_component.png
+++ b/docs/images/demo_component.png
--- a/docs/images/demo_inspector.png
+++ b/docs/images/demo_inspector.png
--- a/ml-agents/mlagents/envs/communicator_objects/demonstration_meta_proto_pb2.py
+++ b/ml-agents/mlagents/envs/communicator_objects/demonstration_meta_proto_pb2.py
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: mlagents/envs/communicator_objects/demonstration_meta_proto.proto
+
+import sys
+_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+
+
+DESCRIPTOR = _descriptor.FileDescriptor(
+  name='mlagents/envs/communicator_objects/demonstration_meta_proto.proto',
+  package='communicator_objects',
+  syntax='proto3',
+  serialized_options=_b('\252\002\034MLAgents.CommunicatorObjects'),
+  serialized_pb=_b('\nAmlagents/envs/communicator_objects/demonstration_meta_proto.proto\x12\x14\x63ommunicator_objects\"\x8d\x01\n\x16\x44\x65monstrationMetaProto\x12\x13\n\x0b\x61pi_version\x18\x01 \x01(\x05\x12\x1a\n\x12\x64\x65monstration_name\x18\x02 \x01(\t\x12\x14\n\x0cnumber_steps\x18\x03 \x01(\x05\x12\x17\n\x0fnumber_episodes\x18\x04 \x01(\x05\x12\x13\n\x0bmean_reward\x18\x05 \x01(\x02\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
+)
+
+
+
+
+_DEMONSTRATIONMETAPROTO = _descriptor.Descriptor(
+  name='DemonstrationMetaProto',
+  full_name='communicator_objects.DemonstrationMetaProto',
+  filename=None,
+  file=DESCRIPTOR,
+  containing_type=None,
+  fields=[
+    _descriptor.FieldDescriptor(
+      name='api_version', full_name='communicator_objects.DemonstrationMetaProto.api_version', index=0,
+      number=1, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='demonstration_name', full_name='communicator_objects.DemonstrationMetaProto.demonstration_name', index=1,
+      number=2, type=9, cpp_type=9, label=1,
+      has_default_value=False, default_value=_b("").decode('utf-8'),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='number_steps', full_name='communicator_objects.DemonstrationMetaProto.number_steps', index=2,
+      number=3, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='number_episodes', full_name='communicator_objects.DemonstrationMetaProto.number_episodes', index=3,
+      number=4, type=5, cpp_type=1, label=1,
+      has_default_value=False, default_value=0,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='mean_reward', full_name='communicator_objects.DemonstrationMetaProto.mean_reward', index=4,
+      number=5, type=2, cpp_type=6, label=1,
+      has_default_value=False, default_value=float(0),
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR),
+  ],
+  extensions=[
+  ],
+  nested_types=[],
+  enum_types=[
+  ],
+  serialized_options=None,
+  is_extendable=False,
+  syntax='proto3',
+  extension_ranges=[],
+  oneofs=[
+  ],
+  serialized_start=92,
+  serialized_end=233,
+)
+
+DESCRIPTOR.message_types_by_name['DemonstrationMetaProto'] = _DEMONSTRATIONMETAPROTO
+_sym_db.RegisterFileDescriptor(DESCRIPTOR)
+
+DemonstrationMetaProto = _reflection.GeneratedProtocolMessageType('DemonstrationMetaProto', (_message.Message,), dict(
+  DESCRIPTOR = _DEMONSTRATIONMETAPROTO,
+  __module__ = 'mlagents.envs.communicator_objects.demonstration_meta_proto_pb2'
+  # @@protoc_insertion_point(class_scope:communicator_objects.DemonstrationMetaProto)
+  ))
+_sym_db.RegisterMessage(DemonstrationMetaProto)
+
+
+DESCRIPTOR._options = None
+# @@protoc_insertion_point(module_scope)
--- a/ml-agents/mlagents/envs/utilities.py
+++ b/ml-agents/mlagents/envs/utilities.py
+from PIL import Image
+import numpy as np
+import io
+
+
+def process_pixels(image_bytes, gray_scale):
+    """
+    Converts byte array observation image into numpy array, re-sizes it,
+    and optionally converts it to grey scale
+    :param image_bytes: input byte array corresponding to image
+    :return: processed numpy array of observation from environment
+    """
+    s = bytearray(image_bytes)
+    image = Image.open(io.BytesIO(s))
+    s = np.array(image) / 255.0
+    if gray_scale:
+        s = np.mean(s, axis=2)
+        s = np.reshape(s, [s.shape[0], s.shape[1], 1])
+    return s
--- a/ml-agents/mlagents/trainers/bc/offline_trainer.py
+++ b/ml-agents/mlagents/trainers/bc/offline_trainer.py
+# # Unity ML-Agents Toolkit
+# ## ML-Agent Learning (Behavioral Cloning)
+# Contains an implementation of Behavioral Cloning Algorithm
+
+import logging
+
+from mlagents.trainers.bc.trainer import BCTrainer
+from mlagents.trainers.demo_loader import demo_to_buffer
+from mlagents.trainers.trainer import UnityTrainerException
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class OfflineBCTrainer(BCTrainer):
+    """The OfflineBCTrainer is an implementation of Offline Behavioral Cloning."""
+
+    def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
+        """
+        Responsible for collecting experiences and training PPO model.
+        :param  trainer_parameters: The parameters for the trainer (dictionary).
+        :param training: Whether the trainer is set for training.
+        :param load: Whether the model should be loaded.
+        :param seed: The seed the model will be initialized with
+        :param run_id: The The identifier of the current run
+        """
+        super(OfflineBCTrainer, self).__init__(
+            brain, trainer_parameters, training, load, seed, run_id)
+
+        self.param_keys = ['batch_size', 'summary_freq', 'max_steps',
+                           'batches_per_epoch', 'use_recurrent',
+                           'hidden_units', 'learning_rate', 'num_layers',
+                           'sequence_length', 'memory_size', 'model_path',
+                           'demo_path']
+
+        self.check_param_keys()
+        self.batches_per_epoch = trainer_parameters['batches_per_epoch']
+        self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
+                               1)
+
+        brain_params, self.demonstration_buffer = demo_to_buffer(
+            trainer_parameters['demo_path'],
+            self.policy.sequence_length)
+
+        print(brain.__dict__)
+        print(brain_params.__dict__)
+        if brain.__dict__ != brain_params.__dict__:
+            raise UnityTrainerException("The provided demonstration is not compatible with the "
+                                        "brain being used for performance evaluation.")
+
+    def __str__(self):
+        return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
+            self.brain_name, '\n'.join(
+                ['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
--- a/ml-agents/mlagents/trainers/bc/online_trainer.py
+++ b/ml-agents/mlagents/trainers/bc/online_trainer.py
+# # Unity ML-Agents Toolkit
+# ## ML-Agent Learning (Behavioral Cloning)
+# Contains an implementation of Behavioral Cloning Algorithm
+
+import logging
+import numpy as np
+
+from mlagents.envs import AllBrainInfo
+from mlagents.trainers.bc.trainer import BCTrainer
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+class OnlineBCTrainer(BCTrainer):
+    """The OnlineBCTrainer is an implementation of Online Behavioral Cloning."""
+
+    def __init__(self, brain, trainer_parameters, training, load, seed, run_id):
+        """
+        Responsible for collecting experiences and training PPO model.
+        :param  trainer_parameters: The parameters for the trainer (dictionary).
+        :param training: Whether the trainer is set for training.
+        :param load: Whether the model should be loaded.
+        :param seed: The seed the model will be initialized with
+        :param run_id: The The identifier of the current run
+        """
+        super(OnlineBCTrainer, self).__init__(brain, trainer_parameters, training, load, seed,
+                                              run_id)
+
+        self.param_keys = ['brain_to_imitate', 'batch_size', 'time_horizon',
+                           'summary_freq', 'max_steps',
+                           'batches_per_epoch', 'use_recurrent',
+                           'hidden_units', 'learning_rate', 'num_layers',
+                           'sequence_length', 'memory_size', 'model_path']
+
+        self.check_param_keys()
+        self.brain_to_imitate = trainer_parameters['brain_to_imitate']
+        self.batches_per_epoch = trainer_parameters['batches_per_epoch']
+        self.n_sequences = max(int(trainer_parameters['batch_size'] / self.policy.sequence_length),
+                               1)
+
+    def __str__(self):
+        return '''Hyperparameters for the Imitation Trainer of brain {0}: \n{1}'''.format(
+            self.brain_name, '\n'.join(
+                ['\t{0}:\t{1}'.format(x, self.trainer_parameters[x]) for x in self.param_keys]))
+
+    def add_experiences(self, curr_info: AllBrainInfo, next_info: AllBrainInfo,
+                        take_action_outputs):
+        """
+        Adds experiences to each agent's experience history.
+        :param curr_info: Current AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
+        :param next_info: Next AllBrainInfo (Dictionary of all current brains and corresponding BrainInfo).
+        :param take_action_outputs: The outputs of the take action method.
+        """
+
+        # Used to collect teacher experience into training buffer
+        info_teacher = curr_info[self.brain_to_imitate]
+        next_info_teacher = next_info[self.brain_to_imitate]
+        for agent_id in info_teacher.agents:
+            self.demonstration_buffer[agent_id].last_brain_info = info_teacher
+
+        for agent_id in next_info_teacher.agents:
+            stored_info_teacher = self.demonstration_buffer[agent_id].last_brain_info
+            if stored_info_teacher is None:
+                continue
+            else:
+                idx = stored_info_teacher.agents.index(agent_id)
+                next_idx = next_info_teacher.agents.index(agent_id)
+                if stored_info_teacher.text_observations[idx] != "":
+                    info_teacher_record, info_teacher_reset = \
+                        stored_info_teacher.text_observations[idx].lower().split(",")
+                    next_info_teacher_record, next_info_teacher_reset = \
+                    next_info_teacher.text_observations[idx]. \
+                        lower().split(",")
+                    if next_info_teacher_reset == "true":
+                        self.demonstration_buffer.reset_update_buffer()
+                else:
+                    info_teacher_record, next_info_teacher_record = "true", "true"
+                if info_teacher_record == "true" and next_info_teacher_record == "true":
+                    if not stored_info_teacher.local_done[idx]:
+                        for i in range(self.policy.vis_obs_size):
+                            self.demonstration_buffer[agent_id]['visual_obs%d' % i] \
+                                .append(stored_info_teacher.visual_observations[i][idx])
+                        if self.policy.use_vec_obs:
+                            self.demonstration_buffer[agent_id]['vector_obs'] \
+                                .append(stored_info_teacher.vector_observations[idx])
+                        if self.policy.use_recurrent:
+                            if stored_info_teacher.memories.shape[1] == 0:
+                                stored_info_teacher.memories = np.zeros(
+                                    (len(stored_info_teacher.agents),
+                                     self.policy.m_size))
+                            self.demonstration_buffer[agent_id]['memory'].append(
+                                stored_info_teacher.memories[idx])
+                        self.demonstration_buffer[agent_id]['actions'].append(
+                            next_info_teacher.previous_vector_actions[next_idx])
+
+        super(OnlineBCTrainer, self).add_experiences(curr_info, next_info, take_action_outputs)
+
+    def process_experiences(self, current_info: AllBrainInfo, next_info: AllBrainInfo):
+        """
+        Checks agent histories for processing condition, and processes them as necessary.
+        Processing involves calculating value and advantage targets for model updating step.
+        :param current_info: Current AllBrainInfo
+        :param next_info: Next AllBrainInfo
+        """
+        info_teacher = next_info[self.brain_to_imitate]
+        for l in range(len(info_teacher.agents)):
+            teacher_action_list = len(self.demonstration_buffer[info_teacher.agents[l]]['actions'])
+            horizon_reached = teacher_action_list > self.trainer_parameters['time_horizon']
+            teacher_filled = len(self.demonstration_buffer[info_teacher.agents[l]]['actions']) > 0
+            if (info_teacher.local_done[l] or horizon_reached) and teacher_filled:
+                agent_id = info_teacher.agents[l]
+                self.demonstration_buffer.append_update_buffer(
+                    agent_id, batch_size=None, training_length=self.policy.sequence_length)
+                self.demonstration_buffer[agent_id].reset_agent()
+
+        super(OnlineBCTrainer, self).process_experiences(current_info, next_info)
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
+import numpy as np
+import pathlib
+import logging
+from mlagents.trainers.buffer import Buffer
+from mlagents.envs.brain import BrainParameters, BrainInfo
+from mlagents.envs.utilities import process_pixels
+from mlagents.envs.communicator_objects import *
+from google.protobuf.internal.decoder import _DecodeVarint32
+
+logger = logging.getLogger("mlagents.trainers")
+
+
+def brain_param_proto_to_obj(brain_param_proto):
+    resolution = [{
+        "height": x.height,
+        "width": x.width,
+        "blackAndWhite": x.gray_scale
+    } for x in brain_param_proto.camera_resolutions]
+    brain_params = BrainParameters(brain_param_proto.brain_name, {
+        "vectorObservationSize": brain_param_proto.vector_observation_size,
+        "numStackedVectorObservations": brain_param_proto.num_stacked_vector_observations,
+        "cameraResolutions": resolution,
+        "vectorActionSize": brain_param_proto.vector_action_size,
+        "vectorActionDescriptions": brain_param_proto.vector_action_descriptions,
+        "vectorActionSpaceType": brain_param_proto.vector_action_space_type
+    })
+    return brain_params
+
+
+def agent_info_proto_to_brain_info(agent_info, brain_params):
+    vis_obs = []
+    agent_info_list = [agent_info]
+    for i in range(brain_params.number_visual_observations):
+        obs = [process_pixels(x.visual_observations[i],
+                              brain_params.camera_resolutions[i]['blackAndWhite'])
+               for x in agent_info_list]
+        vis_obs += [np.array(obs)]
+    if len(agent_info_list) == 0:
+        memory_size = 0
+    else:
+        memory_size = max([len(x.memories) for x in agent_info_list])
+    if memory_size == 0:
+        memory = np.zeros((0, 0))
+    else:
+        [x.memories.extend([0] * (memory_size - len(x.memories))) for x in agent_info_list]
+        memory = np.array([x.memories for x in agent_info_list])
+    total_num_actions = sum(brain_params.vector_action_space_size)
+    mask_actions = np.ones((len(agent_info_list), total_num_actions))
+    for agent_index, agent_info in enumerate(agent_info_list):
+        if agent_info.action_mask is not None:
+            if len(agent_info.action_mask) == total_num_actions:
+                mask_actions[agent_index, :] = [
+                    0 if agent_info.action_mask[k] else 1 for k in range(total_num_actions)]
+    if any([np.isnan(x.reward) for x in agent_info_list]):
+        logger.warning("An agent had a NaN reward.")
+    if any([np.isnan(x.stacked_vector_observation).any() for x in agent_info_list]):
+        logger.warning("An agent had a NaN observation.")
+    brain_info = BrainInfo(
+        visual_observation=vis_obs,
+        vector_observation=np.nan_to_num(
+            np.array([x.stacked_vector_observation for x in agent_info_list])),
+        text_observations=[x.text_observation for x in agent_info_list],
+        memory=memory,
+        reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list],
+        agents=[x.id for x in agent_info_list],
+        local_done=[x.done for x in agent_info_list],
+        vector_action=np.array([x.stored_vector_actions for x in agent_info_list]),
+        text_action=[x.stored_text_actions for x in agent_info_list],
+        max_reached=[x.max_step_reached for x in agent_info_list],
+        action_mask=mask_actions
+    )
+    return brain_info
+
+
+def make_demo_buffer(brain_infos, brain_params, sequence_length):
+    # Create and populate buffer using experiences
+    demo_buffer = Buffer()
+    for idx, experience in enumerate(brain_infos):
+        if idx > len(brain_infos) - 2:
+            break
+        current_brain_info = brain_infos[idx]
+        next_brain_info = brain_infos[idx + 1]
+        demo_buffer[0].last_brain_info = current_brain_info
+        for i in range(brain_params.number_visual_observations):
+            demo_buffer[0]['visual_obs%d' % i] \
+                .append(current_brain_info.visual_observations[i][0])
+        if brain_params.vector_observation_space_size > 0:
+            demo_buffer[0]['vector_obs'] \
+                .append(current_brain_info.vector_observations[0])
+        demo_buffer[0]['actions'].append(next_brain_info.previous_vector_actions[0])
+        if next_brain_info.local_done[0]:
+            demo_buffer.append_update_buffer(0, batch_size=None,
+                                             training_length=sequence_length)
+            demo_buffer.reset_local_buffers()
+    demo_buffer.append_update_buffer(0, batch_size=None,
+                                     training_length=sequence_length)
+    return demo_buffer
+
+
+def demo_to_buffer(file_path, sequence_length):
+    """
+    Loads demonstration file and uses it to fill training buffer.
+    :param file_path: Location of demonstration file (.demo).
+    :param sequence_length: Length of trajectories to fill buffer.
+    :return:
+    """
+    brain_params, brain_infos, _ = load_demonstration(file_path)
+    demo_buffer = make_demo_buffer(brain_infos, brain_params, sequence_length)
+    return brain_params, demo_buffer
+
+
+def load_demonstration(file_path):
+    """
+    Loads and parses a demonstration file.
+    :param file_path: Location of demonstration file (.demo).
+    :return: BrainParameter and list of BrainInfos containing demonstration data.
+    """
+    INITIAL_POS = 33
+
+    file_extension = pathlib.Path(file_path).suffix
+    if file_extension != '.demo':
+        raise ValueError("The file is not a '.demo' file. Please provide a file with the "
+                         "correct extension.")
+
+    brain_params = None
+    brain_infos = []
+    data = open(file_path, "rb").read()
+    next_pos, pos, obs_decoded = 0, 0, 0
+    total_expected = 0
+    while pos < len(data):
+        next_pos, pos = _DecodeVarint32(data, pos)
+        if obs_decoded == 0:
+            meta_data_proto = DemonstrationMetaProto()
+            meta_data_proto.ParseFromString(data[pos:pos + next_pos])
+            total_expected = meta_data_proto.number_steps
+            pos = INITIAL_POS
+        if obs_decoded == 1:
+            brain_param_proto = BrainParametersProto()
+            brain_param_proto.ParseFromString(data[pos:pos + next_pos])
+            brain_params = brain_param_proto_to_obj(brain_param_proto)
+            pos += next_pos
+        if obs_decoded > 1:
+            agent_info = AgentInfoProto()
+            agent_info.ParseFromString(data[pos:pos + next_pos])
+            brain_info = agent_info_proto_to_brain_info(agent_info, brain_params)
+            brain_infos.append(brain_info)
+            if len(brain_infos) == total_expected:
+                break
+            pos += next_pos
+        obs_decoded += 1
+    return brain_params, brain_infos, total_expected
--- a/ml-agents/tests/trainers/test.demo
+++ b/ml-agents/tests/trainers/test.demo
+Test9 -��@**0:Ball3DBrain7
+ �k?����<�
���@HZ��"P���������<
+ �k?����<�
���;|@HZ���"{�"=���=P���������<
+ �k?����<�
��0r@HZ���"��"=���=P���������<
+ �k?����<�
���a@HZ��Z<�"=���=P���������<
+ �k?����<�
���BK@HZ���"{�"=���=P���������<
+ �k?����<�
��|a.@HZ������"=���=P���������<
+ �k?����<�
���8@HZ��Z��"=���=P���������<
+ �k?����<�
�����?HZ��r���"=���=P���������<
+ �k?����<�
��0FH?HZ���"��"=���=P���������<
+ �k?����<�뵾�+?D#���>����7-�>"=���=P���������<
+ �k?����<`��P�*?�
+����">ש��`Ъ>"=���=P���������<
+ �k?����<@w���*?����A�3>b���_L�>"=���=P���������<
+ �k?����< ���0s)?8����ID>
+hƼ���>"=���=P���������<
+ �k?����<����`�(?�!����T>H׼�R�>"=���=P���������<
+ �k?����<���� (?�I����e>.9�����>"=���=P���������<
+ �k?����<@u�PV'?�v��Rv>����U$?"=���=P���������<
+ �k?����<�r[��&?��h��>]��x�	?"=���=P���������<
+ �k?����<�%@�@�%?hBZ�8׋>�Y
�g�?"=���=P���������<
+ �k?����<@.#��$?�K�A(�>����Y?"=���=P���������<
+ �k?����<��P�#?P;�`��>�0��$?"=���=P���������<
+ �k?����<��Ƚ0�"?�*��Τ>m�&�3�,?"=���=P���������<
+ �k?����<������!?HF�'�>�/�T�5?"=���=P���������<
+ �k?����<L��p� ?И�Rr�>�g7��A>?"=���=P���������<
+ �k?����< �;�b?���6˽>(�?�\G?"=���=P���������<
+ �k?����<R6=�'?�?���>`6H�/�O?"=���=P���������<
+ �k?����<�d�=��? ����l�><�P��rX?"=���=P���������<
+ �k?����<y>��?�H����>
Y��a?"=���=P���������<
+ �k?����<�i,>P&?�ٽ�>3~a��i?"=���=P���������<
+ �k?����<@Z>��?HN��K�>��i�ӆr?"=���=P���������<
+ �k?����<���>p7?�p�=���>�Rr��K{?"=���=P���������<
+ �k?����<��>��?`�<>��>��z��?"=���=P���������<
+ �k?����<`d�>�?�s�>�!?����CZ�?"=���=P���������<
+ �k?����<���>@l?@=�>�G?B�����?"=���=P���������<
+ �k?����<y�>P�?b?�m?�扽�
�?"=���=P���������<
+ �k?����<��?�?�?]�?����f�?"=���=P���������<
+ �k?����<@�?@(
?��<?��?�I�����?"=���=P���������<
+ �k?����<�� ?L?�c[?��?�x��<�?"=���=P���������<
+ �k?����<��/?�b	?�!{?e?6���zp�?"=���=P���������<
+ �k?����<@W??�k?�ލ?�&?[ߞ�hȤ?"=���=P���������<
+ �k?����<NO?0g?��?�J!?m�� �?"=���=P���������<
+ �k?����<��_?pU?hȯ?�n%?�=��Nw�?"=���=P���������<
+ �k?����<�yp?P6?�c�?=�)?�f��Eα?"=���=P���������<
+ �k?����<׀?��>ln�?t�-?]����$�?"=���=P���������<
+ �k?����<X��?���>��?W�1?�ó�A{�?"=���=P���������<
+ �k?����<���?@�>���?��5?����BѾ?"=���=P���������<
+ �k?����<���?�g�>@1:?�%���&�?"=���=P���������<
+ �k?����<�Q�?���>_�@*?>?�L��T|�?"=���=P���������<
+ �k?����<���? ��>@�`B?A�Ľf��?"=���=P���������<
+ �k?����<�˸?]�>hd$@�AF?*{��2�?"=���=P���������<
+ �k?����<���? ��>��.@C?6�K�r-�?"=���=P���������<
+ �k?����<XN�? U�>P\9@9A?��ڿC��?"=���=P���������>
+ yM�����=4g=�@,��?"=��@P���������<
+ yM�����=4g=�;|@,��?�"{�"=���=P���������<
+ yM�����=4g=0r@,��?�"��"=���=P���������<
+ yM�����=4g=�a@,��?Z<�"=���=P���������<
+ yM�����=4g=�BK@,��?�"{�"=���=P���������<
+ yM�����=4g=|a.@,��?����"=���=P���������Test9 -��@
--- a/ml-agents/tests/trainers/test_demo_loader.py
+++ b/ml-agents/tests/trainers/test_demo_loader.py
+import unittest.mock as mock
+import pytest
+
+from mlagents.trainers.demo_loader import load_demonstration, make_demo_buffer
+
+
+def test_load_demo():
+    brain_parameters, brain_infos, total_expected = load_demonstration('./tests/trainers/test.demo')
+    assert (brain_parameters.brain_name == "Ball3DBrain")
+    assert (brain_parameters.vector_observation_space_size == 8)
+    assert (len(brain_infos) == total_expected)
+
+    demo_buffer = make_demo_buffer(brain_infos, brain_parameters, 1)
+    assert (len(demo_buffer.update_buffer['actions']) == total_expected - 1)
--- a/protobuf-definitions/proto/mlagents/envs/communicator_objects/demonstration_meta_proto.proto
+++ b/protobuf-definitions/proto/mlagents/envs/communicator_objects/demonstration_meta_proto.proto
+syntax = "proto3";
+
+option csharp_namespace = "MLAgents.CommunicatorObjects";
+package communicator_objects;
+
+message DemonstrationMetaProto {
+    int32 api_version = 1;
+    string demonstration_name = 2;
+    int32 number_steps = 3;
+    int32 number_episodes = 4;
+    float mean_reward = 5;
+}
--- a/UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png
+++ b/UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png
--- a/UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png.meta
+++ b/UnitySDK/Assets/ML-Agents/Resources/DemoIcon.png.meta
+fileFormatVersion: 2
+guid: 3352a0e8d253b4a4ea3782a6d7e09d9b
+TextureImporter:
+  fileIDToRecycleName: {}
+  externalObjects: {}
+  serializedVersion: 4
+  mipmaps:
+    mipMapMode: 0
+    enableMipMap: 1
+    sRGBTexture: 1
+    linearTexture: 0
+    fadeOut: 0
+    borderMipMap: 0
+    mipMapsPreserveCoverage: 0
+    alphaTestReferenceValue: 0.5
+    mipMapFadeDistanceStart: 1
+    mipMapFadeDistanceEnd: 3
+  bumpmap:
+    convertToNormalMap: 0
+    externalNormalMap: 0
+    heightScale: 0.25
+    normalMapFilter: 0
+  isReadable: 0
+  grayScaleToAlpha: 0
+  generateCubemap: 6
+  cubemapConvolution: 0
+  seamlessCubemap: 0
+  textureFormat: 1
+  maxTextureSize: 2048
+  textureSettings:
+    serializedVersion: 2
+    filterMode: -1
+    aniso: -1
+    mipBias: -1
+    wrapU: -1
+    wrapV: -1
+    wrapW: -1
+  nPOTScale: 1
+  lightmap: 0
+  compressionQuality: 50
+  spriteMode: 0
+  spriteExtrude: 1
+  spriteMeshType: 1
+  alignment: 0
+  spritePivot: {x: 0.5, y: 0.5}
+  spritePixelsToUnits: 100
+  spriteBorder: {x: 0, y: 0, z: 0, w: 0}
+  spriteGenerateFallbackPhysicsShape: 1
+  alphaUsage: 1
+  alphaIsTransparency: 1
+  spriteTessellationDetail: -1
+  textureType: 0
+  textureShape: 1
+  maxTextureSizeSet: 0
+  compressionQualitySet: 0
+  textureFormatSet: 0
+  platformSettings:
+  - buildTarget: DefaultTexturePlatform
+    maxTextureSize: 2048
+    resizeAlgorithm: 0
+    textureFormat: -1
+    textureCompression: 1
+    compressionQuality: 50
+    crunchedCompression: 0
+    allowsAlphaSplitting: 0
+    overridden: 0
+    androidETC2FallbackOverride: 0
+  - buildTarget: Standalone
+    maxTextureSize: 2048
+    resizeAlgorithm: 0
+    textureFormat: -1
+    textureCompression: 1
+    compressionQuality: 50
+    crunchedCompression: 0
+    allowsAlphaSplitting: 0
+    overridden: 0
+    androidETC2FallbackOverride: 0
+  spriteSheet:
+    serializedVersion: 2
+    sprites: []
+    outline: []
+    physicsShape: []
+  spritePackingTag: 
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: