Merge remote-tracking branch 'origin/develop' into enable-flake8

6 年前 · 5d07ca1f
--- a/.gitignore
+++ b/.gitignore
 .DS_Store
 .ipynb_checkpoints

-# pytest cache 
+# pytest cache
 *.pytest_cache/

 # Ignore compiled protobuf files.
 # Python virtual environment
 venv/
 .mypy_cache/
+
+# Code coverage report
+.coverage
--- a/README.md
+++ b/README.md
 we welcome any enhancements and improvements from the community.

 * [Chinese](docs/localized/zh-CN/)
+* [Korean](docs/localized/KR/)

 ## License

--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.44824862, g: 0.49827534, b: 0.57558274, a: 1}
+  m_IndirectSpecularColor: {r: 0.44824898, g: 0.49827564, b: 0.5755826, a: 1}
 --- !u!157 &3
 LightmapSettings:
  m_ObjectHideFlags: 0
    timeScale: 1
    targetFrameRate: -1
  resetParameters:
-    resetParameters: []
+    resetParameters:
+    - key: mass
+      value: 1
+    - key: gravity
+      value: 9.81
+    - key: scale
+      value: 1
 --- !u!1 &1746325439
 GameObject:
  m_ObjectHideFlags: 0
--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallHard.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.45096254, g: 0.5008292, b: 0.5744089, a: 1}
+  m_IndirectSpecularColor: {r: 0.45096314, g: 0.50082976, b: 0.57440954, a: 1}
 --- !u!157 &3
 LightmapSettings:
  m_ObjectHideFlags: 0
    timeScale: 1
    targetFrameRate: -1
  resetParameters:
-    resetParameters: []
+    resetParameters:
+    - key: mass
+      value: 1
+    - key: gravity
+      value: 9.81
+    - key: scale
+      value: 1
 --- !u!1001 &1591880668
 Prefab:
  m_ObjectHideFlags: 0
--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAcademy.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAcademy.cs
 {
    public override void AcademyReset()
    {
-
+        Physics.gravity = new Vector3(0, -resetParameters["gravity"], 0);
    }

    public override void AcademyStep()
--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
    [Header("Specific to Ball3D")]
    public GameObject ball;
    private Rigidbody ballRb;
+    private ResetParameters resetParams;
+        var academy = Object.FindObjectOfType<Academy>() as Academy;
+        resetParams = academy.resetParameters;
+        SetResetParameters();
    }

    public override void CollectObservations()

    public override void AgentAction(float[] vectorAction, string textAction)
    {
-        
+
        if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
        {
            var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
        ballRb.velocity = new Vector3(0f, 0f, 0f);
        ball.transform.position = new Vector3(Random.Range(-1.5f, 1.5f), 4f, Random.Range(-1.5f, 1.5f))
                                      + gameObject.transform.position;
+        //Reset the parameters when the Agent is reset.
+        SetResetParameters();
+    }
+    public void SetBall()
+    {
+        //Set the attributes of the ball by fetching the information from the academy
+        ballRb.mass = resetParams["mass"];
+        var scale = resetParams["scale"];
+        ball.transform.localScale = new Vector3(scale, scale, scale);
+    public void SetResetParameters()
+    {
+        SetBall();
+    }
 }
--- a/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
    [Header("Specific to Ball3DHard")]
    public GameObject ball;
    private Rigidbody ballRb;
+    private ResetParameters resetParams;
+        var academy = Object.FindObjectOfType<Academy>() as Academy;
+        resetParams = academy.resetParameters;
+        SetResetParameters();
    }

    public override void CollectObservations()

    public override void AgentAction(float[] vectorAction, string textAction)
    {
-        
+
        if (brain.brainParameters.vectorActionSpaceType == SpaceType.continuous)
        {
            var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);

    }

+
+    public void SetBall()
+    {
+        //Set the attributes of the ball by fetching the information from the academy
+        ballRb.mass = resetParams["mass"];
+        var scale = resetParams["scale"];
+        ball.transform.localScale = new Vector3(scale, scale, scale);
+    }
+
+    public void SetResetParameters()
+    {
+        SetBall();
+    }
 }
--- a/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
    timeScale: 2
    targetFrameRate: 60
  resetParameters:
-    resetParameters: []
+    resetParameters:
+    - key: ball_scale
+      value: 7.5
+    - key: gravity
+      value: 9.81
  brainStriker: {fileID: 11400000, guid: 29ed78b3e8fef4340b3a1f6954b88f18, type: 2}
  brainGoalie: {fileID: 11400000, guid: 090fa5a8588f5433bb7f878e6f5ac954, type: 2}
  redMaterial: {fileID: 2100000, guid: 776dd8b57653342839c3fb5f46ce664e, type: 2}
--- a/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs

    public enum Team
    {
-        Red, 
+        Red,
-        Striker, 
+        Striker,
-    
+
-    
+
    [HideInInspector]
    public Rigidbody agentRb;
    SoccerAcademy academy;

        var playerState = new PlayerState
        {
-            agentRB = agentRb, 
-            startingPos = transform.position, 
+            agentRB = agentRb,
+            startingPos = transform.position,
            agentScript = this,
        };
        area.playerStates.Add(playerState);
        transform.position = area.GetRandomSpawnPos(agentRole, team);
        agentRb.velocity = Vector3.zero;
        agentRb.angularVelocity = Vector3.zero;
+        SetResetParameters();
+    }
+
+    public void SetResetParameters()
+    {
        area.ResetBall();
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerAcademy.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerAcademy.cs
    }
    public override void AcademyReset()
    {
-
+        Physics.gravity = new Vector3(0, -resetParameters["gravity"], 0);
    }

    public override void AcademyStep()
--- a/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
 [System.Serializable]
 public class PlayerState
 {
-    public int playerIndex; 
-    public Rigidbody agentRB; 
-    public Vector3 startingPos; 
-    public AgentSoccer agentScript; 
+    public int playerIndex;
+    public Rigidbody agentRB;
+    public Vector3 startingPos;
+    public AgentSoccer agentScript;
    public float ballPosReward;
 }

    public GameObject ball;
    [HideInInspector]
    public Rigidbody ballRB;
-    public GameObject ground; 
+    public GameObject ground;
    public GameObject centerPitch;
    SoccerBallController ballController;
    public List<PlayerState> playerStates = new List<PlayerState>();
    public IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
    {
        groundRenderer.material = mat;
-        yield return new WaitForSeconds(time); 
+        yield return new WaitForSeconds(time);
        groundRenderer.material = groundMaterial;
    }

        academy = FindObjectOfType<SoccerAcademy>();
-        groundRenderer = centerPitch.GetComponent<Renderer>(); 
+        groundRenderer = centerPitch.GetComponent<Renderer>();
        groundMaterial = groundRenderer.material;
        canResetBall = true;
        if (goalTextUI) { goalTextUI.SetActive(false); }
        {
            xOffset = xOffset * -1f;
        }
-        var randomSpawnPos = ground.transform.position + 
-                               new Vector3(xOffset, 0f, 0f) 
+        var randomSpawnPos = ground.transform.position +
+                               new Vector3(xOffset, 0f, 0f)
                               + (Random.insideUnitSphere * 2);
        randomSpawnPos.y = ground.transform.position.y + 2;
        return randomSpawnPos;
    {
-        var randomSpawnPos = ground.transform.position + 
-                             new Vector3(0f, 0f, 0f) 
+        var randomSpawnPos = ground.transform.position +
+                             new Vector3(0f, 0f, 0f)
                             + (Random.insideUnitSphere * 2);
        randomSpawnPos.y = ground.transform.position.y + 2;
        return randomSpawnPos;
        ball.transform.position = GetBallSpawnPosition();
        ballRB.velocity = Vector3.zero;
        ballRB.angularVelocity = Vector3.zero;
+
+        var ballScale = academy.resetParameters["ball_scale"];
+        ballRB.transform.localScale = new Vector3(ballScale, ballScale, ballScale);
    }
 }
--- a/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
    timeScale: 1
    targetFrameRate: 60
  resetParameters:
-    resetParameters: []
+    resetParameters:
+    - key: gravity
+      value: 9.81
+    - key: angle
+      value: 55
+    - key: scale
+      value: 1
 --- !u!1001 &1065879750
 Prefab:
  m_ObjectHideFlags: 0
--- a/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/TennisIL.unity
+++ b/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scenes/TennisIL.unity
  brain: {fileID: 11400000, guid: 6bf6a586a645b471bb9bd1194ae0e229, type: 2}
  agentParameters:
    agentCameras: []
+    agentRenderTextures: []
    maxStep: 5000
    resetOnDone: 1
    onDemandDecision: 0
  score: 0
-  scoreText: {fileID: 2073469450}
-  opponent: {fileID: 1894084401}
+  angle: 0
+  scale: 0
 --- !u!65 &348265184
 BoxCollider:
  m_ObjectHideFlags: 0
    timeScale: 1
    targetFrameRate: 60
  resetParameters:
-    resetParameters: []
+    resetParameters:
+    - key: angle
+      value: 55
+    - key: scale
+      value: 1
+    - key: gravity
+      value: 9.81
 --- !u!1 &1114726487
 GameObject:
  m_ObjectHideFlags: 0
  brain: {fileID: 11400000, guid: 1674996276be448c2ad51fb139e21e05, type: 2}
  agentParameters:
    agentCameras: []
+    agentRenderTextures: []
    maxStep: 5000
    resetOnDone: 1
    onDemandDecision: 0
  score: 0
-  scoreText: {fileID: 1871669621}
-  opponent: {fileID: 348265181}
+  angle: 0
+  scale: 0
 --- !u!65 &1894084404
 BoxCollider:
  m_ObjectHideFlags: 0
--- a/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
            lastAgentHit = collision.gameObject.name == "AgentA" ? 0 : 1;
        }
    }
-}
+}
--- a/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAcademy.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAcademy.cs

    public override void AcademyReset()
    {
+        Physics.gravity = new Vector3(0, -resetParameters["gravity"], 0);
    }

    public override void AcademyStep()
--- a/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
+++ b/UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
    public bool invertX;
    public int score;
    public GameObject myArea;
+    public float angle;
+    public float scale;
+    private ResetParameters resetParams;

    // Looks for the scoreboard based on the name of the gameObjects.
    // Do not modify the names of the Score GameObjects
        ballRb = ball.GetComponent<Rigidbody>();
        var canvas = GameObject.Find(CanvasName);
        GameObject scoreBoard;
+        var academy = Object.FindObjectOfType<Academy>() as Academy;
+        resetParams = academy.resetParameters;
        if (invertX)
        {
            scoreBoard = canvas.transform.Find(ScoreBoardBName).gameObject;
            scoreBoard = canvas.transform.Find(ScoreBoardAName).gameObject;
        }
        textComponent = scoreBoard.GetComponent<Text>();
+        SetResetParameters();
    }

    public override void CollectObservations()
    {
        var moveX = Mathf.Clamp(vectorAction[0], -1f, 1f) * invertMult;
        var moveY = Mathf.Clamp(vectorAction[1], -1f, 1f);
-        
+
        if (moveY > 0.5 && transform.position.y - transform.parent.transform.position.y < -1.5f)
        {
            agentRb.velocity = new Vector3(agentRb.velocity.x, 7f, 0f);

-        if (invertX && transform.position.x - transform.parent.transform.position.x < -invertMult || 
+        if (invertX && transform.position.x - transform.parent.transform.position.x < -invertMult ||
-                transform.position = new Vector3(-invertMult + transform.parent.transform.position.x, 
-                                                            transform.position.y, 
-                                                            transform.position.z);
+            transform.position = new Vector3(-invertMult + transform.parent.transform.position.x,
+                                                        transform.position.y,
+                                                        transform.position.z);
        }

        textComponent.text = score.ToString();

        transform.position = new Vector3(-invertMult * Random.Range(6f, 8f), -1.5f, 0f) + transform.parent.transform.position;
        agentRb.velocity = new Vector3(0f, 0f, 0f);
+
+        SetResetParameters();
+    }
+
+    public void SetRacket()
+    {
+        angle = resetParams["angle"];
+        gameObject.transform.eulerAngles = new Vector3(
+                                                gameObject.transform.eulerAngles.x,
+                                                gameObject.transform.eulerAngles.y,
+                                                invertMult * angle
+                                            );
+    }
+
+    public void SetBall()
+    {
+        scale = resetParams["scale"];
+        ball.transform.localScale = new Vector3(scale, scale, scale);
+    }
+
+    public void SetResetParameters()
+    {
+        SetRacket();
+        SetBall();
    }
 }
--- a/docs/Background-Machine-Learning.md
+++ b/docs/Background-Machine-Learning.md
 [Learning a policy](https://blogs.unity3d.com/2017/08/22/unity-ai-reinforcement-learning-with-q-learning/)
 usually requires many trials and iterative policy updates. More specifically,
 the robot is placed in several fire situations and over time learns an optimal
-policy which allows it to put our fires more effectively. Obviously, we cannot
+policy which allows it to put out fires more effectively. Obviously, we cannot
 expect to train a robot repeatedly in the real world, particularly when fires
 are involved. This is precisely why the use of
 [Unity as a simulator](https://blogs.unity3d.com/2018/01/23/designing-safer-cities-through-simulations/)
--- a/docs/Installation.md
+++ b/docs/Installation.md

 ### Install Python and mlagents Package

-In order to use ML-Agents toolkit, you need Python 3.6 along with the
-dependencies listed in the [setup.py file](../ml-agents/setup.py).
-Some of the primary dependencies include:
-
- [TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support)
- [Jupyter](Background-Jupyter.md)
-
-[Download](https://www.python.org/downloads/) and install Python 3.6 if you do not
-already have it.
+In order to use ML-Agents toolkit, you need Python 3.6.  
+[Download](https://www.python.org/downloads/) and install Python 3.6 if you do not already have it.
-To install the dependencies and `mlagents` Python package, run from the command line:
+To install the `mlagents` Python package, run from the command line:

 ```sh
 pip3 install mlagents
 If you installed this correctly, you should be able to run
 `mlagents-learn --help`, after which you will see the Unity logo and the command line
 parameters you can use with `mlagents-learn`. 
+
+By installing the `mlagents` package, its dependencies listed in the [setup.py file](../ml-agents/setup.py) are also installed.
+Some of the primary dependencies include:
+
+- [TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support)
+- [Jupyter](Background-Jupyter.md)

 **Notes:**

--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
  * Vector Action space: (Continuous) Size of 2, with one value corresponding to
    X-rotation, and the other to Z-rotation.
  * Visual Observations: None.
-* Reset Parameters: None
+* Reset Parameters: Three, corresponding to the following:
+    * scale: Specifies the scale of the ball in the 3 dimensions (equal across the three dimensions)
+      * Default: 1
+      * Recommended Minimum: 0.2
+      * Recommended Maximum: 5
+    * gravity: Magnitude of gravity  
+      * Default: 9.81
+      * Recommended Minimum: 4
+      * Recommended Maximum: 105
+    * mass: Specifies mass of the ball
+      * Default: 1
+      * Recommended Minimum: 0.1
+      * Recommended Maximum: 20
 * Benchmark Mean Reward: 100

 ## [GridWorld](https://youtu.be/gu8HE9WKEVI)
  * Vector Action space: (Continuous) Size of 2, corresponding to movement
    toward net or away from net, and jumping.
  * Visual Observations: None.
-* Reset Parameters: One, corresponding to size of ball.
+* Reset Parameters: Three, corresponding to the following:
+    * angle: Angle of the racket from the vertical (Y) axis.
+      * Default: 55
+      * Recommended Minimum: 35 
+      * Recommended Maximum: 65
+    * gravity: Magnitude of gravity
+      * Default: 9.81
+      * Recommended Minimum: 6
+      * Recommended Maximum: 20
+    * scale: Specifies the scale of the ball in the 3 dimensions (equal across the three dimensions)
+      * Default: 1
+      * Recommended Minimum: 0.2
+      * Recommended Maximum: 5
 * Benchmark Mean Reward: 2.5
 * Optional Imitation Learning scene: `TennisIL`.

      as well as rotation.
    * Goalie: 4 actions corresponding to forward, backward, sideways movement.
  * Visual Observations: None.
-* Reset Parameters: None
+* Reset Parameters: Two, corresponding to the following:
+  * ball_scale: Specifies the scale of the ball in the 3 dimensions (equal across the three dimensions)
+    * Default: 7.5
+    * Recommended minimum: 4
+    * Recommended maximum: 10
+  * gravity: Magnitude of the gravity
+    * Default: 9.81
+    * Recommended minimum: 6
+    * Recommended maximum: 20
 * Benchmark Mean Reward (Striker & Goalie Brain): 0 (the means will be inverse
  of each other and criss crosses during training) __Note that our trainer is currently unable to consistently train this environment__

--- a/docs/Training-Imitation-Learning.md
+++ b/docs/Training-Imitation-Learning.md
 Imitation Learning uses pairs of observations and actions from
 from a demonstration to learn a policy. [Video Link](https://youtu.be/kpb8ZkMBFYs).

+Imitation learning can also be used to help reinforcement learning. Especially in 
+environments with sparse (i.e., infrequent or rare) rewards, the agent may never see
+the reward and thus not learn from it. Curiosity helps the agent explore, but in some cases
+it is easier to just show the agent how to achieve the reward. In these cases, 
+imitation learning can dramatically reduce the time it takes to solve the environment.
+For instance, on the [Pyramids environment](Learning-Environment-Examples.md#pyramids), 
+just 6 episodes of demonstrations can reduce training steps by more than 4 times.
+
+<p align="center">
+  <img src="images/mlagents-ImitationAndRL.png"
+       alt="Using Demonstrations with Reinforcement Learning"
+       width="350" border="0" />
+</p>
+
+ML-Agents provides several ways to learn from demonstrations. For most situations,
+[GAIL](Training-RewardSignals.md#the-gail-reward-signal) is the preferred approach.
+
+* To train using GAIL (Generative Adversarial Imitaiton Learning) you can add the
+  [GAIL reward signal](Training-RewardSignals.md#the-gail-reward-signal). GAIL can be
+  used with or without environment rewards, and works well when there are a limited
+  number of demonstrations. 
+* To help bootstrap reinforcement learning, you can enable 
+  [pretraining](Training-PPO.md#optional-pretraining-using-demonstrations) 
+  on the PPO trainer, in addition to using a small GAIL reward signal. 
+* To train an agent to exactly mimic demonstrations, you can use the 
+  [Behavioral Cloning](Training-BehavioralCloning.md) trainer. Behavioral Cloning can be
+  used offline and online (in-editor), and learns very quickly. However, it usually is ineffective
+  on more complex environments without a large number of demonstrations.
+
 ## Recording Demonstrations

 It is possible to record demonstrations of agent behavior from the Unity Editor, 
       alt="BC Teacher Helper"
       width="375" border="10" />
 </p>
- 
-
-## Training with Behavioral Cloning
-
-There are a variety of possible imitation learning algorithms which can 
-be used, the simplest one of them is Behavioral Cloning. It works by collecting 
-demonstrations from a teacher, and then simply uses them to directly learn a 
-policy, in the same way the supervised learning for image classification 
-or other traditional Machine Learning tasks work.
-
-
-### Offline Training
-
-With offline behavioral cloning, we can use demonstrations (`.demo` files) 
-generated using the `Demonstration Recorder` as the dataset used to train a behavior.
-
-1. Choose an agent you would like to learn to imitate some set of demonstrations. 
-2. Record a set of demonstration using the `Demonstration Recorder` (see above). 
-   For illustrative purposes we will refer to this file as `AgentRecording.demo`. 
-3. Build the scene, assigning the agent a Learning Brain, and set the Brain to 
-   Control in the Broadcast Hub. For more information on Brains, see 
-   [here](Learning-Environment-Design-Brains.md).
-4. Open the `config/offline_bc_config.yaml` file. 
-5. Modify the `demo_path` parameter in the file to reference the path to the 
-   demonstration file recorded in step 2. In our case this is: 
-   `./UnitySDK/Assets/Demonstrations/AgentRecording.demo`
-6. Launch `mlagent-learn`, providing `./config/offline_bc_config.yaml` 
-   as the config parameter, and include the `--run-id` and `--train` as usual. 
-   Provide your environment as the `--env` parameter if it has been compiled 
-   as standalone, or omit to train in the editor.
-7. (Optional) Observe training performance using TensorBoard.
-
-This will use the demonstration file to train a neural network driven agent 
-to directly imitate the actions provided in the demonstration. The environment 
-will launch and be used for evaluating the agent's performance during training.
-
-### Online Training
-
-It is also possible to provide demonstrations in realtime during training, 
-without pre-recording a demonstration file. The steps to do this are as follows:
-
-1. First create two Brains, one which will be the "Teacher," and the other which
-   will be the "Student." We will assume that the names of the Brain
-   Assets are "Teacher" and "Student" respectively.
-2. The "Teacher" Brain must be a **Player Brain**. You must properly 
-   configure the inputs to map to the corresponding actions.
-3. The "Student" Brain must be a **Learning Brain**.
-4. The Brain Parameters of both the "Teacher" and "Student" Brains must be 
-   compatible with the agent.
-5. Drag both the "Teacher" and "Student" Brain into the Academy's `Broadcast Hub` 
-   and check the `Control` checkbox on the "Student" Brain. 
-6. Link the Brains to the desired Agents (one Agent as the teacher and at least
-   one Agent as a student).
-7. In `config/online_bc_config.yaml`, add an entry for the "Student" Brain. Set
-   the `trainer` parameter of this entry to `online_bc`, and the
-   `brain_to_imitate` parameter to the name of the teacher Brain: "Teacher".
-   Additionally, set `batches_per_epoch`, which controls how much training to do
-   each moment. Increase the `max_steps` option if you'd like to keep training
-   the Agents for a longer period of time.
-8. Launch the training process with `mlagents-learn config/online_bc_config.yaml
-   --train --slow`, and press the :arrow_forward: button in Unity when the
-   message _"Start training by pressing the Play button in the Unity Editor"_ is
-   displayed on the screen
-9. From the Unity window, control the Agent with the Teacher Brain by providing
-   "teacher demonstrations" of the behavior you would like to see.
-10. Watch as the Agent(s) with the student Brain attached begin to behave
-   similarly to the demonstrations.
-11. Once the Student Agents are exhibiting the desired behavior, end the training
-   process with `CTL+C` from the command line.
-12. Move the resulting `*.nn` file into the `TFModels` subdirectory of the
-    Assets folder (or a subdirectory within Assets of your choosing) , and use
-    with `Learning` Brain.
-
-**BC Teacher Helper**
-
-We provide a convenience utility, `BC Teacher Helper` component that you can add
-to the Teacher Agent.
-
-<p align="center">
-  <img src="images/bc_teacher_helper.png"
-       alt="BC Teacher Helper"
-       width="375" border="10" />
-</p>
-
-This utility enables you to use keyboard shortcuts to do the following:
-
-1. To start and stop recording experiences. This is useful in case you'd like to
-   interact with the game _but not have the agents learn from these
-   interactions_. The default command to toggle this is to press `R` on the
-   keyboard.
-
-2. Reset the training buffer. This enables you to instruct the agents to forget
-   their buffer of recent experiences. This is useful if you'd like to get them
-   to quickly learn a new behavior. The default command to reset the buffer is
-   to press `C` on the keyboard.
+ 
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md
 presented to an agent, see [Training with Curriculum
 Learning](Training-Curriculum-Learning.md).

-For information about imitation learning, which uses a different training
-algorithm, see
+For information about imitation learning from demonstrations, see
 [Training with Imitation Learning](Training-Imitation-Learning.md).

 ## Best Practices when training with PPO
 the agent will need to remember in order to successfully complete the task.

 Typical Range: `64` - `512`
+
+## (Optional) Pretraining Using Demonstrations
+
+In some cases, you might want to bootstrap the agent's policy using behavior recorded
+from a player. This can help guide the agent towards the reward. Pretraining adds 
+training operations that mimic a demonstration rather than attempting to maximize reward. 
+It is essentially equivalent to running [behavioral cloning](./Training-BehavioralCloning.md)
+in-line with PPO.
+
+To use pretraining, add a `pretraining` section to the trainer_config. For instance:
+
+```
+    pretraining:
+        demo_path: ./demos/ExpertPyramid.demo
+        strength: 0.5
+        steps: 10000
+```
+
+Below are the avaliable hyperparameters for pretraining.
+
+### Strength
+
+`strength` corresponds to the learning rate of the imitation relative to the learning
+rate of PPO, and roughly corresponds to how strongly we allow the behavioral cloning
+to influence the policy. 
+
+Typical Range: `0.1` - `0.5`
+
+### Demo Path
+
+`demo_path` is the path to your `.demo` file or directory of `.demo` files. 
+See the [imitation learning guide](Training-ImitationLearning.md) for more on `.demo` files.
+
+### Steps
+
+During pretraining, it is often desirable to stop using demonstrations after the agent has 
+"seen" rewards, and allow it to optimize past the available demonstrations and/or generalize
+outside of the provided demonstrations. `steps` corresponds to the training steps over which
+pretraining is active. The learning rate of the pretrainer will anneal over the steps. Set 
+the steps to 0 for constant imitation over the entire training run. 
+
+### (Optional) Batch Size
+
+`batch_size` is the number of demonstration experiences used for one iteration of a gradient
+descent update. If not specified, it will default to the `batch_size` defined for PPO.
+
+Typical Range (Continuous): `512` - `5120`
+
+Typical Range (Discrete): `32` - `512`
+
+### (Optional) Number of Epochs
+
+`num_epoch` is the number of passes through the experience buffer during
+gradient descent. If not specified, it will default to the number of epochs set for PPO.
+
+Typical Range: `3` - `10`
+
+### (Optional) Samples Per Update
+
+`samples_per_update` is the maximum number of samples
+to use during each imitation update. You may want to lower this if your demonstration
+dataset is very large to avoid overfitting the policy on demonstrations. Set to 0 
+to train over all of the demonstrations at each update step.
+
+Default Value: `0` (all)
+
+Typical Range: Approximately equal to PPO's `buffer_size`

 ## Training Statistics

--- a/docs/Training-RewardSignals.md
+++ b/docs/Training-RewardSignals.md
 observation, but also not too small to prevent it from learning to differentiate between
 demonstrated and actual behavior.

-Default Value: 64
+Default Value: `64`
+
 Typical Range: `64` - `256`

 #### Learning Rate

 Default Value: `3e-4`
+
+
+### The GAIL Reward Signal
+
+GAIL, or [Generative Adversarial Imitation Learning](https://arxiv.org/abs/1606.03476), is an 
+imitation learning algorithm that uses an adversarial approach, in a similar vein to GANs 
+(Generative Adversarial Networks). In this framework, a second neural network, the
+discriminator, is taught to distinguish whether an observation/action is from a demonstration, or 
+produced by the agent. This discriminator can the examine a new observation/action and provide it a 
+reward based on how close it believes this new observation/action is to the provided demonstrations. 
+
+At each training step, the agent tries to learn how to maximize this reward. Then, the 
+discriminator is trained to better distinguish between demonstrations and agent state/actions. 
+In this way, while the agent gets better and better at mimicing the demonstrations, the
+discriminator keeps getting stricter and stricter and the agent must try harder to "fool" it. 
+
+This approach, when compared to [Behavioral Cloning](Training-BehavioralCloning.md), requires 
+far fewer demonstrations to be provided. After all, we are still learning a policy that happens
+to be similar to the demonstration, not directly copying the behavior of the demonstrations. It
+is also especially effective when combined with an Extrinsic signal, but can also be used 
+independently to purely learn from demonstration. 
+
+Using GAIL requires recorded demonstrations from your Unity environment. See the 
+[imitation learning guide](Training-Imitation-Learning.md) to learn more about recording demonstrations.
+
+#### Strength 
+
+`strength` is the factor by which to multiply the raw reward. Note that when using GAIL
+with an Extrinsic Signal, this value should be set lower if your demonstrations are 
+suboptimal (e.g. from a human), so that a trained agent will focus on receiving extrinsic 
+rewards instead of exactly copying the demonstrations. Keep the strength below about 0.1 in those cases. 
+
+Typical Range: `0.01` - `1.0`
+
+#### Gamma
+
+`gamma` corresponds to the discount factor for future rewards. 
+
+Typical Range: `0.8` - `0.9`
+
+#### Demo Path
+
+`demo_path` is the path to your `.demo` file or directory of `.demo` files. See the [imitation learning guide]
+(Training-ImitationLearning.md).
+
+#### Encoding Size
+
+`encoding_size` corresponds to the size of the hidden layer used by the discriminator. 
+This value should be small enough to encourage the discriminator to compress the original
+observation, but also not too small to prevent it from learning to differentiate between 
+demonstrated and actual behavior. Dramatically increasing this size will also negatively affect
+training times. 
+
+Default Value: `64`
+
+Typical Range: `64` - `256`
+
+#### Learning Rate
+
+`learning_rate` is the learning rate used to update the discriminator. 
+This should typically be decreased if training is unstable, and the GAIL loss is unstable.
+
+Default Value: `3e-4`
+
+Typical Range: `1e-5` - `1e-3`  
+
+#### Use Actions
+
+`use_actions` determines whether the discriminator should discriminate based on both 
+observations and actions, or just observations. Set to `True` if you want the agent to
+mimic the actions from the demonstrations, and `False` if you'd rather have the agent
+visit the same states as in the demonstrations but with possibly different actions. 
+Setting to `False` is more likely to be stable, especially with imperfect demonstrations,
+but may learn slower. 
+
+Default Value: `false`
+
+#### (Optional) Samples Per Update
+
+`samples_per_update` is the maximum number of samples to use during each discriminator update. You may 
+want to lower this if your buffer size is very large to avoid overfitting the discriminator on current data. 
+If set to 0, we will use the minimum of buffer size and the number of demonstration samples. 
+
+Default Value: `0`
+
+Typical Range: Approximately equal to [`buffer_size`](Training-PPO.md)
+
+#### (Optional) Variational Discriminator Bottleneck
+
+`use_vail` enables a [variational bottleneck](https://arxiv.org/abs/1810.00821) within the 
+GAIL discriminator. This forces the discriminator to learn a more general representation 
+and reduces its tendency to be "too good" at discriminating, making learning more stable. 
+However, it does increase training time. Enable this if you notice your imitation learning is
+unstable, or unable to learn the task at hand. 
+
+Default Value: `false`
--- a/docs/Using-Docker.md
+++ b/docs/Using-Docker.md
 the repository:

 ```sh
-docker run --name <container-name> \
+docker run -it --name <container-name> \
+           -p 6006:6006 \
           <image-name>:latest \
           --docker-target-name=unity-volume \
           <trainer-config-file> \
 To train with a `3DBall` environment executable, the command would be:

 ```sh
-docker run --name 3DBallContainer.first.trial \
+docker run -it --name 3DBallContainer.first.trial \
+           -p 6006:6006 \
-           --env=3DBall
+           --env=3DBall \
           --train \
           --run-id=3dball_first_trial
 ```

 **NOTE** If you are training using docker for environments that use visual observations, you may need to increase the default memory that Docker allocates for the container. For example, see [here](https://docs.docker.com/docker-for-mac/#advanced) for instructions for Docker for Mac.
+
+### Running Tensorboard
+
+You can run Tensorboard to monitor your training instance on http://localhost:6006:
+
+```sh
+docker exec -it <container-name> tensorboard --logdir=/unity-volume/summaries --host=0.0.0.0
+```
+
+With our previous 3DBall example, this command would look like this:
+```sh
+docker exec -it 3DBallContainer.first.trial tensorboard --logdir=/unity-volume/summaries --host=0.0.0.0
+```
+
+For more details on Tensorboard, check out the documentation about [Using Tensorboard](Using-Tensorboard.md).

 ### Stopping Container and Saving State

--- a/gym-unity/gym_unity/envs/unity_env.py
+++ b/gym-unity/gym_unity/envs/unity_env.py
    def _single_step(self, info):
        if self.use_visual:
            visual_obs = info.visual_observations
-            if isinstance(visual_obs, list):
-                visual_obs = np.array(visual_obs)
-                    visual_obs_list.append(self._preprocess_single(obs[0, :, :, :]))
+                    visual_obs_list.append(self._preprocess_single(obs[0]))
-                self.visual_obs = self._preprocess_single(visual_obs[0][0, :, :, :])
+                self.visual_obs = self._preprocess_single(visual_obs[0][0])

            default_observation = self.visual_obs
        else:
--- a/ml-agents-envs/mlagents/envs/init.py
+++ b/ml-agents-envs/mlagents/envs/init.py
-from .brain import *
+from .brain import AllBrainInfo, BrainInfo, BrainParameters
+from .action_info import ActionInfo, ActionInfoOutputs
+from .policy import Policy
 from .environment import *
 from .exception import *
--- a/ml-agents-envs/mlagents/envs/brain.py
+++ b/ml-agents-envs/mlagents/envs/brain.py
        return np.append(m1, m2, axis=0)

    @staticmethod
-    def process_pixels(image_bytes, gray_scale):
+    def process_pixels(image_bytes: bytes, gray_scale: bool) -> np.ndarray:
        """
        Converts byte array observation image into numpy array, re-sizes it,
        and optionally converts it to grey scale
        """
-        s = bytearray(image_bytes)
-        image = Image.open(io.BytesIO(s))
+        image_bytearray = bytearray(image_bytes)
+        image = Image.open(io.BytesIO(image_bytearray))
        s = np.array(image) / 255.0
        if gray_scale:
            s = np.mean(s, axis=2)
    @staticmethod
-    def from_agent_proto(agent_info_list, brain_params):
+    def from_agent_proto(worker_id: int, agent_info_list, brain_params):
-        vis_obs = []
+        vis_obs: List[np.ndarray] = []
        for i in range(brain_params.number_visual_observations):
            obs = [
                BrainInfo.process_pixels(
            vector_obs = np.nan_to_num(
                np.array([x.stacked_vector_observation for x in agent_info_list])
            )
+        agents = [f"${worker_id}-{x.id}" for x in agent_info_list]
        brain_info = BrainInfo(
            visual_observation=vis_obs,
            vector_observation=vector_obs,
-            agents=[x.id for x in agent_info_list],
+            agents=agents,
            local_done=[x.done for x in agent_info_list],
            vector_action=np.array([x.stored_vector_actions for x in agent_info_list]),
            text_action=[list(x.stored_text_actions) for x in agent_info_list],
--- a/ml-agents-envs/mlagents/envs/environment.py
+++ b/ml-agents-envs/mlagents/envs/environment.py
        docker_training: bool = False,
        no_graphics: bool = False,
        timeout_wait: int = 30,
+        args: list = [],
    ):
        """
        Starts a new unity environment and establishes a connection with the environment.
        :bool no_graphics: Whether to run the Unity simulator in no-graphics mode
        :int timeout_wait: Time (in seconds) to wait for connection from environment.
        :bool train_mode: Whether to run in training mode, speeding up the simulation, by default.
+        :list args: Addition Unity command line arguments
        """

        atexit.register(self._close)
            None
        )  # The process that is started. If None, no process was started
        self.communicator = self.get_communicator(worker_id, base_port, timeout_wait)
+        self.worker_id = worker_id

        # If the environment name is None, a new environment will not be launched
        # and the communicator will directly try to connect to an existing unity environment.
                "the worker-id must be 0 in order to connect with the Editor."
            )
        if file_name is not None:
-            self.executable_launcher(file_name, docker_training, no_graphics)
+            self.executable_launcher(file_name, docker_training, no_graphics, args)
        else:
            logger.info(
                "Start training by pressing the Play button in the Unity Editor."
    def reset_parameters(self):
        return self._resetParameters

-    def executable_launcher(self, file_name, docker_training, no_graphics):
+    def executable_launcher(self, file_name, docker_training, no_graphics, args):
        cwd = os.getcwd()
        file_name = (
            file_name.strip()
                            "--port",
                            str(self.port),
                        ]
+                        + args
-                        [launch_string, "--port", str(self.port)]
+                        [launch_string, "--port", str(self.port)] + args
                    )
            else:
                """
        for brain_name in output.agentInfos:
            agent_info_list = output.agentInfos[brain_name].value
            _data[brain_name] = BrainInfo.from_agent_proto(
-                agent_info_list, self.brains[brain_name]
+                self.worker_id, agent_info_list, self.brains[brain_name]
            )
        return _data, global_done

--- a/ml-agents-envs/setup.py
+++ b/ml-agents-envs/setup.py
        "pytest>=3.2.2,<4.0.0",
        "protobuf>=3.6,<3.7",
        "grpcio>=1.11.0,<1.12.0",
-        "cloudpickle==0.8.1",
+        "cloudpickle",
    ],
    python_requires=">=3.5,<3.8",
 )
--- a/ml-agents/mlagents/trainers/init.py
+++ b/ml-agents/mlagents/trainers/init.py
-from .action_info import *
 from .buffer import *
 from .curriculum import *
 from .meta_curriculum import *
-from .policy import *
+from .tf_policy import *
 from .trainer_controller import *
 from .bc.models import *
 from .bc.offline_trainer import *
--- a/ml-agents/mlagents/trainers/bc/policy.py
+++ b/ml-agents/mlagents/trainers/bc/policy.py

 import numpy as np
 from mlagents.trainers.bc.models import BehavioralCloningModel
-from mlagents.trainers.policy import Policy
+from mlagents.trainers.tf_policy import TFPolicy
-class BCPolicy(Policy):
+class BCPolicy(TFPolicy):
    def __init__(self, seed, brain, trainer_parameters, load):
        """
        :param seed: Random seed.
--- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
+from typing import List, Tuple
 import tensorflow as tf
 from mlagents.trainers.models import LearningModel

        """
        self.encoding_size = encoding_size
        self.policy_model = policy_model
+        self.next_visual_in: List[tf.Tensor] = []
-    def create_curiosity_encoders(self):
+    def create_curiosity_encoders(self) -> Tuple[tf.Tensor, tf.Tensor]:
        """
        Creates state encoders for current and future observations.
        Used for implementation of Curiosity-driven Exploration by Self-supervised Prediction
        encoded_next_state = tf.concat(encoded_next_state_list, axis=1)
        return encoded_state, encoded_next_state

-    def create_inverse_model(self, encoded_state, encoded_next_state):
+    def create_inverse_model(
+        self, encoded_state: tf.Tensor, encoded_next_state: tf.Tensor
+    ) -> None:
        """
        Creates inverse model TensorFlow ops for Curiosity module.
        Predicts action taken given current and future encoded states.
                tf.dynamic_partition(cross_entropy, self.policy_model.mask, 2)[1]
            )

-    def create_forward_model(self, encoded_state, encoded_next_state):
+    def create_forward_model(
+        self, encoded_state: tf.Tensor, encoded_next_state: tf.Tensor
+    ) -> None:
        """
        Creates forward model TensorFlow ops for Curiosity module.
        Predicts encoded future state based on encoded current state and given action.
            tf.dynamic_partition(squared_difference, self.policy_model.mask, 2)[1]
        )

-    def create_loss(self, learning_rate):
+    def create_loss(self, learning_rate: float) -> None:
        """
        Creates the loss node of the model as well as the update_batch optimizer to update the model.
        :param learning_rate: The learning rate for the optimizer.
--- a/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
+from typing import Any, Dict, List
+from mlagents.envs.brain import BrainInfo
+
+from mlagents.trainers.buffer import Buffer
-from mlagents.trainers.policy import Policy
+from mlagents.trainers.tf_policy import TFPolicy
-        policy: Policy,
+        policy: TFPolicy,
        strength: float,
        gamma: float,
        encoding_size: int = 128,
        """
        Creates the Curiosity reward generator
        :param policy: The Learning Policy
-        :param encoding_size: The size of the Curiosity encoding
-        :param signal_strength: The scaling parameter for the reward. The scaled reward will be the unscaled
+        :param strength: The scaling parameter for the reward. The scaled reward will be the unscaled
+        :param gamma: The time discounting factor used for this reward.
+        :param encoding_size: The size of the hidden encoding layer for the ICM
+        :param learning_rate: The learning rate for the ICM.
+        :param num_epoch: The number of epochs to train over the training buffer for the ICM. 
        """
        super().__init__(policy, strength, gamma)
        self.model = CuriosityModel(
        }
        self.has_updated = False

-    def evaluate(self, current_info, next_info):
+    def evaluate(
+        self, current_info: BrainInfo, next_info: BrainInfo
+    ) -> RewardSignalResult:
        """
        Evaluates the reward for the agents present in current_info given the next_info
        :param current_info: The current BrainInfo.
        return RewardSignalResult(scaled_reward, unscaled_reward)

    @classmethod
-    def check_config(cls, config_dict):
+    def check_config(
+        cls, config_dict: Dict[str, Any], param_keys: List[str] = None
+    ) -> None:
        """
        Checks the config and throw an exception if a hyperparameter is missing. Curiosity requires strength,
        gamma, and encoding size at minimum.

-    def update(self, update_buffer, num_sequences):
+    def update(self, update_buffer: Buffer, num_sequences: int) -> Dict[str, float]:
        """
        Updates Curiosity model using training buffer. Divides training buffer into mini batches and performs
        gradient descent.
        """
-        forward_total, inverse_total = [], []
+        forward_total: List[float] = []
+        inverse_total: List[float] = []
        for _ in range(self.num_epoch):
            update_buffer.shuffle()
            buffer = update_buffer
        }
        return update_stats

-    def _update_batch(self, mini_batch, num_sequences):
+    def _update_batch(
+        self, mini_batch: Dict[str, np.ndarray], num_sequences: int
+    ) -> Dict[str, float]:
        """
        Updates model using buffer.
        :param num_sequences: Number of trajectories in batch.
--- a/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
+from typing import Any, Dict, List
+from mlagents.envs.brain import BrainInfo
+from mlagents.trainers.buffer import Buffer
-from mlagents.trainers.policy import Policy
+from mlagents.trainers.tf_policy import TFPolicy
-    def __init__(self, policy: Policy, strength: float, gamma: float):
+    def __init__(self, policy: TFPolicy, strength: float, gamma: float):
        """
        The extrinsic reward generator. Returns the reward received by the environment
        :param policy: The Policy object (e.g. PPOPolicy) that this Reward Signal will apply to.
        super().__init__(policy, strength, gamma)

    @classmethod
-    def check_config(cls, config_dict):
+    def check_config(
+        cls, config_dict: Dict[str, Any], param_keys: List[str] = None
+    ) -> None:
        """
        Checks the config and throw an exception if a hyperparameter is missing. Extrinsic requires strength and gamma
        at minimum.

-    def evaluate(self, current_info, next_info):
+    def evaluate(
+        self, current_info: BrainInfo, next_info: BrainInfo
+    ) -> RewardSignalResult:
        """
        Evaluates the reward for the agents present in current_info given the next_info
        :param current_info: The current BrainInfo.
        scaled_reward = self.strength * unscaled_reward
        return RewardSignalResult(scaled_reward, unscaled_reward)

-    def update(self, update_buffer, num_sequences):
+    def update(self, update_buffer: Buffer, num_sequences: int) -> Dict[str, float]:
        """
        This method does nothing, as there is nothing to update.
        """
--- a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/reward_signal.py
 import logging
-from mlagents.trainers.trainer import UnityTrainerException
-from mlagents.trainers.policy import Policy
+from typing import Any, Dict, List
 from collections import namedtuple
 import numpy as np
 import abc
+from mlagents.envs.brain import BrainInfo
+from mlagents.trainers.trainer import UnityTrainerException
+from mlagents.trainers.tf_policy import TFPolicy
+from mlagents.trainers.buffer import Buffer
+
 logger = logging.getLogger("mlagents.trainers")

 RewardSignalResult = namedtuple(

 class RewardSignal(abc.ABC):
-    def __init__(self, policy: Policy, strength: float, gamma: float):
+    def __init__(self, policy: TFPolicy, strength: float, gamma: float):
        """
        Initializes a reward signal. At minimum, you must pass in the policy it is being applied to,
        the reward strength, and the gamma (discount factor.)
        self.policy = policy
        self.strength = strength

-    def evaluate(self, current_info, next_info):
+    def evaluate(
+        self, current_info: BrainInfo, next_info: BrainInfo
+    ) -> RewardSignalResult:
        """
        Evaluates the reward for the agents present in current_info given the next_info
        :param current_info: The current BrainInfo.
-        return (
+        return RewardSignalResult(
-    def update(self, update_buffer, n_sequences):
+    def update(self, update_buffer: Buffer, num_sequences: int) -> Dict[str, float]:
        """
        If the reward signal has an internal model (e.g. GAIL or Curiosity), update that model.
        :param update_buffer: An AgentBuffer that contains the live data from which to update.
        return {}

    @classmethod
-    def check_config(cls, config_dict, param_keys=None):
+    def check_config(
+        cls, config_dict: Dict[str, Any], param_keys: List[str] = None
+    ) -> None:
        """
        Check the config dict, and throw an error if there are missing hyperparameters.
        """
--- a/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
+++ b/ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
 from mlagents.trainers.components.reward_signals.extrinsic.signal import (
    ExtrinsicRewardSignal,
 )
+from mlagents.trainers.components.reward_signals.gail.signal import GAILRewardSignal
-from mlagents.trainers.policy import Policy
+from mlagents.trainers.tf_policy import TFPolicy

 logger = logging.getLogger("mlagents.trainers")

    "curiosity": CuriosityRewardSignal,
+    "gail": GAILRewardSignal,
-    policy: Policy, name: str, config_entry: Dict[str, Any]
+    policy: TFPolicy, name: str, config_entry: Dict[str, Any]
 ) -> RewardSignal:
    """
    Creates a reward signal class based on the name and config entry provided as a dict.
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
 import pathlib
 import logging
 import os
+from typing import List, Tuple
-from mlagents.envs.communicator_objects import *
+from mlagents.envs.communicator_objects import (
+    AgentInfoProto,
+    BrainParametersProto,
+    DemonstrationMetaProto,
+)
 from google.protobuf.internal.decoder import _DecodeVarint32  # type: ignore


-def make_demo_buffer(brain_infos, brain_params, sequence_length):
+def make_demo_buffer(
+    brain_infos: List[BrainInfo], brain_params: BrainParameters, sequence_length: int
+) -> Buffer:
    # Create and populate buffer using experiences
    demo_buffer = Buffer()
    for idx, experience in enumerate(brain_infos):
    return demo_buffer


-def demo_to_buffer(file_path, sequence_length):
+def demo_to_buffer(
+    file_path: str, sequence_length: int
+) -> Tuple[BrainParameters, Buffer]:
    """
    Loads demonstration file and uses it to fill training buffer.
    :param file_path: Location of demonstration file (.demo).
    return brain_params, demo_buffer


-def load_demonstration(file_path):
+def load_demonstration(file_path: str) -> Tuple[BrainParameters, List[BrainInfo], int]:
    """
    Loads and parses a demonstration file.
    :param file_path: Location of demonstration file (.demo).
        all_files = os.listdir(file_path)
        for _file in all_files:
            if _file.endswith(".demo"):
-                file_paths.append(_file)
+                file_paths.append(os.path.join(file_path, _file))
+        if not all_files:
+            raise ValueError("There are no '.demo' files in the provided directory.")
+        file_extension = pathlib.Path(file_path).suffix
+        if file_extension != ".demo":
+            raise ValueError(
+                "The file is not a '.demo' file. Please provide a file with the "
+                "correct extension."
+            )
-    file_extension = pathlib.Path(file_path).suffix
-    if file_extension != ".demo":
-        raise ValueError(
-            "The file is not a '.demo' file. Please provide a file with the "
-            "correct extension."
-        )
+    total_expected = 0
-        total_expected = 0
-                total_expected = meta_data_proto.number_steps
+                total_expected += meta_data_proto.number_steps
                pos = INITIAL_POS
            if obs_decoded == 1:
                brain_param_proto = BrainParametersProto()
            if obs_decoded > 1:
                agent_info = AgentInfoProto()
                agent_info.ParseFromString(data[pos : pos + next_pos])
-                brain_info = BrainInfo.from_agent_proto([agent_info], brain_params)
+                brain_info = BrainInfo.from_agent_proto(0, [agent_info], brain_params)
                brain_infos.append(brain_info)
                if len(brain_infos) == total_expected:
                    break
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents.envs import UnityEnvironment
 from mlagents.envs.exception import UnityEnvironmentException
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
-from mlagents.envs.subprocess_environment import SubprocessUnityEnvironment
+from mlagents.envs.subprocess_env_manager import SubprocessEnvManager


 def run_training(
        run_seed,
        base_port + (sub_id * num_envs),
    )
-    env = SubprocessUnityEnvironment(env_factory, num_envs)
+    env = SubprocessEnvManager(env_factory, num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env)

    # Create controller and begin training.
        train_model,
        keep_checkpoints,
        lesson,
-        env.external_brains,
        run_seed,
        fast_simulation,
    )


 def try_create_meta_curriculum(
-    curriculum_folder: Optional[str], env: BaseUnityEnvironment
+    curriculum_folder: Optional[str], env: SubprocessEnvManager
 ) -> Optional[MetaCurriculum]:
    if curriculum_folder is None:
        return None
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
 import logging
+from typing import Any, Callable, Dict

 import numpy as np
 import tensorflow as tf
+
+ActivationFunction = Callable[[tf.Tensor], tf.Tensor]


 class LearningModel(object):
        tf.set_random_seed(seed)
        self.brain = brain
        self.vector_in = None
-        self.global_step, self.increment_step = self.create_global_steps()
+        self.global_step, self.increment_step, self.steps_to_increment = (
+            self.create_global_steps()
+        )
        self.visual_in = []
        self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name="batch_size")
        self.sequence_length = tf.placeholder(
        global_step = tf.Variable(
            0, name="global_step", trainable=False, dtype=tf.int32
        )
-        increment_step = tf.assign(global_step, tf.add(global_step, 1))
-        return global_step, increment_step
+        steps_to_increment = tf.placeholder(
+            shape=[], dtype=tf.int32, name="steps_to_increment"
+        )
+        increment_step = tf.assign(global_step, tf.add(global_step, steps_to_increment))
+        return global_step, increment_step, steps_to_increment

    @staticmethod
    def scaled_init(scale):
-    def swish(input_activation):
+    def swish(input_activation: tf.Tensor) -> tf.Tensor:
-    def create_visual_input(camera_parameters, name):
+    def create_visual_input(camera_parameters: Dict[str, Any], name: str) -> tf.Tensor:
        """
        Creates image input op.
        :param camera_parameters: Parameters for visual observation from BrainInfo.

    @staticmethod
    def create_vector_observation_encoder(
-        observation_input, h_size, activation, num_layers, scope, reuse
-    ):
+        observation_input: tf.Tensor,
+        h_size: int,
+        activation: ActivationFunction,
+        num_layers: int,
+        scope: str,
+        reuse: bool,
+    ) -> tf.Tensor:
        """
        Builds a set of hidden state encoders.
        :param reuse: Whether to re-use the weights within the same scope.
        return hidden

    def create_visual_observation_encoder(
-        self, image_input, h_size, activation, num_layers, scope, reuse
-    ):
+        self,
+        image_input: tf.Tensor,
+        h_size: int,
+        activation: ActivationFunction,
+        num_layers: int,
+        scope: str,
+        reuse: bool,
+    ) -> tf.Tensor:
        """
        Builds a set of visual (CNN) encoders.
        :param reuse: Whether to re-use the weights within the same scope.
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
 import logging
 import numpy as np
+from typing import Any, Dict
+import tensorflow as tf
+from mlagents.envs.timers import timed
-from mlagents.trainers.policy import Policy
+from mlagents.trainers.tf_policy import TFPolicy
+from mlagents.trainers.components.bc.module import BCModule
-class PPOPolicy(Policy):
+class PPOPolicy(TFPolicy):
    def __init__(self, seed, brain, trainer_params, is_training, load):
        """
        Policy for Proximal Policy Optimization Networks.
                    self, reward_signal, config
                )

+            # Create pretrainer if needed
+            if "pretraining" in trainer_params:
+                BCModule.check_config(trainer_params["pretraining"])
+                self.bc_module = BCModule(
+                    self,
+                    policy_learning_rate=trainer_params["learning_rate"],
+                    default_batch_size=trainer_params["batch_size"],
+                    default_num_epoch=trainer_params["num_epoch"],
+                    **trainer_params["pretraining"],
+                )
+            else:
+                self.bc_module = None
+
        if load:
            self._load_graph()
        else:
            "update_batch": self.model.update_batch,
        }

+    @timed
    def evaluate(self, brain_info):
        """
        Evaluates policy for the agent experiences provided.
            run_out["random_normal_epsilon"] = epsilon
        return run_out

+    @timed
    def update(self, mini_batch, num_sequences):
        """
        Updates model using buffer.
        run_out = self._execute_model(feed_dict, self.update_dict)
        return run_out

-    def get_value_estimates(self, brain_info, idx):
+    def get_value_estimates(
+        self, brain_info: BrainInfo, idx: int, done: bool
+    ) -> Dict[str, float]:
+        :param done: Whether or not this is the last element of the episode, in which case we want the value estimate to be 0. 
-        feed_dict = {self.model.batch_size: 1, self.model.sequence_length: 1}
+        if done:
+            return {k: 0.0 for k in self.model.value_heads.keys()}
+
+        feed_dict: Dict[tf.Tensor, Any] = {
+            self.model.batch_size: 1,
+            self.model.sequence_length: 1,
+        }
        for i in range(len(brain_info.visual_observations)):
            feed_dict[self.model.visual_in[i]] = [
                brain_info.visual_observations[i][idx]
                idx
            ].reshape([-1, len(self.model.act_size)])
        value_estimates = self.sess.run(self.model.value_heads, feed_dict)
-        return value_estimates
+
+        return {k: float(v) for k, v in value_estimates.items()}

    def get_action(self, brain_info: BrainInfo) -> ActionInfo:
        """
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py

 import logging
 from collections import deque, defaultdict
-from typing import Any, List
+from typing import List, Any
-import tensorflow as tf
-from mlagents.trainers.action_info import ActionInfoOutputs
+from mlagents.envs.action_info import ActionInfoOutputs

 logger = logging.getLogger("mlagents.trainers")

        """
        Responsible for collecting experiences and training PPO model.
        :param trainer_parameters: The parameters for the trainer (dictionary).
+        :param reward_buff_cap: Max reward history to track in the reward buffer
        :param training: Whether the trainer is set for training.
        :param load: Whether the model should be loaded.
        :param seed: The seed the model will be initialized with
        """
        return self._reward_buffer

-    def increment_step(self):
+    def increment_step(self, n_steps: int) -> None:
+
+        :param n_steps: number of steps to increment the step count by
-        self.policy.increment_step()
-        self.step = self.policy.get_current_step()
+        self.step = self.policy.increment_step(n_steps)

    def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
        """
                else:
                    bootstrapping_info = info
                    idx = l
-                value_next = self.policy.get_value_estimates(bootstrapping_info, idx)
-                if info.local_done[l] and not info.max_reached[l]:
-                    value_next["extrinsic"] = 0.0
+                value_next = self.policy.get_value_estimates(
+                    bootstrapping_info,
+                    idx,
+                    info.local_done[l] and not info.max_reached[l],
+                )
+
                tmp_advantages = []
                tmp_returns = []
                for name in self.policy.reward_signals:
                            self.stats["Environment/Cumulative Reward"].append(
                                rewards.get(agent_id, 0)
                            )
+                            self.reward_buffer.appendleft(rewards.get(agent_id, 0))
-                            self.reward_buffer.appendleft(rewards.get(agent_id, 0))
                        else:
                            self.stats[
                                self.policy.reward_signals[name].stat_name
            )
            for stat, val in update_stats.items():
                self.stats[stat].append(val)
+        if self.policy.bc_module:
+            update_stats = self.policy.bc_module.update()
+            for stat, val in update_stats.items():
+                self.stats[stat].append(val)
        self.training_buffer.reset_update_buffer()
        self.trainer_metrics.end_policy_update()

    :param lambd: GAE weighing factor.
    :return: list of advantage estimates for time-steps t to T.
    """
-    value_estimates = np.asarray(value_estimates.tolist() + [value_next])
+    value_estimates = np.append(value_estimates, value_next)
    delta_t = rewards + gamma * value_estimates[1:] - value_estimates[:-1]
    advantage = discount_rewards(r=delta_t, gamma=gamma * lambd)
    return advantage
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
 import pytest
 import numpy as np

+from mlagents.trainers.buffer import Buffer
+

 def create_mock_brainparams(
    number_visual_observations=0,
    mock_env.return_value.brain_names = ["MockBrain"]
    mock_env.return_value.reset.return_value = {"MockBrain": mock_braininfo}
    mock_env.return_value.step.return_value = {"MockBrain": mock_braininfo}
+
+
+def simulate_rollout(env, policy, buffer_init_samples):
+    brain_info_list = []
+    for i in range(buffer_init_samples):
+        brain_info_list.append(env.step()[env.brain_names[0]])
+    buffer = create_buffer(brain_info_list, policy.brain, policy.sequence_length)
+    return buffer
+
+
+def create_buffer(brain_infos, brain_params, sequence_length):
+    buffer = Buffer()
+    # Make a buffer
+    for idx, experience in enumerate(brain_infos):
+        if idx > len(brain_infos) - 2:
+            break
+        current_brain_info = brain_infos[idx]
+        next_brain_info = brain_infos[idx + 1]
+        buffer[0].last_brain_info = current_brain_info
+        buffer[0]["done"].append(next_brain_info.local_done[0])
+        buffer[0]["rewards"].append(next_brain_info.rewards[0])
+        for i in range(brain_params.number_visual_observations):
+            buffer[0]["visual_obs%d" % i].append(
+                current_brain_info.visual_observations[i][0]
+            )
+            buffer[0]["next_visual_obs%d" % i].append(
+                current_brain_info.visual_observations[i][0]
+            )
+        if brain_params.vector_observation_space_size > 0:
+            buffer[0]["vector_obs"].append(current_brain_info.vector_observations[0])
+            buffer[0]["next_vector_in"].append(
+                current_brain_info.vector_observations[0]
+            )
+        buffer[0]["actions"].append(next_brain_info.previous_vector_actions[0])
+        buffer[0]["prev_action"].append(current_brain_info.previous_vector_actions[0])
+        buffer[0]["masks"].append(1.0)
+        buffer[0]["advantages"].append(1.0)
+        buffer[0]["action_probs"].append(np.ones(buffer[0]["actions"][0].shape))
+        buffer[0]["actions_pre"].append(np.ones(buffer[0]["actions"][0].shape))
+        buffer[0]["random_normal_epsilon"].append(
+            np.ones(buffer[0]["actions"][0].shape)
+        )
+        buffer[0]["action_mask"].append(np.ones(buffer[0]["actions"][0].shape))
+        buffer[0]["memory"].append(np.ones(8))
+
+    buffer.append_update_buffer(0, batch_size=None, training_length=sequence_length)
+    return buffer
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py

    demo_buffer = make_demo_buffer(brain_infos, brain_parameters, 1)
    assert len(demo_buffer.update_buffer["actions"]) == total_expected - 1
+
+
+def test_load_demo_dir():
+    path_prefix = os.path.dirname(os.path.abspath(__file__))
+    brain_parameters, brain_infos, total_expected = load_demonstration(
+        path_prefix + "/test_demo_dir"
+    )
+    assert brain_parameters.brain_name == "Ball3DBrain"
+    assert brain_parameters.vector_observation_space_size == 8
+    assert len(brain_infos) == total_expected
+
+    demo_buffer = make_demo_buffer(brain_infos, brain_parameters, 1)
+    assert len(demo_buffer.update_buffer["actions"]) == total_expected - 1
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
-import unittest.mock as mock
 import pytest
 from unittest.mock import *
 from mlagents.trainers import learn, TrainerController
    }


-@patch("mlagents.trainers.learn.SubprocessUnityEnvironment")
+@patch("mlagents.trainers.learn.SubprocessEnvManager")
@patch("mlagents.trainers.learn.create_environment_factory")
@patch("mlagents.trainers.learn.load_config")
 def test_run_training(load_config, create_environment_factory, subproc_env_mock):
                False,
                5,
                0,
-                subproc_env_mock.return_value.external_brains,
-@patch("mlagents.trainers.learn.SubprocessUnityEnvironment")
+@patch("mlagents.trainers.learn.SubprocessEnvManager")
@patch("mlagents.trainers.learn.create_environment_factory")
@patch("mlagents.trainers.learn.load_config")
 def test_docker_target_path(load_config, create_environment_factory, subproc_env_mock):
--- a/ml-agents/mlagents/trainers/tests/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_policy.py
-from mlagents.trainers.policy import *
+from mlagents.trainers.tf_policy import *
 from unittest.mock import MagicMock



 def test_take_action_returns_empty_with_no_agents():
    test_seed = 3
-    policy = Policy(test_seed, basic_mock_brain(), basic_params())
+    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    no_agent_brain_info = BrainInfo([], [], [], agents=[])
    result = policy.get_action(no_agent_brain_info)
    assert result == ActionInfo([], [], [], None, None)
    test_seed = 3
-    policy = Policy(test_seed, basic_mock_brain(), basic_params())
+    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    policy.evaluate = MagicMock(return_value={})
    brain_info_with_agents = BrainInfo([], [], [], agents=["an-agent-id"])
    result = policy.get_action(brain_info_with_agents)
 def test_take_action_returns_action_info_when_available():
    test_seed = 3
-    policy = Policy(test_seed, basic_mock_brain(), basic_params())
+    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    policy_eval_out = {
        "action": np.array([1.0]),
        "memory_out": np.array([2.5]),
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
 import yaml

 from mlagents.trainers.ppo.models import PPOModel
-from mlagents.trainers.ppo.trainer import discount_rewards
+from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
-from mlagents.envs import UnityEnvironment
+from mlagents.envs import UnityEnvironment, BrainParameters
 from mlagents.envs.mock_communicator import MockCommunicator


    )
    run_out = policy.evaluate(brain_info)
    assert run_out["action"].shape == (3, 2)
+    env.close()
+
+
+@mock.patch("mlagents.envs.UnityEnvironment.executable_launcher")
+@mock.patch("mlagents.envs.UnityEnvironment.get_communicator")
+def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config):
+    tf.reset_default_graph()
+    mock_communicator.return_value = MockCommunicator(
+        discrete_action=False, visual_inputs=0
+    )
+    env = UnityEnvironment(" ")
+    brain_infos = env.reset()
+    brain_info = brain_infos[env.brain_names[0]]
+
+    trainer_parameters = dummy_config
+    model_path = env.brain_names[0]
+    trainer_parameters["model_path"] = model_path
+    trainer_parameters["keep_checkpoints"] = 3
+    policy = PPOPolicy(
+        0, env.brains[env.brain_names[0]], trainer_parameters, False, False
+    )
+    run_out = policy.get_value_estimates(brain_info, 0, done=False)
+    for key, val in run_out.items():
+        assert type(key) is str
+        assert type(val) is float
+
+    run_out = policy.get_value_estimates(brain_info, 0, done=True)
+    for key, val in run_out.items():
+        assert type(key) is str
+        assert val == 0.0
+
    env.close()


    gamma = 0.9
    returns = discount_rewards(rewards, gamma, 0.0)
    np.testing.assert_array_almost_equal(returns, np.array([0.729, 0.81, 0.9, 1.0]))
+
+
+def test_trainer_increment_step():
+    trainer_params = {
+        "trainer": "ppo",
+        "batch_size": 2048,
+        "beta": 0.005,
+        "buffer_size": 20480,
+        "epsilon": 0.2,
+        "gamma": 0.995,
+        "hidden_units": 512,
+        "lambd": 0.95,
+        "learning_rate": 0.0003,
+        "max_steps": "2e6",
+        "memory_size": 256,
+        "normalize": True,
+        "num_epoch": 3,
+        "num_layers": 3,
+        "time_horizon": 1000,
+        "sequence_length": 64,
+        "summary_freq": 3000,
+        "use_recurrent": False,
+        "use_curiosity": False,
+        "curiosity_strength": 0.01,
+        "curiosity_enc_size": 128,
+        "summary_path": "./summaries/test_trainer_summary",
+        "model_path": "./models/test_trainer_models/TestModel",
+        "keep_checkpoints": 5,
+        "reward_signals": {"extrinsic": {"strength": 1.0, "gamma": 0.99}},
+    }
+    brain_params = BrainParameters("test_brain", 1, 1, [], [2], [], 0)
+
+    trainer = PPOTrainer(brain_params, 0, trainer_params, True, False, 0, "0")
+    policy_mock = mock.Mock()
+    step_count = 10
+    policy_mock.increment_step = mock.Mock(return_value=step_count)
+    trainer.policy = policy_mock
+
+    trainer.increment_step(5)
+    policy_mock.increment_step.assert_called_with(5)
+    assert trainer.step == 10


 if __name__ == "__main__":
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
 from mlagents.trainers.ppo.models import PPOModel
 from mlagents.trainers.ppo.trainer import discount_rewards
 from mlagents.trainers.ppo.policy import PPOPolicy
+from mlagents.trainers.demo_loader import make_demo_buffer
 from mlagents.envs import UnityEnvironment
 from mlagents.envs.mock_communicator import MockCommunicator



@pytest.fixture
+def gail_dummy_config():
+    return {
+        "gail": {
+            "strength": 0.1,
+            "gamma": 0.9,
+            "encoding_size": 128,
+            "demo_path": os.path.dirname(os.path.abspath(__file__)) + "/test.demo",
+        }
+    }
+
+
+@pytest.fixture
+
+
+VECTOR_ACTION_SPACE = [2]
+VECTOR_OBS_SPACE = 8
+DISCRETE_ACTION_SPACE = [2]
+BUFFER_INIT_SAMPLES = 20
+NUM_AGENTS = 12


 def create_ppo_policy_mock(
    if not use_visual:
        mock_brain = mb.create_mock_brainparams(
            vector_action_space_type="discrete" if use_discrete else "continuous",
-            vector_action_space_size=[2],
-            vector_observation_space_size=8,
+            vector_action_space_size=DISCRETE_ACTION_SPACE
+            if use_discrete
+            else VECTOR_ACTION_SPACE,
+            vector_observation_space_size=VECTOR_OBS_SPACE,
-            num_agents=12,
-            num_vector_observations=8,
-            num_vector_acts=2,
+            num_agents=NUM_AGENTS,
+            num_vector_observations=VECTOR_OBS_SPACE,
+            num_vector_acts=sum(
+                DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE
+            ),
-            vector_action_space_size=[2],
+            vector_action_space_size=DISCRETE_ACTION_SPACE
+            if use_discrete
+            else VECTOR_ACTION_SPACE,
-            num_agents=12,
+            num_agents=NUM_AGENTS,
-            num_vector_acts=2,
+            num_vector_acts=sum(
+                DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE
+            ),
            discrete=use_discrete,
        )
    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
    return env, policy


-@mock.patch("mlagents.envs.UnityEnvironment")
-def test_curiosity_cc_evaluate(mock_env, dummy_config, curiosity_dummy_config):
-    env, policy = create_ppo_policy_mock(
-        mock_env, dummy_config, curiosity_dummy_config, False, False, False
-    )
+def reward_signal_eval(env, policy, reward_signal_name):
-    scaled_reward, unscaled_reward = policy.reward_signals["curiosity"].evaluate(
+    # Test evaluate
+    rsig_result = policy.reward_signals[reward_signal_name].evaluate(
-    assert scaled_reward.shape == (12,)
-    assert unscaled_reward.shape == (12,)
+    assert rsig_result.scaled_reward.shape == (NUM_AGENTS,)
+    assert rsig_result.unscaled_reward.shape == (NUM_AGENTS,)
+
+
+def reward_signal_update(env, policy, reward_signal_name):
+    buffer = mb.simulate_rollout(env, policy, BUFFER_INIT_SAMPLES)
+    out = policy.reward_signals[reward_signal_name].update(buffer.update_buffer, 2)
+    assert type(out) is dict
-def test_curiosity_dc_evaluate(mock_env, dummy_config, curiosity_dummy_config):
+def test_gail_cc(mock_env, dummy_config, gail_dummy_config):
-        mock_env, dummy_config, curiosity_dummy_config, False, True, False
+        mock_env, dummy_config, gail_dummy_config, False, False, False
-    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
-    next_brain_info = env.step()[env.brain_names[0]]
-    scaled_reward, unscaled_reward = policy.reward_signals["curiosity"].evaluate(
-        brain_info, next_brain_info
+    reward_signal_eval(env, policy, "gail")
+    reward_signal_update(env, policy, "gail")
+
+
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_gail_dc(mock_env, dummy_config, gail_dummy_config):
+    env, policy = create_ppo_policy_mock(
+        mock_env, dummy_config, gail_dummy_config, False, True, False
-    assert scaled_reward.shape == (12,)
-    assert unscaled_reward.shape == (12,)
+    reward_signal_eval(env, policy, "gail")
+    reward_signal_update(env, policy, "gail")
+
+
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_gail_visual(mock_env, dummy_config, gail_dummy_config):
+    gail_dummy_config["gail"]["demo_path"] = (
+        os.path.dirname(os.path.abspath(__file__)) + "/testdcvis.demo"
+    )
+    env, policy = create_ppo_policy_mock(
+        mock_env, dummy_config, gail_dummy_config, False, True, True
+    )
+    reward_signal_eval(env, policy, "gail")
+    reward_signal_update(env, policy, "gail")
+
+
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_gail_rnn(mock_env, dummy_config, gail_dummy_config):
+    env, policy = create_ppo_policy_mock(
+        mock_env, dummy_config, gail_dummy_config, True, False, False
+    )
+    reward_signal_eval(env, policy, "gail")
+    reward_signal_update(env, policy, "gail")
+
+
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_curiosity_cc(mock_env, dummy_config, curiosity_dummy_config):
+    env, policy = create_ppo_policy_mock(
+        mock_env, dummy_config, curiosity_dummy_config, False, False, False
+    )
+    reward_signal_eval(env, policy, "curiosity")
+    reward_signal_update(env, policy, "curiosity")
+
+
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_curiosity_dc(mock_env, dummy_config, curiosity_dummy_config):
+    env, policy = create_ppo_policy_mock(
+        mock_env, dummy_config, curiosity_dummy_config, False, True, False
+    )
+    reward_signal_eval(env, policy, "curiosity")
+    reward_signal_update(env, policy, "curiosity")
-def test_curiosity_visual_evaluate(mock_env, dummy_config, curiosity_dummy_config):
+def test_curiosity_visual(mock_env, dummy_config, curiosity_dummy_config):
-    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
-    next_brain_info = env.step()[env.brain_names[0]]
-    scaled_reward, unscaled_reward = policy.reward_signals["curiosity"].evaluate(
-        brain_info, next_brain_info
-    )
-    assert scaled_reward.shape == (12,)
-    assert unscaled_reward.shape == (12,)
+    reward_signal_eval(env, policy, "curiosity")
+    reward_signal_update(env, policy, "curiosity")
-def test_curiosity_rnn_evaluate(mock_env, dummy_config, curiosity_dummy_config):
+def test_curiosity_rnn(mock_env, dummy_config, curiosity_dummy_config):
-    brain_infos = env.reset()
-    brain_info = brain_infos[env.brain_names[0]]
-    next_brain_info = env.step()[env.brain_names[0]]
-    scaled_reward, unscaled_reward = policy.reward_signals["curiosity"].evaluate(
-        brain_info, next_brain_info
+    reward_signal_eval(env, policy, "curiosity")
+    reward_signal_update(env, policy, "curiosity")
+
+
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_extrinsic(mock_env, dummy_config, curiosity_dummy_config):
+    env, policy = create_ppo_policy_mock(
+        mock_env, dummy_config, curiosity_dummy_config, False, False, False
-    assert scaled_reward.shape == (12,)
-    assert unscaled_reward.shape == (12,)
+    reward_signal_eval(env, policy, "extrinsic")
+    reward_signal_update(env, policy, "extrinsic")


 if __name__ == "__main__":
--- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
-import json
 import os
 from unittest.mock import *

 from mlagents.trainers.ppo.trainer import PPOTrainer
 from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
 from mlagents.trainers.bc.online_trainer import OnlineBCTrainer
+from mlagents.envs.subprocess_env_manager import StepInfo
 from mlagents.envs.exception import UnityEnvironmentException




@pytest.fixture
-def basic_trainer_controller(brain_info):
+def basic_trainer_controller():
    return TrainerController(
        model_path="test_model_path",
        summaries_dir="test_summaries_dir",
        train=True,
        keep_checkpoints=False,
        lesson=None,
-        external_brains={"testbrain": brain_info},
        training_seed=99,
        fast_simulation=True,
    )
@patch("tensorflow.set_random_seed")
 def test_initialization_seed(numpy_random_seed, tensorflow_set_seed):
    seed = 27
-    TrainerController("", "", "1", 1, None, True, False, False, None, {}, seed, True)
+    TrainerController("", "", "1", 1, None, True, False, False, None, seed, True)
-    trainer_cls, input_config, tc, expected_brain_info, expected_config
+    trainer_cls, input_config, tc, expected_brain_params, expected_config
+    external_brains = {"testbrain": expected_brain_params}
+
-        assert brain == expected_brain_info
+        assert brain == expected_brain_params
        assert trainer_params == expected_config
        assert training == tc.train_model
        assert load == tc.load_model
    with patch.object(trainer_cls, "__init__", mock_constructor):
-        tc.initialize_trainers(input_config)
+        tc.initialize_trainers(input_config, external_brains)
-    input_config, tc, expected_brain_info, expected_config, expected_reward_buff_cap=0
+    input_config, tc, expected_brain_params, expected_config, expected_reward_buff_cap=0
+    external_brains = {"testbrain": expected_brain_params}
+
-        assert brain == expected_brain_info
+        assert brain == expected_brain_params
        assert trainer_parameters == expected_config
        assert reward_buff_cap == expected_reward_buff_cap
        assert training == tc.train_model

    with patch.object(PPOTrainer, "__init__", mock_constructor):
-        tc.initialize_trainers(input_config)
+        tc.initialize_trainers(input_config, external_brains)
-@patch("mlagents.envs.BrainInfo")
-def test_initialize_trainer_parameters_uses_defaults(BrainInfoMock):
-    brain_info_mock = BrainInfoMock()
-    tc = basic_trainer_controller(brain_info_mock)
+@patch("mlagents.envs.BrainParameters")
+def test_initialize_trainer_parameters_uses_defaults(BrainParametersMock):
+    brain_params_mock = BrainParametersMock()
+    tc = basic_trainer_controller()

    full_config = dummy_offline_bc_config()
    expected_config = full_config["default"]

    assert_bc_trainer_constructed(
-        OfflineBCTrainer, full_config, tc, brain_info_mock, expected_config
+        OfflineBCTrainer, full_config, tc, brain_params_mock, expected_config
-@patch("mlagents.envs.BrainInfo")
-def test_initialize_trainer_parameters_override_defaults(BrainInfoMock):
-    brain_info_mock = BrainInfoMock()
-    tc = basic_trainer_controller(brain_info_mock)
+@patch("mlagents.envs.BrainParameters")
+def test_initialize_trainer_parameters_override_defaults(BrainParametersMock):
+    brain_params_mock = BrainParametersMock()
+    tc = basic_trainer_controller()

    full_config = dummy_offline_bc_config_with_override()
    expected_config = full_config["default"]
    expected_config["normalize"] = False

    assert_bc_trainer_constructed(
-        OfflineBCTrainer, full_config, tc, brain_info_mock, expected_config
+        OfflineBCTrainer, full_config, tc, brain_params_mock, expected_config
-@patch("mlagents.envs.BrainInfo")
-def test_initialize_online_bc_trainer(BrainInfoMock):
-    brain_info_mock = BrainInfoMock()
-    tc = basic_trainer_controller(brain_info_mock)
+@patch("mlagents.envs.BrainParameters")
+def test_initialize_online_bc_trainer(BrainParametersMock):
+    brain_params_mock = BrainParametersMock()
+    tc = basic_trainer_controller()

    full_config = dummy_online_bc_config()
    expected_config = full_config["default"]

    assert_bc_trainer_constructed(
-        OnlineBCTrainer, full_config, tc, brain_info_mock, expected_config
+        OnlineBCTrainer, full_config, tc, brain_params_mock, expected_config
-@patch("mlagents.envs.BrainInfo")
-def test_initialize_ppo_trainer(BrainInfoMock):
-    brain_info_mock = BrainInfoMock()
-    tc = basic_trainer_controller(brain_info_mock)
+@patch("mlagents.envs.BrainParameters")
+def test_initialize_ppo_trainer(BrainParametersMock):
+    brain_params_mock = BrainParametersMock()
+    tc = basic_trainer_controller()

    full_config = dummy_config()
    expected_config = full_config["default"]

-    assert_ppo_trainer_constructed(full_config, tc, brain_info_mock, expected_config)
+    assert_ppo_trainer_constructed(full_config, tc, brain_params_mock, expected_config)
-@patch("mlagents.envs.BrainInfo")
-def test_initialize_invalid_trainer_raises_exception(BrainInfoMock):
-    brain_info_mock = BrainInfoMock()
-    tc = basic_trainer_controller(brain_info_mock)
+@patch("mlagents.envs.BrainParameters")
+def test_initialize_invalid_trainer_raises_exception(BrainParametersMock):
+    tc = basic_trainer_controller()
+    external_brains = {"testbrain": BrainParametersMock()}
-        tc.initialize_trainers(bad_config)
+        tc.initialize_trainers(bad_config, external_brains)


 def trainer_controller_with_start_learning_mocks():
    trainer_mock.parameters = {"some": "parameter"}
    trainer_mock.write_tensorboard_text = MagicMock()

-    brain_info_mock = MagicMock()
-    tc = basic_trainer_controller(brain_info_mock)
+    tc = basic_trainer_controller()
-    tc.take_step = MagicMock()
+    tc.advance = MagicMock()
+    tc.trainers["testbrain"].get_step = 0
-    def take_step_sideeffect(env, curr_info):
+    def take_step_sideeffect(env):
+        return 1
-    tc.take_step.side_effect = take_step_sideeffect
+    tc.advance.side_effect = take_step_sideeffect

    tc._export_graph = MagicMock()
    tc._save_model = MagicMock()
    env_mock = MagicMock()
    env_mock.close = MagicMock()
    env_mock.reset = MagicMock()
+    env_mock.external_brains = MagicMock()
-    tc.initialize_trainers.assert_called_once_with(trainer_config)
+    tc.initialize_trainers.assert_called_once_with(
+        trainer_config, env_mock.external_brains
+    )
-    assert tc.take_step.call_count == 11
+    assert tc.advance.call_count == 11
    tc._export_graph.assert_not_called()
    tc._save_model.assert_not_called()
    env_mock.close.assert_called_once()
    env_mock = MagicMock()
    env_mock.close = MagicMock()
    env_mock.reset = MagicMock(return_value=brain_info_mock)
+    env_mock.external_brains = MagicMock()
-    tc.initialize_trainers.assert_called_once_with(trainer_config)
+    tc.initialize_trainers.assert_called_once_with(
+        trainer_config, env_mock.external_brains
+    )
-    assert tc.take_step.call_count == trainer_mock.get_max_steps + 1
+    assert tc.advance.call_count == trainer_mock.get_max_steps + 1
    env_mock.close.assert_called_once()
    tc._save_model.assert_called_once_with(steps=6)

    trainer_mock.parameters = {"some": "parameter"}
    trainer_mock.write_tensorboard_text = MagicMock()

-    brain_info_mock = MagicMock()
-    tc = basic_trainer_controller(brain_info_mock)
+    tc = basic_trainer_controller()
    tc.trainers = {"testbrain": trainer_mock}

    return tc, trainer_mock
    tc, trainer_mock = trainer_controller_with_take_step_mocks()

-    curr_info_mock = MagicMock()
-    brain_info_mock = MagicMock()
-    curr_info_mock.__getitem__ = MagicMock(return_value=brain_info_mock)
+    old_step_info = StepInfo(Mock(), Mock(), MagicMock())
+    new_step_info = StepInfo(Mock(), Mock(), MagicMock())
-    env_step_output_mock = MagicMock()
-    env_mock.step = MagicMock(return_value=env_step_output_mock)
-    env_mock.close = MagicMock()
-    env_mock.reset = MagicMock(return_value=curr_info_mock)
+    env_mock.step.return_value = [new_step_info]
+    env_mock.reset.return_value = [old_step_info]
-    action_output_mock = ActionInfo(
-        "action", "memory", "actiontext", "value", {"some": "output"}
-    )
-    trainer_mock.get_action = MagicMock(return_value=action_output_mock)
-
-    tc.take_step(env_mock, curr_info_mock)
+    tc.advance(env_mock)
-    trainer_mock.get_action.assert_called_once_with(brain_info_mock)
-    env_mock.step.assert_called_once_with(
-        vector_action={"testbrain": action_output_mock.action},
-        memory={"testbrain": action_output_mock.memory},
-        text_action={"testbrain": action_output_mock.text},
-        value={"testbrain": action_output_mock.value},
-    )
+    env_mock.step.assert_called_once()
-        curr_info_mock, env_step_output_mock, action_output_mock.outputs
+        new_step_info.previous_all_brain_info,
+        new_step_info.current_all_brain_info,
+        new_step_info.brain_name_to_action_info["testbrain"].outputs,
-        curr_info_mock, env_step_output_mock
+        new_step_info.previous_all_brain_info, new_step_info.current_all_brain_info
-    trainer_mock.write_summary.assert_called_once()
    trainer_mock.increment_step.assert_called_once()
--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py
 import tensorflow as tf
 import numpy as np

-from mlagents.envs import UnityException, AllBrainInfo, BrainInfo
-from mlagents.trainers import ActionInfo, ActionInfoOutputs
+from mlagents.envs import UnityException, AllBrainInfo, ActionInfoOutputs
 from mlagents.trainers import TrainerMetrics

 LOGGER = logging.getLogger("mlagents.trainers")
        """
        raise UnityTrainerException("The get_step property was not implemented.")

-    def increment_step(self):
+    def increment_step(self, n_steps: int) -> None:
-    def get_action(self, curr_info: BrainInfo) -> ActionInfo:
-        """
-        Get an action using this trainer's current policy.
-        :param curr_info: Current BrainInfo.
-        :return: The ActionInfo given by the policy given the BrainInfo.
-        """
-        self.trainer_metrics.start_experience_collection_timer()
-        action = self.policy.get_action(curr_info)
-        self.trainer_metrics.end_experience_collection_timer()
-        return action
-
    def add_experiences(
        self,
        curr_info: AllBrainInfo,
                if self.is_training and self.get_step <= self.get_max_steps
                else "Not Training."
            )
+            step = min(self.get_step, self.get_max_steps)
            if len(self.stats["Environment/Cumulative Reward"]) > 0:
                mean_reward = np.mean(self.stats["Environment/Cumulative Reward"])
                LOGGER.info(
-                    "Reward: {"
-                    ":0.3f}. Std of Reward: {:0.3f}. {}".format(
+                    "Reward: {:0.3f}"
+                    ". Std of Reward: {:0.3f}. {}".format(
-                        min(self.get_step, self.get_max_steps),
+                        step,
                        delta_train_start,
                        mean_reward,
                        np.std(self.stats["Environment/Cumulative Reward"]),
            else:
                LOGGER.info(
                    " {}: {}: Step: {}. No episode was completed since last summary. {}".format(
-                        self.run_id, self.brain_name, self.get_step, is_training
+                        self.run_id, self.brain_name, step, is_training
                    )
                )
            summary = tf.Summary()
                    summary.value.add(tag="{}".format(key), simple_value=stat_mean)
                    self.stats[key] = []
            summary.value.add(tag="Environment/Lesson", simple_value=lesson_num)
-            self.summary_writer.add_summary(summary, self.get_step)
+            self.summary_writer.add_summary(summary, step)
            self.summary_writer.flush()

    def write_tensorboard_text(self, key, input_dict):
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 """Launches trainers for each External Brains in a Unity Environment."""

 import os
+import json
-import shutil
-import sys
 from typing import *

 import numpy as np
-from mlagents.envs import AllBrainInfo, BrainParameters
-from mlagents.envs.base_unity_environment import BaseUnityEnvironment
+from mlagents.envs import BrainParameters
+from mlagents.envs.env_manager import StepInfo
+from mlagents.envs.subprocess_env_manager import SubprocessEnvManager
+from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
 from mlagents.trainers import Trainer, TrainerMetrics
 from mlagents.trainers.ppo.trainer import PPOTrainer
 from mlagents.trainers.bc.offline_trainer import OfflineBCTrainer
        train: bool,
        keep_checkpoints: int,
        lesson: Optional[int],
-        external_brains: Dict[str, BrainParameters],
        training_seed: int,
        fast_simulation: bool,
    ):
        :param train: Whether to train model, or only run inference.
        :param keep_checkpoints: How many model checkpoints to keep.
        :param lesson: Start learning from this lesson.
-        :param external_brains: dictionary of external brain names to BrainInfo objects.
-        self.external_brains = external_brains
-        self.external_brain_names = external_brains.keys()
        self.logger = logging.getLogger("mlagents.envs")
        self.run_id = run_id
        self.save_freq = save_freq
        self.keep_checkpoints = keep_checkpoints
        self.trainers: Dict[str, Trainer] = {}
        self.trainer_metrics: Dict[str, TrainerMetrics] = {}
-        self.global_step = 0
        self.meta_curriculum = meta_curriculum
        self.seed = training_seed
        self.training_start_time = time()
        for brain_name in self.trainers.keys():
            if brain_name in self.trainer_metrics:
                self.trainers[brain_name].write_training_metrics()
+
+    def _write_timing_tree(self) -> None:
+        timing_path = f"{self.summaries_dir}/{self.run_id}_timers.json"
+        try:
+            with open(timing_path, "w") as f:
+                json.dump(get_timer_tree(), f, indent=2)
+        except FileNotFoundError:
+            self.logger.warning(
+                f"Unable to save to {timing_path}. Make sure the directory exists"
+            )

    def _export_graph(self):
        """
            self.trainers[brain_name].export_model()

-    def initialize_trainers(self, trainer_config: Dict[str, Any]) -> None:
+    def initialize_trainers(
+        self,
+        trainer_config: Dict[str, Any],
+        external_brains: Dict[str, BrainParameters],
+    ) -> None:
-        for brain_name in self.external_brains:
+        for brain_name in external_brains:
            trainer_parameters = trainer_config["default"].copy()
            trainer_parameters["summary_path"] = "{basedir}/{name}".format(
                basedir=self.summaries_dir, name=str(self.run_id) + "_" + brain_name
                    _brain_key = trainer_config[_brain_key]
                trainer_parameters.update(trainer_config[_brain_key])
            trainer_parameters_dict[brain_name] = trainer_parameters.copy()
-        for brain_name in self.external_brains:
+        for brain_name in external_brains:
-                    self.external_brains[brain_name],
+                    external_brains[brain_name],
                    trainer_parameters_dict[brain_name],
                    self.train_model,
                    self.load_model,
            elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
                self.trainers[brain_name] = OnlineBCTrainer(
-                    self.external_brains[brain_name],
+                    external_brains[brain_name],
                    trainer_parameters_dict[brain_name],
                    self.train_model,
                    self.load_model,
            elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
                self.trainers[brain_name] = PPOTrainer(
-                    self.external_brains[brain_name],
+                    external_brains[brain_name],
                    self.meta_curriculum.brains_to_curriculums[
                        brain_name
                    ].min_lesson_length
                "permissions are set correctly.".format(model_path)
            )

-    def _reset_env(self, env: BaseUnityEnvironment) -> AllBrainInfo:
+    def _reset_env(self, env: SubprocessEnvManager) -> List[StepInfo]:
        """Resets the environment.

        Returns:
        else:
            return env.reset(train_mode=self.fast_simulation)

+    def _should_save_model(self, global_step: int) -> bool:
+        return (
+            global_step % self.save_freq == 0 and global_step != 0 and self.train_model
+        )
+
+    def _not_done_training(self) -> bool:
+        return (
+            any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()])
+            or not self.train_model
+        )
+
+    def write_to_tensorboard(self, global_step: int) -> None:
+        for brain_name, trainer in self.trainers.items():
+            # Write training statistics to Tensorboard.
+            delta_train_start = time() - self.training_start_time
+            if self.meta_curriculum is not None:
+                trainer.write_summary(
+                    global_step,
+                    delta_train_start,
+                    lesson_num=self.meta_curriculum.brains_to_curriculums[
+                        brain_name
+                    ].lesson_num,
+                )
+            else:
+                trainer.write_summary(global_step, delta_train_start)
+
-        self, env: BaseUnityEnvironment, trainer_config: Dict[str, Any]
+        self, env_manager: SubprocessEnvManager, trainer_config: Dict[str, Any]
    ) -> None:
        # TODO: Should be able to start learning at different lesson numbers
        # for each curriculum.
        tf.reset_default_graph()

        # Prevent a single session from taking all GPU memory.
-        self.initialize_trainers(trainer_config)
+        self.initialize_trainers(trainer_config, env_manager.external_brains)
+
+        global_step = 0
-            curr_info = self._reset_env(env)
-            while (
-                any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()])
-                or not self.train_model
-            ):
-                new_info = self.take_step(env, curr_info)
-                self.global_step += 1
-                if (
-                    self.global_step % self.save_freq == 0
-                    and self.global_step != 0
-                    and self.train_model
-                ):
-                    # Save Tensorflow model
-                    self._save_model(steps=self.global_step)
-                curr_info = new_info
+            for brain_name, trainer in self.trainers.items():
+                env_manager.set_policy(brain_name, trainer.policy)
+            self._reset_env(env_manager)
+            while self._not_done_training():
+                n_steps = self.advance(env_manager)
+                for i in range(n_steps):
+                    global_step += 1
+                    if self._should_save_model(global_step):
+                        # Save Tensorflow model
+                        self._save_model(steps=global_step)
+                    self.write_to_tensorboard(global_step)
-            if self.global_step != 0 and self.train_model:
-                self._save_model(steps=self.global_step)
+            if global_step != 0 and self.train_model:
+                self._save_model(steps=global_step)
-                self._save_model_when_interrupted(steps=self.global_step)
+                self._save_model_when_interrupted(steps=global_step)
-        env.close()
+        env_manager.close()
+        self._write_timing_tree()
-    def take_step(
-        self, env: BaseUnityEnvironment, curr_info: AllBrainInfo
-    ) -> AllBrainInfo:
+    @timed
+    def advance(self, env: SubprocessEnvManager) -> int:
        if self.meta_curriculum:
            # Get the sizes of the reward buffers.
            reward_buff_sizes = {
        # If any lessons were incremented or the environment is
        # ready to be reset
        if self.meta_curriculum and any(lessons_incremented.values()):
-            curr_info = self._reset_env(env)
+            self._reset_env(env)
            for brain_name, trainer in self.trainers.items():
                trainer.end_episode()
            for brain_name, changed in lessons_incremented.items():
-        # Decide and take an action
-        take_action_vector = {}
-        take_action_memories = {}
-        take_action_text = {}
-        take_action_value = {}
-        take_action_outputs = {}
-        for brain_name, trainer in self.trainers.items():
-            action_info = trainer.get_action(curr_info[brain_name])
-            take_action_vector[brain_name] = action_info.action
-            take_action_memories[brain_name] = action_info.memory
-            take_action_text[brain_name] = action_info.text
-            take_action_value[brain_name] = action_info.value
-            take_action_outputs[brain_name] = action_info.outputs
-        time_start_step = time()
-        new_info = env.step(
-            vector_action=take_action_vector,
-            memory=take_action_memories,
-            text_action=take_action_text,
-            value=take_action_value,
-        )
-        delta_time_step = time() - time_start_step
+        with hierarchical_timer("env_step"):
+            time_start_step = time()
+            new_step_infos = env.step()
+            delta_time_step = time() - time_start_step
+
+        for step_info in new_step_infos:
+            for brain_name, trainer in self.trainers.items():
+                if brain_name in self.trainer_metrics:
+                    self.trainer_metrics[brain_name].add_delta_step(delta_time_step)
+                trainer.add_experiences(
+                    step_info.previous_all_brain_info,
+                    step_info.current_all_brain_info,
+                    step_info.brain_name_to_action_info[brain_name].outputs,
+                )
+                trainer.process_experiences(
+                    step_info.previous_all_brain_info, step_info.current_all_brain_info
+                )
-            trainer.add_experiences(
-                curr_info, new_info, take_action_outputs[brain_name]
-            )
-            trainer.process_experiences(curr_info, new_info)
-            if (
-                trainer.is_ready_update()
-                and self.train_model
-                and trainer.get_step <= trainer.get_max_steps
-            ):
-                # Perform gradient descent with experience buffer
-
-                trainer.update_policy()
-            # Write training statistics to Tensorboard.
-            delta_train_start = time() - self.training_start_time
-            if self.meta_curriculum is not None:
-                trainer.write_summary(
-                    self.global_step,
-                    delta_train_start,
-                    lesson_num=self.meta_curriculum.brains_to_curriculums[
-                        brain_name
-                    ].lesson_num,
-                )
-            else:
-                trainer.write_summary(self.global_step, delta_train_start)
-                trainer.increment_step()
-        return new_info
+                trainer.increment_step(len(new_step_infos))
+                if trainer.is_ready_update():
+                    # Perform gradient descent with experience buffer
+                    with hierarchical_timer("update_policy"):
+                        trainer.update_policy()
+                    env.set_policy(brain_name, trainer.policy)
+        return len(new_step_infos)
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py
 import numpy as np
 import tensorflow as tf

-from mlagents.trainers import ActionInfo, UnityException
+from mlagents.trainers import UnityException
+from mlagents.envs import Policy, ActionInfo
 from tensorflow.python.tools import freeze_graph
 from mlagents.trainers import tensorflow_to_barracuda as tf2bc
 from mlagents.envs import BrainInfo
    pass


-class Policy(object):
+class TFPolicy(Policy):
    """
    Contains a learning model, and the necessary
    functions to interact with it to perform evaluate and updating.
        step = self.sess.run(self.model.global_step)
        return step

-    def increment_step(self):
+    def increment_step(self, n_steps):
-        self.sess.run(self.model.increment_step)
+        out_dict = {
+            "global_step": self.model.global_step,
+            "increment_step": self.model.increment_step,
+        }
+        feed_dict = {self.model.steps_to_increment: n_steps}
+        return self.sess.run(out_dict, feed_dict=feed_dict)["global_step"]

    def get_inference_vars(self):
        """
--- a/setup.cfg
+++ b/setup.cfg
+[coverage:report]
+# Run "pytest --cov=mlagents" to see the current coverage percentage.
+# Run "pytest --cov=mlagents --cov-report html" to get a nice visualization of what is/isn't coverge in html format.
+fail_under = 60
+
+
 [flake8]
 # black will apply a line length of 88 to code but not docstrings/comments
 # This seems like a decent compromise between readability and redoing all the docstrings.
--- a/docs/Training-BehavioralCloning.md
+++ b/docs/Training-BehavioralCloning.md
+# Training with Behavioral Cloning
+
+There are a variety of possible imitation learning algorithms which can 
+be used, the simplest one of them is Behavioral Cloning. It works by collecting 
+demonstrations from a teacher, and then simply uses them to directly learn a 
+policy, in the same way the supervised learning for image classification 
+or other traditional Machine Learning tasks work.
+
+## Offline Training
+
+With offline behavioral cloning, we can use demonstrations (`.demo` files) 
+generated using the `Demonstration Recorder` as the dataset used to train a behavior.
+
+1. Choose an agent you would like to learn to imitate some set of demonstrations. 
+2. Record a set of demonstration using the `Demonstration Recorder` (see [here](Training-Imitation-Learning.md)). 
+   For illustrative purposes we will refer to this file as `AgentRecording.demo`. 
+3. Build the scene, assigning the agent a Learning Brain, and set the Brain to 
+   Control in the Broadcast Hub. For more information on Brains, see 
+   [here](Learning-Environment-Design-Brains.md).
+4. Open the `config/offline_bc_config.yaml` file. 
+5. Modify the `demo_path` parameter in the file to reference the path to the 
+   demonstration file recorded in step 2. In our case this is: 
+   `./UnitySDK/Assets/Demonstrations/AgentRecording.demo`
+6. Launch `mlagent-learn`, providing `./config/offline_bc_config.yaml` 
+   as the config parameter, and include the `--run-id` and `--train` as usual. 
+   Provide your environment as the `--env` parameter if it has been compiled 
+   as standalone, or omit to train in the editor.
+7. (Optional) Observe training performance using TensorBoard.
+
+This will use the demonstration file to train a neural network driven agent 
+to directly imitate the actions provided in the demonstration. The environment 
+will launch and be used for evaluating the agent's performance during training.
+
+## Online Training
+
+It is also possible to provide demonstrations in realtime during training, 
+without pre-recording a demonstration file. The steps to do this are as follows:
+
+1. First create two Brains, one which will be the "Teacher," and the other which
+   will be the "Student." We will assume that the names of the Brain
+   Assets are "Teacher" and "Student" respectively.
+2. The "Teacher" Brain must be a **Player Brain**. You must properly 
+   configure the inputs to map to the corresponding actions.
+3. The "Student" Brain must be a **Learning Brain**.
+4. The Brain Parameters of both the "Teacher" and "Student" Brains must be 
+   compatible with the agent.
+5. Drag both the "Teacher" and "Student" Brain into the Academy's `Broadcast Hub` 
+   and check the `Control` checkbox on the "Student" Brain. 
+6. Link the Brains to the desired Agents (one Agent as the teacher and at least
+   one Agent as a student).
+7. In `config/online_bc_config.yaml`, add an entry for the "Student" Brain. Set
+   the `trainer` parameter of this entry to `online_bc`, and the
+   `brain_to_imitate` parameter to the name of the teacher Brain: "Teacher".
+   Additionally, set `batches_per_epoch`, which controls how much training to do
+   each moment. Increase the `max_steps` option if you'd like to keep training
+   the Agents for a longer period of time.
+8. Launch the training process with `mlagents-learn config/online_bc_config.yaml
+   --train --slow`, and press the :arrow_forward: button in Unity when the
+   message _"Start training by pressing the Play button in the Unity Editor"_ is
+   displayed on the screen
+9. From the Unity window, control the Agent with the Teacher Brain by providing
+   "teacher demonstrations" of the behavior you would like to see.
+10. Watch as the Agent(s) with the student Brain attached begin to behave
+   similarly to the demonstrations.
+11. Once the Student Agents are exhibiting the desired behavior, end the training
+   process with `CTL+C` from the command line.
+12. Move the resulting `*.nn` file into the `TFModels` subdirectory of the
+    Assets folder (or a subdirectory within Assets of your choosing) , and use
+    with `Learning` Brain.
+
+**BC Teacher Helper**
+
+We provide a convenience utility, `BC Teacher Helper` component that you can add
+to the Teacher Agent.
+
+<p align="center">
+  <img src="images/bc_teacher_helper.png"
+       alt="BC Teacher Helper"
+       width="375" border="10" />
+</p>
+
+This utility enables you to use keyboard shortcuts to do the following:
+
+1. To start and stop recording experiences. This is useful in case you'd like to
+   interact with the game _but not have the agents learn from these
+   interactions_. The default command to toggle this is to press `R` on the
+   keyboard.
+
+2. Reset the training buffer. This enables you to instruct the agents to forget
+   their buffer of recent experiences. This is useful if you'd like to get them
+   to quickly learn a new behavior. The default command to reset the buffer is
+   to press `C` on the keyboard.
--- a/docs/images/mlagents-ImitationAndRL.png
+++ b/docs/images/mlagents-ImitationAndRL.png
--- a/ml-agents-envs/mlagents/envs/env_manager.py
+++ b/ml-agents-envs/mlagents/envs/env_manager.py
+from abc import ABC, abstractmethod
+from typing import List, Dict, NamedTuple, Optional
+from mlagents.envs import AllBrainInfo, BrainParameters, Policy, ActionInfo
+
+
+class StepInfo(NamedTuple):
+    previous_all_brain_info: Optional[AllBrainInfo]
+    current_all_brain_info: AllBrainInfo
+    brain_name_to_action_info: Optional[Dict[str, ActionInfo]]
+
+
+class EnvManager(ABC):
+    def __init__(self):
+        self.policies: Dict[str, Policy] = {}
+
+    def set_policy(self, brain_name: str, policy: Policy) -> None:
+        self.policies[brain_name] = policy
+
+    @abstractmethod
+    def step(self) -> List[StepInfo]:
+        pass
+
+    @abstractmethod
+    def reset(self, config=None, train_mode=True) -> List[StepInfo]:
+        pass
+
+    @abstractmethod
+    def external_brains(self) -> Dict[str, BrainParameters]:
+        pass
+
+    @property
+    @abstractmethod
+    def reset_parameters(self) -> Dict[str, float]:
+        pass
+
+    @abstractmethod
+    def close(self):
+        pass
--- a/ml-agents-envs/mlagents/envs/policy.py
+++ b/ml-agents-envs/mlagents/envs/policy.py
+from abc import ABC, abstractmethod
+
+from mlagents.envs import BrainInfo
+from mlagents.envs import ActionInfo
+
+
+class Policy(ABC):
+    @abstractmethod
+    def get_action(self, brain_info: BrainInfo) -> ActionInfo:
+        pass
--- a/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/subprocess_env_manager.py
+from typing import *
+import cloudpickle
+
+from mlagents.envs import UnityEnvironment
+from multiprocessing import Process, Pipe
+from multiprocessing.connection import Connection
+from mlagents.envs.base_unity_environment import BaseUnityEnvironment
+from mlagents.envs.env_manager import EnvManager, StepInfo
+from mlagents.envs.timers import timed, hierarchical_timer
+from mlagents.envs import AllBrainInfo, BrainParameters, ActionInfo
+
+
+class EnvironmentCommand(NamedTuple):
+    name: str
+    payload: Any = None
+
+
+class EnvironmentResponse(NamedTuple):
+    name: str
+    worker_id: int
+    payload: Any
+
+
+class UnityEnvWorker:
+    def __init__(self, process: Process, worker_id: int, conn: Connection):
+        self.process = process
+        self.worker_id = worker_id
+        self.conn = conn
+        self.previous_step: StepInfo = StepInfo(None, {}, None)
+        self.previous_all_action_info: Dict[str, ActionInfo] = {}
+
+    def send(self, name: str, payload=None):
+        try:
+            cmd = EnvironmentCommand(name, payload)
+            self.conn.send(cmd)
+        except (BrokenPipeError, EOFError):
+            raise KeyboardInterrupt
+
+    def recv(self) -> EnvironmentResponse:
+        try:
+            response: EnvironmentResponse = self.conn.recv()
+            return response
+        except (BrokenPipeError, EOFError):
+            raise KeyboardInterrupt
+
+    def close(self):
+        try:
+            self.conn.send(EnvironmentCommand("close"))
+        except (BrokenPipeError, EOFError):
+            pass
+        self.process.join()
+
+
+def worker(parent_conn: Connection, pickled_env_factory: str, worker_id: int):
+    env_factory: Callable[[int], UnityEnvironment] = cloudpickle.loads(
+        pickled_env_factory
+    )
+    env = env_factory(worker_id)
+
+    def _send_response(cmd_name, payload):
+        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))
+
+    try:
+        while True:
+            cmd: EnvironmentCommand = parent_conn.recv()
+            if cmd.name == "step":
+                all_action_info = cmd.payload
+                if env.global_done:
+                    all_brain_info = env.reset()
+                else:
+                    actions = {}
+                    memories = {}
+                    texts = {}
+                    values = {}
+                    for brain_name, action_info in all_action_info.items():
+                        actions[brain_name] = action_info.action
+                        memories[brain_name] = action_info.memory
+                        texts[brain_name] = action_info.text
+                        values[brain_name] = action_info.value
+                    all_brain_info = env.step(actions, memories, texts, values)
+                _send_response("step", all_brain_info)
+            elif cmd.name == "external_brains":
+                _send_response("external_brains", env.external_brains)
+            elif cmd.name == "reset_parameters":
+                _send_response("reset_parameters", env.reset_parameters)
+            elif cmd.name == "reset":
+                all_brain_info = env.reset(
+                    cmd.payload[0], cmd.payload[1], cmd.payload[2]
+                )
+                _send_response("reset", all_brain_info)
+            elif cmd.name == "global_done":
+                _send_response("global_done", env.global_done)
+            elif cmd.name == "close":
+                break
+    except KeyboardInterrupt:
+        print("UnityEnvironment worker: keyboard interrupt")
+    finally:
+        env.close()
+
+
+class SubprocessEnvManager(EnvManager):
+    def __init__(
+        self, env_factory: Callable[[int], BaseUnityEnvironment], n_env: int = 1
+    ):
+        super().__init__()
+        self.env_workers: List[UnityEnvWorker] = []
+        for worker_idx in range(n_env):
+            self.env_workers.append(self.create_worker(worker_idx, env_factory))
+
+    def get_last_steps(self):
+        return [ew.previous_step for ew in self.env_workers]
+
+    @staticmethod
+    def create_worker(
+        worker_id: int, env_factory: Callable[[int], BaseUnityEnvironment]
+    ) -> UnityEnvWorker:
+        parent_conn, child_conn = Pipe()
+
+        # Need to use cloudpickle for the env factory function since function objects aren't picklable
+        # on Windows as of Python 3.6.
+        pickled_env_factory = cloudpickle.dumps(env_factory)
+        child_process = Process(
+            target=worker, args=(child_conn, pickled_env_factory, worker_id)
+        )
+        child_process.start()
+        return UnityEnvWorker(child_process, worker_id, parent_conn)
+
+    def step(self) -> List[StepInfo]:
+        for env_worker in self.env_workers:
+            all_action_info = self._take_step(env_worker.previous_step)
+            env_worker.previous_all_action_info = all_action_info
+            env_worker.send("step", all_action_info)
+
+        with hierarchical_timer("recv"):
+            step_brain_infos: List[AllBrainInfo] = [
+                self.env_workers[i].recv().payload for i in range(len(self.env_workers))
+            ]
+        steps = []
+        for i in range(len(step_brain_infos)):
+            env_worker = self.env_workers[i]
+            step_info = StepInfo(
+                env_worker.previous_step.current_all_brain_info,
+                step_brain_infos[i],
+                env_worker.previous_all_action_info,
+            )
+            env_worker.previous_step = step_info
+            steps.append(step_info)
+        return steps
+
+    def reset(
+        self, config=None, train_mode=True, custom_reset_parameters=None
+    ) -> List[StepInfo]:
+        self._broadcast_message("reset", (config, train_mode, custom_reset_parameters))
+        reset_results = [
+            self.env_workers[i].recv().payload for i in range(len(self.env_workers))
+        ]
+        for i in range(len(reset_results)):
+            env_worker = self.env_workers[i]
+            env_worker.previous_step = StepInfo(None, reset_results[i], None)
+        return list(map(lambda ew: ew.previous_step, self.env_workers))
+
+    @property
+    def external_brains(self) -> Dict[str, BrainParameters]:
+        self.env_workers[0].send("external_brains")
+        return self.env_workers[0].recv().payload
+
+    @property
+    def reset_parameters(self) -> Dict[str, float]:
+        self.env_workers[0].send("reset_parameters")
+        return self.env_workers[0].recv().payload
+
+    def close(self):
+        for env in self.env_workers:
+            env.close()
+
+    def _broadcast_message(self, name: str, payload=None):
+        for env in self.env_workers:
+            env.send(name, payload)
+
+    @timed
+    def _take_step(self, last_step: StepInfo) -> Dict[str, ActionInfo]:
+        all_action_info: Dict[str, ActionInfo] = {}
+        for brain_name, brain_info in last_step.current_all_brain_info.items():
+            all_action_info[brain_name] = self.policies[brain_name].get_action(
+                brain_info
+            )
+        return all_action_info
--- a/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_subprocess_env_manager.py
+import unittest.mock as mock
+from unittest.mock import Mock, MagicMock
+import unittest
+import cloudpickle
+from mlagents.envs.subprocess_env_manager import StepInfo
+
+from mlagents.envs.subprocess_env_manager import (
+    SubprocessEnvManager,
+    EnvironmentResponse,
+    EnvironmentCommand,
+    worker,
+)
+from mlagents.envs.base_unity_environment import BaseUnityEnvironment
+
+
+def mock_env_factory(worker_id: int):
+    return mock.create_autospec(spec=BaseUnityEnvironment)
+
+
+class MockEnvWorker:
+    def __init__(self, worker_id, resp=None):
+        self.worker_id = worker_id
+        self.process = None
+        self.conn = None
+        self.send = Mock()
+        self.recv = Mock(return_value=resp)
+
+
+class SubprocessEnvManagerTest(unittest.TestCase):
+    def test_environments_are_created(self):
+        SubprocessEnvManager.create_worker = MagicMock()
+        env = SubprocessEnvManager(mock_env_factory, 2)
+        # Creates two processes
+        env.create_worker.assert_has_calls(
+            [mock.call(0, mock_env_factory), mock.call(1, mock_env_factory)]
+        )
+        self.assertEqual(len(env.env_workers), 2)
+
+    def test_worker_step_resets_on_global_done(self):
+        env_mock = Mock()
+        env_mock.reset = Mock(return_value="reset_data")
+        env_mock.global_done = True
+
+        def mock_global_done_env_factory(worker_id: int):
+            return env_mock
+
+        mock_parent_connection = Mock()
+        step_command = EnvironmentCommand("step", (None, None, None, None))
+        close_command = EnvironmentCommand("close")
+        mock_parent_connection.recv.side_effect = [step_command, close_command]
+        mock_parent_connection.send = Mock()
+
+        worker(
+            mock_parent_connection, cloudpickle.dumps(mock_global_done_env_factory), 0
+        )
+
+        # recv called twice to get step and close command
+        self.assertEqual(mock_parent_connection.recv.call_count, 2)
+
+        # worker returns the data from the reset
+        mock_parent_connection.send.assert_called_with(
+            EnvironmentResponse("step", 0, "reset_data")
+        )
+
+    def test_reset_passes_reset_params(self):
+        manager = SubprocessEnvManager(mock_env_factory, 1)
+        params = {"test": "params"}
+        manager.reset(params, False)
+        manager.env_workers[0].send.assert_called_with("reset", (params, False, None))
+
+    def test_reset_collects_results_from_all_envs(self):
+        SubprocessEnvManager.create_worker = lambda em, worker_id, env_factory: MockEnvWorker(
+            worker_id, EnvironmentResponse("reset", worker_id, worker_id)
+        )
+        manager = SubprocessEnvManager(mock_env_factory, 4)
+
+        params = {"test": "params"}
+        res = manager.reset(params)
+        for i, env in enumerate(manager.env_workers):
+            env.send.assert_called_with("reset", (params, True, None))
+            env.recv.assert_called()
+            # Check that the "last steps" are set to the value returned for each step
+            self.assertEqual(
+                manager.env_workers[i].previous_step.current_all_brain_info, i
+            )
+        assert res == list(map(lambda ew: ew.previous_step, manager.env_workers))
+
+    def test_step_takes_steps_for_all_envs(self):
+        SubprocessEnvManager.create_worker = lambda em, worker_id, env_factory: MockEnvWorker(
+            worker_id, EnvironmentResponse("step", worker_id, worker_id)
+        )
+        manager = SubprocessEnvManager(mock_env_factory, 2)
+        step_mock = Mock()
+        last_steps = [Mock(), Mock()]
+        manager.env_workers[0].previous_step = last_steps[0]
+        manager.env_workers[1].previous_step = last_steps[1]
+        manager._take_step = Mock(return_value=step_mock)
+        res = manager.step()
+        for i, env in enumerate(manager.env_workers):
+            env.send.assert_called_with("step", step_mock)
+            env.recv.assert_called()
+            # Check that the "last steps" are set to the value returned for each step
+            self.assertEqual(
+                manager.env_workers[i].previous_step.current_all_brain_info, i
+            )
+            self.assertEqual(
+                manager.env_workers[i].previous_step.previous_all_brain_info,
+                last_steps[i].current_all_brain_info,
+            )
+        assert res == list(map(lambda ew: ew.previous_step, manager.env_workers))
--- a/ml-agents-envs/mlagents/envs/tests/test_timers.py
+++ b/ml-agents-envs/mlagents/envs/tests/test_timers.py
+from unittest import mock
+
+from mlagents.envs import timers
+
+
+@timers.timed
+def decorated_func(x: int = 0, y: float = 1.0) -> str:
+    return f"{x} + {y} = {x + y}"
+
+
+def test_timers() -> None:
+    with mock.patch(
+        "mlagents.envs.timers._global_timer_stack", new_callable=timers.TimerStack
+    ) as test_timer:
+        # First, run some simple code
+        with timers.hierarchical_timer("top_level"):
+            for i in range(3):
+                with timers.hierarchical_timer("multiple"):
+                    decorated_func()
+
+            raised = False
+            try:
+                with timers.hierarchical_timer("raises"):
+                    raise RuntimeError("timeout!")
+            except RuntimeError:
+                raised = True
+
+            with timers.hierarchical_timer("post_raise"):
+                assert raised
+                pass
+
+        # We expect the hierarchy to look like
+        #   (root)
+        #       top_level
+        #           multiple
+        #               decorated_func
+        #           raises
+        #           post_raise
+        root = test_timer.root
+        assert root.children.keys() == {"top_level"}
+
+        top_level = root.children["top_level"]
+        assert top_level.children.keys() == {"multiple", "raises", "post_raise"}
+
+        # make sure the scope was closed properly when the exception was raised
+        raises = top_level.children["raises"]
+        assert raises.count == 1
+
+        multiple = top_level.children["multiple"]
+        assert multiple.count == 3
+
+        timer_tree = test_timer.get_timing_tree()
+
+        expected_tree = {
+            "name": "root",
+            "total": mock.ANY,
+            "count": 1,
+            "self": mock.ANY,
+            "children": [
+                {
+                    "name": "top_level",
+                    "total": mock.ANY,
+                    "count": 1,
+                    "self": mock.ANY,
+                    "children": [
+                        {
+                            "name": "multiple",
+                            "total": mock.ANY,
+                            "count": 3,
+                            "self": mock.ANY,
+                            "children": [
+                                {
+                                    "name": "decorated_func",
+                                    "total": mock.ANY,
+                                    "count": 3,
+                                    "self": mock.ANY,
+                                }
+                            ],
+                        },
+                        {
+                            "name": "raises",
+                            "total": mock.ANY,
+                            "count": 1,
+                            "self": mock.ANY,
+                        },
+                        {
+                            "name": "post_raise",
+                            "total": mock.ANY,
+                            "count": 1,
+                            "self": mock.ANY,
+                        },
+                    ],
+                }
+            ],
+        }
+        assert timer_tree == expected_tree
--- a/ml-agents-envs/mlagents/envs/timers.py
+++ b/ml-agents-envs/mlagents/envs/timers.py
+# # Unity ML-Agents Toolkit
+from time import perf_counter
+
+from contextlib import contextmanager
+from typing import Any, Callable, Dict, Generator, TypeVar
+
+"""
+Lightweight, hierarchical timers for profiling sections of code.
+
+Example:
+
+@timed
+def foo(t):
+    time.sleep(t)
+
+def main():
+    for i in range(3):
+        foo(i + 1)
+    with hierarchical_timer("context"):
+        foo(1)
+
+    print(get_timer_tree())
+
+This would produce a timer tree like
+    (root)
+        "foo"
+        "context"
+            "foo"
+
+The total time and counts are tracked for each block of code; in this example "foo" and "context.foo" are considered
+distinct blocks, and are tracked separately.
+
+The decorator and contextmanager are equivalent; the context manager may be more useful if you want more control
+over the timer name, or are splitting up multiple sections of a large function.
+"""
+
+
+class TimerNode:
+    """
+    Represents the time spent in a block of code.
+    """
+
+    __slots__ = ["children", "total", "count"]
+
+    def __init__(self):
+        # Note that since dictionary keys are the node names, we don't explicitly store the name on the TimerNode.
+        self.children: Dict[str, TimerNode] = {}
+        self.total: float = 0.0
+        self.count: int = 0
+
+    def get_child(self, name: str) -> "TimerNode":
+        """
+        Get the child node corresponding to the name (and create if it doesn't already exist).
+        """
+        child = self.children.get(name)
+        if child is None:
+            child = TimerNode()
+            self.children[name] = child
+        return child
+
+    def add_time(self, elapsed: float) -> None:
+        """
+        Accumulate the time spent in the node (and increment the count).
+        """
+        self.total += elapsed
+        self.count += 1
+
+
+class TimerStack:
+    """
+    Tracks all the time spent. Users shouldn't use this directly, they should use the contextmanager below to make
+    sure that pushes and pops are already matched.
+    """
+
+    __slots__ = ["root", "stack", "start_time"]
+
+    def __init__(self):
+        self.root = TimerNode()
+        self.stack = [self.root]
+        self.start_time = perf_counter()
+
+    def push(self, name: str) -> TimerNode:
+        """
+        Called when entering a new block of code that is timed (e.g. with a contextmanager).
+        """
+        current_node: TimerNode = self.stack[-1]
+        next_node = current_node.get_child(name)
+        self.stack.append(next_node)
+        return next_node
+
+    def pop(self) -> None:
+        """
+        Called when exiting a new block of code that is timed (e.g. with a contextmanager).
+        """
+        self.stack.pop()
+
+    def get_timing_tree(self, node: TimerNode = None) -> Dict[str, Any]:
+        """
+        Recursively build a tree of timings, suitable for output/archiving.
+        """
+
+        if node is None:
+            # Special case the root - total is time since it was created, and count is 1
+            node = self.root
+            total_elapsed = perf_counter() - self.start_time
+            res = {"name": "root", "total": total_elapsed, "count": 1}
+        else:
+            res = {"total": node.total, "count": node.count}
+
+        child_total = 0.0
+        child_list = []
+        for child_name, child_node in node.children.items():
+            child_res: Dict[str, Any] = {
+                "name": child_name,
+                **self.get_timing_tree(child_node),
+            }
+            child_list.append(child_res)
+            child_total += child_res["total"]
+
+        # "self" time is total time minus all time spent on children
+        res["self"] = max(0.0, node.total - child_total)
+        if child_list:
+            res["children"] = child_list
+
+        return res
+
+
+# Global instance of a TimerStack. This is generally all that we need for profiling, but you can potentially
+# create multiple instances and pass them to the contextmanager
+_global_timer_stack = TimerStack()
+
+
+@contextmanager
+def hierarchical_timer(name: str, timer_stack: TimerStack = None) -> Generator:
+    """
+    Creates a scoped timer around a block of code. This time spent will automatically be incremented when
+    the context manager exits.
+    """
+    timer_stack = timer_stack or _global_timer_stack
+    timer_node = timer_stack.push(name)
+    start_time = perf_counter()
+
+    try:
+        # The wrapped code block will run here.
+        yield
+    finally:
+        # This will trigger either when the context manager exits, or an exception is raised.
+        # We'll accumulate the time, and the exception (if any) gets raised automatically.
+        elapsed = perf_counter() - start_time
+        timer_node.add_time(elapsed)
+        timer_stack.pop()
+
+
+# This is used to ensure the signature of the decorated function is preserved
+# See also https://github.com/python/mypy/issues/3157
+FuncT = TypeVar("FuncT", bound=Callable[..., Any])
+
+
+def timed(func: FuncT) -> FuncT:
+    """
+    Decorator for timing a function or method. The name of the timer will be the qualified name of the function.
+    Usage:
+        @timed
+        def my_func(x, y):
+            return x + y
+    Note that because this doesn't take arguments, the global timer stack is always used.
+    """
+
+    def wrapped(*args, **kwargs):
+        with hierarchical_timer(func.__qualname__):
+            return func(*args, **kwargs)
+
+    return wrapped  # type: ignore
+
+
+def get_timer_tree(timer_stack: TimerStack = None) -> Dict[str, Any]:
+    """
+    Return the tree of timings from the TimerStack as a dictionary (or the global stack if none is provided)
+    """
+    timer_stack = timer_stack or _global_timer_stack
+    return timer_stack.get_timing_tree()
--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+import unittest.mock as mock
+import pytest
+import mlagents.trainers.tests.mock_brain as mb
+
+import numpy as np
+import yaml
+import os
+
+from mlagents.trainers.ppo.policy import PPOPolicy
+
+
+@pytest.fixture
+def dummy_config():
+    return yaml.safe_load(
+        """
+        trainer: ppo
+        batch_size: 32
+        beta: 5.0e-3
+        buffer_size: 512
+        epsilon: 0.2
+        hidden_units: 128
+        lambd: 0.95
+        learning_rate: 3.0e-4
+        max_steps: 5.0e4
+        normalize: true
+        num_epoch: 5
+        num_layers: 2
+        time_horizon: 64
+        sequence_length: 64
+        summary_freq: 1000
+        use_recurrent: false
+        memory_size: 8
+        pretraining:
+          demo_path: ./demos/ExpertPyramid.demo
+          strength: 1.0
+          steps: 10000000
+        reward_signals:
+          extrinsic:
+            strength: 1.0
+            gamma: 0.99
+        """
+    )
+
+
+def create_mock_3dball_brain():
+    mock_brain = mb.create_mock_brainparams(
+        vector_action_space_type="continuous",
+        vector_action_space_size=[2],
+        vector_observation_space_size=8,
+    )
+    return mock_brain
+
+
+def create_mock_banana_brain():
+    mock_brain = mb.create_mock_brainparams(
+        number_visual_observations=1,
+        vector_action_space_type="discrete",
+        vector_action_space_size=[3, 3, 3, 2],
+        vector_observation_space_size=0,
+    )
+    return mock_brain
+
+
+def create_ppo_policy_with_bc_mock(
+    mock_env, mock_brain, dummy_config, use_rnn, demo_file
+):
+    mock_braininfo = mb.create_mock_braininfo(num_agents=12, num_vector_observations=8)
+    mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo)
+    env = mock_env()
+
+    trainer_parameters = dummy_config
+    model_path = env.brain_names[0]
+    trainer_parameters["model_path"] = model_path
+    trainer_parameters["keep_checkpoints"] = 3
+    trainer_parameters["use_recurrent"] = use_rnn
+    trainer_parameters["pretraining"]["demo_path"] = (
+        os.path.dirname(os.path.abspath(__file__)) + "/" + demo_file
+    )
+    policy = PPOPolicy(0, mock_brain, trainer_parameters, False, False)
+    return env, policy
+
+
+# Test default values
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_bcmodule_defaults(mock_env, dummy_config):
+    # See if default values match
+    mock_brain = create_mock_3dball_brain()
+    env, policy = create_ppo_policy_with_bc_mock(
+        mock_env, mock_brain, dummy_config, False, "test.demo"
+    )
+    assert policy.bc_module.num_epoch == dummy_config["num_epoch"]
+    assert policy.bc_module.batch_size == dummy_config["batch_size"]
+    env.close()
+    # Assign strange values and see if it overrides properly
+    dummy_config["pretraining"]["num_epoch"] = 100
+    dummy_config["pretraining"]["batch_size"] = 10000
+    env, policy = create_ppo_policy_with_bc_mock(
+        mock_env, mock_brain, dummy_config, False, "test.demo"
+    )
+    assert policy.bc_module.num_epoch == 100
+    assert policy.bc_module.batch_size == 10000
+    env.close()
+
+
+# Test with continuous control env and vector actions
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_bcmodule_update(mock_env, dummy_config):
+    mock_brain = create_mock_3dball_brain()
+    env, policy = create_ppo_policy_with_bc_mock(
+        mock_env, mock_brain, dummy_config, False, "test.demo"
+    )
+    stats = policy.bc_module.update()
+    for _, item in stats.items():
+        assert isinstance(item, np.float32)
+    env.close()
+
+
+# Test with RNN
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_bcmodule_rnn_update(mock_env, dummy_config):
+    mock_brain = create_mock_3dball_brain()
+    env, policy = create_ppo_policy_with_bc_mock(
+        mock_env, mock_brain, dummy_config, True, "test.demo"
+    )
+    stats = policy.bc_module.update()
+    for _, item in stats.items():
+        assert isinstance(item, np.float32)
+    env.close()
+
+
+# Test with discrete control and visual observations
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_bcmodule_dc_visual_update(mock_env, dummy_config):
+    mock_brain = create_mock_banana_brain()
+    env, policy = create_ppo_policy_with_bc_mock(
+        mock_env, mock_brain, dummy_config, False, "testdcvis.demo"
+    )
+    stats = policy.bc_module.update()
+    for _, item in stats.items():
+        assert isinstance(item, np.float32)
+    env.close()
+
+
+# Test with discrete control, visual observations and RNN
+@mock.patch("mlagents.envs.UnityEnvironment")
+def test_bcmodule_rnn_dc_update(mock_env, dummy_config):
+    mock_brain = create_mock_banana_brain()
+    env, policy = create_ppo_policy_with_bc_mock(
+        mock_env, mock_brain, dummy_config, True, "testdcvis.demo"
+    )
+    stats = policy.bc_module.update()
+    for _, item in stats.items():
+        assert isinstance(item, np.float32)
+    env.close()
+
+
+if __name__ == "__main__":
+    pytest.main()
--- a/ml-agents/mlagents/trainers/tests/testdcvis.demo
+++ b/ml-agents/mlagents/trainers/tests/testdcvis.demo
--- a/demos/Expert3DBall.demo
+++ b/demos/Expert3DBall.demo
+BallDemo� -bfB**0:3DBallBrain7
+ f&C�v���x��?�@�Q��"P���������<
+ ��<����x��?�;|@�Q���"{�"n��>@��==���=P���������<
+ �=���x��?0r@�Q���"��":MG?��J?=���=P���������<
+ ���=e]	=x��?�a@�Q��Z<�"�hw�{�>=���=P���������<
+ ��m={׾;x��?�BK@�Q���"{�"�ѫ��٠�=���=P���������<
+ q�<�H=x��?|a.@�Q������"3����K�>=���=P���������<
+ ��_�ޚJ=x��?�8@�Q��Z��"������E>=���=P���������<
+ �l=qR=x��?���?�Q��r���"�R?6�o<=���=P���������<
+ �')=F-�x��?0FH?�Q���"��"o�=��=���=P���������<
+ ���=A��袧?��@?�:����J���{Ѿ"���>�Ҵ�=���=P���������<
+ �'H=����(��?-5?0k�Q�(�����t^޾"�dr���N>=���=P���������<
+ ���;8��9��?�T?P���,-�z�H�Q���"�%���t>=���=P���������<
+ ގn���Y=��? )?�j���!����5�˾"��5�l?=���=P���������<
+ ��V��9�<�Շ?0�?h�$�;������햾"܂=@	��=���=P���������<
+ �.����<P[�?��?+�~/ӾA=?�}�"�A�=6���=���=P���������<
+ ��B�%�1=�{?�"?p /�xϿ�3l�<.J�"��>p-c>=���=P���������<
+ ��U=4(=�Wp?�3A?x�0�1NϾ;������"s;<?�;
+�=���=P���������<
+ .�<��|<P�e?�Z1?M4�1NϾ�w�����"�~�ÿ��=���=P���������<
+ �/��>F= �Z?P�+?��6�����5���k�"?+���>=���=P���������<
+ ψ);��=��N?p�5?�j5��w��k޻="�3"=dO�>=���=P���������<
+ t�v<}��=�C?�9?�2�,����Z:>"��>�}U�=���=P���������<
+ }nE<Z��<P7?`=)?�=/�,������Z:>"I�����=���=P���������<
+ z�ڼ� �<�+?Pa?�+�����J���=>"PA㾻�<=���=P���������<
+ ^�0=˙X=��?�s8?(."�J��
+���9�>"��M?�*�>=���=P���������<
+ ��S<M�<>?��'?Pk�J羙,��9�>"���?��=���=P���������<
+ 1g��N�	=?�_"?��k���g��NJ�>"7
Z�(a�==���=P���������<
+ �ݡ�q�&� ��>`&?h(
+�k����W��NJ�>"ne+���Y�=���=P���������<
+ ^2����%�� �> �?x�(���*h��Q�>"R1�xµ>=���=P���������<
+ �-�@T%;�Y�>��?@9��	���[8���>"#�c?�q>=���=P���������<
+ :<�絼���>n?�z��ٱ��ݘ��j�>"�&�=�\��=���=P���������<
+ �����ʻ��>@�?�ݾ�/��2��;[Zr>"�X;���<>=���=P���������<
+ r�Fa\�`��>
?pGѾ@���`���U>"�g��'��=���=P���������<
+ �*������>��?pA˾�
+m�q�K<]��="0u=HwS>=���=P���������<
+ ZE&=?�� Ǔ>`�?�_Ⱦ���o���U="�?��S==���=P���������<
+ ��=�C	=��>�:%?��ľ`���������="�����=?=���=P���������<
+ ,C<"�<�
+e>��?0V���ª��qp�SL�="k+���=���=P���������<
+ qeӻ��;�xB>?����e��7�M��B�="�ZY��)M�=���=P���������<
+ ڿF���,=�$> ? ����G��Vo<.y">"����&P�>=���=P���������<
+ ���.>-=�T
+>P ?0���y���!���]>"+��>����=���=P���������<
+ ]���<J<d�=��?���жd��TS��du>"������=���=P���������<
+ ��<�u����=��?0���%�k�u1���>>"�k?��=���=P���������<
+ (i<���;�=0s?1���4�����j��="���� `l?=���=P���������<
+ �.�$x�&=�w?����D���F��%�="|�����=���=P���������<
+ �"�����:�7<`?M��au�OH<v��="���h<�>=���=P���������<
+ G-��K6�LJ���?p���\l��@�_T="^�7>���=���=P���������<
+ ږ��<
+;y��[?pҀ��{I���O<K�V�"� ���[	?=���=P���������<
+ m��<^ˑ<�T��x? �����L�'�*�P<"��'?��2>=���=P���������<
+ ��1�:�z�j��#?ీ���.��u��Q�"LqJ�(xf�=���=P���������<
+ �w�����;���<?�����������;�ף�"�2|>��G?=���=P���������<
+ ��%����;z��@j?�݈�k=ϽV{K����"v�h>���=���=P���������<
+ ���;[��;�wӽ�? a��Ea׽�:@�3��"��=�;A<=���=P���������<
+ ��I��'��P`?P���0瞽�d�;	ɇ�"����;��=���=P���������<
+ �2�;l>y<�G��0}?�䒾�΀���^:Wxe�"�@?�zd>=���=P���������<
+ ����P:�+����?�e���&���(;]�@�"׀޾/2&�=���=P���������<
+ k3�9yk�=���$?�(���B��Ah�	�;"t�>�x9?=���=P���������<
+ c<�����0??��������ܾ9�9<"�a�=PC�=���=P���������<
+ n/,��������?�Y��s��:l�����;"x��K�<=���=P���������<
+ �F<���5��?�;��ю;��~�����"
+?�3��=���=P���������<
+ ��e��E<=@���^!?P~��^a�;�N��WO<"����]H?=���=P���������<
+ ^U3=d�;��p�?`���?��������<"#C)?�;��=���=P���������<
+ �B�75�<�����?@돾�T��`	�o�[="o�n��K?>=���=P���������<
+ >�żc"#��
+���?�k��nn<����="�>++8�=���=P���������<
+ >��&�<�����?�*�����<-�K�_(<"B�B>z�2?=���=P���������<
+ �)P<�8[��T��?@��z<�<�ɾ
E��"�jm>d[�=���=P���������<
+ ���h�	=!�P9?@ҏ�ރ�;)~Z;{�K�"B�*��?=���=P���������<
+ ��E�%6�;@����?0t���==>Ն���Լ"i������=���=P���������<
+ �=���<�0��K?�B��Y=\�m�W��"�?��3>=���=P���������<
+ {���\nf=&�� $?������<
+u
+��v="\�B����>=���=P���������<
+ �79�A��L����?�⋾�]v=���n�p="�LU���Y�=���=P���������<
+ �I4�Z����bڽ�d?+��ԡ�=g�����<"���>;Fe�=���=P���������<
+ D�0= g�:�ŽI?̉��K�=�]�;��;"��?��>=���=P���������<
+ �V	=G�ﺀU��@2?�v����=�U��Q�;"km佌M*�=���=P���������<
+ ��E�RǾ<�����J? T���JZ=�ͻ���<"�n��ϙ>=���=P���������<
+ !x�<ST=��`�?`��jx�=g%�L�="�H<?�7�==���=P���������<
+ BJd�*Z��ғ�0�?p�����=Sb�sU:="�jT��6=�=���=P���������<
+ (����^=�a�@�!?���&>�/Z�X��="���>�?=���=P���������<
+ ����[ɻ���? gx��{8>�����Ț="+�����1�=���=P���������<
+ D^�={��<���&?@�o�A
+>�a<�,�="�?{�>=���=P���������<
+ 3��<=�Y��;� �?��e����=�8���="w��l�پ=���=P���������<
+ 撧�v�� =���?`�_�'��=3d)���<"��L@�=���=P���������<
+ �����_�<�?��f�̅�=e����h��"G�>@x�>=���=P���������<
+ /
d=�zN=�k<�!?�l��I�=�b<��q�"��<?�?=���=P���������<
+ C�V=a�1=�<�� ?�
o���:�e=���:"��ơ��=���=P���������<
+ ��+=�.��w<��? n�:{;���r��<"c��m�=���=P���������<
+ 9Oj�{;�;�	?`l�?���`=�7��<">B����==���=P���������<
+ �x<9�%��@���{?@�j�J��x�����<"K�?<n�n�=���=P���������<
+ @��;�*����N?��i�o��������ǻ"8�K=�ⴾ=���=P���������<
+ rlܼ��ǼЎ��<?`�m�����u�:�9V�"�鹾�_�==���=P���������<
+ qHt=_�0=n߼�6?*q���ν���!i��"�?��C?=���=P���������<
+ ft�;��b���`1?`t����?~��}��"(n!���%�=���=P���������<
+ +����<�I�0�?�v���ڽ�d�;�p�"�r��*k?=���=P���������<
+ �*�:o�P3?��v�����x�������"�9�!���=���=P���������<
+  n�;�a̼O����?@xw�Er�7�%�U���"BU�>�x��=���=P���������<
+ {^�<�lA����PN?��{��Ӷ��zֻ{1L�"�Y�>Ȥ>=���=P���������<
+ �)��Ƙ<�G���^?�-��6������;�N'�"8>7��8�>=���=P���������<
+ �p��a�������?`��Ui�����&��"M��{ы�=���=P���������<
+ 	�^��ئ����`�?A��	%�<J�9�=�\�"_?�e6�=���=P���������<
+ H�!;de�=�ң��/#? ���9�=��:R>@�"1K>�?=���=P���������<
+ �9�9�
+=���P�?�샾[=ŜQ���="������þ=���=P���������<
+ ��<a�<������?@���	��< ýz��="��N>�Q�=���=P���������<
+ �α��TW������? �x��<�{��~��="y쾳z¾=���=P���������<
+ gF#<"�;�����?tq�M�
=m�8�I�="�>�>��D>=���=P���������<
+ nI�:r���g���?�k����<�[����<"�VԽr�B�=���=P���������<
+ G��<@`�����`z?@�n��{;�X9rm��"�~�>|*1?=���=P���������<
+ d]���=���ׂ���?�ar���R;�}�JH�""�����x�=���=P���������<
+ �؇�Qϥ<�{���?��w�R��<6�t:��=�"�(;��?=���=P���������<
+ �U�9z~�<nm�@+?��z��=�4¼����"o�G>eU�=���=P���������<
+ �-��K
+;2[���?�{�>�^=��_���"Z;��f�C�=���=P���������<
+ I�;
+�:=8?���?��y����=#�2�s�="rɓ>�	�>=���=P���������<
+ ��
+<0W&�,$�j?��v��K=N���>=_<"Njn=�A}�=���=P���������<
+ h�̻���<
�0R?��w�(/k=�kչ���"y|!���Q?=���=P���������<
+ �1=���.��P�?��x���=^���}�"�6�>{�,�=���=P���������<
+ (��<�=4���?�z�����Ln��"V"����:?=���=P���������<
+ �'�<�����
�`?� {�e�A��8���<\�"��Լ�*�=���=P���������<
+ xr��"=��@� ?�%{��B���@,;()�"����7?=���=P���������<
+ c��=Z�<���?�y�Y�U<)2.�k+�<"j��>o��=���=P���������<
+ ��=rx<0��Q?�t�+�y���d="�?{�(�=���=P���������<
+ �����ig:'�?��n���λK�Ǽ�#�="���b�!�=���=P���������<
+ �|M=%`l;`� �?`�g���%�h���0�="B?{8�<=���=P���������<
+ �b������4�`�?`�a��_b��8���O:="�,�S翾=���=P���������<
+ >��<��0<ZM��F?@_^�����n�M���="�>J��>=���=P���������<
+ �y��@�n��_�`�? [�䙴�T\l;aR�<"�n��3��=���=P���������<
+ ®Y���<XZ��?`X�n��<����="�?���>=���=P���������<
+ ����m��<�K���? 
S��6=3�p�Wp="V!�� ��=���=P���������<
+ �;H<�֤�:9���?�AM���*=׭��lM="<�>þվ=���=P���������<
+ ӎ;<P�;�.� �?`�I����<���9t="v%ջC�>=���=P���������<
+ c�4<�¦�)���?��F�Z�;������<"�Q������=���=P���������<
+ aL�<�f���.�`!? �E���м�������;"țT>�i>=���=P���������<
+ ��:�&�<t?��9?`
+D�.���W�m�<"�����^O>=���=P���������<
+ �X��n��I��+?@vC�)��:^ʽ����"�� �p?[�=���=P���������<
+ Jo�����6� >?`K��c�=�>�B�Ž"�vm>~t>=���=P���������<
+ :1�<Nӟ<�� L?��U����=]��;}�ν"B�?�@$?=���=P���������<
+ �f��5A <����
+?`�^����=NV��y��"�nʾD�ٽ=���=P���������<
+ �-6=�i�<�ɼ��? �e��L6=���;KPZ�"��1?�rA>=���=P���������<
+ KŻN��,���s?�xk���=U�g��@��""H�p,�=���=P���������<
+ -�=]�b=,��`� ?`tp��d<��;��E�"O��>�?=���=P���������<
+ �dd=J-�6��p�?��u����A4����s�"��>��=���=P���������<
+ �
+@=�9"<r��p�?@�~��d�!!+�~��"@7ǽ���>=���=P���������<
+ ����`&�E6���?����	��tü�V��"��I���b�=���=P���������<
+ U�����<�d�`/? s����ٽ&ɨ;!��"_�M=ҁV>=���=P���������<
+ �
(�W��<H���
?`��c|���<[���"����v�S>=���=P���������<
+ �����<7���?����[�e�ڻ�����"��w=0�߽=���=P���������<
+ C4�V-��Y���?`�����|;Aǒ���Y�"5��>�8�=���=P���������<
+ �:��ϼüц��+?P����\<�@����۽"�1�=6��==���=P���������<
+ ⱀ��bO=Yk�``$?�$���v�=�{�Ґ��"Z:5���\?=���=P���������<
+ ֌�NK�<�L�`?�ǜ�*ղ=1����h�"g�?��þ=���=P���������<
+ ?�=|��;�+��
+? 垾)I�=ṽ���"s�>E$�=���=P���������<
+ �!L<��
+�����?�2���g0=!�V��n�"Yg�cM޾=���=P���������<
+ 䡰�RS��M�В?����+O=�"p�b�ý"k�ƾ&~�>=���=P���������<
+ �l�<�*�<Fݼ�e?@ ���<=���;pؙ�"Ӹ�>f��>=���=P���������<
+ �P$<>��<T����?���	��<#�����B�"�M�����=���=P���������<
+ �����	<$����?�䮾��d=B������"0
�.�۽=���=P���������<
+ ��H=��7=x�"?h��6Q=��;��k<"�?���>=���=P���������<
+ b���ɼ�:�P-?�����. =/���T��;"2�(�@K�=���=P���������<
+ �P=
�K=|H���"?�q��ϫY:����1�<"<.?�RY?=���=P���������<
+ �v=P=(s���?`����9�����="nG/��OQ�=���=P���������<
+ ��<2rx����0s?0���zA��mu��aڊ="C6a�ԋ	�=���=P���������<
+ sL;r{�����p�?�x��W6��Dƨ�S�<"�2�Oo5�=���=P���������<
+ ;ļ^�R=����"?`ȡ��+��B�ĺ�(D="�����Sq?=���=P���������<
+ ��$��<�)��?�)������*�8��="�0�Zϝ�=���=P���������<
+ �O\;��q��4�@�?P���O7���Ɇ��I�="f�>ʎ��=���=P���������<
+ t.H���9V;��?@ȕ���P�Ha�9�I�="P�5���1>=���=P���������<
+ -�����<=�pH? ���$����L��!�=".��={�M>=���=P���������<
+ [Z9=ok��@�J?�u���]��S̾�EP="0?��=���=P���������<
+ �b�<�|R��e��\?pq��qֽ�Y�����"B膾�P>=���=P���������<
+ #f*��L<�����^?�����ݽ�U�;n�ν"�澾z�;?=���=P���������<
+ �>��hE��ћ��N?&���Y����L��ֽ"��Ծ����=���=P���������<
+ M��<��8=�����O ?A���k\�Aǐ;vܖ�"7�F?Wr'?=���=P���������<
+ �`���t;�&��`?{�����/�35a�"�T��]��=���=P���������<
+ �
+b=}o��O���? H���g��6X��W3d�"�?���=���=P���������<
+ r��)��<��̽�q?����N���b�;��"C�a���>=���=P���������<
+ ���6=|;��۽��?�1��$j��
+	�G���"^�����=���=P���������<
+ b�;�<�,���?�Ө��#�~pc�+��"&��>��O==���=P���������<
+ E�w��� �����p�?�:��.���Jx�k6�"�����,U�=���=P���������<
+ ��λiF=��Z"?����y��p��`�<"�K�=��*?=���=P���������<
+ �m'����<�����{ ?p���y
+�<�-�����="��˾�U��=���=P���������<
+ K�
+<�<�;@��@'?&]�<Զh�0�="�o?*�U�=���=P���������<
+ 	~���ʢ�
��`�?p���&M=��幫e�="tH����ؽ=���=P���������<
+ ��^<�w5<�,۽��? ����;
=lzj�i��="\��>:>=���=P���������<
+ i6h<U�=�@׽�?���/�z<�<�����="<+܂>=���=P���������<
+ }���N|��sӽP�?p�����<��V�{1�="�{ξ��#�=���=P���������<
+ �l�<8o��`ͽ�?0���t(�<�zV��O="���>ԕӾ=���=P���������<
+ ��=Hm,<��ѽ@7?Pv����������1�;"Ôa>d�I?=���=P���������<
+ "U��N�<�ؽ@�?P�������ٹ��<"\��x>=���=P���������<
+ ���<�0"<�޽`>?0	��������#D="��>R}�=���=P���������<
+ �G�:L�:��D��0?������J��ό9="ЏG���w�=���=P���������<
+ B�����x<��ｐ�?�!��^�vѸ�CM="Ȼ��a�>=���=P���������<
+ T�� >E������?`���·�����\M�<"� >�J:�=���=P���������<
+ Gk
+�J]�����P�?�,��O��<�Zf�(�I�"^i��+ґ>=���=P���������<
+ b_=��\;����x?�#���	�;�q:��q�"�iA?�'�>=���=P���������<
+ ��:?yV=�"��� ? ؅��},��C�:��U�"Ӆ���?=���=P���������<
+ �����@<X��p�?ළ�;H�:�;8��e<"�/i�����=���=P���������<
+ ~�;�f;,����?������3<�%ƽ���<"��>4ɽ=���=P���������<
+ .�ռ
+��z���/?`׃�``�<Av��oK��"n��6�ݾ=���=P���������<
+ R�;B�==m���|?`Y��2)=��I�f�d<"��>�Ml?=���=P���������<
+ �
S���;��ܽ�&?����P�D=�8��H="0�p��*��=���=P���������<
+ ���<�e开�ҽ��?𘀾�,=j�H�&��<"{j�>|���=���=P���������<
+ >s����/<�Ƚ�V?p����V=�M��:$9"�F
+���>=���=P���������<
+ �$�<P���.���~?瀾0,B=��b���(�"�S?J���=���=P���������<
+ |1r<
+{=�Q���b!? �~��p<|2��v�<"62t��q?=���=P���������<
+ �D9�q�;���@L? �{���@<xY���.="�C@�F(,�=���=P���������<
+ �_R�ND<�����?�Nv��I�<8���~mc="f�����==���=P���������<
+ Z;$"���.��@t?.q�v�<	�I�ګ!="��)>D��=���=P���������<
+ �b|���<����@�?`�m��S=�,s���$="o�D�pu2?=���=P���������<
+ ��%�dU=Ś�@" ?��e����=n�T�ゲ="������==���=P���������<
+ D��<��������E?�X]�RV�=rݧ�՝�="�-?�J�=���=P���������<
+ S�Q��+	��|�0�?`[�{#~=�¡�֑;"࿾քw�=���=P���������<
+ x)�<�����e��1?�,]��V1=K�^�"|V�>�Ta==���=P���������<
+ ��&= �{��a��I?c�[�����'����"_�>�>=���=P���������<
+ ���;��R<no���?��i�CE���:s��"��ž���>=���=P���������<
+ �Y�;��P=W�� 
 ?`l�n.��{:$M�"�n���>=���=P���������<
+ �E��O��<�ׇ��z?��j���޼��ҽ�?�<"HH��iġ�=���=P���������<
+ |��:��W��ь���?��h�{��Ed��2��"��>L_�=���=P���������<
+ �ޝ<�{������?`7p��-�����T��"�W>��+>=���=P���������<
+ pà��G=U���� ?`�w��P��"
+<����"�&ھ�?=���=P���������<
+ �����1=����`_ ?`1{�H�f ��hU�"8�3=�̓�=���=P���������<
+ >@��6$�=��p�)? �o�r~�<筼��="�m��|0?=���=P���������<
+ h=�U�<E��`�?`�e�gC�<UC���="*`c?��=���=P���������<
+ �`���s�������?`cY��Gi�B�r;���="��W�7ɋ�=���=P���������<
+ c�׼��=�����?@K�E�мQ���d>"�Dؽ~�?=���=P���������<
+ ��ȼ%������o?`b<�j�ź�d��L
+>"vD<��K�=���=P���������<
+ ���<=���|���?��1����
���^	�="��"?��>=���=P���������<
+ i�ɻ��?��0����?O*�?��N��:��="n�׾���==���=P���������<
+ r�_�7�n����?,%����-�1�\�="�o��
��=���=P���������<
+ ?_3;���;����?��#�N�J�
���T<"��F>+K�>=���=P���������<
+ B��K������?@�$���]=f'-�D�޼"0�\�B��=���=P���������<
+ �m^�ʙ<t����?�Q)����=����A�"3`-?K+?=���=P���������<
+ .��<w;�܆��T?@�-�B%�=�B��y �"�>�㐽=���=P���������<
+ �f�D��<�`���?7/��J�=!>R�?97�"�$v���>=���=P���������<
+ e��<�6�=�)� � ?�5+��Q>�����=" �s?���>=���=P���������<
+ T�)=���v��@z?�g$����=�������="C�+>��=���=P���������<
+ dŁ;�\��ü0:? �*	O=$ �:��="
+ܾV��;=���=P���������<
+ a\;~�<@��0?�_��A>="ƀ�|��<"�ȕ�i��>=���=P���������<
+ X>ռ��<�Y�`�?���qM�=G»��T="�;����6>=���=P���������<
+ �=[=�ɼ����?������<�	D�x1"="��h?��
�=���=P���������<
+ h�"�ࡆ�����?@��U�;yw|9#�!<"�.:�XK�==���=P���������<
+ ,s=Z��<��0S?���RE��Iz�А�<"�v?��>=���=P���������<
+ X��^�]<T>���?�3
��_�&Ϊ��n'="s�ؾ[���=���=P���������<
+ �2�*ڨ;v��?@0�z9���ꦼ)TX="�k���ý=���=P���������<
+ j��<���ʘ���?��GC����V��"���>��V�=���=P���������<
+ 3����I��v̼�?`
+��Ȃ��b��BP_�"d$���E9?=���=P���������<
+ ;|Ӽ���H����?@��g+�t�����"	ݍ��+O�=���=P���������<
+ ^h7��xV=|�0C?@T�*���[�;�)8�"X�.>�\?=���=P���������<
+ 9��@����`�?�2���|9ܯX�bg�"�ֽ��=���=P���������<
+ OE<��=��,?��!���<���;k�m�"|
+�>G?=���=P���������<
+ �̂<%1�<���M?��%���/�%HE��>ͼ"֠5=X�=���=P���������<
+ ®_<W��<���?`&�4��1�� <"�׼��"==���=P���������<
+ 9ߕ�)%���@�? 3&����<Pǻ���"��� ;�=���=P���������<
+ &�F�~�<��7?��(�Ï�=k��E���"H�4?vlH?=���=P���������<
+ 5�Z=���n��P:?`8+���-=�?��
+$�"��??��D�=���=P���������<
+ �;=�n<����@?��3��/�ҹ���ݽ"8K���ٰ�=���=P���������<
+ D�뻹���漀�?��B���p���2��*�"����==���=P���������<
+ S�X;���<v
+�@o?@�S��<j���<�Y%�"���=L�:?=���=P���������<
+ ��D<ߥ:��#�Ћ?��c���������"�z�=���=���=P���������<
+ 
ѻ�A�={A� "?�km�&B���2R<����"@�W�L>;?=���=P���������<
+ (R�<�ݧ�v_�=?� w�4���U޾s�̽"3��>��v�=���=P���������<
+ �F%���;=�}��F!?`R��(���[<o¤�"P�=��|B?=���=P���������<
+ E=k���p����?�僾�浽Ϡ��
+"�]?�	Y�=���=P���������<
+ 6V�grh<����?�쉾o���#D<����"���>=���=P���������<
+ �"d;+@=i��Pg?�ێ��j����;z���"g#?X[>=���=P���������<
+ ߻k2<����D?`d��q�]��6���z�"+���
+��=���=P���������<
+ C0��s�@<Z���H?��������;pf6�"6���2==���=P���������<
+ �T����;�½��?֖�Ǵ�;���?"�"�P^�.���=���=P���������<
+ s_�<������03?ט�F��eY��@�W�"�9D?�Rؾ=���=P���������<
+ Z���P:�ǽp=?0R���C����]:-寽"�
+����>=���=P���������<
+ ��D��#Y<�ý��?PJ�����<��"�$%��"�뷾X>=���=P���������<
+ �k=�h�;����}?�夾4�i<� �;��"�y?O��=���=P���������<
+ �Sq;f�=ý@ ? m��W���w�:q+Ҽ"pHþ���>=���=P���������<
+ �T��c3=����$?�2��B��<P�����="��"�kS==���=P���������<
+ c�߼�׎<����P_?����k�Y=�����g="�|�>�7��=���=P���������<
+ v=��f=&����!?�^�����<�03��*�="�?Sc�>=���=P���������<
+ ��K=,��<���@�?>��'[ּZq�c�>"�����;=���=P���������<
+ �4�;�ο��׼��V?���*p��?!&���>"�y��r��=���=P���������<
+ ���<���:�ν��?�P���Ľ���S>"��d>�p�>=���=P���������<
+ ��i�D%G������?�Q��m�½��ǽ�	�="^36�=���=P���������<
+ B���0����p�?@�~���d�U/�;���<"TPO�k7{==���=P���������<
+ ��=��������?�~�:s����4�����"��L?��w>=���=P���������<
+ <Ê��X�<����z?�7��A�ŽE!:\pF�"&����C	?=���=P���������<
+ ���J���`� �?�h��yڊ��C����b�"�˾bį�=���=P���������<
+ \��;������P;?�逾��k���8�����"A�>���=���=P���������<
+ ��ͼ��K;%���? ����
+��1
;s���"������x>=���=P���������<
+ �:���h��@��p0?P���3a��3"�,K�"ێ=�^��=���=P���������<
+ `lE����
+���?����
+@=��Ļ���"�k��q_>=���=P���������<
+ o����<��p�?@_����=�N��6��"�5�>Xr�>=���=P���������<
+ *�^=�~�<��
+�0?���>Pa=3˾�4b�"�b]?+B�==���=P���������<
+ �2�<�=��	�`]?����5>N:U�h@S<"O�޾��T==���=P���������<
+ �K�JA<��	�PE?�s���;�U��� ="���0�v�=���=P���������<
+ ��;�^.��	��5?0Ӌ���;T����P�<"�WX>'&�=���=P���������<
+ ��(� !��@	�@�?�t��!�;������"�B>�I�>=���=P���������<
+ �c�JS=@��0?"?�錾��<�{��C�P<"�i:��"?=���=P���������<
+ )P���wǼ���J?`\��5W=0�¾�5�;"�fx���^�=���=P���������<
+ ���M5<=����k ?�����=^F�Բ�<"�#0>J�N?=���=P���������<
+ м�<�x�;����8?�1��ͰK=�t��%�	="s٩>�D��=���=P���������<
+ ܕM��p=�6轀s?`��==�ʻ���="G?ھ(��>=���=P���������<
+ wT =҈N�����Р?pP���ã</��N�T="��?��=���=P���������<
+ t��<��л����!?�����cx�qwa�n
+��"��&�2�?=���=P���������<
+ �<�#�;�����?���y=�f;m�Ҽ"�r.��>=���=P���������<
+ �6�;K|<^��`e?@��������\��U&�"���>^��==���=P���������<
+ 	�Ҽ��P=�����"?焾1h�n0="ӈ���{�>=���=P���������<
+ �-��eD��L�K?������xI���6="��7>��8�=���=P���������<
+ �d@����<����� ? ���bC=$����m="B�׾�2�>=���=P���������<
+ �H;k��<���@?�
+x����=�H��F�=""�?�Q'�=���=P���������<
+ ʈ�<��ͼ���?`�n��8=�큾�="�>�j�=���=P���������<
+ �����=�ܽ�=?�f��;=�d��O�="��*�X�)?=���=P���������<
+ 1蛻��v���ѽ��?��\��m=�H���?�="�q�>�p
�=���=P���������<
+ !y�<�s<�Ƚ��? kT��t=/�湮��="��>���>=���=P���������<
+ \���,2��½pH?@�K����<iE��="DgѾ�ZǾ=���=P���������<
+ �L����<����z?`-C��I=�묻*;�="�;c=l
+?=���=P���������<
+ ��߼q.�;����?@�8�$t=���/�="�����h��=���=P���������<
+ 
��;_����(��=?�(/���=ߢH�h#z="���>۱K�=���=P���������<
+ ��r;������`�? C0��k=�����B��"Ɠ�����>=���=P���������<
+ ���<=K��g���?��5��=�.��"n�W>c�>=���=P���������<
+ 9(��%��<����d?�<�)�!=/Y�:_l�"(s쾾�?=���=P���������<
+ �X><iip=�|�P�?�R=��X =���9�Q�;"V�>���>=���=P���������<
+ �zH��.�<�l�@C?��:�/11=.����4="����>߾=���=P���������<
+ �2��i��ZS�`�?@=5�:1�=����)O="�Q����=���=P���������<
+ ���<_e=�1�~?`�.��ߜ=��Ǻb�="��?�s?=���=P���������<
+ �K���S����?�(�w�==�)�W�2="+����=���=P���������<
+ ��c<ںN�fۼ@�?�n)��ƪ=�n3�z�3�"vh�>�0�<=���=P���������<
+ ӹ�<���������?��0�Ij=���:���"`+�=L�?=���=P���������<
+ _��;d��=���P�"?@�1�s&=��;���"tL��?=���=P���������<
+ O��<k,=�X� ?�z/���<iE���F="_4=>�'��=���=P���������<
+ ����p1=0�P2?`%��q=U��>�="��+5L<=���=P���������<
+ L&��3��ϻ�?���е[=�}-�)>"��{>?�=���=P���������<
+ E�J=˭<�0'���?�W
�k�a<����տ�="�L/?�q;=���=P���������<
+ �C_�Ac��?��v?���\A��*9�LN="��9��?=���=P���������<
+ 1�<�M���X�?���h=���J�s�<"�ϲ>���=���=P���������<
+ l�W=L��;��p�?@��J̈́��滟�D<"���>���>=���=P���������<
+ �5<.�^�����?@����Ƚ��P�;�;"0��b�m�=���=P���������<
+ x8˻���;�ۼ�	? ���9ɽ���:��9"��0��h>=���=P���������<
+ P?��䭥�����?�[��㞽�֕�-F��"��*���=���=P���������<
+ 
+��<}3*�3��?@�M鵽����,
+�"��?�#�>=���=P���������<
+ @\X�?�,<�Z�0�?�#�&⿽�;&�-�"����)?=���=P���������<
+ ��輇��<\x��G?�2�E\��8<.z�"�1���D>=���=P���������<
+ 9M��]:;�����? @�K���Z׽���"�C�=(���=���=P���������<
+ ��,�=N�;����@/? �L���t�N#��!���"��j>'�<=���=P���������<
+ &c���
+=q���?2W�,����;���"�'T��&�>=���=P���������<
+ �j3=�h�;!���D?�_�K����$�]{��"j�?;檾=���=P���������<
+ 4Q�F�Q<D��@?��f�3����A�:.?~�"k���==���=P���������<
+ ��<���������?�l�����[�ӽ�Sl�"�%�>p�*�=���=P���������<
+ �p�f?<�����|? �q�|{����<�lF�"��x�>=���=P���������<
+ ����=���$?�oq�G�>�\����q?<"���>�6?=���=P���������<
+ *)��(sH;�*���~?@�o�jH�<LG��o�<"��:>>:5�=���=P���������<
+ ��V=wb<���`�?�kl� ���í"�W)(="\�T?�>=���=P���������<
+ T�Z���@��#��P�?@�i�PBǼ<����;"�~@�|b3�=���=P���������<
+ �$><��z<�ý��?�vk���ȼޟ��ߢu�"R�>�k7?=���=P���������<
+ ͎Լ=�"=��ƽ��?`ki��[�'7ֺ��<"<�ܾKw�>=���=P���������<
+ ��,�-9���Ƚ`�?�>g�y߻MrľLW�;"�d�>s�x�=���=P���������<
+ �Fȼ%M���ƽ�o?@�m�?��<�N��R���"4ߋ�nP">=���=P���������<
+ Y9�<��J��mý0M?`^x�G<�k�ux�"��?�T�=���=P���������<
+ k�^=��C<�ν��?�[����y��<��q�"T��>�4?=���=P���������<
+ u�<��>=���p�?`4��*˽�b
+<D���"WH�h��>=���=P���������<
+ �����=�����%?��������(�;�ۼ"P����G�>=���=P���������<
+ ���<����@�� �?Ps�������jy��ۼ".B�>��=���=P���������<
+ JO���l�;��	�� ? ��psH�y�;��K�"��k?�>=���=P���������<
+ �=��5�<�	�� ?�)�����<��4��"�zt>.��==���=P���������<
+ x*;͗�<��P�?@��]x8=����7R#�"ز?Hz�==���=P���������<
+ )6=�=<)��@?`���.E�<��(�gU�<"8�>;O�=���=P���������<
+ 9�����<����?�����t;HO6�h="pz����=���=P���������<
+ P�;J�3��j��9?`F��@�;hL���="on�>�߽=���=P���������<
+ �y<�<���`�?PG���SF�{'���,="S��=�6�==���=P���������<
+ A���ڻ����9?p��խ��k):
+�'="��پ�@+�=���=P���������<
+ ���<��c����?@I������7���bq�<"\�>�p��=���=P���������<
+ ���<�}Y<@�2?0�����
+�D_�����<"��;"��>=���=P���������<
+ n�`���<��	�`?�����e�����S="�۶��Jw==���=P���������<
+ �s��h����}�Ѓ?0т������xd="�Լ=>2��=���=P���������<
+ �=Ɍ������?@����l4��,R����<"kR�>����=���=P���������<
+ vGռ���<@2��l?�����N��
;��c<"��-�rN?=���=P���������<
+ !w��l��<`��?��|�|����ђ���,="ku�=��{==���=P���������<
+ �j&<�G����p�?@;y���ҼH�ھfI@<"2`�>Lpx�=���=P���������<
+ �nJ���<���?*���><y�9mqo�"0/���S?=���=P���������<
+ Ah�O�~��Q���?p���=�-5�/�d�"�>�O��=���=P���������<
+ �b)<���<@���X?�B��=�&=m�';�@
+�"_i�>u�>=���=P���������<
+ ��U=i�<�����? F��Ҹi�9
սp��"���>��x�=���=P���������<
+ �GS<T��r�@�?�ކ���&�5�u!ü"�Y����=���=P���������<
+ �p���<����?߈��o���;��
�"K����?=���=P���������<
+ :�c�C����00?๊��_ļFDM�� -�"�gA<H��=���=P���������<
+ �w���%=����#!?� ����<���:�o˼"�>ٽ[�4?=���=P���������<
+ �����4`=�d���#?p܊�95=�һ��0="��e<��)>=���=P���������<
+ �*W���<�0� ?����p��=#1��C��="p��F���=���=P���������<
+ ��
=�k"<����<?�����=�a�T��="�?�]��=���=P���������<
+ '�<��üY���?��|�r�'=p޽ æ="�cW��Ǿ=���=P���������<
+ ��
+�������pj?`�w�8n=��	��W9="|���W�<=���=P���������<
+ j�_�P�?<����?��s�	y�=9+a���2="��j>+�>=���=P���������<
+ @��<��<Խ��?��m�g��=���ރ="�?�>��==���=P���������<
+ p�8=C3���ǽ�X?��g�"�=k�h���5="���>���=���=P���������<
+ �z�<,f0;��̽`p?�f�����g�溯�0<"kQO�+��>=���=P���������BallDemo� -bfB
--- a/demos/Expert3DBallHard.demo
+++ b/demos/Expert3DBallHard.demo
--- a/demos/ExpertBanana.demo
+++ b/demos/ExpertBanana.demo
--- a/demos/ExpertBasic.demo
+++ b/demos/ExpertBasic.demo
+ExpertBasic\ -�]?*:
BasicLearningc
+P�?"P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������h
+P�?"@=
+�#�P���������j
+P�?"@=�p}?@P���������ExpertBasic\ -�]?
--- a/demos/ExpertBouncer.demo
+++ b/demos/ExpertBouncer.demo
+
ExpertBouncer� -䪌A ***0:BouncerLearningc
+H:�?�
+o?
+�������܍@ݶY�"P���������h
+H:�?�
+o?
+�������܍@ݶY�I��>��_?��������܍@ݶY�",k̽���>\��="��P���������h
+H:�?�
+o?
+�������܍@ݶY�I��>��_?��������܍@ݶY����?��t?�EB����T9@�O��"ք�=XDH��.B>=?�?P���������h
+HI��>��_?��������܍@ݶY����?��t?�EB����T9@�O��f���J�@s��>���T9@�O��"����N=L.�>=s��P���������h
+H���?��t?�EB����T9@�O��f���J�@s��>���T9@�O󾼁��ՠ)?��
+@�a���iG@4���"�i.=ƚk�2o�==n�?P���������h
+Hf���J�@s��>���T9@�O󾼁��ՠ)?��
+@�a���iG@4���d��,�W?^9��& �?!�@@�)@"���C���'�=|�}?P���������h
+H����ՠ)?��
+@�a���iG@4���d��,�W?^9��& �?!�@@�)@MK�@6�a?3`�@�'@����@hK��"��\?G���?=�Kx?P���������h
+Hd��,�W?^9��& �?!�@@�)@MK�@6�a?3`�@�'@����@hK��r����z?uS��[��@P�@b�c�"Np*�6�?���}�=�y?P���������h
+HMK�@6�a?3`�@�'@����@hK��r����z?uS��[��@P�@b�c�%i�@��S?!���[��@P�@b�c�"Z�E?s#��.�==6A&�P���������h
+Hr����z?uS��[��@P�@b�c�%i�@��S?!���[��@P�@b�c���@t�C?zN_�Xq�+��@r�c�"L
+=�EJ=�)�>=c]?P���������h
+H%i�@��S?!���[��@P�@b�c���@t�C?zN_�Xq�+��@r�c�&����[N?�;?Y��@�j@拞�"��ˡx�vJ�==��{?P���������h
+H��@t�C?zN_�Xq�+��@r�c�&����[N?�;?Y��@�j@拞���@��Q?#��,����6@�Fd�"�?��[��!�=��y?P���������h
+H&����[N?�;?Y��@�j@拞���@��Q?#��,����6@�Fd�������c?/���,����6@�Fd�"�C
���{�;��<=��ǻP���������h
+H��@��Q?#��,����6@�Fd�������c?/���,����6@�Fd��Z>�SJ�?
f@z@@4��@��Ŀ"�����@?P?=kc�?P���������h
+H������c?/���,����6@�Fd��Z>�SJ�?
f@z@@4��@��Ŀ��k@�D?C�R�����@���@":9�>�P��bܘ�=��~?P���������h
+H�Z>�SJ�?
f@z@@4��@��Ŀ��k@�D?C�R�����@���@����?"P�@q�q@�;�@���@"'d���
��1��>=�?P���������h
+H��k@�D?C�R�����@���@����?"P�@q�q@�;�@���@W�A�\F?+͠@�R.@ɩl@iX��"�U3?{�T�EZ�==V�}?P���������h
+H����?"P�@q�q@�;�@���@W�A�\F?+͠@�R.@ɩl@iX��cV�A^?[������@j��?"��X��~B��a�=�P{?P���������h
+HW�A�\F?+͠@�R.@ɩl@iX��cV�A^?[������@j��?ܯ���W�?���@�U�>�Y�@�h��"�o������r�?==#�?P���������j
+H�М�@
�j�8@KX�@G@"=x�y?@P���������h
+H�М�@
�j�8@KX�@G@��v@�A?���j�8@KX�@G@"�*�>n�kYv<=�~ҺP���������h
+H�М�@
�j�8@KX�@G@��v@�A?���j�8@KX�@G@Ը��Y�z?�R�@�f����@�>"���nzD=�L�>=��~?P���������h
+H��v@�A?���j�8@KX�@G@Ը��Y�z?�R�@�f����@�>�5� �K?h��?U{
@���@���"^Xͽ8>��(�=��?P���������h
+HԸ��Y�z?�R�@�f����@�>�5� �K?h��?U{
@���@���HA:YX?����u������@d��@"��@?��>���=a�|?P���������h
+H�5� �K?h��?U{
@���@���HA:YX?����u������@d��@�q��?
+Z?f�@��n��k@�DK@"��u����=�
?="1�?P���������h
+HHA:YX?����u������@d��@�q��?
+Z?f�@��n��k@�DK@q4�����?��@�-�@�A@�ת�"�d�<�1m�����=�{?P���������h
+H�q��?
+Z?f�@��n��k@�DK@q4�����?��@�-�@�A@�ת�)�A��]?�]�O�ֿ�Y@�Ou?"�?><!��z=�=A{{?P���������h
+Hq4�����?��@�-�@�A@�ת�)�A��]?�]�O�ֿ�Y@�Ou?�A��2>S?_Ԩ���>�{`@��'�"�k<������<=F�}?P���������h
+H)�A��]?�]�O�ֿ�Y@�Ou?�A��2>S?_Ԩ���>�{`@��'�����n?A�^>���?I�1@�"G�"զ=b�=Rr==5�?P���������h
+H�A��2>S?_Ԩ���>�{`@��'�����n?A�^>���?I�1@�"G�,z�?U?�g�Ռ���HY@F��@"���={�C�Ă��=�z?P���������h
+H����n?A�^>���?I�1@�"G�,z�?U?�g�Ռ���HY@F��@ɶ�>�tG?��5@Ռ���HY@F��@"p�4��������>=�bW�P���������h
+H,z�?U?�g�Ռ���HY@F��@ɶ�>�tG?��5@Ռ���HY@F��@�uֿH�r?�1�@��@e4D@#K��"s+�6r���\>=�?P���������h
+Hɶ�>�tG?��5@Ռ���HY@F��@�uֿH�r?�1�@��@e4D@#K���� ??'�@���@e4D@#K��"ɏ>.�P��^�=6ro�P���������h
+H�uֿH�r?�1�@��@e4D@#K���� ??'�@���@e4D@#K���J?��e?����Կ@n�@����"KF <֬p� ���=���?P���������h
+H�� ??'�@���@e4D@#K���J?��e?����Կ@n�@���������0x?i�9�q:@��@hs@".���fw8�7�=�a?P���������h
+H�J?��e?����Կ@n�@���������0x?i�9�q:@��@hs@t�A(�X?%�@��&��)-@�`�?"�ee?Ĝ��<?=2?z?P���������h
+H�����0x?i�9�q:@��@hs@t�A(�X?%�@��&��)-@�`�?����ɱJ?I�����&��)-@�`�?"���,���%g�=+w��P���������h
+Ht�A(�X?%�@��&��)-@�`�?����ɱJ?I�����&��)-@�`�?Y�@�j~?�??l���@��Z@":�?bz���T�>=��}?P���������h
+H����ɱJ?I�����&��)-@�`�?Y�@�j~?�??l���@��Z@\�Y=�Q�?)%@8�>��@O���"�|!�����c�>=��?P���������j
+H]^h@@T@�@�Dw=@����"=W�~?@P���������h
+H]^h@@T@�@�Dw=@������K�`��@Sq��@�Dw=@����"*�$�Ьl��j�=:Z�P���������h
+H]^h@@T@�@�Dw=@������K�`��@Sq��@�Dw=@����mRn�'6[?���P@��@���"6�>G���fȆ�=�?P���������h
+H��K�`��@Sq��@�Dw=@����mRn�'6[?���P@��@���,o�@��@�΄?P@��@���"
#?r�p���k?=�P��P���������h
+HmRn�'6[?���P@��@���,o�@��@�΄?P@��@���*=�?*ps?J{����?<d�@��q@"�??�@^��1��=�?P���������h
+H,o�@��@�΄?P@��@���*=�?*ps?J{����?<d�@��q@�<��cG?˜�=���?<d�@��q@"�������<=1J�P���������h
+H*=�?*ps?J{����?<d�@��q@�<��cG?˜�=���?<d�@��q@A��?��Q?�S�@�e�Q�@;���"J�
>�q_�k��>=	?P���������h
+H�<��cG?˜�=���?<d�@��q@A��?��Q?�S�@�e�Q�@;���]���p?K���I#]@��D@�р�"��������7�b�=�[|?P���������h
+HA��?��Q?�S�@�e�Q�@;���]���p?K���I#]@��D@�р����@��p?�\>nw&�a@d2��"��?Ј��H�>=��}?P���������h
+H]���p?K���I#]@��D@�р����@��p?�\>nw&�a@d2���r���DJ?
+�&@�@^�@p��="�N��f���E4>=?�|?P���������h
+H���@��p?�\>nw&�a@d2���r���DJ?
+�&@�@^�@p��=5�z@�M?T���;�T���@�a�>"��?P�������=^�}?P���������h
+H�r���DJ?
+�&@�@^�@p��=5�z@�M?T���;�T���@�a�>x��~�[?Ф��;�T���@�a�>"�K��F��<�|>=��#�P���������h
+H5�z@�M?T���;�T���@�a�>x��~�[?Ф��;�T���@�a�>�H��b�Q?<�-@"i@�kH@i澿"(���v)ֽk$�>=��~?P���������h
+Hx��~�[?Ф��;�T���@�a�>�H��b�Q?<�-@"i@�kH@i澿m��@J�~?�#�?"i@�kH@i澿"�?$D�0ާ�=�u��P���������h
+H�H��b�Q?<�-@"i@�kH@i澿m��@J�~?�#�?"i@�kH@i澿plG@XH?m������?%�@"�?"�w���T�Ry�=HP?P���������h
+Hm��@J�~?�#�?"i@�kH@i澿plG@XH?m������?%�@"�?�7��N�e?�?���@oh7@~��@"z���Bv����4>="?P���������h
+HplG@XH?m������?%�@"�?�7��N�e?�?���@oh7@~��@���@0�|?$��@�(��l%@"n�?"�A)?�S��L��>="�|?P���������h
+H�7��N�e?�?���@oh7@~��@���@0�|?$��@�(��l%@"n�?�]����x?\""�z��@��@���@"��k���~����=�f{?P���������h
+H���@0�|?$��@�(��l%@"n�?�]����x?\""�z��@��@���@T��@�v�?�j@K�Q��P@0�H@"�?�RY�k��>=�#{?P���������h
+H�]����x?\""�z��@��@���@T��@�v�?�j@K�Q��P@0�H@�y����D?��ƿK�Q��P@0�H@"FW���W��仾=�r�P���������j
+H�Ŀ@�=�@G7�T�@���"=,9?@P���������h
+H�Ŀ@�=�@G7�T�@�俖+��V�Q?^�������"K@w!�?"�����`���@�=�}?P���������h
+H�Ŀ@�=�@G7�T�@�俖+��V�Q?^�������"K@w!�?�-���'J?jT%@�y����@)���"p��>�V���/?=�}?P���������h
+H�+��V�Q?^�������"K@w!�?�-���'J?jT%@�y����@)�迊5�6BF?Qd�>�y����@)���"�)���c=���=��e�P���������h
+H�-���'J?jT%@�y����@)�迊5�6BF?Qd�>�y����@)���������u?W�����e�ߑ@~Y�"�+�^��=����=�A?P���������h
+H�5�6BF?Qd�>�y����@)���������u?W�����e�ߑ@~Y��B��*Q?�:@+(T���@���>"���>6��=�+?=2&�?P���������h
+H������u?W�����e�ߑ@~Y��B��*Q?�:@+(T���@���>1t��j�s?c'@n�$���e@/�Կ"k��I�>+���=x?P���������h
+H�B��*Q?�:@+(T���@���>1t��j�s?c'@n�$���e@/�Կ����3�@b�W�n�$���e@/�Կ"/��>��B=O&�=�l��P���������h
+H1t��j�s?c'@n�$���e@/�Կ����3�@b�W�n�$���e@/�ԿS��@��=?�Lz�n�$���e@/�Կ"kV�>�s��v��=�y��P���������h
+H����3�@b�W�n�$���e@/�ԿS��@��=?�Lz�n�$���e@/�Կt��ru_?������ݿ��@�_Z@"[�꾞�r�"��>=r�~?P���������h
+HS��@��=?�Lz�n�$���e@/�Կt��ru_?������ݿ��@�_Z@��z�@J��?J�(�cV@<��>"{�¼�8�=[��>=?P���������h
+Ht��ru_?������ݿ��@�_Z@��z�@J��?J�(�cV@<��>	�ANJs?�A(@J�(�cV@<��>"��?OP1��k2==�q��P���������h
+H��z�@J��?J�(�cV@<��>	�ANJs?�A(@J�(�cV@<��>K��"1{?�I
+�|R�>�}�@��
�"���h1�����=�{?P���������h
+H	�ANJs?�A(@J�(�cV@<��>K��"1{?�I
+�|R�>�}�@��
�e?�?F�z?sݢ?ƨ.���~@7ޅ�"��?09��s?�>=
+�}?P���������h
+HK��"1{?�I
+�|R�>�}�@��
�e?�?F�z?sݢ?ƨ.���~@7ޅ�<��f�8?D5��t@���@R:�@"�1���$�=V�#�=H�}?P���������h
+He?�?F�z?sݢ?ƨ.���~@7ޅ�<��f�8?D5��t@���@R:�@C�@�uS?	/�@���>�{�@s�"h�/?�v��?=�yy?P���������h
+H<��f�8?D5��t@���@R:�@C�@�uS?	/�@���>�{�@s�@����?�c0�?�o@���"O�;�xd-=N�=�G�?P���������h
+HC�@�uS?	/�@���>�{�@s�@����?�c0�?�o@����H�?�֝@=d,�?�o@���"x�6?��+�+��;=�!�P���������h
+H@����?�c0�?�o@����H�?�֝@=d,�?�o@���6��>�3T?f��� �VT�@|��>"S7����۾��&>=J��?P���������h
+H�H�?�֝@=d,�?�o@���6��>�3T?f��� �VT�@|��>`ҿ�\\?M6�?{*>�y�#@�e�"?�8½�E�==n�?P���������j
+HV�#@@�']?���=勲@م�?"=�"?@P���������h
+HV�#@@�']?���=勲@م�?�Ț��Bh?M�3�\w@1�@�J!�"�����~�𦀾=Z��?P���������h
+HV�#@@�']?���=勲@م�?�Ț��Bh?M�3�\w@1�@�J!�|n�?��b?�^?l<�>I;@Dr�>"Z�E>+�¼�]h>=O�?P���������h
+H�Ț��Bh?M�3�\w@1�@�J!�|n�?��b?�^?l<�>I;@Dr�>C�����s?j��@3Tt@`i�@�4@"��f�z��=���>=+y?P���������h
+H|n�?��b?�^?l<�>I;@Dr�>C�����s?j��@3Tt@`i�@�4@���@��B?L�]�\���@T	@"��?�J�=��=@D}?P���������h
+HC�����s?j��@3Tt@`i�@�4@���@��B?L�]�\���@T	@�2��8�Y?�Y�?�$��$�@�?b�"Jp.��Q�0��>=�@}?P���������h
+H���@��B?L�]�\���@T	@�2��8�Y?�Y�?�$��$�@�?b�����KtL?��)�q�0����@o�?"��+���������=�|?P���������h
+H�2��8�Y?�Y�?�$��$�@�?b�����KtL?��)�q�0����@o�?hS���xF?]�_@,��@���@t�ľ"T�>j0>+��>=(?P���������h
+H����KtL?��)�q�0����@o�?hS���xF?]�_@,��@���@t�ľ�N	A�Z?ȕ�_�@w%@��@"2W1?�^�<�	�=�|?P���������h
+HhS���xF?]�_@,��@���@t�ľ�N	A�Z?ȕ�_�@w%@��@Y��@�[�?y��?4�>/��@�9�"|�Ͻ��9���>=#(?P���������h
+H�N	A�Z?ȕ�_�@w%@��@Y��@�[�?y��?4�>/��@�9�#ɼ��aV?�5�?4�>/��@�9�"�&`�ߤ��V{�:=�3S�P���������h
+HY��@�[�?y��?4�>/��@�9�#ɼ��aV?�5�?4�>/��@�9�v@�`?��������2�@��$�"ҫ?[h0�7(ھ=B�}?P���������h
+H#ɼ��aV?�5�?4�>/��@�9�v@�`?��������2�@��$��|���Hv?������?ש@T���"f�`�rN>=߫~?P���������h
+Hv@�`?��������2�@��$��|���Hv?������?ש@T�𾂥�@�A|?s��@L��?/�@�Ί�"@+I?��Ľ���>=�U|?P���������h
+H�|���Hv?������?ש@T�𾂥�@�A|?s��@L��?/�@�Ί�.Hv?��F?�����i�>$�@K �@"RfD�=7A�=�~?P���������h
+H���@�A|?s��@L��?/�@�Ί�.Hv?��F?�����i�>$�@K �@�����46?�E�@X�[�2	�@���?"�ҾmS>	'?=�q}?P���������h
+H.Hv?��F?�����i�>$�@K �@�����46?�E�@X�[�2	�@���?�����F?d�>����n�@�h�?"V�q>�Q�=D��=�<?P���������h
+H�����46?�E�@X�[�2	�@���?�����F?d�>����n�@�h�?~�Y?�9>?��?*n=@��P�"�.��J�s�==��?P���������h
+H�����F?d�>����n�@�h�?~�Y?�9>?��?*n=@��P�.W|@�_?4�ܿ�͆?7@}�m@"��?��
+��^/�=̀~?P���������h
+H~�Y?�9>?��?*n=@��P�.W|@�_?4�ܿ�͆?7@}�m@f��H�f?>~�@��@@�L�@�'��"8k���Ӏ�gW�>=�~?P���������j
+Ho���@+���O�X9I@Z�?"=k;|?@P���������h
+Ho���@+���O�X9I@Z�?�$��@�"j��O�X9I@Z�?"ު��O:�����>=s,K�P���������h
+Ho���@+���O�X9I@Z�?�$��@�"j��O�X9I@Z�?�5����\?�N;@�y9@&u�@��b�"i���Ƚ;�9>=�?P���������h
+H�$��@�"j��O�X9I@Z�?�5����\?�N;@�y9@&u�@��b�P�I@l��?� �g/@٦X@*I�"FM,?�����+^�=J�z?P���������h
+H�5����\?�N;@�y9@&u�@��b�P�I@l��?� �g/@٦X@*I�`�A��i?�:?^)�7��@:-�?"��>�#��v+2?=�g}?P���������h
+HP�I@l��?� �g/@٦X@*I�`�A��i?�:?^)�7��@:-�?�ȱ�6M?B4~@GVw@��@��]�"��PU��
+�p>=��>@P���������h
+H`�A��i?�:?^)�7��@:-�?�ȱ�6M?B4~@GVw@��@��]��D�@:��@����GVw@��@��]�"��l?����e�=�q��P���������h
+H�ȱ�6M?B4~@GVw@��@��]��D�@:��@����GVw@��@��]��-A0�J?犈�GVw@��@��]�"��>8Y����==���P���������h
+H�D�@:��@����GVw@��@��]��-A0�J?犈�GVw@��@��]�d�p@�N?��I��`�����@�o@"�����=�����==��~?P���������h
+H�-A0�J?犈�GVw@��@��]�d�p@�N?��I��`�����@�o@�]���r?GB�@M�˿�j�@k߇�"fi`���p��i4?=�]z?P���������h
+Hd�p@�N?��I��`�����@�o@�]���r?GB�@M�˿�j�@k߇�b�@Tnb?,Ux��j
+@�&�@��s@"H�1?ƑI=3�-�=��{?P���������h
+H�]���r?GB�@M�˿�j�@k߇�b�@Tnb?,Ux��j
+@�&�@��s@7�@0�v?+Fu@�j
+@�&�@��s@"���>��%���?=��ŻP���������h
+Hb�@Tnb?,Ux��j
+@�&�@��s@7�@0�v?+Fu@�j
+@�&�@��s@��k�2��@d�c@��Q�qw@�b�@"_j6����=�Hܼ=��}?P���������h
+H7�@0�v?+Fu@�j
+@�&�@��s@��k�2��@d�c@��Q�qw@�b�@p�'@��*?/m&A,*�@d@my@"��%>�:.���>=�?P���������h
+H��k�2��@d�c@��Q�qw@�b�@p�'@��*?/m&A,*�@d@my@qHU@a�?9�Ѿ�[@���@w$?";M=@��3�B�=r}?P���������h
+Hp�'@��*?/m&A,*�@d@my@qHU@a�?9�Ѿ�[@���@w$?���@$�z?AB�@��~@O��@ƚP�"�Y�>x������>=/^~?P���������h
+HqHU@a�?9�Ѿ�[@���@w$?���@$�z?AB�@��~@O��@ƚP������/8?x�,?��~@O��@ƚP�"���V�=���=+���P���������h
+H���@$�z?AB�@��~@O��@ƚP������/8?x�,?��~@O��@ƚP�C�@�C?�>���T����@5�>�"l��>���<vݠ�=J�~?P���������h
+H�����/8?x�,?��~@O��@ƚP�C�@�C?�>���T����@5�>�����&X?�ړ�g?t��@K@"ga����=G�=o�|?P���������h
+HC�@�C?�>���T����@5�>�����&X?�ړ�g?t��@K@��@��d?��@:���J��@l�@"W�g?8�V�oe_?=�y?P���������j
+H�祿@���>Ǝ[@���@�ԁ�"=�y?@P���������h
+H�祿@���>Ǝ[@���@�ԁ�?Y�?	?�@H�t��@�;@�:��"
+�t>	6���e��=�-?P���������h
+H�祿@���>Ǝ[@���@�ԁ�?Y�?	?�@H�t��@�;@�:��h�0@� +?��[@ٹ��r@j��@"���=0Vr��G�>=��~?P���������h
+H?Y�?	?�@H�t��@�;@�:��h�0@� +?��[@ٹ��r@j��@�J��}@?�ʜ@��@]7b@<�ž"{D����Q�(��==AP?P���������h
+Hh�0@� +?��[@ٹ��r@j��@�J��}@?�ʜ@��@]7b@<�ž�?�g�?�>z?���@���@"�I�>���o���=O�~?P���������h
+H�J��}@?�ʜ@��@]7b@<�ž�?�g�?�>z?���@���@��P���V?�D�@��2�Ѫ�@�&��"��@��|��X�>=��~?P���������h
+H�?�g�?�>z?���@���@��P���V?�D�@��2�Ѫ�@�&��1r;��b??Y��7�@�>�@�8
+?"H�
+��j�=�Ͼ=0?P���������h
+H��P���V?�D�@��2�Ѫ�@�&��1r;��b??Y��7�@�>�@�8
+?�4m@���?xv"@,׽?%�@ܶ�>"��>bν��>=��~?P���������h
+H1r;��b??Y��7�@�>�@�8
+?�4m@���?xv"@,׽?%�@ܶ�>R�?EOE?u~��Օ�;%@ar�@"d�!���>�uо=�?@P���������h
+H�4m@���?xv"@,׽?%�@ܶ�>R�?EOE?u~��Օ�;%@ar�@�_P���f?��A�"�@���@t{:�"7�	��o-��\o?=Q/|?P���������h
+HR�?EOE?u~��Օ�;%@ar�@�_P���f?��A�"�@���@t{:��J�@�zg?�7o��>��3�@�?"��>��=��z?P���������h
+H�_P���f?��A�"�@���@t{:��J�@�zg?�7o��>��3�@�?����2J?e� ?�I>�s@�_׿"�'���H�>=��}?P���������h
+H�J�@�zg?�7o��>��3�@�?����2J?e� ?�I>�s@�_׿N�>�/]?ՙ�?�I>�s@�_׿"@ڦ>p��<L��==r���P���������h
+H����2J?e� ?�I>�s@�_׿N�>�/]?ՙ�?�I>�s@�_׿���?z�]?j�-����?,�@�\f@"���=�l=���=��?P���������h
+HN�>�/]?ՙ�?�I>�s@�_׿���?z�]?j�-����?,�@�\f@B(@.�^?��?gC�@y�@���"�m=.W�=��>=.��?P���������h
+H���?z�]?j�-����?,�@�\f@B(@.�^?��?gC�@y�@���8�@�pG?�Y��Dߓ��c�@8�5>"�_�=��=9H�=ø?P���������h
+HB(@.�^?��?gC�@y�@���8�@�pG?�Y��Dߓ��c�@8�5>�}4���G?��?�;����@���@"�����=c�=>=��{?P���������h
+H8�@�pG?�Y��Dߓ��c�@8�5>�}4���G?��?�;����@���@�	.@D6\?�,Ả�?���@0c<�"�?�YR�| �>=S�z?P���������h
+H�}4���G?��?�;����@���@�	.@D6\?�,Ả�?���@0c<��3m@n�a?C����^����@��\�"�q�=�Ž��=(�>@P���������h
+H�	.@D6\?�,Ả�?���@0c<��3m@n�a?C����^����@��\��C��Z?"'R��y�@c@	uǿ"Є���bb���q>=u�~?P���������j
+H,]�@@q�R@S�:@@g�0@"=Ď|?@P���������h
+H,]�@@q�R@S�:@@g�0@ӿ�@cM�@��1@S�:@@g�0@";V��ª�(�p�=��<�P���������h
+H,]�@@q�R@S�:@@g�0@ӿ�@cM�@��1@S�:@@g�0@�.-@ڒ@��@S�:@@g�0@"�⽪���S�=�q��P���������h
+Hӿ�@cM�@��1@S�:@@g�0@�.-@ڒ@��@S�:@@g�0@L�@A��@�%@2��@PD�?"O��G������<=���?P���������h
+H�.-@ڒ@��@S�:@@g�0@L�@A��@�%@2��@PD�?i����M?�}*>k���U�@�#@"�b<�d�۾K���=;?P���������h
+HL�@A��@�%@2��@PD�?i����M?�}*>k���U�@�#@����O?�t�@��&�d;a@�~o�"�'���f���-�>=�t�?P���������h
+Hi����M?�}*>k���U�@�#@����O?�t�@��&�d;a@�~o�0m��xB?��-���&�d;a@�~o�"�z>_����վ=)���P���������h
+H����O?�t�@��&�d;a@�~o�0m��xB?��-���&�d;a@�~o�%���K�a?*���k忺;�@1i�@"��K�$�d��=�?P���������h
+H0m��xB?��-���&�d;a@�~o�%���K�a?*���k忺;�@1i�@x���۳n?g��@�����Q@ ]�?":$�W�~��?="\{?P���������h
+H%���K�a?*���k忺;�@1i�@x���۳n?g��@�����Q@ ]�?J��Tj??��
+@
+�
+?�b@�;S�"Β������<��=m�?P���������h
+Hx���۳n?g��@�����Q@ ]�?J��Tj??��
+@
+�
+?�b@�;S��8�r��@P�\?�s¾v@��@"�N?Pp��6��=��}?P���������h
+HJ��Tj??��
+@
+�
+?�b@�;S��8�r��@P�\?�s¾v@��@b���@�u9@�s¾v@��@"F4H=�;�e >=��L�P���������h
+H�8�r��@P�\?�s¾v@��@b���@�u9@�s¾v@��@�@�[?��x����hK�@歝�"�ӎ>��P�{�q�=f��?P���������h
+Hb���@�u9@�s¾v@��@�@�[?��x����hK�@歝�E:��~o?v������hK�@歝�"�� ��[������=B�D�P���������h
+H�@�[?��x����hK�@歝�E:��~o?v������hK�@歝�`nۿD�J?�{!�b����g@M{o@"`��>3"�b��>=Z��?P���������h
+HE:��~o?v������hK�@歝�`nۿD�J?�{!�b����g@M{o@C�?|q?'�@�? �@�3�"�'">�5H�:�?=f~?P���������
ExpertBouncer� -䪌A
--- a/demos/ExpertCrawlerSta.demo
+++ b/demos/ExpertCrawlerSta.demo
--- a/demos/ExpertGrid.demo
+++ b/demos/ExpertGrid.demo
--- a/demos/ExpertHallway.demo
+++ b/demos/ExpertHallway.demo
--- a/demos/ExpertPush.demo
+++ b/demos/ExpertPush.demo
--- a/demos/ExpertPyramid.demo
+++ b/demos/ExpertPyramid.demo
--- a/demos/ExpertReacher.demo
+++ b/demos/ExpertReacher.demo
--- a/demos/ExpertSoccerGoal.demo
+++ b/demos/ExpertSoccerGoal.demo
--- a/demos/ExpertSoccerStri.demo
+++ b/demos/ExpertSoccerStri.demo
--- a/demos/ExpertTennis.demo
+++ b/demos/ExpertTennis.demo
--- a/demos/ExpertWalker.demo
+++ b/demos/ExpertWalker.demo
--- a/docs/localized/KR/docs/Migrating.md
+++ b/docs/localized/KR/docs/Migrating.md
--- a/docs/localized/KR/docs/Readme.md
+++ b/docs/localized/KR/docs/Readme.md
--- a/docs/localized/KR/docs/images/academy.png
+++ b/docs/localized/KR/docs/images/academy.png
--- a/docs/localized/KR/docs/images/agent.png
+++ b/docs/localized/KR/docs/images/agent.png
--- a/docs/localized/KR/docs/images/anaconda_default.PNG
+++ b/docs/localized/KR/docs/images/anaconda_default.PNG
--- a/docs/localized/KR/docs/images/anaconda_install.PNG
+++ b/docs/localized/KR/docs/images/anaconda_install.PNG
--- a/docs/localized/KR/docs/images/balance.png
+++ b/docs/localized/KR/docs/images/balance.png
--- a/docs/localized/KR/docs/images/banana.png
+++ b/docs/localized/KR/docs/images/banana.png
--- a/docs/localized/KR/docs/images/banner.png
+++ b/docs/localized/KR/docs/images/banner.png
--- a/docs/localized/KR/docs/images/basic.png
+++ b/docs/localized/KR/docs/images/basic.png
--- a/docs/localized/KR/docs/images/bc_teacher_helper.png
+++ b/docs/localized/KR/docs/images/bc_teacher_helper.png
--- a/docs/localized/KR/docs/images/bouncer.png
+++ b/docs/localized/KR/docs/images/bouncer.png
--- a/docs/localized/KR/docs/images/brain.png
+++ b/docs/localized/KR/docs/images/brain.png
--- a/docs/localized/KR/docs/images/broadcast.png
+++ b/docs/localized/KR/docs/images/broadcast.png
--- a/docs/localized/KR/docs/images/conda_new.PNG
+++ b/docs/localized/KR/docs/images/conda_new.PNG
--- a/docs/localized/KR/docs/images/crawler.png
+++ b/docs/localized/KR/docs/images/crawler.png