Merge branch 'master' into develop-splitpolicyoptimizer

4 年前 · db249ceb
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
            chmod +x Grpc.Tools.1.14.1/tools/linux_x64/protoc
            chmod +x Grpc.Tools.1.14.1/tools/linux_x64/grpc_csharp_plugin
            COMPILER=Grpc.Tools.1.14.1/tools/linux_x64 ./make.sh
-            CS_PROTO_PATH=UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects
+            CS_PROTO_PATH=com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects
            git diff --exit-code --quiet -- :/ ":(exclude,top)$CS_PROTO_PATH/*.meta" \
              || { GIT_ERR=$?; echo "protobufs need to be regenerated, apply the patch uploaded to artifacts."; \
              echo "Apply the patch with the command: git apply proto.patch"; \
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
 blank_issues_enabled: false
+contact_links:
+  - name: ML-Agents Unity Forum
+    url: https://forum.unity.com/forums/ml-agents.453/
+    about: Please ask Installation / Setup and Discussion / General Questions in the Unity Forum.
--- a/.github/ISSUE_TEMPLATE/discussion---general-questions.md
+++ b/.github/ISSUE_TEMPLATE/discussion---general-questions.md

 ---

-Describe what you'd like to discuss.
+For discussions, please post in [ML-Agents Unity Forum](https://forum.unity.com/forums/ml-agents.453/) instead of
+creating a Github issue. Unity forums are the predominant community for Unity users and experts. By leveraging Unity
+forums for general discussions and project help, we can keep Github issues for bugs, performance issues, and feature
+requests for ML-Agents.
+
-**Note**: The ML-Agents team has limited resources for education and community discussion.  We'll participate as we are able, but encourage members of the community to support one another to discuss and support one another.
+**Note**: The ML-Agents team has limited resources for education and community discussion.  We'll participate as we are
+able, but encourage members of the community to support one another to discuss and support one another.
--- a/.gitignore
+++ b/.gitignore
-/UnitySDK/[Ll]ibrary/
-/UnitySDK/Logs/
-/UnitySDK/[Tt]emp/
-/UnitySDK/[Oo]bj/
-/UnitySDK/[Bb]uild/
-/UnitySDK/[Bb]uilds/
-/UnitySDK/Assets/AssetStoreTools*
-/UnitySDK/Assets/Plugins*
-/UnitySDK/Assets/Demonstrations*
-/UnitySDK/csharp_timers.json
-
 # Tensorflow Model Info
 /models
 /summaries
 /UnitySDK/.vs/

 # Autogenerated VS/MD/Consulo solution and project files
-/UnitySDKExportedObj/
-/UnitySDK.consulo/
+/com.unity.ml-agentsExportedObj/
+/com.unity.ml-agents.consulo/
 *.csproj
 *.unityproj
 *.sln
 *.pidb.meta

 # Unity3D Generated File On Crash Reports
-/UnitySDK/sysinfo.txt
+/com.unity.ml-agents/sysinfo.txt

 # Builds
 *.apk
 *.x86_64
 *.x86

-# Tensorflow Sharp Files
-/UnitySDK/Assets/ML-Agents/Plugins/Android*
-/UnitySDK/Assets/ML-Agents/Plugins/iOS*
-/UnitySDK/Assets/ML-Agents/Plugins/Computer*
-/UnitySDK/Assets/ML-Agents/Plugins/System.Numerics*
-/UnitySDK/Assets/ML-Agents/Plugins/System.ValueTuple*
-
-/UnitySDK/Assets/ML-Agents/VideoRecorder*
+/com.unity.ml-agents/VideoRecorder*

 # Generated doc folders
 /docs/html
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
  variables:
    UNITY_VERSION: {{ editor.version }}
  commands:
-    - ./run-standalone-build-osx.sh
+    - python -u -m ml-agents.tests.yamato.standalone_build_tests
  triggers:
    pull_requests:
      - targets:
--- a/README.md
+++ b/README.md

 The ML-Agents toolkit is an open-source project and we encourage and welcome
 contributions. If you wish to contribute, be sure to review our
-[contribution guidelines](CONTRIBUTING.md) and
+[contribution guidelines](com.unity.ml-agents/CONTRIBUTING.md) and
-If you run into any problems using the ML-Agents toolkit,
-[submit an issue](https://github.com/Unity-Technologies/ml-agents/issues) and
-make sure to include as much detail as possible.
+For problems with the installation and setup of the the ML-Agents toolkit, or
+discussions about how to best setup or train your agents, please create a new
+thread on the [Unity ML-Agents forum](https://forum.unity.com/forums/ml-agents.453/)
+and make sure to include as much detail as possible.
+If you run into any other problems using the ML-Agents toolkit, or have a specific
+feature requests, please [submit a GitHub issue](https://github.com/Unity-Technologies/ml-agents/issues).
-

 For any other questions or feedback, connect directly with the ML-Agents
 team at ml-agents@unity3d.com.
--- a/config/gail_config.yaml
+++ b/config/gail_config.yaml
    max_steps: 5.0e5
    num_epoch: 3
    behavioral_cloning:
-        demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+        demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
        strength: 0.5
        steps: 10000
    reward_signals:
            strength: 0.01
            gamma: 0.99
            encoding_size: 128
-            demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+            demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo

 CrawlerStatic:
    normalize: true
    num_layers: 3
    hidden_units: 512
    behavioral_cloning:
-        demo_path: UnitySDK/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
+        demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
        strength: 0.5
        steps: 5000
    reward_signals:
            encoding_size: 128
-            demo_path: UnitySDK/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
+            demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo

 PushBlock:
    max_steps: 5.0e4
            strength: 1.0
            gamma: 0.99
            encoding_size: 128
-            demo_path: UnitySDK/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
+            demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo

 Hallway:
    use_recurrent: true
            strength: 0.1
            gamma: 0.99
            encoding_size: 128
-            demo_path: UnitySDK/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo
+            demo_path: Project/Assets/ML-Agents/Examples/Hallway/Demos/ExpertHallway.demo

 FoodCollector:
    batch_size: 64
            strength: 0.1
            gamma: 0.99
            encoding_size: 128
-            demo_path: UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
+            demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
-        demo_path: UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
+        demo_path: Project/Assets/ML-Agents/Examples/FoodCollector/Demos/ExpertFood.demo
        strength: 1.0
        steps: 0
--- a/config/sac_trainer_config.yaml
+++ b/config/sac_trainer_config.yaml
            gamma: 0.99
            encoding_size: 128
            use_actions: true
-            demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+            demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo

 VisualPyramids:
    time_horizon: 128
            gamma: 0.99
            encoding_size: 128
            use_actions: true
-            demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+            demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo

 3DBall:
    normalize: true
    time_horizon: 1000

 Tennis:
-    buffer_size: 500000
-    max_steps: 4e6
+    max_steps: 2e7
+    hidden_units: 256
+    self_play:
+        window: 10
+        play_against_current_self_ratio: 0.5
+        save_steps: 50000
+        swap_steps: 50000

 CrawlerStatic:
    normalize: true
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml

 Tennis:
    normalize: true
-    max_steps: 4e6
+    max_steps: 2e7
+    learning_rate_schedule: constant
+    hidden_units: 256
+    self_play:
+        window: 10
+        play_against_current_self_ratio: 0.5
+        save_steps: 50000
+        swap_steps: 50000

 CrawlerStatic:
    normalize: true
--- a/docs/Basic-Guide.md
+++ b/docs/Basic-Guide.md

 1. Launch Unity
 2. On the Projects dialog, choose the **Open** option at the top of the window.
-3. Using the file dialog that opens, locate the `UnitySDK` folder
+3. Using the file dialog that opens, locate the `Project` folder
   within the ML-Agents toolkit project and click **Open**.
 4. Go to **Edit** > **Project Settings** > **Player**
 5. For **each** of the platforms you target (**PC, Mac and Linux Standalone**,

   ![Platform Prefab](images/platform_prefab.png)

-3. In the **Project** window, drag the **3DBallLearning** Model located in
-   `Assets/ML-Agents/Examples/3DBall/TFModels` into the `Model` property under `Ball 3D Agent (Script)` component in the **Inspector** window.
+3. In the **Project** window, drag the **3DBall** Model located in
+   `Assets/ML-Agents/Examples/3DBall/TFModels` into the `Model` property under `Behavior Parameters (Script)` component in the Agent GameObject **Inspector** window.
-4. You should notice that each `Agent` under each `3DBall` in the **Hierarchy** windows now contains **3DBallLearning** as `Model`. __Note__ : You can modify multiple game objects in a scene by selecting them all at
+4. You should notice that each `Agent` under each `3DBall` in the **Hierarchy** windows now contains **3DBall** as `Model` on the `Behavior Parameters`. __Note__ : You can modify multiple game objects in a scene by selecting them all at
   once using the search bar in the Scene Hierarchy.
 8. Select the **InferenceDevice** to use for this model (CPU or GPU) on the Agent.
   _Note: CPU is faster for the majority of ML-Agents toolkit generated models_
 [above](#running-a-pre-trained-model).

 1. Move your model file into
-   `UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/`.
+   `Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
 2. Open the Unity Editor, and select the **3DBall** scene as described above.
 3. Select the  **3DBall** prefab Agent object.
 4. Drag the `<behavior_name>.nn` file from the Project window of
--- a/docs/Installation-Windows.md
+++ b/docs/Installation-Windows.md
 If you don't want to use Git, you can always directly download all the files
 [here](https://github.com/Unity-Technologies/ml-agents/archive/latest_release.zip).

-The `UnitySDK` subdirectory contains the Unity Assets to add to your projects.
-It also contains many [example environments](Learning-Environment-Examples.md)
+The `com.unity.ml-agents` subdirectory contains the core code to add to your projects.
+The `Project` subdirectory contains many [example environments](Learning-Environment-Examples.md)
 to help you get started.

 The `ml-agents` subdirectory contains a Python package which provides deep reinforcement
--- a/docs/Installation.md
+++ b/docs/Installation.md
 The `--branch latest_release` option will switch to the tag of the latest stable release.
 Omitting that will get the `master` branch which is potentially unstable.

-The `UnitySDK` subdirectory contains the Unity Assets to add to your projects.
-It also contains many [example environments](Learning-Environment-Examples.md)
+The `com.unity.ml-agents` subdirectory contains the core code to add to your projects.
+The `Project` subdirectory contains many [example environments](Learning-Environment-Examples.md)
-If you intend to copy the `UnitySDK` folder in to your project, ensure that
+If you intend to copy the `com.unity.ml-agents` folder in to your project, ensure that
-To install the Barrcuda package in later versions of Unity, navigate to the Package
+To install the Barracuda package in later versions of Unity, navigate to the Package
-`Adavanced` dropdown menu to the left of the search bar and make sure "Show Preview Packages"
+`Advanced` dropdown menu to the left of the search bar and make sure "Show Preview Packages"
 is checked.  Search for or select the `Barracuda` package and install the latest version.

 <p align="center">
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
    but is the default as of 2018.3.)
 3. In a file system window, navigate to the folder containing your cloned
    ML-Agents repository.
-4. Drag the `ML-Agents` folder from `UnitySDK/Assets` to the Unity
-    Editor Project window. If you see console errors about Barracuda, make sure
-    you've installed Barracuda from the Unity Package Manager. More information
-    can be found in the [installation instructions](Installation.md) under
+4. Open the `manifest.json` file in the `Packages` directory of your project.
+    Add the following line to your project's package dependencies:
+    ```
+        "com.unity.ml-agents" : "file:<path_to_local_ml-agents_repo>/com.unity.ml-agents"
+    ```
+    More information can be found in the [installation instructions](Installation.md) under
    **Package Installation**.

 Your Unity **Project** window should contain the following assets:
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md

 The Unity ML-Agents toolkit contains an expanding set of example environments
 which demonstrate various features of the platform. Environments are located in
-`UnitySDK/Assets/ML-Agents/Examples` and summarized below. Additionally, our
+`Project/Assets/ML-Agents/Examples` and summarized below. Additionally, our
 [first ML Challenge](https://connect.unity.com/challenges/ml-agents-1) contains
 environments created by the community.

 researchers.

 If you would like to contribute environments, please see our
-[contribution guidelines](../CONTRIBUTING.md) page.
+[contribution guidelines](../com.unity.ml-agents/CONTRIBUTING.md) page.

 ## Basic

 * Goal: Move to the most reward state.
 * Agents: The environment contains one agent.
 * Agent Reward Function:
+  * -0.01 at each step
  * +0.1 for arriving at suboptimal state.
  * +1.0 for arriving at optimal state.
 * Behavior Parameters:
  * Visual Observations: None
 * Float Properties: None
-* Benchmark Mean Reward: 0.94
+* Benchmark Mean Reward: 0.93

 ## [3DBall: 3D Balance Ball](https://youtu.be/dheeCO29-EI)

--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md

 1. Launch Unity.
 2. On the Projects dialog, choose the **Open** option at the top of the window.
-3. Using the file dialog that opens, locate the `UnitySDK` folder within the
+3. Using the file dialog that opens, locate the `Project` folder within the
   ML-Agents project and click **Open**.
 4. In the **Project** window, navigate to the folder
   `Assets/ML-Agents/Examples/3DBall/Scenes/`.
 into your Agent by following the steps below:

 1. Move your model file into
-   `UnitySDK/Assets/ML-Agents/Examples/3DBall/TFModels/`.
+   `Project/Assets/ML-Agents/Examples/3DBall/TFModels/`.
 2. Open the Unity Editor, and select the **3DBall** scene as described above.
 3. Select the **3DBall** prefab from the Project window and select **Agent**.
 5. Drag the `<behavior_name>.nn` file from the Project window of
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 ## Migrating from 0.13 to latest

 ### Important changes
+* The `Decision Period` and `On Demand decision` checkbox have been removed from the Agent. On demand decision is now the default (calling `RequestDecision` on the Agent manually.)
+* Agents will always request a decision after being marked as `Done()` and will no longer wait for the next call to `RequestDecision()`.
+* The `agentParameters` field of the Agent has been removed. (Contained only `maxStep` information)
+* `maxStep` is now a public field on the Agent. (Was moved from `agentParameters`)
+* The `Info` field of the Agent has been made private. (Was only used internally and not meant to be modified outside of the Agent)
+* The `GetReward()` method on the Agent has been removed. (It was being confused with `GetCumulativeReward()`)
+* The `AgentAction` struct no longer contains a `value` field. (Value estimates were not set during inference)
+* The `GetValueEstimate()` method on the Agent has been removed.
+* The `UpdateValueAction()` method on the Agent has been removed.
+* Calling `Done()` on the Agent will now reset it immediately and call the `AgentReset` virtual method. (This is to simplify the previous logic in which the Agent had to wait for the next `EnvironmentStep` to reset)
+* If you were not using `On Demand Decision` for your Agent, you **must** add a `DecisionRequester` component to your Agent GameObject and set its `Decision Period` field to the old `Decision Period` of the Agent.
 * If you have a class that inherits from Academy:
  * If the class didn't override any of the virtual methods and didn't store any additional data, you can just remove the old script from the scene.
  * If the class had additional data, create a new MonoBehaviour and store the data on this instead.
 * Combine curriculum configs into a single file.  See [the WallJump curricula](../config/curricula/wall_jump.yaml) for an example of the new curriculum config format.
  A tool like https://www.json2yaml.com may be useful to help with the conversion.
 * If your Agent implements `AgentOnDone` and your Agent does not have the checkbox `Reset On Done` checked in the inspector, you must call the code that was in `AgentOnDone` manually.
+

 ## Migrating from ML-Agents toolkit v0.12.0 to v0.13.0

--- a/docs/Reward-Signals.md
+++ b/docs/Reward-Signals.md
        strength: 0.01
        gamma: 0.99
        encoding_size: 128
-        demo_path: UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+        demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
 ```

 Each reward signal should define at least two parameters, `strength` and `gamma`, in addition
--- a/docs/Training-Curriculum-Learning.md
+++ b/docs/Training-Curriculum-Learning.md

 Once our curriculum is defined, we have to use the reset parameters we defined
 and modify the environment from the Agent's `AgentReset()` function. See
-[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
+[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
 for an example.


--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md

 ```
    behavioral_cloning:
-        demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+        demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
        strength: 0.5
        steps: 10000
 ```
--- a/docs/Training-SAC.md
+++ b/docs/Training-SAC.md

 ```
    behavioral_cloning:
-        demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+        demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
        strength: 0.5
        steps: 10000
 ```
--- a/docs/dox-ml-agents.conf
+++ b/docs/dox-ml-agents.conf
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.

-INPUT                  = ../UnitySDK/Assets/ML-Agents/Scripts/Academy.cs \
-                         ../UnitySDK/Assets/ML-Agents/Scripts/Agent.cs \
-                         ../UnitySDK/Assets/ML-Agents/Scripts/Monitor.cs \
-                         ../UnitySDK/Assets/ML-Agents/Scripts/Decision.cs
+INPUT                  = ../Project/Assets/ML-Agents/Scripts/Academy.cs \
+                         ../Project/Assets/ML-Agents/Scripts/Agent.cs \
+                         ../Project/Assets/ML-Agents/Scripts/Monitor.cs \
+                         ../Project/Assets/ML-Agents/Scripts/Decision.cs

 # This tag can be used to specify the character encoding of the source files
 # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
--- a/docs/images/3dball_learning_brain.png
+++ b/docs/images/3dball_learning_brain.png
--- a/docs/images/mlagents-NewProject.png
+++ b/docs/images/mlagents-NewProject.png
--- a/docs/localized/KR/README.md
+++ b/docs/localized/KR/README.md
 ## 커뮤니티 그리고 피드백

 ML-Agents toolkit은 오픈소스 프로젝트이며 컨트리뷰션을 환영합니다. 만약 컨트리뷰션을 원하시는 경우
-[컨트리뷰션 가이드라인](CONTRIBUTING.md)과 [행동 규칙](CODE_OF_CONDUCT.md)을 검토해주십시오.
+[컨트리뷰션 가이드라인](com/unity.ml-agents/CONTRIBUTING.md)과 [행동 규칙](CODE_OF_CONDUCT.md)을 검토해주십시오.

 만약 ML-Agents toolkit을 사용하며 문제가 생긴다면, 가능한 많은 세부 사항을 포함하여 [이슈 제출](https://github.com/Unity-Technologies/ml-agents/issues)을 해주십시오.


 장현준: totok682@naver.com

-민규식:  kyushikmin@gmail.com
+민규식:  kyushikmin@gmail.com
--- a/docs/localized/zh-CN/README.md
+++ b/docs/localized/zh-CN/README.md

 ML-Agents 是一个开源项目，我们鼓励并欢迎大家贡献自己的力量。
 如果您想做出贡献，请务必查看我们的
-[贡献准则](/CONTRIBUTING.md)和
+[贡献准则](/com.unity.ml-agents/CONTRIBUTING.md)和
 [行为准则](/CODE_OF_CONDUCT.md)。

 您可以通过 Unity Connect 和 GitHub 与我们以及更广泛的社区进行交流：
--- a/docs/localized/zh-CN/docs/Learning-Environment-Examples.md
+++ b/docs/localized/zh-CN/docs/Learning-Environment-Examples.md
 页面。

 如果您想提交自己的环境，请参阅我们的
-[贡献指南](/CONTRIBUTING.md)页面。
+[贡献指南](/com.unity.ml-agents/CONTRIBUTING.md)页面。

 ## Basic

--- a/gym-unity/gym_unity/init.py
+++ b/gym-unity/gym_unity/init.py
-__version__ = "0.13.0"
+__version__ = "0.14.0.dev0"
--- a/ml-agents-envs/mlagents_envs/init.py
+++ b/ml-agents-envs/mlagents_envs/init.py
-__version__ = "0.13.0"
+__version__ = "0.14.0.dev0"
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
 class UnityEnvironment(BaseEnv):
    SCALAR_ACTION_TYPES = (int, np.int32, np.int64, float, np.float32, np.float64)
    SINGLE_BRAIN_ACTION_TYPES = SCALAR_ACTION_TYPES + (list, np.ndarray)
-    API_VERSION = "API-13"
+    API_VERSION = "API-14-dev0"

    def __init__(
        self,
--- a/ml-agents/mlagents/tf_utils/init.py
+++ b/ml-agents/mlagents/tf_utils/init.py
 from mlagents.tf_utils.tf import tf as tf  # noqa
 from mlagents.tf_utils.tf import set_warnings_enabled  # noqa
+from mlagents.tf_utils.tf import generate_session_config  # noqa
--- a/ml-agents/mlagents/tf_utils/tf.py
+++ b/ml-agents/mlagents/tf_utils/tf.py

 def set_warnings_enabled(is_enabled: bool) -> None:
    """
-    Enable or disable tensorflow warnings (notabley, this disables deprecation warnings.
+    Enable or disable tensorflow warnings (notably, this disables deprecation warnings.
+
+
+def generate_session_config() -> tf.ConfigProto:
+    """
+    Generate a ConfigProto to use for ML-Agents that doesn't consume all of the GPU memory
+    and allows for soft placement in the case of multi-GPU.
+    """
+    config = tf.ConfigProto()
+    config.gpu_options.allow_growth = True
+    # For multi-GPU training, set allow_soft_placement to True to allow
+    # placing the operation into an alternative device automatically
+    # to prevent from exceptions if the device doesn't suppport the operation
+    # or the device does not exist
+    config.allow_soft_placement = True
+    return config
--- a/ml-agents/mlagents/trainers/init.py
+++ b/ml-agents/mlagents/trainers/init.py
-__version__ = "0.13.0"
+__version__ = "0.14.0.dev0"
--- a/ml-agents/mlagents/trainers/action_info.py
+++ b/ml-agents/mlagents/trainers/action_info.py
    value: Any
    outputs: ActionInfoOutputs
    agent_ids: List[AgentId]
+
+    @staticmethod
+    def empty() -> "ActionInfo":
+        return ActionInfo([], [], {}, [])
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
 from mlagents.trainers.policy import Policy
 from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
 from mlagents.trainers.stats import StatsReporter
-from mlagents.trainers.env_manager import get_global_agent_id
+from mlagents.trainers.brain_conversion_utils import get_global_agent_id

 T = TypeVar("T")

                "Policy/Learning Rate", take_action_outputs["learning_rate"]
            )

+        terminated_agents: List[str] = []
        # Make unique agent_ids that are global across workers
        action_global_agent_ids = [
            get_global_agent_id(worker_id, ag_id) for ag_id in previous_action.agent_ids
                            "Environment/Episode Length",
                            self.episode_steps.get(global_id, 0),
                        )
-                        del self.episode_steps[global_id]
-                        del self.episode_rewards[global_id]
+                        terminated_agents += [global_id]
                elif not curr_agent_step.done:
                    self.episode_steps[global_id] += 1

                previous_action.agent_ids, take_action_outputs["action"]
            )

+        for terminated_id in terminated_agents:
+            self._clean_agent_data(terminated_id)
+
+    def _clean_agent_data(self, global_id: str) -> None:
+        """
+        Removes the data for an Agent.
+        """
+        del self.experience_buffers[global_id]
+        del self.last_take_action_outputs[global_id]
+        del self.episode_steps[global_id]
+        del self.episode_rewards[global_id]
+        del self.last_step_result[global_id]
+        self.policy.remove_previous_action([global_id])
+        self.policy.remove_memories([global_id])
+
    def publish_trajectory_queue(
        self, trajectory_queue: "AgentManagerQueue[Trajectory]"
    ) -> None:
        :param trajectory_queue: Trajectory queue to publish to.
        """
        self.trajectory_queues.append(trajectory_queue)
+
+    def end_episode(self) -> None:
+        """
+        Ends the episode, terminating the current trajectory and stopping stats collection for that
+        episode. Used for forceful reset (e.g. in curriculum or generalization training.)
+        """
+        self.experience_buffers.clear()
+        self.episode_rewards.clear()
+        self.episode_steps.clear()


 class AgentManagerQueue(Generic[T]):
--- a/ml-agents/mlagents/trainers/brain_conversion_utils.py
+++ b/ml-agents/mlagents/trainers/brain_conversion_utils.py
    return BrainParameters(
        name, int(vec_size), cam_res, a_size, [], vector_action_space_type
    )
+
+
+def get_global_agent_id(worker_id: int, agent_id: int) -> str:
+    """
+    Create an agent id that is unique across environment workers using the worker_id.
+    """
+    return f"${worker_id}-{agent_id}"
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
-import pathlib
 import logging
 import os
 from typing import List, Tuple
    return brain_params, demo_buffer


+def get_demo_files(path: str) -> List[str]:
+    """
+    Retrieves the demonstration file(s) from a path.
+    :param path: Path of demonstration file or directory.
+    :return: List of demonstration files
+
+    Raises errors if |path| is invalid.
+    """
+    if os.path.isfile(path):
+        if not path.endswith(".demo"):
+            raise ValueError("The path provided is not a '.demo' file.")
+        return [path]
+    elif os.path.isdir(path):
+        paths = [
+            os.path.join(path, name)
+            for name in os.listdir(path)
+            if name.endswith(".demo")
+        ]
+        if not paths:
+            raise ValueError("There are no '.demo' files in the provided directory.")
+        return paths
+    else:
+        raise FileNotFoundError(
+            f"The demonstration file or directory {path} does not exist."
+        )
+
+
@timed
 def load_demonstration(
    file_path: str

    # First 32 bytes of file dedicated to meta-data.
    INITIAL_POS = 33
-    file_paths = []
-    if os.path.isdir(file_path):
-        all_files = os.listdir(file_path)
-        for _file in all_files:
-            if _file.endswith(".demo"):
-                file_paths.append(os.path.join(file_path, _file))
-        if not all_files:
-            raise ValueError("There are no '.demo' files in the provided directory.")
-    elif os.path.isfile(file_path):
-        file_paths.append(file_path)
-        file_extension = pathlib.Path(file_path).suffix
-        if file_extension != ".demo":
-            raise ValueError(
-                "The file is not a '.demo' file. Please provide a file with the "
-                "correct extension."
-            )
-    else:
-        raise FileNotFoundError(
-            "The demonstration file or directory {} does not exist.".format(file_path)
-        )
-
+    file_paths = get_demo_files(file_path)
    group_spec = None
    brain_param_proto = None
    info_action_pairs = []
--- a/ml-agents/mlagents/trainers/env_manager.py
+++ b/ml-agents/mlagents/trainers/env_manager.py
 from abc import ABC, abstractmethod
+import logging
-from mlagents.trainers.policy import Policy
+from mlagents.trainers.tf_policy import TFPolicy
+from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
-
-def get_global_agent_id(worker_id: int, agent_id: int) -> str:
-    """
-    Create an agent id that is unique across environment workers using the worker_id.
-    """
-    return f"${worker_id}-{agent_id}"
+logger = logging.getLogger("mlagents.trainers")


 class EnvironmentStep(NamedTuple):

 class EnvManager(ABC):
    def __init__(self):
-        self.policies: Dict[AgentGroup, Policy] = {}
+        self.policies: Dict[AgentGroup, TFPolicy] = {}
+        self.agent_managers: Dict[AgentGroup, AgentManager] = {}
+        self.first_step_infos: List[EnvironmentStep] = None
-    def set_policy(self, brain_name: AgentGroup, policy: Policy) -> None:
+    def set_policy(self, brain_name: AgentGroup, policy: TFPolicy) -> None:
+        if brain_name in self.agent_managers:
+            self.agent_managers[brain_name].policy = policy
+
+    def set_agent_manager(self, brain_name: AgentGroup, manager: AgentManager) -> None:
+        self.agent_managers[brain_name] = manager
-    def step(self) -> List[EnvironmentStep]:
+    def _step(self) -> List[EnvironmentStep]:
-    def reset(self, config: Dict = None) -> List[EnvironmentStep]:
+    def _reset_env(self, config: Dict = None) -> List[EnvironmentStep]:
+    def reset(self, config: Dict = None) -> int:
+        for manager in self.agent_managers.values():
+            manager.end_episode()
+        # Save the first step infos, after the reset.
+        # They will be processed on the first advance().
+        self.first_step_infos = self._reset_env(config)
+        return len(self.first_step_infos)
+
    @property
    @abstractmethod
    def external_brains(self) -> Dict[AgentGroup, BrainParameters]:
    @abstractmethod
    def close(self):
        pass
+
+    def advance(self):
+        # If we had just reset, process the first EnvironmentSteps.
+        # Note that we do it here instead of in reset() so that on the very first reset(),
+        # we can create the needed AgentManagers before calling advance() and processing the EnvironmentSteps.
+        if self.first_step_infos is not None:
+            self._process_step_infos(self.first_step_infos)
+            self.first_step_infos = None
+        # Get new policies if found
+        for brain_name in self.external_brains:
+            try:
+                _policy = self.agent_managers[brain_name].policy_queue.get_nowait()
+                self.set_policy(brain_name, _policy)
+            except AgentManagerQueue.Empty:
+                pass
+        # Step the environment
+        new_step_infos = self._step()
+        # Add to AgentProcessor
+        num_step_infos = self._process_step_infos(new_step_infos)
+        return num_step_infos
+
+    def _process_step_infos(self, step_infos: List[EnvironmentStep]) -> int:
+        for step_info in step_infos:
+            for name_behavior_id in step_info.name_behavior_ids:
+                if name_behavior_id not in self.agent_managers:
+                    logger.warning(
+                        "Agent manager was not created for behavior id {}.".format(
+                            name_behavior_id
+                        )
+                    )
+                    continue
+                self.agent_managers[name_behavior_id].add_experiences(
+                    step_info.current_all_step_result[name_behavior_id],
+                    step_info.worker_id,
+                    step_info.brain_name_to_action_info.get(
+                        name_behavior_id, ActionInfo.empty()
+                    ),
+                )
+        return len(step_infos)
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        self.policy.initialize_or_load()
        for _reward_signal in self.optimizer.reward_signals.keys():
            self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
+        # Needed to resume loads properly
+        self.step = policy.get_current_step()
+        self.next_summary_step = self._get_next_summary_step()

    def get_policy(self, name_behavior_id: str) -> TFPolicy:
        """
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
        self.policy.initialize_or_load()
        for _reward_signal in self.optimizer.reward_signals.keys():
            self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
+        # Needed to resume loads properly
+        self.step = policy.get_current_step()
+        self.next_summary_step = self._get_next_summary_step()

    def get_policy(self, name_behavior_id: str) -> TFPolicy:
        """
--- a/ml-agents/mlagents/trainers/simple_env_manager.py
+++ b/ml-agents/mlagents/trainers/simple_env_manager.py
        self.previous_step: EnvironmentStep = EnvironmentStep.empty(0)
        self.previous_all_action_info: Dict[str, ActionInfo] = {}

-    def step(self) -> List[EnvironmentStep]:
+    def _step(self) -> List[EnvironmentStep]:
        all_action_info = self._take_step(self.previous_step)
        self.previous_all_action_info = all_action_info

        self.previous_step = step_info
        return [step_info]

-    def reset(
+    def _reset_env(
        self, config: Dict[AgentGroup, float] = None
    ) -> List[EnvironmentStep]:  # type: ignore
        if config is not None:
--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py
    std: float
    num: int

+    @staticmethod
+    def empty() -> "StatsSummary":
+        return StatsSummary(0.0, 0.0, 0)
+

 class StatsWriter(abc.ABC):
    """
        :param key: The type of statistic, e.g. Environment/Reward.
        :returns: A StatsSummary NamedTuple containing (mean, std, count).
        """
-        return StatsSummary(
-            mean=np.mean(StatsReporter.stats_dict[self.category][key]),
-            std=np.std(StatsReporter.stats_dict[self.category][key]),
-            num=len(StatsReporter.stats_dict[self.category][key]),
-        )
+        if len(StatsReporter.stats_dict[self.category][key]) > 0:
+            return StatsSummary(
+                mean=np.mean(StatsReporter.stats_dict[self.category][key]),
+                std=np.std(StatsReporter.stats_dict[self.category][key]),
+                num=len(StatsReporter.stats_dict[self.category][key]),
+            )
+        return StatsSummary.empty()
--- a/ml-agents/mlagents/trainers/subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/subprocess_env_manager.py
                env_worker.send("step", env_action_info)
                env_worker.waiting = True

-    def step(self) -> List[EnvironmentStep]:
+    def _step(self) -> List[EnvironmentStep]:
        # Queue steps for any workers which aren't in the "waiting" state.
        self._queue_steps()

        step_infos = self._postprocess_steps(worker_steps)
        return step_infos

-    def reset(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
+    def _reset_env(self, config: Optional[Dict] = None) -> List[EnvironmentStep]:
        while any(ew.waiting for ew in self.env_workers):
            if not self.step_queue.empty():
                step = self.step_queue.get_nowait()
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
    )
    processor.publish_trajectory_queue(tqueue)
    # This is like the initial state after the env reset
-    processor.add_experiences(mock_step, 0, ActionInfo([], [], {}, []))
+    processor.add_experiences(mock_step, 0, ActionInfo.empty())
    for _ in range(5):
        processor.add_experiences(mock_step, 0, fake_action_info)

--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
        use_recurrent: false
        memory_size: 8
        behavioral_cloning:
-          demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+          demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
          strength: 1.0
          steps: 10000000
        reward_signals:
        use_recurrent: false
        vis_encode_type: simple
        behavioral_cloning:
-            demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+            demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
            strength: 1.0
            steps: 10000000
        reward_signals:
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
 import os
 import numpy as np
+import pytest
+import tempfile
-from mlagents.trainers.demo_loader import load_demonstration, demo_to_buffer
+from mlagents.trainers.demo_loader import (
+    load_demonstration,
+    demo_to_buffer,
+    get_demo_files,
+)


 def test_load_demo():

    _, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1)
    assert len(demo_buffer["actions"]) == total_expected - 1
+
+
+def test_edge_cases():
+    path_prefix = os.path.dirname(os.path.abspath(__file__))
+    # nonexistent file and directory
+    with pytest.raises(FileNotFoundError):
+        get_demo_files(os.path.join(path_prefix, "nonexistent_file.demo"))
+    with pytest.raises(FileNotFoundError):
+        get_demo_files(os.path.join(path_prefix, "nonexistent_directory"))
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # empty directory
+        with pytest.raises(ValueError):
+            get_demo_files(tmpdirname)
+        # invalid file
+        invalid_fname = os.path.join(tmpdirname, "mydemo.notademo")
+        with open(invalid_fname, "w") as f:
+            f.write("I'm not a demo")
+        with pytest.raises(ValueError):
+            get_demo_files(invalid_fname)
+        # invalid directory
+        with pytest.raises(ValueError):
+            get_demo_files(tmpdirname)
+        # valid file
+        valid_fname = os.path.join(tmpdirname, "mydemo.demo")
+        with open(valid_fname, "w") as f:
+            f.write("I'm a demo file")
+        assert get_demo_files(valid_fname) == [valid_fname]
+        # valid directory
+        assert get_demo_files(tmpdirname) == [valid_fname]
--- a/ml-agents/mlagents/trainers/tests/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/test_policy.py
    dummy_groupspec = AgentGroupSpec([(1,)], "continuous", 1)
    no_agent_step = BatchedStepResult.empty(dummy_groupspec)
    result = policy.get_action(no_agent_step)
-    assert result == ActionInfo([], [], {}, [])
+    assert result == ActionInfo.empty()


 def test_take_action_returns_nones_on_missing_values():
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
        brain_params.brain_name, 0, trainer_params, True, False, 0, "0", False
    )
    policy_mock = mock.Mock(spec=NNPolicy)
+    policy_mock.get_current_step.return_value = 0
    step_count = (
        5
    )  # 10 hacked because this function is no longer called through trainer
        for agent in reward.values():
            assert agent == 0
    assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
+
+
+def test_add_get_policy(dummy_config):
+    brain_params = make_brain_parameters(
+        discrete_action=False, visual_inputs=0, vec_obs_size=6
+    )
+    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
+    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    trainer = PPOTrainer(brain_params, 0, dummy_config, True, False, 0, "0", False)
+    policy = mock.Mock(spec=NNPolicy)
+    policy.get_current_step.return_value = 2000
+
+    trainer.add_policy(brain_params.brain_name, policy)
+    assert trainer.get_policy(brain_params.brain_name) == policy
+
+    # Make sure the summary steps were loaded properly
+    assert trainer.get_step == 2000
+    assert trainer.next_summary_step > 2000
+
+    # Test incorrect class of policy
+    policy = mock.Mock()
+    with pytest.raises(RuntimeError):
+        trainer.add_policy(brain_params, policy)


 def test_normalization(dummy_config):
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
        use_recurrent: false
        vis_encode_type: simple
        behavioral_cloning:
-            demo_path: ./UnitySDK/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
+            demo_path: ./Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
            strength: 1.0
            steps: 10000000
        reward_signals:
--- a/ml-agents/mlagents/trainers/tests/test_sac.py
+++ b/ml-agents/mlagents/trainers/tests/test_sac.py
 import pytest
+from unittest import mock
 import yaml

 import numpy as np
    policy = trainer2.create_policy(mock_brain)
    trainer2.add_policy(mock_brain.brain_name, policy)
    assert trainer2.update_buffer.num_experiences == buffer_len
+
+
+def test_add_get_policy(dummy_config):
+    brain_params = make_brain_parameters(
+        discrete_action=False, visual_inputs=0, vec_obs_size=6
+    )
+    dummy_config["summary_path"] = "./summaries/test_trainer_summary"
+    dummy_config["model_path"] = "./models/test_trainer_models/TestModel"
+    trainer = SACTrainer(brain_params, 0, dummy_config, True, False, 0, "0")
+    policy = mock.Mock(spec=SACPolicy)
+    policy.get_current_step.return_value = 2000
+
+    trainer.add_policy(brain_params.brain_name, policy)
+    assert trainer.get_policy(brain_params.brain_name) == policy
+
+    # Make sure the summary steps were loaded properly
+    assert trainer.get_step == 2000
+    assert trainer.next_summary_step > 2000
+
+    # Test incorrect class of policy
+    policy = mock.Mock()
+    with pytest.raises(RuntimeError):
+        trainer.add_policy(brain_params, policy)


 def test_process_trajectory(dummy_config):
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
                gamma: 0.99
    """

+GHOST_CONFIG = f"""
+    {BRAIN_NAME}:
+        trainer: ppo
+        batch_size: 16
+        beta: 5.0e-3
+        buffer_size: 64
+        epsilon: 0.2
+        hidden_units: 128
+        lambd: 0.95
+        learning_rate: 5.0e-3
+        max_steps: 2500
+        memory_size: 256
+        normalize: false
+        num_epoch: 3
+        num_layers: 2
+        time_horizon: 64
+        sequence_length: 64
+        summary_freq: 500
+        use_recurrent: false
+        reward_signals:
+            extrinsic:
+                strength: 1.0
+                gamma: 0.99
+        self_play:
+            save_step: 1000
+    """
+

 def _check_environment_trains(
    env, config, meta_curriculum=None, success_threshold=0.99
 def test_simple_sac(use_discrete):
    env = Simple1DEnvironment(use_discrete=use_discrete)
    _check_environment_trains(env, SAC_CONFIG)
+
+
+@pytest.mark.parametrize("use_discrete", [True, False])
+def test_simple_ghost(use_discrete):
+    env = Simple1DEnvironment(use_discrete=use_discrete)
+    _check_environment_trains(env, GHOST_CONFIG)
--- a/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
    EnvironmentResponse,
    StepResponse,
 )
+from mlagents.trainers.env_manager import EnvironmentStep
 from mlagents_envs.base_env import BaseEnv
 from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig

            mock_env_factory, EngineConfig.default_config(), 1
        )
        params = {"test": "params"}
-        manager.reset(params)
+        manager._reset_env(params)
        manager.env_workers[0].send.assert_called_with("reset", (params))

    def test_reset_collects_results_from_all_envs(self):
        )

        params = {"test": "params"}
-        res = manager.reset(params)
+        res = manager._reset_env(params)
        for i, env in enumerate(manager.env_workers):
            env.send.assert_called_with("reset", (params))
            env.recv.assert_called()
        manager.env_workers[2].previous_step = last_steps[2]
        manager.env_workers[2].waiting = True
        manager._take_step = Mock(return_value=step_mock)
-        res = manager.step()
+        res = manager._step()
        for i, env in enumerate(manager.env_workers):
            if i < 2:
                env.send.assert_called_with("step", step_mock)
            manager.env_workers[0].previous_step,
            manager.env_workers[1].previous_step,
        ]
+
+    @mock.patch("mlagents.trainers.subprocess_env_manager.SubprocessEnvManager._step")
+    @mock.patch(
+        "mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.external_brains",
+        new_callable=mock.PropertyMock,
+    )
+    def test_advance(self, external_brains_mock, step_mock):
+        brain_name = "testbrain"
+        action_info_dict = {brain_name: MagicMock()}
+        SubprocessEnvManager.create_worker = lambda em, worker_id, step_queue, env_factory, engine_c: MockEnvWorker(
+            worker_id, EnvironmentResponse("step", worker_id, worker_id)
+        )
+        env_manager = SubprocessEnvManager(
+            mock_env_factory, EngineConfig.default_config(), 3
+        )
+        external_brains_mock.return_value = [brain_name]
+        agent_manager_mock = mock.Mock()
+        env_manager.set_agent_manager(brain_name, agent_manager_mock)
+
+        step_info_dict = {brain_name: Mock()}
+        step_info = EnvironmentStep(step_info_dict, 0, action_info_dict)
+        step_mock.return_value = [step_info]
+        env_manager.advance()
+
+        # Test add_experiences
+        env_manager._step.assert_called_once()
+
+        agent_manager_mock.add_experiences.assert_called_once_with(
+            step_info.current_all_step_result[brain_name],
+            0,
+            step_info.brain_name_to_action_info[brain_name],
+        )
+
+        # Test policy queue
+        mock_policy = mock.Mock()
+        agent_manager_mock.policy_queue.get_nowait.return_value = mock_policy
+        env_manager.advance()
+        assert env_manager.policies[brain_name] == mock_policy
+        assert agent_manager_mock.policy == mock_policy
--- a/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_controller.py
-from unittest.mock import MagicMock, Mock, patch
+from unittest.mock import MagicMock, patch
-from mlagents.trainers.subprocess_env_manager import EnvironmentStep
 from mlagents.trainers.sampler_class import SamplerManager


    return tc, trainer_mock


-def test_take_step_adds_experiences_to_trainer_and_trains(
+def test_advance_adds_experiences_to_trainer_and_trains(
-    action_info_dict = {brain_name: MagicMock()}
-
-    brain_info_dict = {brain_name: Mock()}
-    old_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
-    new_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
-    trainer_mock._is_ready_update = MagicMock(return_value=True)
-    env_mock.step.return_value = [new_step_info]
-    env_mock.reset.return_value = [old_step_info]

    tc.brain_name_to_identifier[brain_name].add(brain_name)

-    env_mock.step.assert_called_once()
-
-    manager_mock = tc.managers[brain_name]
-    manager_mock.add_experiences.assert_called_once_with(
-        new_step_info.current_all_step_result[brain_name],
-        0,
-        new_step_info.brain_name_to_action_info[brain_name],
-    )
-
-    trainer_mock.advance.assert_called_once()
-
-
-def test_take_step_if_not_training(trainer_controller_with_take_step_mocks):
-    tc, trainer_mock = trainer_controller_with_take_step_mocks
-    tc.train_model = False
-
-    brain_name = "testbrain"
-    action_info_dict = {brain_name: MagicMock()}
-
-    brain_info_dict = {brain_name: Mock()}
-    old_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
-    new_step_info = EnvironmentStep(brain_info_dict, 0, action_info_dict)
-
-    trainer_mock._is_ready_update = MagicMock(return_value=False)
-
-    env_mock = MagicMock()
-    env_mock.step.return_value = [new_step_info]
-    env_mock.reset.return_value = [old_step_info]
-
-    tc.brain_name_to_identifier[brain_name].add(brain_name)
-
-    tc.advance(env_mock)
-    env_mock.reset.assert_not_called()
-    env_mock.step.assert_called_once()
-    manager_mock = tc.managers[brain_name]
-    manager_mock.add_experiences.assert_called_once_with(
-        new_step_info.current_all_step_result[brain_name],
-        0,
-        new_step_info.brain_name_to_action_info[brain_name],
-    )
+    env_mock.advance.assert_called_once()
    trainer_mock.advance.assert_called_once()
--- a/ml-agents/mlagents/trainers/tf_policy.py
+++ b/ml-agents/mlagents/trainers/tf_policy.py

 import numpy as np
 from mlagents.tf_utils import tf
+from mlagents import tf_utils

 from mlagents_envs.exception import UnityException
 from mlagents.trainers.policy import Policy
 from mlagents.trainers import tensorflow_to_barracuda as tf2bc
 from mlagents.trainers.trajectory import SplitObservations
-from mlagents.trainers.env_manager import get_global_agent_id
+from mlagents.trainers.brain_conversion_utils import get_global_agent_id
 from mlagents_envs.base_env import BatchedStepResult
 from mlagents.trainers.models import LearningModel

        """
        self._version_number_ = 2
        self.m_size = 0
+
+        # for ghost trainer save/load snapshots
+        self.assign_phs = []
+        self.assign_ops = []
+
        self.inference_dict = {}
        self.update_dict = {}
        self.sequence_length = 1
        self.model_path = trainer_parameters["model_path"]
        self.keep_checkpoints = trainer_parameters.get("keep_checkpoints", 5)
        self.graph = tf.Graph()
-        config = tf.ConfigProto()
-        config.gpu_options.allow_growth = True
-        # For multi-GPU training, set allow_soft_placement to True to allow
-        # placing the operation into an alternative device automatically
-        # to prevent from exceptions if the device doesn't suppport the operation
-        # or the device does not exist
-        config.allow_soft_placement = True
+        self.sess = tf.Session(
+            config=tf_utils.generate_session_config(), graph=self.graph
+        )
-        self.sess = tf.Session(config=config, graph=self.graph)
        self.saver = None
        self.optimizer = None
        if self.use_recurrent:
        else:
            self._initialize_graph()

+    def get_weights(self):
+        with self.graph.as_default():
+            _vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+            values = [v.eval(session=self.sess) for v in _vars]
+            return values
+
+    def init_load_weights(self):
+        with self.graph.as_default():
+            _vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+            values = [v.eval(session=self.sess) for v in _vars]
+            for var, value in zip(_vars, values):
+                assign_ph = tf.placeholder(var.dtype, shape=value.shape)
+                self.assign_phs.append(assign_ph)
+                self.assign_ops.append(tf.assign(var, assign_ph))
+
+    def load_weights(self, values):
+        with self.graph.as_default():
+            feed_dict = {}
+            for assign_ph, value in zip(self.assign_phs, values):
+                feed_dict[assign_ph] = value
+            self.sess.run(self.assign_ops, feed_dict=feed_dict)
+
    def evaluate(
        self, batched_step_result: BatchedStepResult, global_agent_ids: List[str]
    ) -> Dict[str, Any]:
        to be passed to add experiences
        """
        if batched_step_result.n_agents() == 0:
-            return ActionInfo([], [], {}, [])
+            return ActionInfo.empty()

        agents_done = [
            agent
--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py
 import abc

 from mlagents.tf_utils import tf
+from mlagents import tf_utils

 from collections import deque

        self.step: int = 0
        self.training_start_time = time.time()
        self.summary_freq = self.trainer_parameters["summary_freq"]
-        self.next_update_step = self.summary_freq
+        self.next_summary_step = self.summary_freq

    def _check_param_keys(self):
        for k in self.param_keys:
        :param input_dict: A dictionary that will be displayed in a table on Tensorboard.
        """
        try:
-            with tf.Session() as sess:
+            with tf.Session(config=tf_utils.generate_session_config()) as sess:
                s_op = tf.summary.text(
                    key,
                    tf.convert_to_tensor(
        :param n_steps: number of steps to increment the step count by
        """
        self.step += n_steps
-        self.next_update_step = self.step + (
-            self.summary_freq - self.step % self.summary_freq
-        )
+        self.next_summary_step = self._get_next_summary_step()
+    def _get_next_summary_step(self) -> int:
+        """
+        Get the next step count that should result in a summary write.
+        """
+        return self.step + (self.summary_freq - self.step % self.summary_freq)
+
    def save_model(self, name_behavior_id: str) -> None:
        """
        Saves the model
        write the summary. This logic ensures summaries are written on the update step and not in between.
        :param step_after_process: the step count after processing the next trajectory.
        """
-        if step_after_process >= self.next_update_step and self.get_step != 0:
-            self._write_summary(self.next_update_step)
+        if step_after_process >= self.next_summary_step and self.get_step != 0:
+            self._write_summary(self.next_summary_step)

    @abc.abstractmethod
    def end_episode(self):
        self, trajectory_queue: AgentManagerQueue[Trajectory]
    ) -> None:
        """
-        Adds a trajectory queue to the list of queues for the trainer injest Trajectories from.
+        Adds a trajectory queue to the list of queues for the trainer to ingest Trajectories from.
        :param queue: Trajectory queue to publish to.
        """
        self.trajectory_queues.append(trajectory_queue)
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 import sys
 import json
 import logging
-from typing import Dict, List, Optional, Set
+from typing import Dict, Optional, Set
-from mlagents.trainers.env_manager import EnvManager, EnvironmentStep
+from mlagents.trainers.env_manager import EnvManager
 from mlagents_envs.exception import (
    UnityEnvironmentException,
    UnityCommunicationException,
 from mlagents.trainers.trainer import Trainer
 from mlagents.trainers.meta_curriculum import MetaCurriculum
 from mlagents.trainers.trainer_util import TrainerFactory
-from mlagents.trainers.action_info import ActionInfo
-from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
+from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
+from mlagents.trainers.agent_processor import AgentManager


 class TrainerController(object):
        """
        self.trainers: Dict[str, Trainer] = {}
        self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set)
-        self.managers: Dict[str, AgentManager] = {}
        self.trainer_factory = trainer_factory
        self.model_path = model_path
        self.summaries_dir = summaries_dir
                "permissions are set correctly.".format(model_path)
            )

-    def _reset_env(self, env: EnvManager) -> List[EnvironmentStep]:
+    def _reset_env(self, env: EnvManager) -> None:
        """Resets the environment.

        Returns:
            self.meta_curriculum.get_config() if self.meta_curriculum else {}
        )
        sampled_reset_param.update(new_meta_curriculum_config)
-        return env.reset(config=sampled_reset_param)
+        env.reset(config=sampled_reset_param)

    def _should_save_model(self, global_step: int) -> bool:
        return (
    def _create_trainer_and_manager(
        self, env_manager: EnvManager, name_behavior_id: str
    ) -> None:
-        try:
-            brain_name, _ = name_behavior_id.split("?")
-        except ValueError:
-            brain_name = name_behavior_id
+        brain_name = BehaviorIdentifiers.from_name_behavior_id(
+            name_behavior_id
+        ).brain_name
        try:
            trainer = self.trainers[brain_name]
        except KeyError:
        policy = trainer.create_policy(env_manager.external_brains[name_behavior_id])
        trainer.add_policy(name_behavior_id, policy)

-        env_manager.set_policy(name_behavior_id, policy)
-
-        self.brain_name_to_identifier[brain_name].add(name_behavior_id)
-
        agent_manager = AgentManager(
            policy,
            name_behavior_id,
+        env_manager.set_agent_manager(name_behavior_id, agent_manager)
+        env_manager.set_policy(name_behavior_id, policy)
+        self.brain_name_to_identifier[brain_name].add(name_behavior_id)
+
-        self.managers[name_behavior_id] = agent_manager

    def _create_trainers_and_managers(
        self, env_manager: EnvManager, behavior_ids: Set[str]
        global_step = 0
        last_brain_behavior_ids: Set[str] = set()
        try:
-            initial_step = self._reset_env(env_manager)
-            # Create the initial set of trainers and managers
-            initial_brain_behaviors = set(env_manager.external_brains.keys())
-            self._create_trainers_and_managers(env_manager, initial_brain_behaviors)
-            last_brain_behavior_ids = initial_brain_behaviors
-            self._process_step_infos(initial_step)
+            # Initial reset
+            self._reset_env(env_manager)
            while self._not_done_training():
                external_brain_behavior_ids = set(env_manager.external_brains.keys())
                new_behavior_ids = external_brain_behavior_ids - last_brain_behavior_ids
                    global_step += 1
                    self.reset_env_if_ready(env_manager, global_step)
                    if self._should_save_model(global_step):
-                        # Save Tensorflow model
+
            # Final save Tensorflow model
            if global_step != 0 and self.train_model:
                self._save_model()
    def end_trainer_episodes(
        self, env: EnvManager, lessons_incremented: Dict[str, bool]
    ) -> None:
-        reset_step = self._reset_env(env)
-        self._process_step_infos(reset_step)
+        self._reset_env(env)
        # Reward buffers reset takes place only for curriculum learning
        # else no reset.
        for trainer in self.trainers.values():
        if meta_curriculum_reset or generalization_reset:
            self.end_trainer_episodes(env, lessons_incremented)

-    def _get_and_process_experiences(self, env: EnvManager) -> int:
-        with hierarchical_timer("env_step"):
-            # Get new policies if found
-            for brain_name in self.trainers.keys():
-                for name_behavior_id in self.brain_name_to_identifier[brain_name]:
-                    try:
-                        _policy = self.managers[
-                            name_behavior_id
-                        ].policy_queue.get_nowait()
-                        env.set_policy(name_behavior_id, _policy)
-                    except AgentManagerQueue.Empty:
-                        pass
-            # Step the environment
-            new_step_infos = env.step()
-        # Add to AgentProcessor
-        num_step_infos = self._process_step_infos(new_step_infos)
-        return num_step_infos
-
-    def _process_step_infos(self, step_infos: List[EnvironmentStep]) -> int:
-        for step_info in step_infos:
-            for name_behavior_id in step_info.name_behavior_ids:
-                if name_behavior_id not in self.managers:
-                    self.logger.warning(
-                        "Agent manager was not created for behavior id {}.".format(
-                            name_behavior_id
-                        )
-                    )
-                    continue
-                self.managers[name_behavior_id].add_experiences(
-                    step_info.current_all_step_result[name_behavior_id],
-                    step_info.worker_id,
-                    step_info.brain_name_to_action_info.get(
-                        name_behavior_id, ActionInfo([], [], {}, [])
-                    ),
-                )
-        return len(step_infos)
-
-        num_steps = self._get_and_process_experiences(env)
+        with hierarchical_timer("env_step"):
+            num_steps = env.advance()

        # Report current lesson
        if self.meta_curriculum:
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.ppo.trainer import PPOTrainer
 from mlagents.trainers.sac.trainer import SACTrainer
+from mlagents.trainers.ghost.trainer import GhostTrainer

 logger = logging.getLogger("mlagents.trainers")

            seed,
            run_id,
        )
+
+        )
+
+    if "self_play" in trainer_parameters:
+        trainer = GhostTrainer(
+            trainer,
+            brain_name,
+            min_lesson_length,
+            trainer_parameters,
+            train_model,
+            run_id,
        )
    return trainer

--- a/notebooks/getting-started.ipynb
+++ b/notebooks/getting-started.ipynb
   "outputs": [],
   "source": [
    "engine_configuration_channel = EngineConfigurationChannel()\n",
-    "env = UnityEnvironment(base_port = 5006, file_name=env_name, side_channels = [engine_configuration_channel])\n",
+    "env = UnityEnvironment(base_port = 5004, file_name=env_name, side_channels = [engine_configuration_channel])\n",
    "\n",
    "#Reset the environment\n",
    "env.reset()\n",
--- a/protobuf-definitions/README.md
+++ b/protobuf-definitions/README.md
 2. Un-comment line 7 in `make.sh` (for Windows, use `make_for_win.bat`), and set to correct Grpc.Tools sub-directory.
 3. Run the protobuf generation script from the terminal by navigating to `$MLAGENTS_ROOT\protobuf-definitions` and entering `make.sh` (for Windows, use `make_for_win.bat`)
 4. Note any errors generated that may result from setting the wrong directory in step 2.
-5. In the generated `UnityToExternalGrpc.cs` file in the `$MLAGENTS_ROOT/UnitySDK/Assets/ML-Agents/Scripts/CommunicatorObjects` folder, check to see if you need to add the following to the beginning of the file:
+5. In the generated `UnityToExternalGrpc.cs` file in the `$MLAGENTS_ROOT/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects` folder, check to see if you need to add the following to the beginning of the file:

 ```csharp
 # if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
--- a/protobuf-definitions/make.sh
+++ b/protobuf-definitions/make.sh
 # COMPILER=[DIRECTORY]

 SRC_DIR=proto/mlagents_envs/communicator_objects
-DST_DIR_C=../UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects
+DST_DIR_C=../com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects
 DST_DIR_P=../ml-agents-envs
 PROTO_PATH=proto
 PYTHON_PACKAGE=mlagents_envs/communicator_objects

 # generate proto objects in python and C#

-$COMPILER/protoc --proto_path=proto --csharp_out=$DST_DIR_C $SRC_DIR/*.proto
+$COMPILER/protoc --proto_path=proto --csharp_opt=internal_access --csharp_out $DST_DIR_C $SRC_DIR/*.proto
 $COMPILER/protoc --proto_path=proto --python_out=$DST_DIR_P --mypy_out=$DST_DIR_P $SRC_DIR/*.proto

 # grpc
-$COMPILER/protoc --proto_path=proto --csharp_out $DST_DIR_C --grpc_out $DST_DIR_C $SRC_DIR/$GRPC --plugin=protoc-gen-grpc=$COMPILER/grpc_csharp_plugin
+$COMPILER/protoc --proto_path=proto --csharp_out=$DST_DIR_C --grpc_out=internal_access:$DST_DIR_C $SRC_DIR/$GRPC --plugin=protoc-gen-grpc=$COMPILER/grpc_csharp_plugin
 python3 -m grpc_tools.protoc --proto_path=proto --python_out=$DST_DIR_P --grpc_python_out=$DST_DIR_P $SRC_DIR/$GRPC


--- a/protobuf-definitions/make_for_win.bat
+++ b/protobuf-definitions/make_for_win.bat
 rem set COMPILER=[DIRECTORY]

 set SRC_DIR=proto\mlagents_envs\communicator_objects
-set DST_DIR_C=..\UnitySDK\Assets\ML-Agents\Scripts\Grpc\CommunicatorObjects
+set DST_DIR_C=..\com.unity.ml-agents\Runtime\Grpc\CommunicatorObjects
 set DST_DIR_P=..\ml-agents-envs
 set PROTO_PATH=proto

 rem generate proto objects in python and C#

 for %%i in (%SRC_DIR%\*.proto) do (
-    %COMPILER%\protoc --proto_path=proto --csharp_out=%DST_DIR_C% %%i
+    %COMPILER%\protoc --proto_path=proto --csharp_opt=internal_access --csharp_out=%DST_DIR_C% %%i
    %COMPILER%\protoc --proto_path=proto --python_out=%DST_DIR_P% %%i
 )


-%COMPILER%\protoc --proto_path=proto --csharp_out %DST_DIR_C% --grpc_out %DST_DIR_C% %SRC_DIR%\%GRPC% --plugin=protoc-gen-grpc=%COMPILER%\grpc_csharp_plugin.exe
+%COMPILER%\protoc --proto_path=proto --csharp_out %DST_DIR_C% --grpc_out=internal_access:%DST_DIR_C% %SRC_DIR%\%GRPC% --plugin=protoc-gen-grpc=%COMPILER%\grpc_csharp_plugin.exe --csharp_opt=internal_access
 python -m grpc_tools.protoc --proto_path=proto --python_out=%DST_DIR_P% --grpc_python_out=%DST_DIR_P% %SRC_DIR%\%GRPC%

 rem Generate the init file for the python module
--- a/test_constraints_max_tf1_version.txt
+++ b/test_constraints_max_tf1_version.txt
 # For projects with upper bounds, we should periodically update this list to the latest release version
 grpcio>=1.23.0
 numpy>=1.17.2
-tensorflow>=1.14.0,<2.0
+# Temporary workaround for https://github.com/tensorflow/tensorflow/issues/36179 and https://github.com/tensorflow/tensorflow/issues/36188
+tensorflow>=1.14.0,<1.15.1
 h5py>=2.10.0
--- a/utils/validate_meta_files.py
+++ b/utils/validate_meta_files.py


 def main():
-    asset_path = "UnitySDK/Assets"
+    asset_path = "Project/Assets"
    meta_suffix = ".meta"
    python_suffix = ".py"

--- a/com.unity.ml-agents/CONTRIBUTING.md
+++ b/com.unity.ml-agents/CONTRIBUTING.md

 ## Communication

-First, please read through our [code of conduct](CODE_OF_CONDUCT.md), as we
+First, please read through our [code of conduct](https://github.com/Unity-Technologies/ml-agents/blob/master/CODE_OF_CONDUCT.md), as we
 expect all our contributors to follow it.

 Second, before starting on a project that you intend to contribute to the
--- a/Project/ProjectSettings/ClusterInputManager.asset
+++ b/Project/ProjectSettings/ClusterInputManager.asset
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!236 &1
+ClusterInputManager:
+  m_ObjectHideFlags: 0
+  m_Inputs: []
--- a/Project/ProjectSettings/GraphicsSettings.asset
+++ b/Project/ProjectSettings/GraphicsSettings.asset
  - {fileID: 15106, guid: 0000000000000000f000000000000000, type: 0}
  - {fileID: 10753, guid: 0000000000000000f000000000000000, type: 0}
  - {fileID: 10770, guid: 0000000000000000f000000000000000, type: 0}
-  - {fileID: 16000, guid: 0000000000000000f000000000000000, type: 0}
-  - {fileID: 16001, guid: 0000000000000000f000000000000000, type: 0}
-  - {fileID: 17000, guid: 0000000000000000f000000000000000, type: 0}
+  - {fileID: 10783, guid: 0000000000000000f000000000000000, type: 0}
  m_PreloadedShaders: []
  m_SpritesDefaultMaterial: {fileID: 10754, guid: 0000000000000000f000000000000000,
    type: 0}
  m_AlbedoSwatchInfos: []
  m_LightsUseLinearIntensity: 0
  m_LightsUseColorTemperature: 0
+  m_LogWhenShaderIsCompiled: 0
--- a/Project/ProjectSettings/InputManager.asset
+++ b/Project/ProjectSettings/InputManager.asset
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!13 &1
+InputManager:
+  m_ObjectHideFlags: 0
+  serializedVersion: 2
+  m_Axes:
+  - serializedVersion: 3
+    m_Name: Horizontal
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: left
+    positiveButton: right
+    altNegativeButton: a
+    altPositiveButton: d
+    gravity: 3
+    dead: 0.001
+    sensitivity: 3
+    snap: 1
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Vertical
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: down
+    positiveButton: up
+    altNegativeButton: s
+    altPositiveButton: w
+    gravity: 3
+    dead: 0.001
+    sensitivity: 3
+    snap: 1
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Fire1
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: left ctrl
+    altNegativeButton: 
+    altPositiveButton: mouse 0
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Fire2
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: left alt
+    altNegativeButton: 
+    altPositiveButton: mouse 1
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Fire3
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: left shift
+    altNegativeButton: 
+    altPositiveButton: mouse 2
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Jump
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: space
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Mouse X
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: 
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 0
+    dead: 0
+    sensitivity: 0.1
+    snap: 0
+    invert: 0
+    type: 1
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Mouse Y
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: 
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 0
+    dead: 0
+    sensitivity: 0.1
+    snap: 0
+    invert: 0
+    type: 1
+    axis: 1
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Mouse ScrollWheel
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: 
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 0
+    dead: 0
+    sensitivity: 0.1
+    snap: 0
+    invert: 0
+    type: 1
+    axis: 2
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Horizontal
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: 
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 0
+    dead: 0.19
+    sensitivity: 1
+    snap: 0
+    invert: 0
+    type: 2
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Vertical
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: 
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 0
+    dead: 0.19
+    sensitivity: 1
+    snap: 0
+    invert: 1
+    type: 2
+    axis: 1
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Fire1
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: joystick button 0
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Fire2
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: joystick button 1
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Fire3
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: joystick button 2
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Jump
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: joystick button 3
+    altNegativeButton: 
+    altPositiveButton: 
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Submit
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: return
+    altNegativeButton: 
+    altPositiveButton: joystick button 0
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Submit
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: enter
+    altNegativeButton: 
+    altPositiveButton: space
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
+  - serializedVersion: 3
+    m_Name: Cancel
+    descriptiveName: 
+    descriptiveNegativeName: 
+    negativeButton: 
+    positiveButton: escape
+    altNegativeButton: 
+    altPositiveButton: joystick button 1
+    gravity: 1000
+    dead: 0.001
+    sensitivity: 1000
+    snap: 0
+    invert: 0
+    type: 0
+    axis: 0
+    joyNum: 0
--- a/Project/ProjectSettings/NavMeshAreas.asset
+++ b/Project/ProjectSettings/NavMeshAreas.asset
    manualTileSize: 0
    tileSize: 256
    accuratePlacement: 0
+    debug:
+      m_Flags: 0
  m_SettingNames:
  - Humanoid
--- a/Project/ProjectSettings/TimeManager.asset
+++ b/Project/ProjectSettings/TimeManager.asset
+%YAML 1.1
+%TAG !u! tag:unity3d.com,2011:
+--- !u!5 &1
+TimeManager:
+  m_ObjectHideFlags: 0
+  Fixed Timestep: 0.02
+  Maximum Allowed Timestep: 0.33333334
+  m_TimeScale: 1
+  Maximum Particle Timestep: 0.03
--- a/Project/ProjectSettings/EditorBuildSettings.asset
+++ b/Project/ProjectSettings/EditorBuildSettings.asset
  m_ObjectHideFlags: 0
  serializedVersion: 2
  m_Scenes: []
+  m_configObjects: {}
--- a/Project/ProjectSettings/ProjectSettings.asset
+++ b/Project/ProjectSettings/ProjectSettings.asset
 --- !u!129 &1
 PlayerSettings:
  m_ObjectHideFlags: 0
-  serializedVersion: 14
+  serializedVersion: 18
+  AndroidEnableSustainedPerformanceMode: 0
  defaultScreenOrientation: 4
  targetDevice: 2
  useOnDemandResources: 0
  defaultCursor: {fileID: 0}
  cursorHotspot: {x: 0, y: 0}
  m_SplashScreenBackgroundColor: {r: 0.13725491, g: 0.12156863, b: 0.1254902, a: 1}
-  m_ShowUnitySplashScreen: 1
+  m_ShowUnitySplashScreen: 0
  m_ShowUnitySplashLogo: 1
  m_SplashScreenOverlayOpacity: 1
  m_SplashScreenAnimation: 1
  m_SplashScreenLogos: []
  m_VirtualRealitySplashScreen: {fileID: 0}
  m_HolographicTrackingLossScreen: {fileID: 0}
-  defaultScreenWidth: 1024
-  defaultScreenHeight: 768
+  defaultScreenWidth: 80
+  defaultScreenHeight: 80
  defaultScreenWidthWeb: 960
  defaultScreenHeightWeb: 600
  m_StereoRenderingPath: 0
  iosShowActivityIndicatorOnLoading: -1
  androidShowActivityIndicatorOnLoading: -1
-  tizenShowActivityIndicatorOnLoading: -1
-  iosAppInBackgroundBehavior: 0
+  iosUseCustomAppBackgroundBehavior: 0
  iosAllowHTTPDownload: 1
  allowedAutorotateToPortrait: 1
  allowedAutorotateToPortraitUpsideDown: 1
  use32BitDisplayBuffer: 1
  preserveFramebufferAlpha: 0
  disableDepthAndStencilBuffers: 0
+  androidStartInFullscreen: 1
+  androidRenderOutsideSafeArea: 0
-  defaultIsFullScreen: 0
  defaultIsNativeResolution: 1
  macRetinaSupport: 1
  runInBackground: 1
  visibleInBackground: 0
  allowFullscreenSwitch: 1
  graphicsJobMode: 0
-  macFullscreenMode: 2
-  d3d11FullscreenMode: 1
+  fullscreenMode: 3
-  n3dsDisableStereoscopicView: 0
-  n3dsEnableSharedListOpt: 1
-  n3dsEnableVSync: 0
  xboxOneResolution: 0
  xboxOneSResolution: 0
  xboxOneXResolution: 3
  xboxOnePresentImmediateThreshold: 0
-  videoMemoryForVertexBuffers: 0
-  psp2PowerMode: 0
-  psp2AcquireBGM: 1
-  wiiUTVResolution: 0
-  wiiUGamePadMSAA: 1
-  wiiUSupportsNunchuk: 0
-  wiiUSupportsClassicController: 0
-  wiiUSupportsBalanceBoard: 0
-  wiiUSupportsMotionPlus: 0
-  wiiUSupportsProController: 0
-  wiiUAllowScreenCapture: 1
-  wiiUControllerCount: 0
+  switchQueueCommandMemory: 1048576
+  switchQueueControlMemory: 16384
+  switchQueueComputeMemory: 262144
+  switchNVNShaderPoolsGranularity: 33554432
+  switchNVNDefaultPoolsGranularity: 16777216
+  switchNVNOtherPoolsGranularity: 16777216
+  vulkanEnableSetSRGBWrite: 0
  m_SupportedAspectRatios:
    4:3: 1
    5:4: 1
  m_HolographicPauseOnTrackingLoss: 1
  xboxOneDisableKinectGpuReservation: 0
  xboxOneEnable7thCore: 0
+  isWsaHolographicRemotingEnabled: 0
  vrSettings:
    cardboard:
      depthFormat: 0
    oculus:
      sharedDepthBuffer: 0
      dashSupport: 0
+      lowOverheadMode: 0
+      protectedContext: 0
+      v2Signing: 0
+    enable360StereoCapture: 0
+  enableFrameTimingStats: 0
  useHDRDisplay: 0
  m_ColorGamuts: 00000000
  targetPixelDensity: 30
  APKExpansionFiles: 0
  keepLoadedShadersAlive: 0
  StripUnusedMeshComponents: 0
-  VertexChannelCompressionMask:
-    serializedVersion: 2
-    m_Bits: 4294901998
+  VertexChannelCompressionMask: 214
-  iOSTargetOSVersionString: 8.0
+  iOSTargetOSVersionString: 9.0
  tvOSSdkVersion: 0
  tvOSRequireExtendedGameController: 0
  tvOSTargetOSVersionString: 9.0
  tvOSSmallIconLayers: []
  tvOSSmallIconLayers2x: []
  tvOSLargeIconLayers: []
+  tvOSLargeIconLayers2x: []
  tvOSTopShelfImageLayers: []
  tvOSTopShelfImageLayers2x: []
  tvOSTopShelfImageWideLayers: []
  appleDeveloperTeamID: 
  iOSManualSigningProvisioningProfileID: 
  tvOSManualSigningProvisioningProfileID: 
+  iOSManualSigningProvisioningProfileType: 0
+  tvOSManualSigningProvisioningProfileType: 0
+  iOSRequireARKit: 0
+  iOSAutomaticallyDetectAndAddCapabilities: 1
+  appleEnableProMotion: 0
-  AndroidTargetDevice: 0
+  templatePackageId: 
+  templateDefaultScene: 
+  AndroidTargetArchitectures: 5
+  AndroidBuildApkPerCpuArchitecture: 0
  AndroidTVCompatibility: 1
  AndroidIsGame: 1
  AndroidEnableTango: 0
  androidGamepadSupportLevel: 0
  resolutionDialogBanner: {fileID: 0}
  m_BuildTargetIcons: []
+  m_BuildTargetPlatformIcons: []
  m_BuildTargetBatching: []
  m_BuildTargetGraphicsAPIs:
  - m_BuildTarget: MacStandaloneSupport
    m_EncodingQuality: 1
  - m_BuildTarget: PS4
    m_EncodingQuality: 1
-  wiiUTitleID: 0005000011000000
-  wiiUGroupID: 00010000
-  wiiUCommonSaveSize: 4096
-  wiiUAccountSaveSize: 2048
-  wiiUOlvAccessKey: 0
-  wiiUTinCode: 0
-  wiiUJoinGameId: 0
-  wiiUJoinGameModeMask: 0000000000000000
-  wiiUCommonBossSize: 0
-  wiiUAccountBossSize: 0
-  wiiUAddOnUniqueIDs: []
-  wiiUMainThreadStackSize: 3072
-  wiiULoaderThreadStackSize: 1024
-  wiiUSystemHeapSize: 128
-  wiiUTVStartupScreen: {fileID: 0}
-  wiiUGamePadStartupScreen: {fileID: 0}
-  wiiUDrcBufferDisabled: 0
-  wiiUProfilerLibPath: 
+  m_BuildTargetGroupLightmapSettings: []
+  runPlayModeTestAsEditModeTest: 0
  actionOnDotNetUnhandledException: 1
  enableInternalProfiler: 0
  logObjCUncaughtExceptions: 1
  switchRatingsInt_9: 0
  switchRatingsInt_10: 0
  switchRatingsInt_11: 0
+  switchRatingsInt_12: 0
  switchLocalCommunicationIds_0: 0x0005000C10000001
  switchLocalCommunicationIds_1: 
  switchLocalCommunicationIds_2: 
  switchAllowsVideoCapturing: 1
  switchAllowsRuntimeAddOnContentInstall: 0
  switchDataLossConfirmation: 0
+  switchUserAccountLockEnabled: 0
+  switchSystemResourceMemory: 16777216
+  switchNativeFsCacheSize: 32
+  switchIsHoldTypeHorizontal: 0
+  switchSupportedNpadCount: 8
  switchSocketConfigEnabled: 0
  switchTcpInitialSendBufferSize: 32
  switchTcpInitialReceiveBufferSize: 64
  ps4pnFriends: 1
  ps4pnGameCustomData: 1
  playerPrefsSupport: 0
+  enableApplicationExit: 0
+  resetTempFolder: 1
  restrictedAudioUsageRights: 0
  ps4UseResolutionFallback: 0
  ps4ReprojectionSupport: 0
  ps4attribEyeToEyeDistanceSettingVR: 0
  ps4IncludedModules: []
  monoEnv: 
-  psp2Splashimage: {fileID: 0}
-  psp2NPTrophyPackPath: 
-  psp2NPSupportGBMorGJP: 0
-  psp2NPAgeRating: 12
-  psp2NPTitleDatPath: 
-  psp2NPCommsID: 
-  psp2NPCommunicationsID: 
-  psp2NPCommsPassphrase: 
-  psp2NPCommsSig: 
-  psp2ParamSfxPath: 
-  psp2ManualPath: 
-  psp2LiveAreaGatePath: 
-  psp2LiveAreaBackroundPath: 
-  psp2LiveAreaPath: 
-  psp2LiveAreaTrialPath: 
-  psp2PatchChangeInfoPath: 
-  psp2PatchOriginalPackage: 
-  psp2PackagePassword: V6GXi5xr84P2R391UXaLHbavJvFZGfO4
-  psp2KeystoneFile: 
-  psp2MemoryExpansionMode: 0
-  psp2DRMType: 0
-  psp2StorageType: 0
-  psp2MediaCapacity: 0
-  psp2DLCConfigPath: 
-  psp2ThumbnailPath: 
-  psp2BackgroundPath: 
-  psp2SoundPath: 
-  psp2TrophyCommId: 
-  psp2TrophyPackagePath: 
-  psp2PackagedResourcesPath: 
-  psp2SaveDataQuota: 10240
-  psp2ParentalLevel: 1
-  psp2ShortTitle: Not Set
-  psp2ContentID: IV0000-ABCD12345_00-0123456789ABCDEF
-  psp2Category: 0
-  psp2MasterVersion: 01.00
-  psp2AppVersion: 01.00
-  psp2TVBootMode: 0
-  psp2EnterButtonAssignment: 2
-  psp2TVDisableEmu: 0
-  psp2AllowTwitterDialog: 1
-  psp2Upgradable: 0
-  psp2HealthWarning: 0
-  psp2UseLibLocation: 0
-  psp2InfoBarOnStartup: 0
-  psp2InfoBarColor: 0
-  psp2ScriptOptimizationLevel: 0
-  psmSplashimage: {fileID: 0}
  splashScreenBackgroundSourceLandscape: {fileID: 0}
  splashScreenBackgroundSourcePortrait: {fileID: 0}
  spritePackerPolicy: 
  webGLTemplate: APPLICATION:Default
  webGLAnalyzeBuildSize: 0
  webGLUseEmbeddedResources: 0
-  webGLUseWasm: 0
+  webGLLinkerTarget: 1
+  webGLThreadsSupport: 0
  scriptingDefineSymbols:
    1: 
    7: UNITY_POST_PROCESSING_STACK_V2
    27: UNITY_POST_PROCESSING_STACK_V2
  platformArchitecture: {}
  scriptingBackend: {}
+  il2cppCompilerConfiguration: {}
+  managedStrippingLevel: {}
+  allowUnsafeCode: 0
  additionalIl2CppArgs: 
  scriptingRuntimeVersion: 1
  apiCompatibilityLevelPerPlatform:
  metroApplicationDescription: UnityEnvironment
  wsaImages: {}
  metroTileShortName: 
-  metroCommandLineArgsFile: 
+  metroSupportStreamingInstall: 0
+  metroLastRequiredScene: 0
  metroDefaultTileSize: 1
  metroTileForegroundText: 2
  metroTileBackgroundColor: {r: 0.13333334, g: 0.17254902, b: 0.21568628, a: 0}
  platformCapabilities: {}
+  metroTargetDeviceFamilies: {}
-  tizenProductDescription: 
-  tizenProductURL: 
-  tizenSigningProfileName: 
-  tizenGPSPermissions: 0
-  tizenMicrophonePermissions: 0
-  tizenDeploymentTarget: 
-  tizenDeploymentTargetType: -1
-  tizenMinOSVersion: 1
-  n3dsUseExtSaveData: 0
-  n3dsCompressStaticMem: 1
-  n3dsExtSaveDataNumber: 0x12345
-  n3dsStackSize: 131072
-  n3dsTargetPlatform: 2
-  n3dsRegion: 7
-  n3dsMediaSize: 0
-  n3dsLogoStyle: 3
-  n3dsTitle: GameName
-  n3dsProductCode: 
-  n3dsApplicationId: 0xFF3FF
  XboxOneProductId: 
  XboxOneUpdateKey: 
  XboxOneSandboxId: 
  XboxOneGameOsOverridePath: 
  XboxOnePackagingOverridePath: 
  XboxOneAppManifestOverridePath: 
+  XboxOneVersion: 1.0.0.0
  XboxOnePackageEncryption: 0
  XboxOnePackageUpdateGranularity: 2
  XboxOneDescription: 
  XboxOnePersistentLocalStorageSize: 0
  XboxOneXTitleMemory: 8
  xboxOneScriptCompiler: 0
+  XboxOneOverrideIdentityName: 
+  luminIcon:
+    m_Name: 
+    m_ModelFolderPath: 
+    m_PortalFolderPath: 
+  luminCert:
+    m_CertPath: 
+    m_PrivateKeyPath: 
+  luminIsChannelApp: 0
+  luminVersion:
+    m_VersionCode: 1
+    m_VersionName: 
+  facebookAppId: 
+  facebookCookies: 1
+  facebookLogging: 1
+  facebookStatus: 1
+  facebookXfbml: 0
+  facebookFrictionlessRequests: 1
+  framebufferDepthMemorylessMode: 0
+  legacyClampBlendShapeWeights: 1
--- a/Project/ProjectSettings/ProjectVersion.txt
+++ b/Project/ProjectSettings/ProjectVersion.txt
+m_EditorVersion: 2018.4.14f1
--- a/com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x86.dll.meta
+++ b/com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x86.dll.meta
  serializedVersion: 2
  iconMap: {}
  executionOrder: {}
+  defineConstraints: []
+  isExplicitlyReferenced: 0
+  validateReferences: 1
  platformData:
  - first:
      '': Any
    second:
      enabled: 1
      settings:
-        CPU: x86_64
+        CPU: AnyCPU
  - first:
      Standalone: LinuxUniversal
    second:
--- a/com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x64.dll.meta
+++ b/com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/win/native/grpc_csharp_ext.x64.dll.meta
  serializedVersion: 2
  iconMap: {}
  executionOrder: {}
+  defineConstraints: []
+  isExplicitlyReferenced: 0
+  validateReferences: 1
  platformData:
  - first:
      '': Any
    second:
      enabled: 1
      settings:
-        CPU: x86_64
+        CPU: AnyCPU
  - first:
      Standalone: LinuxUniversal
    second:
--- a/com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/osx/native/libgrpc_csharp_ext.x64.bundle.meta
+++ b/com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/osx/native/libgrpc_csharp_ext.x64.bundle.meta
 fileFormatVersion: 2
 guid: 7eeb863bd08ba4388829c23da03a714f
 PluginImporter:
+  externalObjects: {}
+  defineConstraints: []
+  isExplicitlyReferenced: 0
+  validateReferences: 1
-    data:
-      first:
-        '': Any
-      second:
-        enabled: 0
-        settings:
-          Exclude Android: 1
-          Exclude Editor: 0
-          Exclude Linux: 1
-          Exclude Linux64: 1
-          Exclude LinuxUniversal: 1
-          Exclude OSXIntel: 0
-          Exclude OSXIntel64: 0
-          Exclude OSXUniversal: 0
-          Exclude Win: 1
-          Exclude Win64: 1
-          Exclude iOS: 1
-    data:
-      first:
-        '': OSXIntel
-      second:
-        enabled: 1
-        settings: {}
-    data:
-      first:
-        '': OSXIntel64
-      second:
-        enabled: 1
-        settings: {}
-    data:
-      first:
-        Android: Android
-      second:
-        enabled: 0
-        settings:
-          CPU: ARMv7
-    data:
-      first:
-        Any: 
-      second:
-        enabled: 0
-        settings: {}
-    data:
-      first:
-        Editor: Editor
-      second:
-        enabled: 1
-        settings:
-          CPU: x86_64
-          DefaultValueInitialized: true
-          OS: OSX
-    data:
-      first:
-        Facebook: Win
-      second:
-        enabled: 0
-        settings:
-          CPU: AnyCPU
-    data:
-      first:
-        Facebook: Win64
-      second:
-        enabled: 0
-        settings:
-          CPU: AnyCPU
-    data:
-      first:
-        Standalone: Linux
-      second:
-        enabled: 0
-        settings:
-          CPU: x86
-    data:
-      first:
-        Standalone: Linux64
-      second:
-        enabled: 0
-        settings:
-          CPU: x86_64
-    data:
-      first:
-        Standalone: LinuxUniversal
-      second:
-        enabled: 0
-        settings:
-          CPU: None
-    data:
-      first:
-        Standalone: OSXIntel
-      second:
-        enabled: 1
-        settings:
-          CPU: AnyCPU
-    data:
-      first:
-        Standalone: OSXIntel64
-      second:
-        enabled: 1
-        settings:
-          CPU: AnyCPU
-    data:
-      first:
-        Standalone: OSXUniversal
-      second:
-        enabled: 1
-        settings:
-          CPU: AnyCPU
-    data:
-      first:
-        Standalone: Win
-      second:
-        enabled: 0
-        settings:
-          CPU: AnyCPU
-    data:
-      first:
-        Standalone: Win64
-      second:
-        enabled: 0
-        settings:
-          CPU: AnyCPU
-    data:
-      first:
-        iPhone: iOS
-      second:
-        enabled: 0
-        settings:
-          AddToEmbeddedBinaries: false
-          CompileFlags: 
-          FrameworkDependencies: 
+  - first:
+      '': Any
+    second:
+      enabled: 0
+      settings:
+        Exclude Android: 1
+        Exclude Editor: 0
+        Exclude Linux: 1
+        Exclude Linux64: 1
+        Exclude LinuxUniversal: 1
+        Exclude OSXIntel: 0
+        Exclude OSXIntel64: 0
+        Exclude OSXUniversal: 0
+        Exclude Win: 1
+        Exclude Win64: 1
+        Exclude iOS: 1
+  - first:
+      '': OSXIntel
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      '': OSXIntel64
+    second:
+      enabled: 1
+      settings: {}
+  - first:
+      Android: Android
+    second:
+      enabled: 0
+      settings:
+        CPU: ARMv7
+  - first:
+      Any: 
+    second:
+      enabled: 0
+      settings: {}
+  - first:
+      Editor: Editor
+    second:
+      enabled: 1
+      settings:
+        CPU: x86_64
+        DefaultValueInitialized: true
+        OS: OSX
+  - first:
+      Facebook: Win
+    second:
+      enabled: 0
+      settings:
+        CPU: AnyCPU
+  - first:
+      Facebook: Win64
+    second:
+      enabled: 0
+      settings:
+        CPU: AnyCPU
+  - first:
+      Standalone: Linux
+    second:
+      enabled: 0
+      settings:
+        CPU: x86
+  - first:
+      Standalone: Linux64
+    second:
+      enabled: 0
+      settings:
+        CPU: AnyCPU
+  - first:
+      Standalone: LinuxUniversal
+    second:
+      enabled: 0
+      settings:
+        CPU: None
+  - first:
+      Standalone: OSXIntel
+    second:
+      enabled: 1
+      settings:
+        CPU: AnyCPU
+  - first:
+      Standalone: OSXIntel64
+    second:
+      enabled: 1
+      settings:
+        CPU: AnyCPU
+  - first:
+      Standalone: OSXUniversal
+    second:
+      enabled: 1
+      settings:
+        CPU: AnyCPU
+  - first:
+      Standalone: Win
+    second:
+      enabled: 0
+      settings:
+        CPU: AnyCPU
+  - first:
+      Standalone: Win64
+    second:
+      enabled: 0
+      settings:
+        CPU: AnyCPU
+  - first:
+      iPhone: iOS
+    second:
+      enabled: 0
+      settings:
+        AddToEmbeddedBinaries: false
+        CompileFlags: 
+        FrameworkDependencies: 
  userData: 
  assetBundleName: 
  assetBundleVariant: 
--- a/com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/linux/native/libgrpc_csharp_ext.x64.so.meta
+++ b/com.unity.ml-agents/Plugins/ProtoBuffer/runtimes/linux/native/libgrpc_csharp_ext.x64.so.meta
  serializedVersion: 2
  iconMap: {}
  executionOrder: {}
+  defineConstraints: []
+  isExplicitlyReferenced: 0
+  validateReferences: 1
  platformData:
  - first:
      '': Any
    second:
      enabled: 1
      settings:
-        CPU: x86_64
+        CPU: AnyCPU
  - first:
      Standalone: LinuxUniversal
    second:
--- a/com.unity.ml-agents/Plugins/ProtoBuffer/Grpc.Core.dll.meta
+++ b/com.unity.ml-agents/Plugins/ProtoBuffer/Grpc.Core.dll.meta
  serializedVersion: 2
  iconMap: {}
  executionOrder: {}
+  defineConstraints: []
+  isExplicitlyReferenced: 0
+  validateReferences: 1
  platformData:
  - first:
      '': Any
    second:
      enabled: 1
      settings:
-        CPU: x86_64
+        CPU: AnyCPU
  - first:
      Standalone: LinuxUniversal
    second:
--- a/com.unity.ml-agents/Editor/DemonstrationDrawer.cs
+++ b/com.unity.ml-agents/Editor/DemonstrationDrawer.cs
        return actionLabel.ToString();
    }

-
    /// <summary>
    /// Renders Inspector UI for Brain Parameters of Demonstration.
    /// </summary>
--- a/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
+++ b/com.unity.ml-agents/Editor/BrainParametersDrawer.cs
        {
            return GetHeightDrawVectorObservation() +
                GetHeightDrawVectorAction(property);
-
-
            var indent = EditorGUI.indentLevel;
            EditorGUI.indentLevel = 0;
            position.height = k_LineHeight;
--- a/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
+++ b/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
            var model = (NNModel)serializedObject.FindProperty("m_Model").objectReferenceValue;
            var behaviorParameters = (BehaviorParameters)target;
            SensorComponent[] sensorComponents;
-            if(behaviorParameters.useChildSensors)
+            if (behaviorParameters.useChildSensors)
            {
                sensorComponents = behaviorParameters.GetComponentsInChildren<SensorComponent>();
            }
--- a/com.unity.ml-agents/Editor/AgentEditor.cs
+++ b/com.unity.ml-agents/Editor/AgentEditor.cs
            var serializedAgent = serializedObject;
            serializedAgent.Update();

-            var actionsPerDecision = serializedAgent.FindProperty(
-                "agentParameters.numberOfActionsBetweenDecisions");
-                "agentParameters.maxStep");
-            var isResetOnDone = serializedAgent.FindProperty(
-                "agentParameters.resetOnDone");
-            var isOdd = serializedAgent.FindProperty(
-                "agentParameters.onDemandDecision");
-
-
+                "maxStep");
-            EditorGUILayout.PropertyField(
-                isOdd,
-                new GUIContent(
-                    "On Demand Decisions",
-                    "If checked, you must manually request decisions."));
-            if (!isOdd.boolValue)
-            {
-                EditorGUILayout.PropertyField(
-                    actionsPerDecision,
-                    new GUIContent(
-                        "Decision Interval",
-                        "The agent will automatically request a decision every X" +
-                        " steps and perform an action at every step."));
-                actionsPerDecision.intValue = Mathf.Max(1, actionsPerDecision.intValue);
-            }

            serializedAgent.ApplyModifiedProperties();

--- a/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
 {
    public class SideChannelTests
    {
-
-
            public List<int> m_MessagesReceived = new List<int>();

            public override int ChannelType() { return -1; }
--- a/com.unity.ml-agents/Tests/Editor/Sensor/WriterAdapterTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/WriterAdapterTests.cs

            // AddRange
            writer.SetTarget(buffer, shape, 0);
-            writer.AddRange(new [] {4f, 5f});
+            writer.AddRange(new[] {4f, 5f});
-            writer.AddRange(new [] {6f, 7f});
+            writer.AddRange(new[] {6f, 7f});
            Assert.AreEqual(new[] { 4f, 6f, 7f }, buffer);
        }

            };

            writer.SetTarget(t, 1, 1);
-            writer.AddRange(new [] {-1f, -2f});
+            writer.AddRange(new[] {-1f, -2f});
            Assert.AreEqual(0f, t.data[0, 0]);
            Assert.AreEqual(0f, t.data[0, 1]);
            Assert.AreEqual(0f, t.data[0, 2]);
--- a/com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs
            // Check that Update() clears the data
            sensor.Update();
            SensorTestHelper.CompareObservation(sensor, new[] { 0f, 0f, 0f, 0f });
-
        }

        [Test]
            sensor.AddObservation(1.2f);
-            SensorTestHelper.CompareObservation(sensor, new []{1.2f});
+            SensorTestHelper.CompareObservation(sensor, new[] {1.2f});
        }

        [Test]
            sensor.AddObservation(42);
-            SensorTestHelper.CompareObservation(sensor, new []{42f});
+            SensorTestHelper.CompareObservation(sensor, new[] {42f});
        }

        [Test]
-            sensor.AddObservation(new Vector3(1,2,3));
-            SensorTestHelper.CompareObservation(sensor, new []{1f, 2f, 3f});
+            sensor.AddObservation(new Vector3(1, 2, 3));
+            SensorTestHelper.CompareObservation(sensor, new[] {1f, 2f, 3f});
-            sensor.AddObservation(new Vector2(4,5));
+            sensor.AddObservation(new Vector2(4, 5));
            SensorTestHelper.CompareObservation(sensor, new[] { 4f, 5f });
        }

            var sensor = new VectorSensor(4);
            sensor.AddObservation(Quaternion.identity);
-            SensorTestHelper.CompareObservation(sensor, new []{0f, 0f, 0f, 1f});
+            SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 0f, 1f});
        }

        [Test]
-            sensor.AddObservation(new [] {1f, 2f, 3f, 4f});
+            sensor.AddObservation(new[] {1f, 2f, 3f, 4f});

            SensorTestHelper.CompareObservation(sensor, new[] { 1f, 2f, 3f, 4f });
        }
        {
            var sensor = new VectorSensor(1);
            sensor.AddObservation(true);
-            SensorTestHelper.CompareObservation(sensor, new []{1f});
+            SensorTestHelper.CompareObservation(sensor, new[] {1f});
        }

        [Test]
            sensor.AddOneHotObservation(2, 4);
-            SensorTestHelper.CompareObservation(sensor, new []{0f, 0f, 1f, 0f});
+            SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 1f, 0f});
        }

        [Test]
-            sensor.AddObservation(new [] {1f, 2f, 3f, 4f});
+            sensor.AddObservation(new[] {1f, 2f, 3f, 4f});

            SensorTestHelper.CompareObservation(sensor, new[] { 1f, 2f});
        }
        {
            var sensor = new VectorSensor(4);
-            sensor.AddObservation(new [] {1f, 2f});
+            sensor.AddObservation(new[] {1f, 2f});

            // Make sure extra zeros are added
            SensorTestHelper.CompareObservation(sensor, new[] { 1f, 2f, 0f, 0f});
--- a/com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
            ISensor wrapped = new VectorSensor(4);
            ISensor sensor = new StackingSensor(wrapped, 4);
            Assert.AreEqual("StackingSensor_size4_VectorSensor_size4", sensor.GetName());
-            Assert.AreEqual(sensor.GetObservationShape(), new [] {16});
+            Assert.AreEqual(sensor.GetObservationShape(), new[] {16});
        }

        [Test]
            ISensor sensor = new StackingSensor(wrapped, 3);

-            wrapped.AddObservation(new [] {1f, 2f});
-            SensorTestHelper.CompareObservation(sensor, new [] {0f, 0f, 0f, 0f, 1f, 2f});
+            wrapped.AddObservation(new[] {1f, 2f});
+            SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 0f, 0f, 1f, 2f});
-            wrapped.AddObservation(new [] {3f, 4f});
-            SensorTestHelper.CompareObservation(sensor, new [] {0f, 0f, 1f, 2f, 3f, 4f});
+            wrapped.AddObservation(new[] {3f, 4f});
+            SensorTestHelper.CompareObservation(sensor, new[] {0f, 0f, 1f, 2f, 3f, 4f});
-            wrapped.AddObservation(new [] {5f, 6f});
-            SensorTestHelper.CompareObservation(sensor, new [] {1f, 2f, 3f, 4f, 5f, 6f});
+            wrapped.AddObservation(new[] {5f, 6f});
+            SensorTestHelper.CompareObservation(sensor, new[] {1f, 2f, 3f, 4f, 5f, 6f});
-            wrapped.AddObservation(new [] {7f, 8f});
-            SensorTestHelper.CompareObservation(sensor, new [] {3f, 4f, 5f, 6f, 7f, 8f});
+            wrapped.AddObservation(new[] {7f, 8f});
+            SensorTestHelper.CompareObservation(sensor, new[] {3f, 4f, 5f, 6f, 7f, 8f});
-            wrapped.AddObservation(new [] {9f, 10f});
-            SensorTestHelper.CompareObservation(sensor, new [] {5f, 6f, 7f, 8f, 9f, 10f});
+            wrapped.AddObservation(new[] {9f, 10f});
+            SensorTestHelper.CompareObservation(sensor, new[] {5f, 6f, 7f, 8f, 9f, 10f});
-            SensorTestHelper.CompareObservation(sensor, new [] {5f, 6f, 7f, 8f, 9f, 10f});
+            SensorTestHelper.CompareObservation(sensor, new[] {5f, 6f, 7f, 8f, 9f, 10f});
-
-
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/RayPerceptionSensorTests.cs
+using System;
+using System.Collections.Generic;
 using NUnit.Framework;
 using UnityEngine;
 using MLAgents.Sensor;
        public void TestGetRayAngles()
        {
            var angles = RayPerceptionSensorComponentBase.GetRayAngles(3, 90f);
-            var expectedAngles = new [] { 90f, 60f, 120f, 30f, 150f, 0f, 180f };
+            var expectedAngles = new[] { 90f, 60f, 120f, 30f, 150f, 0f, 180f };
+            }
+        }
+    }
+
+    public class RayPerception3DTests
+    {
+        // Use built-in tags
+        const string k_CubeTag = "Player";
+        const string k_SphereTag = "Respawn";
+
+        void SetupScene()
+        {
+            /* Creates game objects in the world for testing.
+             *   C is a cube
+             *   S are spheres
+             *   @ is the agent (at the origin)
+             * Each space or line is 5 world units, +x is right, +z is up
+             *
+             *      C
+             *    S   S
+             *      @
+             *
+             *      S
+             */
+            var cube = GameObject.CreatePrimitive(PrimitiveType.Cube);
+            cube.transform.position = new Vector3(0, 0, 10);
+            cube.tag = k_CubeTag;
+
+            var sphere1 = GameObject.CreatePrimitive(PrimitiveType.Sphere);
+            sphere1.transform.position = new Vector3(-5, 0, 5);
+            sphere1.tag = k_SphereTag;
+
+            var sphere2 = GameObject.CreatePrimitive(PrimitiveType.Sphere);
+            sphere2.transform.position = new Vector3(5, 0, 5);
+            // No tag for sphere2
+
+            var sphere3 = GameObject.CreatePrimitive(PrimitiveType.Sphere);
+            sphere3.transform.position = new Vector3(0, 0, -10);
+            sphere3.tag = k_SphereTag;
+
+            Physics.SyncTransforms();
+        }
+
+        [Test]
+        public void TestRaycasts()
+        {
+            SetupScene();
+            var obj = new GameObject("agent");
+            var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
+
+            perception.raysPerDirection = 1;
+            perception.maxRayDegrees = 45;
+            perception.rayLength = 20;
+            perception.detectableTags = new List<string>();
+            perception.detectableTags.Add(k_CubeTag);
+            perception.detectableTags.Add(k_SphereTag);
+
+            var radii = new[] { 0f, .5f };
+            foreach (var castRadius in radii)
+            {
+                perception.sphereCastRadius = castRadius;
+                var sensor = perception.CreateSensor();
+
+                var expectedObs = (2 * perception.raysPerDirection + 1) * (perception.detectableTags.Count + 2);
+                Assert.AreEqual(sensor.GetObservationShape()[0], expectedObs);
+                var outputBuffer = new float[expectedObs];
+
+                WriteAdapter writer = new WriteAdapter();
+                writer.SetTarget(outputBuffer, sensor.GetObservationShape(), 0);
+
+                var numWritten = sensor.Write(writer);
+                Assert.AreEqual(numWritten, expectedObs);
+
+                // Expected hits:
+                // ray 0 should hit the cube at roughly halfway
+                // ray 1 should hit a sphere but no tag
+                // ray 2 should hit a sphere with the k_SphereTag tag
+                // The hit fraction should be the same for rays 1 and
+                //
+                Assert.AreEqual(1.0f, outputBuffer[0]); // hit cube
+                Assert.AreEqual(0.0f, outputBuffer[1]); // missed sphere
+                Assert.AreEqual(0.0f, outputBuffer[2]); // missed unknown tag
+
+                // Hit is at z=9.0 in world space, ray length is 20
+                Assert.That(
+                    outputBuffer[3], Is.EqualTo((9.5f - castRadius) / perception.rayLength).Within(.0005f)
+                );
+
+                // Spheres are at 5,0,5 and 5,0,-5, so 5*sqrt(2) units from origin
+                // Minus 1.0 for the sphere radius to get the length of the hit.
+                var expectedHitLengthWorldSpace = 5.0f * Mathf.Sqrt(2.0f) - 0.5f - castRadius;
+                Assert.AreEqual(0.0f, outputBuffer[4]); // missed cube
+                Assert.AreEqual(0.0f, outputBuffer[5]); // missed sphere
+                Assert.AreEqual(0.0f, outputBuffer[6]); // hit unknown tag -> all 0
+                Assert.That(
+                    outputBuffer[7], Is.EqualTo(expectedHitLengthWorldSpace / perception.rayLength).Within(.0005f)
+                );
+
+                Assert.AreEqual(0.0f, outputBuffer[8]); // missed cube
+                Assert.AreEqual(1.0f, outputBuffer[9]); // hit sphere
+                Assert.AreEqual(0.0f, outputBuffer[10]); // missed unknown tag
+                Assert.That(
+                    outputBuffer[11], Is.EqualTo(expectedHitLengthWorldSpace / perception.rayLength).Within(.0005f)
+                );
+            }
+        }
+
+        [Test]
+        public void TestRaycastMiss()
+        {
+            var obj = new GameObject("agent");
+            var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
+
+            perception.raysPerDirection = 0;
+            perception.maxRayDegrees = 45;
+            perception.rayLength = 20;
+            perception.detectableTags = new List<string>();
+            perception.detectableTags.Add(k_CubeTag);
+            perception.detectableTags.Add(k_SphereTag);
+
+            var sensor = perception.CreateSensor();
+            var expectedObs = (2 * perception.raysPerDirection + 1) * (perception.detectableTags.Count + 2);
+            Assert.AreEqual(sensor.GetObservationShape()[0], expectedObs);
+            var outputBuffer = new float[expectedObs];
+
+            WriteAdapter writer = new WriteAdapter();
+            writer.SetTarget(outputBuffer, sensor.GetObservationShape(), 0);
+
+            var numWritten = sensor.Write(writer);
+            Assert.AreEqual(numWritten, expectedObs);
+
+            // Everything missed
+            Assert.AreEqual(new float[] { 0, 0, 1, 1 }, outputBuffer);
+        }
+
+        [Test]
+        public void TestRayFilter()
+        {
+            var cube = GameObject.CreatePrimitive(PrimitiveType.Cube);
+            cube.transform.position = new Vector3(0, 0, 10);
+            cube.tag = k_CubeTag;
+            cube.name = "cubeFar";
+
+            var cubeFiltered = GameObject.CreatePrimitive(PrimitiveType.Cube);
+            cubeFiltered.transform.position = new Vector3(0, 0, 5);
+            cubeFiltered.tag = k_CubeTag;
+            cubeFiltered.name = "cubeNear";
+            cubeFiltered.layer = 7;
+
+            Physics.SyncTransforms();
+
+            var obj = new GameObject("agent");
+            var perception = obj.AddComponent<RayPerceptionSensorComponent3D>();
+            perception.raysPerDirection = 0;
+            perception.rayLength = 20;
+            perception.detectableTags = new List<string>();
+
+            var filterCubeLayers = new[] { false, true };
+            foreach (var filterCubeLayer in filterCubeLayers)
+            {
+                // Set the layer mask to either the default, or one that ignores the close cube's layer
+                var layerMask = Physics.DefaultRaycastLayers;
+                if (filterCubeLayer)
+                {
+                    layerMask &= ~(1 << cubeFiltered.layer);
+                }
+                perception.rayLayerMask = layerMask;
+
+                var sensor = perception.CreateSensor();
+                var expectedObs = (2 * perception.raysPerDirection + 1) * (perception.detectableTags.Count + 2);
+                Assert.AreEqual(sensor.GetObservationShape()[0], expectedObs);
+                var outputBuffer = new float[expectedObs];
+
+                WriteAdapter writer = new WriteAdapter();
+                writer.SetTarget(outputBuffer, sensor.GetObservationShape(), 0);
+
+                var numWritten = sensor.Write(writer);
+                Assert.AreEqual(numWritten, expectedObs);
+
+                if (filterCubeLayer)
+                {
+                    // Hit the far cube because close was filtered.
+                    Assert.That(outputBuffer[outputBuffer.Length - 1],
+                        Is.EqualTo((9.5f - perception.sphereCastRadius) / perception.rayLength).Within(.0005f)
+                    );
+                }
+                else
+                {
+                    // Hit the close cube because not filtered.
+                    Assert.That(outputBuffer[outputBuffer.Length - 1],
+                        Is.EqualTo((4.5f - perception.sphereCastRadius) / perception.rayLength).Within(.0005f)
+                    );
+                }
            }
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/FloatVisualSensorTests.cs
            }
        }

-        public void Update() { }
+        public void Update() {}

        public SensorCompressionType GetCompressionType()
        {
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
 {
    public class TestAgent : Agent
    {
+        public AgentInfo _Info
+        {
+            get
+            {
+                return (AgentInfo)typeof(Agent).GetField("m_Info", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
+            }
+            set
+            {
+                typeof(Agent).GetField("m_Info", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(this, value);
+            }
+        }
+
        public int initializeAgentCalls;
        public int collectObservationsCalls;
        public int agentActionCalls;
        {
            return new float[0];
        }
-
    }

    public class TestSensor : ISensor
            return sensorName;
        }

-        public void Update() { }
+        public void Update() {}
    }

    [TestFixture]
            agentGo2.AddComponent<TestAgent>();
            var agent2 = agentGo2.GetComponent<TestAgent>();

-            Assert.AreEqual(false, agent1.IsDone());
-            Assert.AreEqual(false, agent2.IsDone());
            Assert.AreEqual(0, agent1.agentResetCalls);
            Assert.AreEqual(0, agent2.agentResetCalls);
            Assert.AreEqual(0, agent1.initializeAgentCalls);
            var agentEnableMethod = typeof(Agent).GetMethod("OnEnableHelper",
                BindingFlags.Instance | BindingFlags.NonPublic);

-            agentEnableMethod?.Invoke(agent2, new object[] { });
-            agentEnableMethod?.Invoke(agent1, new object[] { });
+            agentEnableMethod?.Invoke(agent2, new object[] {});
+            agentEnableMethod?.Invoke(agent1, new object[] {});
-            Assert.AreEqual(false, agent1.IsDone());
-            Assert.AreEqual(false, agent2.IsDone());
            // agent1 was not enabled when the academy started
            // The agents have been initialized
            Assert.AreEqual(0, agent1.agentResetCalls);
            var agentEnableMethod = typeof(Agent).GetMethod(
                "OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);

-            agent1.agentParameters = new AgentParameters();
-            agent2.agentParameters = new AgentParameters();
-            // We use event based so the agent will now try to send anything to the brain
-            agent1.agentParameters.onDemandDecision = false;
-            agent1.agentParameters.numberOfActionsBetweenDecisions = 2;
+            var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
+            decisionRequester.DecisionPeriod = 2;
+            decisionRequester.Awake();
-            agent2.agentParameters.onDemandDecision = true;
-            agentEnableMethod?.Invoke(agent1, new object[] { });
+            agentEnableMethod?.Invoke(agent1, new object[] {});

            var numberAgent1Reset = 0;
            var numberAgent2Initialization = 0;
                //Agent 2 is only initialized at step 2
                if (i == 2)
                {
-                    agentEnableMethod?.Invoke(agent2, new object[] { });
+                    agentEnableMethod?.Invoke(agent2, new object[] {});
                    numberAgent2Initialization += 1;
                }

            var agentEnableMethod = typeof(Agent).GetMethod(
                "OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);

-            agent1.agentParameters = new AgentParameters();
-            agent2.agentParameters = new AgentParameters();
-            // We use event based so the agent will now try to send anything to the brain
-            agent1.agentParameters.onDemandDecision = false;
-            agent1.agentParameters.numberOfActionsBetweenDecisions = 2;
-            // agent1 will take an action at every step and request a decision every 2 steps
-            agent2.agentParameters.onDemandDecision = true;
+            var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
+            decisionRequester.DecisionPeriod = 2;
-            agentEnableMethod?.Invoke(agent2, new object[] { });
+            agentEnableMethod?.Invoke(agent2, new object[] {});

            var numberAgent1Reset = 0;
            var numberAgent2Reset = 0;
                //Agent 1 is only initialized at step 2
                if (i == 2)
                {
-                    agentEnableMethod?.Invoke(agent1, new object[] { });
+                    agentEnableMethod?.Invoke(agent1, new object[] {});
+                    numberAgent1Reset += 1;
-                    if (!(agent2.IsDone()))
-                    {
-                        // If the agent was already reset before the request decision
-                        // We should not reset again
-                        agent2.Done();
-                        numberAgent2Reset += 1;
-                        agent2StepSinceReset = 0;
-                    }
+                    agent2.Done();
+                    numberAgent2Reset += 1;
+                    agent2StepSinceReset = 0;
                }
                // Request a decision for agent 2 regularly
                if (i % 3 == 2)
                    // Request an action without decision regularly
                    agent2.RequestAction();
                }
-                if (agent1.IsDone() && (((acaStepsSinceReset) % agent1.agentParameters.numberOfActionsBetweenDecisions == 0)))
-                {
-                    numberAgent1Reset += 1;
-                }
-                //Agent 1 is only initialized at step 2
-                if (i < 2)
-                { }
                aca.EnvironmentStep();
            }
        }
    public class EditModeTestMiscellaneous
    {
-
        [SetUp]
        public void SetUp()
        {

            var agentEnableMethod = typeof(Agent).GetMethod(
                "OnEnableHelper", BindingFlags.Instance | BindingFlags.NonPublic);
-            agent1.agentParameters = new AgentParameters();
-            agent2.agentParameters = new AgentParameters();
-            // We use event based so the agent will now try to send anything to the brain
-            agent1.agentParameters.onDemandDecision = false;
-            agent1.agentParameters.numberOfActionsBetweenDecisions = 3;
-            // agent1 will take an action at every step and request a decision every 2 steps
-            agent2.agentParameters.onDemandDecision = true;
-            // agent2 will request decisions only when RequestDecision is called
-            agent1.agentParameters.maxStep = 20;
+
+            var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();
+            decisionRequester.DecisionPeriod = 2;
+            decisionRequester.Awake();
+
+            agent1.maxStep = 20;
-            agentEnableMethod?.Invoke(agent2, new object[] { });
-            agentEnableMethod?.Invoke(agent1, new object[] { });
+            agentEnableMethod?.Invoke(agent2, new object[] {});
+            agentEnableMethod?.Invoke(agent1, new object[] {});
+                if (i % 20 == 0)
+                {
+                    j = 0;
+                }
+                else
+                {
+                    j++;
+                }
-                Assert.LessOrEqual(Mathf.Abs(j * 0.1f + j * 10f - agent1.GetCumulativeReward()), 0.05f);
+                Assert.LessOrEqual(Mathf.Abs(j * 10.1f - agent1.GetCumulativeReward()), 0.05f);
-
+                agent1.AddReward(10f);
-                agent1.AddReward(10f);
-
-                if ((i % 21 == 0) && (i > 0))
-                {
-                    j = 0;
-                }
-                j++;
            }
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
            }
        }

-        static List<Agent> GetFakeAgents()
+        static List<TestAgent> GetFakeAgents()
        {
            var goA = new GameObject("goA");
            var bpA = goA.AddComponent<BehaviorParameters>();
            bpB.brainParameters.numStackedVectorObservations = 1;
            var agentB = goB.AddComponent<TestAgent>();

-            var agents = new List<Agent> { agentA, agentB };
+            var agents = new List<TestAgent> { agentA, agentB };
-                agentEnableMethod?.Invoke(agent, new object[] { });
+                agentEnableMethod?.Invoke(agent, new object[] {});
            }
            agentA.collectObservationsSensor.AddObservation(new Vector3(1, 2, 3));
            agentB.collectObservationsSensor.AddObservation(new Vector3(4, 5, 6));
                actionMasks = new[] { true, false, false, false, false },
            };

-            agentA.Info = infoA;
-            agentB.Info = infoB;
+
+            agentA._Info = infoA;
+            agentB._Info = infoB;
            return agents;
        }

            var agent1 = agentInfos[1];
            var inputs = new List<AgentInfoSensorsPair>
            {
-                new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
-                new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
+                new AgentInfoSensorsPair {agentInfo = agent0._Info, sensors = agent0.sensors},
+                new AgentInfoSensorsPair {agentInfo = agent1._Info, sensors = agent1.sensors},
            };
            generator.Generate(inputTensor, batchSize, inputs);
            Assert.IsNotNull(inputTensor.data);
            var agent1 = agentInfos[1];
            var inputs = new List<AgentInfoSensorsPair>
            {
-                new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
-                new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
+                new AgentInfoSensorsPair {agentInfo = agent0._Info, sensors = agent0.sensors},
+                new AgentInfoSensorsPair {agentInfo = agent1._Info, sensors = agent1.sensors},
            };
            generator.Generate(inputTensor, batchSize, inputs);
            Assert.IsNotNull(inputTensor.data);
            var agent1 = agentInfos[1];
            var inputs = new List<AgentInfoSensorsPair>
            {
-                new AgentInfoSensorsPair{agentInfo = agent0.Info, sensors = agent0.sensors},
-                new AgentInfoSensorsPair{agentInfo = agent1.Info, sensors = agent1.sensors},
+                new AgentInfoSensorsPair {agentInfo = agent0._Info, sensors = agent0.sensors},
+                new AgentInfoSensorsPair {agentInfo = agent1._Info, sensors = agent1.sensors},
            };

            generator.Generate(inputTensor, batchSize, inputs);
--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
            var action1 = new AgentAction();
            var callbacks = new List<AgentIdActionPair>()
            {
-                new AgentIdActionPair{agentId = 0, action = (a) => action0 = a},
-                new AgentIdActionPair{agentId = 1, action = (a) => action1 = a}
+                new AgentIdActionPair {agentId = 0, action = (a) => action0 = a},
+                new AgentIdActionPair {agentId = 1, action = (a) => action1 = a}
            };

            applier.Apply(inputTensor, callbacks);
            var action1 = new AgentAction();
            var callbacks = new List<AgentIdActionPair>()
            {
-                new AgentIdActionPair{agentId = 0, action = (a) => action0 = a},
-                new AgentIdActionPair{agentId = 1, action = (a) => action1 = a}
+                new AgentIdActionPair {agentId = 0, action = (a) => action0 = a},
+                new AgentIdActionPair {agentId = 1, action = (a) => action1 = a}
            };

            applier.Apply(inputTensor, callbacks);
--- a/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
                reward = 1f,
                actionMasks = new[] { false, true },
                done = true,
-                id = 5,
+                episodeId = 5,
                maxStepReached = true,
                storedVectorActions = new[] { 0f, 1f },
            };
            var agentSendInfo = typeof(Agent).GetMethod("SendInfo",
                BindingFlags.Instance | BindingFlags.NonPublic);

-            agentEnableMethod?.Invoke(agent1, new object[] { });
+            agentEnableMethod?.Invoke(agent1, new object[] {});
-            agentSendInfo?.Invoke(agent1, new object[] { });
+            agentSendInfo?.Invoke(agent1, new object[] {});

            demoRecorder.Close();

--- a/Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/WallJump/Prefabs/WallJumpArea.prefab
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
    public Transform forearmR;
    public Transform handR;
    JointDriveController m_JdController;
-    bool m_IsNewDecisionStep;
-    int m_CurrentDecisionStep;

    Rigidbody m_HipsRb;
    Rigidbody m_ChestRb;

    public override void AgentAction(float[] vectorAction)
    {
-        m_DirToTarget = target.position - m_JdController.bodyPartsDict[hips].rb.position;
+        var bpDict = m_JdController.bodyPartsDict;
+        var i = -1;
-        // Apply action to all relevant body parts.
-        if (m_IsNewDecisionStep)
-        {
-            var bpDict = m_JdController.bodyPartsDict;
-            var i = -1;
+        bpDict[chest].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
+        bpDict[spine].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
-            bpDict[chest].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
-            bpDict[spine].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
+        bpDict[thighL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
+        bpDict[thighR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
+        bpDict[shinL].SetJointTargetRotation(vectorAction[++i], 0, 0);
+        bpDict[shinR].SetJointTargetRotation(vectorAction[++i], 0, 0);
+        bpDict[footR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
+        bpDict[footL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
-            bpDict[thighL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-            bpDict[thighR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-            bpDict[shinL].SetJointTargetRotation(vectorAction[++i], 0, 0);
-            bpDict[shinR].SetJointTargetRotation(vectorAction[++i], 0, 0);
-            bpDict[footR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
-            bpDict[footL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
+        bpDict[armL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
+        bpDict[armR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
+        bpDict[forearmL].SetJointTargetRotation(vectorAction[++i], 0, 0);
+        bpDict[forearmR].SetJointTargetRotation(vectorAction[++i], 0, 0);
+        bpDict[head].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-            bpDict[armL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-            bpDict[armR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
-            bpDict[forearmL].SetJointTargetRotation(vectorAction[++i], 0, 0);
-            bpDict[forearmR].SetJointTargetRotation(vectorAction[++i], 0, 0);
-            bpDict[head].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
+        //update joint strength settings
+        bpDict[chest].SetJointStrength(vectorAction[++i]);
+        bpDict[spine].SetJointStrength(vectorAction[++i]);
+        bpDict[head].SetJointStrength(vectorAction[++i]);
+        bpDict[thighL].SetJointStrength(vectorAction[++i]);
+        bpDict[shinL].SetJointStrength(vectorAction[++i]);
+        bpDict[footL].SetJointStrength(vectorAction[++i]);
+        bpDict[thighR].SetJointStrength(vectorAction[++i]);
+        bpDict[shinR].SetJointStrength(vectorAction[++i]);
+        bpDict[footR].SetJointStrength(vectorAction[++i]);
+        bpDict[armL].SetJointStrength(vectorAction[++i]);
+        bpDict[forearmL].SetJointStrength(vectorAction[++i]);
+        bpDict[armR].SetJointStrength(vectorAction[++i]);
+        bpDict[forearmR].SetJointStrength(vectorAction[++i]);
+    }
-            //update joint strength settings
-            bpDict[chest].SetJointStrength(vectorAction[++i]);
-            bpDict[spine].SetJointStrength(vectorAction[++i]);
-            bpDict[head].SetJointStrength(vectorAction[++i]);
-            bpDict[thighL].SetJointStrength(vectorAction[++i]);
-            bpDict[shinL].SetJointStrength(vectorAction[++i]);
-            bpDict[footL].SetJointStrength(vectorAction[++i]);
-            bpDict[thighR].SetJointStrength(vectorAction[++i]);
-            bpDict[shinR].SetJointStrength(vectorAction[++i]);
-            bpDict[footR].SetJointStrength(vectorAction[++i]);
-            bpDict[armL].SetJointStrength(vectorAction[++i]);
-            bpDict[forearmL].SetJointStrength(vectorAction[++i]);
-            bpDict[armR].SetJointStrength(vectorAction[++i]);
-            bpDict[forearmR].SetJointStrength(vectorAction[++i]);
-        }
-
-        IncrementDecisionTimer();
-
+    void FixedUpdate()
+    {
+        m_DirToTarget = target.position - m_JdController.bodyPartsDict[hips].rb.position;
        AddReward(
            +0.03f * Vector3.Dot(m_DirToTarget.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
            + 0.01f * Vector3.Dot(m_DirToTarget.normalized, hips.forward)
    }

    /// <summary>
-    /// Only change the joint settings based on decision frequency.
-    /// </summary>
-    public void IncrementDecisionTimer()
-    {
-        if (m_CurrentDecisionStep == agentParameters.numberOfActionsBetweenDecisions ||
-            agentParameters.numberOfActionsBetweenDecisions == 1)
-        {
-            m_CurrentDecisionStep = 1;
-            m_IsNewDecisionStep = true;
-        }
-        else
-        {
-            m_CurrentDecisionStep++;
-            m_IsNewDecisionStep = false;
-        }
-    }
-
-    /// <summary>
    /// Loop over body parts and reset them to initial conditions.
    /// </summary>
    public override void AgentReset()
        {
            bodyPart.Reset(bodyPart);
        }
-
-        m_IsNewDecisionStep = true;
-        m_CurrentDecisionStep = 1;
        SetResetParameters();
    }

--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPair.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPair.prefab
--- a/Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn
+++ b/Project/Assets/ML-Agents/Examples/Tennis/TFModels/Tennis.nn
--- a/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisArea.cs
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisArea.cs
            ball.transform.position = new Vector3(ballOut, 6f, 0f) + transform.position;
        }
        m_BallRb.velocity = new Vector3(0f, 0f, 0f);
-        ball.transform.localScale = new Vector3(1, 1, 1);
+        ball.transform.localScale = new Vector3(.5f, .5f, .5f);
        ball.GetComponent<HitWall>().lastAgentHit = -1;
    }

--- a/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
        AddVectorObs(ball.transform.position.y - myArea.transform.position.y);
        AddVectorObs(m_InvertMult * m_BallRb.velocity.x);
        AddVectorObs(m_BallRb.velocity.y);
+
+        AddVectorObs(m_InvertMult * gameObject.transform.rotation.z);
    }

    public override void AgentAction(float[] vectorAction)
+        var rotate = Mathf.Clamp(vectorAction[2], -1f, 1f) * m_InvertMult;

        if (moveY > 0.5 && transform.position.y - transform.parent.transform.position.y < -1.5f)
        {
        m_AgentRb.velocity = new Vector3(moveX * 30f, m_AgentRb.velocity.y, 0f);
+
+        m_AgentRb.transform.rotation = Quaternion.Euler(0f, -180f, 55f * rotate + m_InvertMult * 90f);

        if (invertX && transform.position.x - transform.parent.transform.position.x < -m_InvertMult ||
            !invertX && transform.position.x - transform.parent.transform.position.x > -m_InvertMult)
    {
        m_InvertMult = invertX ? -1f : 1f;

-        transform.position = new Vector3(-m_InvertMult * Random.Range(6f, 8f), -1.5f, -3.5f) + transform.parent.transform.position;
+        transform.position = new Vector3(-m_InvertMult * Random.Range(6f, 8f), -1.5f, -1.8f) + transform.parent.transform.position;
        m_AgentRb.velocity = new Vector3(0f, 0f, 0f);

        SetResetParameters();

    public void SetBall()
    {
-        scale = m_ResetParams.GetPropertyWithDefault("scale", 1);
+        scale = m_ResetParams.GetPropertyWithDefault("scale", .5f);
        ball.transform.localScale = new Vector3(scale, scale, scale);
    }

--- a/Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
 {
    public GameObject areaObject;
    public int lastAgentHit;
+    public bool net;
+
+    public enum FloorHit
+        {
+            Service,
+            FloorHitUnset,
+            FloorAHit,
+            FloorBHit
+        }
+
+    public FloorHit lastFloorHit;
-    // Use this for initialization
+    //  Use this for initialization
    void Start()
    {
        m_Area = areaObject.GetComponent<TennisArea>();

-    void OnTriggerExit(Collider other)
+    void Reset()
-        if (other.name == "over")
-        {
-            if (lastAgentHit == 0)
-            {
-                m_AgentA.AddReward(0.1f);
-            }
-            else
-            {
-                m_AgentB.AddReward(0.1f);
-            }
-            lastAgentHit = 0;
-        }
+        m_AgentA.Done();
+        m_AgentB.Done();
+        m_Area.MatchReset();
+        lastFloorHit = FloorHit.Service;
+        net = false;
+    }
+    
+    void AgentAWins()
+    {
+        m_AgentA.SetReward(1);
+        m_AgentB.SetReward(-1);
+        m_AgentA.score += 1;
+        Reset();
+
+    }
+
+    void AgentBWins()
+    {
+        m_AgentA.SetReward(-1);
+        m_AgentB.SetReward(1);
+        m_AgentB.score += 1;
+        Reset();
+
    }

    void OnCollisionEnter(Collision collision)
            if (collision.gameObject.name == "wallA")
            {
-                if (lastAgentHit == 0)
+                // Agent A hits into wall or agent B hit a winner
+                if (lastAgentHit == 0 || lastFloorHit == FloorHit.FloorAHit)
-                    m_AgentA.AddReward(-0.01f);
-                    m_AgentB.SetReward(0);
-                    m_AgentB.score += 1;
+                    AgentBWins();
+                // Agent B hits long
-                    m_AgentA.SetReward(0);
-                    m_AgentB.AddReward(-0.01f);
-                    m_AgentA.score += 1;
+                    AgentAWins();
-                if (lastAgentHit == 0)
+                // Agent B hits into wall or agent A hit a winner
+                if (lastAgentHit == 1 || lastFloorHit == FloorHit.FloorBHit)
-                    m_AgentA.AddReward(-0.01f);
-                    m_AgentB.SetReward(0);
-                    m_AgentB.score += 1;
+                    AgentAWins();
+                // Agent A hits long
-                    m_AgentA.SetReward(0);
-                    m_AgentB.AddReward(-0.01f);
-                    m_AgentA.score += 1;
+                    AgentBWins();
-                if (lastAgentHit == 0 || lastAgentHit == -1)
+                // Agent A hits into floor, double bounce or service
+                if (lastAgentHit == 0 || lastFloorHit == FloorHit.FloorAHit || lastFloorHit == FloorHit.Service)
-                    m_AgentA.AddReward(-0.01f);
-                    m_AgentB.SetReward(0);
-                    m_AgentB.score += 1;
+                    AgentBWins();
-                    m_AgentA.AddReward(-0.01f);
-                    m_AgentB.SetReward(0);
-                    m_AgentB.score += 1;
+                    lastFloorHit = FloorHit.FloorAHit;
+                    //successful serve
+                    if (!net)
+                    {
+                        net = true;
+                    }
-                if (lastAgentHit == 1 || lastAgentHit == -1)
+                // Agent B hits into floor, double bounce or service
+                if (lastAgentHit == 1 || lastFloorHit == FloorHit.FloorBHit || lastFloorHit == FloorHit.Service)
-                    m_AgentA.SetReward(0);
-                    m_AgentB.AddReward(-0.01f);
-                    m_AgentA.score += 1;
+                    AgentAWins();
-                    m_AgentA.SetReward(0);
-                    m_AgentB.AddReward(-0.01f);
-                    m_AgentA.score += 1;
+                    lastFloorHit = FloorHit.FloorBHit;
+                    //successful serve
+                    if (!net)
+                    {
+                        net = true;
+                    }
-            else if (collision.gameObject.name == "net")
+            else if (collision.gameObject.name == "net" && !net)
-                    m_AgentA.AddReward(-0.01f);
-                    m_AgentB.SetReward(0);
-                    m_AgentB.score += 1;
+                    AgentBWins();
-                else
+                else if (lastAgentHit == 1)
-                    m_AgentA.SetReward(0);
-                    m_AgentB.AddReward(-0.01f);
-                    m_AgentA.score += 1;
+                    AgentAWins();
-            m_AgentA.Done();
-            m_AgentB.Done();
-            m_Area.MatchReset();
+        else if (collision.gameObject.name == "AgentA")
+        {
+            // Agent A double hit
+            if (lastAgentHit == 0)
+            {
+                AgentBWins();
+            }
+            else
+            {
+                //agent can return serve in the air
+                if (lastFloorHit != FloorHit.Service && !net)
+                {
+                    net = true;
+                }
-        if (collision.gameObject.CompareTag("agent"))
+                lastAgentHit = 0;
+                lastFloorHit = FloorHit.FloorHitUnset;
+            }
+        }
+        else if (collision.gameObject.name == "AgentB")
-            lastAgentHit = collision.gameObject.name == "AgentA" ? 0 : 1;
+            // Agent B double hit
+            if (lastAgentHit == 1)
+            {
+                AgentAWins();
+            }
+            else
+            {
+                if (lastFloorHit != FloorHit.Service && !net)
+                {
+                    net = true;
+                }
+
+                lastAgentHit = 1;
+                lastFloorHit = FloorHit.FloorHitUnset;
+            }
        }
    }
 }
--- a/Project/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scenes/Tennis.unity
      objectReference: {fileID: 0}
    m_RemovedComponents: []
  m_SourcePrefab: {fileID: 100100000, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
--- !u!1 &1022397856
-GameObject:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  serializedVersion: 6
-  m_Component:
-  - component: {fileID: 1022397857}
-  - component: {fileID: 1022397858}
-  m_Layer: 0
-  m_Name: TennisSettings
-  m_TagString: Untagged
-  m_Icon: {fileID: 0}
-  m_NavMeshLayer: 0
-  m_StaticEditorFlags: 0
-  m_IsActive: 1
--- !u!4 &1022397857
-Transform:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 1022397856}
-  m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
-  m_LocalPosition: {x: -2.5590992, y: 4.387929, z: 6.622064}
-  m_LocalScale: {x: 1, y: 1, z: 1}
-  m_Children: []
-  m_Father: {fileID: 0}
-  m_RootOrder: 4
-  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &1022397858
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 1022397856}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: ec51f47c5ed0478080c449c74fd9c154, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  gravityMultiplier: 1
-  monitorVerticalOffset: 0
-  fixedDeltaTime: 0.02
-  maximumDeltaTime: 0.33333334
-  solverIterations: 6
-  solverVelocityIterations: 1
 --- !u!1001 &1065879750
 PrefabInstance:
  m_ObjectHideFlags: 0
    - target: {fileID: 1541947554534326, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
      propertyPath: m_Name
      value: TennisArea (3)
-      objectReference: {fileID: 0}
-    - target: {fileID: 1541947554534326, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
-      propertyPath: m_IsActive
-      value: 1
      objectReference: {fileID: 0}
    - target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
      propertyPath: m_LocalPosition.x
      objectReference: {fileID: 0}
    m_RemovedComponents: []
  m_SourcePrefab: {fileID: 100100000, guid: 5889392e3f05b448a8a06c5def6c2dec, type: 3}
+--- !u!1 &1585008373
+GameObject:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  serializedVersion: 6
+  m_Component:
+  - component: {fileID: 1585008375}
+  - component: {fileID: 1585008374}
+  m_Layer: 0
+  m_Name: TennisSettings
+  m_TagString: Untagged
+  m_Icon: {fileID: 0}
+  m_NavMeshLayer: 0
+  m_StaticEditorFlags: 0
+  m_IsActive: 1
+--- !u!114 &1585008374
+MonoBehaviour:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1585008373}
+  m_Enabled: 1
+  m_EditorHideFlags: 0
+  m_Script: {fileID: 11500000, guid: ec51f47c5ed0478080c449c74fd9c154, type: 3}
+  m_Name: 
+  m_EditorClassIdentifier: 
+  gravityMultiplier: 1
+  monitorVerticalOffset: 0
+  fixedDeltaTime: 0.02
+  maximumDeltaTime: 0.33333334
+  solverIterations: 6
+  solverVelocityIterations: 1
+--- !u!4 &1585008375
+Transform:
+  m_ObjectHideFlags: 0
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
+  m_GameObject: {fileID: 1585008373}
+  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
+  m_LocalPosition: {x: 0, y: 0, z: 0}
+  m_LocalScale: {x: 1, y: 1, z: 1}
+  m_Children: []
+  m_Father: {fileID: 0}
+  m_RootOrder: 4
+  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
 --- !u!1001 &1667694556
 PrefabInstance:
  m_ObjectHideFlags: 0
    m_Modifications:
+    - target: {fileID: 1541947554534326, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
+      propertyPath: m_Name
+      value: TennisArea
+      objectReference: {fileID: 0}
    - target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
      propertyPath: m_LocalPosition.x
      value: 0
    - target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
      propertyPath: m_RootOrder
      value: 5
+      objectReference: {fileID: 0}
+    - target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.x
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.y
+      value: 0
+      objectReference: {fileID: 0}
+    - target: {fileID: 4172342666475122, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
+      propertyPath: m_LocalEulerAnglesHint.z
+      value: 0
      objectReference: {fileID: 0}
    m_RemovedComponents: []
  m_SourcePrefab: {fileID: 100100000, guid: 812997c7bc2544b6f927ff684c03450f, type: 3}
--- a/Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
--- a/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab