Merge branch 'soccer-2v1' into asymm-envs

5 年前 · f41695b9
--- a/.yamato/standalone-build-test.yml
+++ b/.yamato/standalone-build-test.yml
        - "*.md"
        - "com.unity.ml-agents/*.md"
        - "com.unity.ml-agents/**/*.md"
+  artifacts:
+    standalonebuild:
+      paths:
+        - "Project/testPlayer*/**"
 {% endfor %}
--- a/.yamato/training-int-tests.yml
+++ b/.yamato/training-int-tests.yml
    # to be disabled until the next release.
    - python -u -m ml-agents.tests.yamato.training_int_tests --python=0.15.0
    - python -u -m ml-agents.tests.yamato.training_int_tests --csharp=0.15.0
+  dependencies:
+    - .yamato/standalone-build-test.yml#test_mac_standalone_{{ editor.version }}
  triggers:
    cancel_old_ci: true
    changes:
--- a/Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
+++ b/Project/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
 {
    public class StandaloneBuildTest
    {
+        const string k_outputCommandLineFlag = "--mlagents-build-output-path";
+        const string k_sceneCommandLineFlag = "--mlagents-build-scene-path";
+
-            string[] scenes = { "Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity" };
-            var buildResult = BuildPipeline.BuildPlayer(scenes, "testPlayer", BuildTarget.StandaloneOSX, BuildOptions.None);
+            // Read commandline arguments for options
+            var outputPath = "testPlayer";
+            var scenePath = "Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity";
+
+            var args = Environment.GetCommandLineArgs();
+            for (var i = 0; i < args.Length - 1; i++)
+            {
+                if (args[i] == k_outputCommandLineFlag)
+                {
+                    outputPath = args[i + 1];
+                    Debug.Log($"Overriding output path to {outputPath}");
+                }
+                else if (args[i] == k_sceneCommandLineFlag)
+                {
+                    scenePath = args[i + 1];
+                }
+            }
+
+            string[] scenes = { scenePath };
+            var buildResult = BuildPipeline.BuildPlayer(
+                scenes,
+                outputPath,
+                BuildTarget.StandaloneOSX,
+                BuildOptions.None
+            );
            var isOk = buildResult.summary.result == BuildResult.Succeeded;
            var error = "";
            foreach (var stepInfo in buildResult.steps)
--- a/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab
    vectorActionSize: 030000000300000003000000
    vectorActionDescriptions: []
    vectorActionSpaceType: 0
-  m_Model: {fileID: 11400000, guid: 3760d1cef396640f486ebe96b48d4f37, type: 3}
+  m_Model: {fileID: 11400000, guid: e9c10c18f4eb745d19186a54dbe3ca2e, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Goalie
    vectorActionSize: 030000000300000003000000
    vectorActionDescriptions: []
    vectorActionSpaceType: 0
-  m_Model: {fileID: 11400000, guid: eef58e4530eb940e28760b9a668bc93c, type: 3}
+  m_Model: {fileID: 11400000, guid: 75a830685bf8e43918adc4783a2abebf, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Striker
    vectorActionSize: 030000000300000003000000
    vectorActionDescriptions: []
    vectorActionSpaceType: 0
-  m_Model: {fileID: 11400000, guid: eef58e4530eb940e28760b9a668bc93c, type: 3}
+  m_Model: {fileID: 11400000, guid: 75a830685bf8e43918adc4783a2abebf, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  m_BehaviorName: Striker
--- a/Project/Assets/ML-Agents/Examples/Soccer/TFModels/Goalie.nn
+++ b/Project/Assets/ML-Agents/Examples/Soccer/TFModels/Goalie.nn
--- a/Project/Assets/ML-Agents/Examples/Soccer/TFModels/Striker.nn
+++ b/Project/Assets/ML-Agents/Examples/Soccer/TFModels/Striker.nn
--- a/README.md
+++ b/README.md
 * Train using concurrent Unity environment instances

 ## Releases & Documentation
-**Our latest, stable release is 0.15.0. Click
+**Our latest, stable release is 0.15.1. Click
 [here](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md) to
 get started with the latest release of ML-Agents.**

 | **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
 |:-------:|:------:|:-------------:|:-------:|:------------:|
 | **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
+| **0.15.1** | **March 30, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.1/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.1.zip)** |
 | **0.15.0** | **March 18, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/0.15.0/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/0.15.0.zip)** |
 | **0.14.1** | February 26, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.1.zip) |
 | **0.14.0** | February 13, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md

 ## [Unreleased]
 ### Major Changes
+ - The `--load` and `--train` command-line flags have been deprecated. Training now happens by default, and
+ use `--resume` to resume training instead. (#3705)
 - The Jupyter notebooks have been removed from the repository.
 - Introduced the `SideChannelUtils` to register, unregister and access side channels.
 - `Academy.FloatProperties` was removed, please use `SideChannelUtils.GetSideChannel<FloatPropertiesChannel>()` instead.
- - Raise the wall in CrawlerStatic scene to prevent Agent from falling off. (#3650)
- - Fixed an issue where specifying `vis_encode_type` was required only for SAC. (#3677)
+ - Fixed an issue where exceptions from environments provided a returncode of 0. (#3680)
+ - Running `mlagents-learn` with the same `--run-id` twice will no longer overwrite the existing files. (#3705)
+ - `StackingSensor` was changed from `internal` visibility to `public`
+
+## [0.15.1-preview] - 2020-03-30
+### Bug Fixes
+ - Raise the wall in CrawlerStatic scene to prevent Agent from falling off. (#3650)
+ - Fixed an issue where specifying `vis_encode_type` was required only for SAC. (#3677)
- - Fixed an issue where exceptions from environments provided a returncode of 0. (#3680)
- - Fixed an issue where logging output was not visible; logging levels are now set consistently (#3703).
+ - Fixed an issue where logging output was not visible; logging levels are now set consistently. (#3703)
+

 ## [0.15.0-preview] - 2020-03-18
 ### Major Changes
--- a/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
    /// For example, 4 stacked sets of observations would be output like
    ///   |  t = now - 3  |  t = now -3  |  t = now - 2  |  t = now  |
    /// Internally, a circular buffer of arrays is used. The m_CurrentIndex represents the most recent observation.
+    ///
+    /// Currently, compressed and multidimensional observations are not supported.
-    internal class StackingSensor : ISensor
+    public class StackingSensor : ISensor
    {
        /// <summary>
        /// The wrapped sensor.
        WriteAdapter m_LocalAdapter = new WriteAdapter();

        /// <summary>
-        ///
+        /// Initializes the sensor.
        /// </summary>
        /// <param name="wrapped">The wrapped sensor.</param>
        /// <param name="numStackedObservations">Number of stacked observations to keep.</param>

            m_Name = $"StackingSensor_size{numStackedObservations}_{wrapped.GetName()}";

+            if (wrapped.GetCompressionType() != SensorCompressionType.None)
+            {
+                throw new UnityAgentsException("StackingSensor doesn't support compressed observations.'");
+            }
+
+            if (shape.Length != 1)
+            {
+                throw new UnityAgentsException("Only 1-D observations are supported by StackingSensor");
+            }
            m_Shape = new int[shape.Length];

            m_UnstackedObservationSize = wrapped.ObservationSize();
            }
        }

+        /// <inheritdoc/>
        public int Write(WriteAdapter adapter)
        {
            // First, call the wrapped sensor's write method. Make sure to use our own adapter, not the passed one.
            m_CurrentIndex = (m_CurrentIndex + 1) % m_NumStackedObservations;
        }

+        /// <inheritdoc/>
+        /// <inheritdoc/>
+        /// <inheritdoc/>
+        /// <inheritdoc/>
        public virtual SensorCompressionType GetCompressionType()
        {
            return SensorCompressionType.None;
--- a/com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
+++ b/com.unity.ml-agents/Tests/Editor/PublicAPI/PublicApiValidation.cs
            }
        }

+        // Simple SensorComponent that sets up a StackingSensor
+        class StackingComponent : SensorComponent
+        {
+            public SensorComponent wrappedComponent;
+            public int numStacks;
+
+            public override ISensor CreateSensor()
+            {
+                var wrappedSensor = wrappedComponent.CreateSensor();
+                return new StackingSensor(wrappedSensor, numStacks);
+            }
+
+            public override int[] GetObservationShape()
+            {
+                int[] shape = (int[]) wrappedComponent.GetObservationShape().Clone();
+                for (var i = 0; i < shape.Length; i++)
+                {
+                    shape[i] *= numStacks;
+                }
+
+                return shape;
+            }
+        }
+

        [Test]
        public void CheckSetupAgent()
            sensorComponent.sensorName = "ray3d";
            sensorComponent.detectableTags = new List<string> { "Player", "Respawn" };
            sensorComponent.raysPerDirection = 3;
+
+            // Make a StackingSensor that wraps the RayPerceptionSensorComponent3D
+            // This isn't necessarily practical, just to ensure that it can be done
+            var wrappingSensorComponent = gameObject.AddComponent<StackingComponent>();
+            wrappingSensorComponent.wrappedComponent = sensorComponent;
+            wrappingSensorComponent.numStacks = 3;

            // ISensor isn't set up yet.
            Assert.IsNull(sensorComponent.raySensor);
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md
 2. Navigate to the folder where you cloned the ML-Agents toolkit repository.
   **Note**: If you followed the default [installation](Installation.md), then
   you should be able to run `mlagents-learn` from any directory.
-3. Run `mlagents-learn <trainer-config-path> --run-id=<run-identifier> --train`
+3. Run `mlagents-learn <trainer-config-path> --run-id=<run-identifier>`
-      training runs
-    - `--train` tells `mlagents-learn` to run a training session (rather
-      than inference)
+      training runs. Make sure to use one that hasn't been used already!
-      mlagents-learn config/trainer_config.yaml --run-id=firstRun --train
+      mlagents-learn config/trainer_config.yaml --run-id=firstRun
      ```

 5. When the message _"Start training by pressing the Play button in the Unity
 **Note**: If you're using Anaconda, don't forget to activate the ml-agents
 environment first.

-The `--train` flag tells the ML-Agents toolkit to run in training mode.
 The `--time-scale=100` sets the `Time.TimeScale` value in Unity.

 **Note**: You can train using an executable rather than the Editor. To do so,
 command-line prompt. If you close the window manually, the `.nn` file
 containing the trained model is not exported into the ml-agents folder.

-You can press Ctrl+C to stop the training, and your trained model will be at
-`models/<run-identifier>/<behavior_name>.nn` where
+If you've quit the training early using Ctrl+C and want to resume training, run the
+same command again, appending the `--resume` flag:
+
+```sh
+mlagents-learn config/trainer_config.yaml --run-id=firstRun --resume
+```
+
+Your trained model will be at `models/<run-identifier>/<behavior_name>.nn` where
 `<behavior_name>` is the name of the `Behavior Name` of the agents corresponding to the model.
 (**Note:** There is a known bug on Windows that causes the saving of the model to
 fail when you early terminate the training, it's recommended to wait until Step
--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
 To train in the editor, run the following Python command from a Terminal or Console
 window before pressing play:

-    mlagents-learn config/config.yaml --run-id=RollerBall-1 --train
+    mlagents-learn config/config.yaml --run-id=RollerBall-1

 (where `config.yaml` is a copy of `trainer_config.yaml` that you have edited
 to change the `batch_size` and `buffer_size` hyperparameters for your trainer.)
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
 * Goal:
  * Get the ball into the opponent's goal while preventing
  the ball from entering own goal.
-  * Goalie:
-  Behavior Parameters : Soccer.
+  Behavior Parameters : SoccerTwos.
 * Agent Reward Function (dependent):
    * +1 When ball enters opponent's goal.
    * -1 When ball enters team's goal.
    and 3 ray-casts backward distributed over 90 degrees each detecting 6 possible object types, along with the object's distance.
-    The forward ray-casts contribute 264 state dimensions and backward 72 state dimensions.
+    The forward ray-casts contribute 264 state dimensions and backward 72 state dimensions over three observation stacks.
  * Vector Action space: (Discrete) Three branched actions corresponding to forward, backward, sideways movement,
      as well as rotation.
  * Visual Observations: None
    * Default: 9.81
    * Recommended minimum: 6
    * Recommended maximum: 20
+
+# Strikers Vs. Goalie
+
+![StrikersVsGoalie](images/strikersvsgoalie.png)
+
+* Set-up: Environment where two agents compete in a 2 vs 1 soccer variant.
+* Goal:
+  * Striker: Get the ball into the opponent's goal.
+  * Goalie: Keep the ball out of the goal.
+* Agents: The environment contains three agents. Two Strikers and one Goalie.
+  Behavior Parameters : Striker, Goalie.
+* Striker Agent Reward Function (dependent):
+    * +1 When ball enters opponent's goal.
+    * -0.001 Existential penalty.
+* Goalie Agent Reward Function (dependent):
+    * -1 When ball enters goal.
+    * 0.001 Existential bonus.
+* Behavior Parameters:
+  * Striker is the same as SoccerTwos above.
+  * Goalie Vector Observation space: 984 corresponding to 41 ray-casts distributed over 360 degrees
+    each detecting 6 possible object types, along with the object's distance and 3 observation stacks.
+  * Goalie Vector Action space: (Discrete) Three branched actions corresponding to forward, backward, sideways movement,
+      as well as rotation.
+  * Visual Observations: None
+* Float Properties: Two
+  * ball_scale: Specifies the scale of the ball in the 3 dimensions (equal across the three dimensions)
+    * Default: 7.5
+    * Recommended minimum: 4
+    * Recommended maximum: 10
+  * gravity: Magnitude of the gravity
+    * Default: 9.81
+    * Recommended minimum: 6
+    * Recommended maximum: 20
+

 ## Walker

--- a/docs/Learning-Environment-Executable.md
+++ b/docs/Learning-Environment-Executable.md
   followed the default [installation](Installation.md), then navigate to the
   `ml-agents/` folder.
 3. Run
-   `mlagents-learn <trainer-config-file> --env=<env_name> --run-id=<run-identifier> --train`
+   `mlagents-learn <trainer-config-file> --env=<env_name> --run-id=<run-identifier>`
   Where:
   * `<trainer-config-file>` is the file path of the trainer configuration yaml
   * `<env_name>` is the name and path to the executable you exported from Unity
-   * And the `--train` tells `mlagents-learn` to run a training session (rather
-     than inference)
-mlagents-learn ../config/trainer_config.yaml --env=3DBall --run-id=firstRun --train
+mlagents-learn ../config/trainer_config.yaml --env=3DBall --run-id=firstRun
-ml-agents$ mlagents-learn config/trainer_config.yaml --env=3DBall --run-id=first-run --train
+ml-agents$ mlagents-learn config/trainer_config.yaml --env=3DBall --run-id=first-run


                        ▄▄▄▓▓▓▓
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 ## Migrating from 0.15 to latest

 ### Important changes
+* The `--load` and `--train` command-line flags have been deprecated and replaced with `--resume` and `--inference`.
+* Running with the same `--run-id` twice will now throw an error.
+
+### Steps to Migrate
+* Replace the `--load` flag with `--resume` when calling `mlagents-learn`, and don't use the `--train` flag as training
+ will happen by default. To run without training, use `--inference`.
+* To force-overwrite files from a pre-existing run, add the `--force` command-line flag.
 * The Jupyter notebooks have been removed from the repository.
 * `Academy.FloatProperties` was removed.
 * `Academy.RegisterSideChannel` and `Academy.UnregisterSideChannel` were removed.
 * Replace `Academy.RegisterSideChannel` with `SideChannelUtils.RegisterSideChannel()`.
 * Replace `Academy.UnregisterSideChannel` with `SideChannelUtils.UnregisterSideChannel`.
-

 ## Migrating from 0.14 to 0.15

--- a/docs/Training-Curriculum-Learning.md
+++ b/docs/Training-Curriculum-Learning.md
 to train agents in the Wall Jump environment with curriculum learning, we can run:

 ```sh
-mlagents-learn config/trainer_config.yaml --curriculum=config/curricula/wall_jump.yaml --run-id=wall-jump-curriculum --train
+mlagents-learn config/trainer_config.yaml --curriculum=config/curricula/wall_jump.yaml --run-id=wall-jump-curriculum
 ```

 We can then keep track of the current lessons and progresses via TensorBoard.
--- a/docs/Training-Environment-Parameter-Randomization.md
+++ b/docs/Training-Environment-Parameter-Randomization.md

 ```sh
 mlagents-learn config/trainer_config.yaml --sampler=config/3dball_randomize.yaml
--run-id=3D-Ball-randomize --train
+--run-id=3D-Ball-randomize
 ```

 We can observe progress and metrics via Tensorboard.
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 The basic command for training is:

 ```sh
-mlagents-learn <trainer-config-file> --env=<env_name> --run-id=<run-identifier> --train
+mlagents-learn <trainer-config-file> --env=<env_name> --run-id=<run-identifier>
 ```

 where
   environment you built in step 1:

 ```sh
-mlagents-learn config/trainer_config.yaml --env=../../projects/Cats/CatsOnBicycles.app --run-id=cob_1 --train
+mlagents-learn config/trainer_config.yaml --env=../../projects/Cats/CatsOnBicycles.app --run-id=cob_1
 ```

 During a training session, the training program prints out and saves updates at
 `models/cob_1/CatsOnBicycles_cob_1.nn`.

 While this example used the default training hyperparameters, you can edit the
-[training_config.yaml file](#training-config-file) with a text editor to set
+[trainer_config.yaml file](#training-config-file) with a text editor to set
+To interrupt training and save the current progress, hit Ctrl+C once and wait for the
+model to be saved out.
+
+### Loading an Existing Model
+
+If you've quit training early using Ctrl+C, you can resume the training run by running
+`mlagents-learn` again, specifying the same `<run-identifier>` and appending the `--resume` flag
+to the command.
+
+You can also use this mode to run inference of an already-trained model in Python.
+Append both the `--resume` and `--inference` to do this. Note that if you want to run
+inference in Unity, you should use the
+[Unity Inference Engine](Getting-started#Running-a-pre-trained-model).
+
+If you've already trained a model using the specified `<run-identifier>` and `--resume` is not
+specified, you will not be able to continue with training. Use `--force` to force ML-Agents to
+overwrite the existing data.
+
 ### Command Line Training Options

 In addition to passing the path of the Unity executable containing your training
  training. Defaults to 0.
 * `--num-envs=<n>`: Specifies the number of concurrent Unity environment instances to
  collect experiences from when training. Defaults to 1.
-* `--run-id=<path>`: Specifies an identifier for each training run. This
+* `--run-id=<run-identifier>`: Specifies an identifier for each training run. This
  identifier is used to name the subdirectories in which the trained model and
  summary statistics are saved as well as the saved model itself. The default id
  is "ppo". If you use TensorBoard to view the training statistics, always set a
  will use the port `(base_port + worker_id)`, where the `worker_id` is sequential IDs
  given to each instance from 0 to `num_envs - 1`. Default is 5005. __Note:__ When
  training using the Editor rather than an executable, the base port will be ignored.
-* `--train`: Specifies whether to train model or only run in inference mode.
-  When training, **always** use the `--train` option.
-* `--load`: If set, the training code loads an already trained model to
+* `--inference`: Specifies whether to only run in inference mode. Omit to train the model.
+  To load an existing model, specify a run-id and combine with `--resume`.
+* `--resume`: If set, the training code loads an already trained model to
-  training). When not set (the default), the neural network weights are randomly
-  initialized and an existing model is not loaded.
+  training). This option only works when the models exist, and have the same behavior names
+  as the current agents in your scene.
+* `--force`: Attempting to train a model with a run-id that has been used before will
+  throw an error. Use `--force` to force-overwrite this run-id's summary and model data.
 * `--no-graphics`: Specify this option to run the Unity executable in
  `-batchmode` and doesn't initialize the graphics driver. Use this only if your
  training doesn't involve visual observations (reading from Pixels). See
--- a/docs/Training-Self-Play.md
+++ b/docs/Training-Self-Play.md
 A symmetric game is one in which opposing agents are equal in form, function and objective. Examples of symmetric games
 are our Tennis and Soccer example environments. In reinforcement learning, this means both agents have the same observation and
 action spaces and learn from the same reward function and so *they can share the same policy*. In asymmetric games,
-this is not the case. Examples of asymmetric games are Hide and Seek or Strikers vs Goalie in Soccer. Agents in these
+this is not the case. An example of an asymmetric games are Hide and Seek. Agents in these
-necessarily ideal. Fortunately, both of these situations are supported with only a few extra command line
-arguments and trainer configurations!
+necessarily ideal.

 With self-play, an agent learns in adversarial games by competing against fixed, past versions of its opponent
 (which could be itself as in symmetric games) to provide a more stable, stationary learning environment. This is compared

 ![Team ID](images/team_id.png)

-***Team ID must be 0 or an integer greater than 0. Negative numbers will cause unpredictable behavior.***
+***Team ID must be 0 or an integer greater than 0.***

 In symmetric games, since all agents (even on opposing teams) will share the same policy, they should have the same 'Behavior Name' in their
 Behavior Parameters Script.  In asymmetric games, they should have a different Behavior Name in their Behavior Parameters script.
-For examples of how to use this feature, you can see the trainer configurations and agent prefabs for our Tennis, Soccer and Strikers Vs Goalie environments.
-Tennis and Soccer provide examples of symmetric games whereas Strikers Vs Goalie provides an example of an asymmetric game.
+For examples of how to use this feature, you can see the trainer configurations and agent prefabs for our Tennis and Soccer environments.
+Tennis and Soccer provide examples of symmetric games. To train an asymmetric game, specify trainer configurations for each of your behavior names
+and include the self-play hyperparameter hierarchy in both.


 ## Best Practices Training with Self-Play

 The `swap_steps` parameter corresponds to the number of *ghost steps* (not trainer steps) between swapping the opponents policy with a different snapshot.
 A 'ghost step' refers to a step taken by an agent *that is following a fixed policy and not learning*. The reason for this distinction is that in asymmetric games,
-we may have teams with an unequal number of agents e.g. the 2v1 scenario in our Strikers Vs Goalie environment. The team with two agents collects
+we may have teams with an unequal number of agents e.g. a 2v1 scenario. The team with two agents collects
 twice as many agent steps per environment step as the team with one agent.  Thus, these two values will need to be distinct to ensure that the same number
 of trainer steps corresponds to the same number of opponent swaps for each team. The formula for `swap_steps` if
 a user desires `x` swaps of a team with `num_agents` agents against an opponent team with `num_opponent_agents`
 swap_steps = (num_agents / num_opponent_agents) * (team_change / x)
 ```

-As an example, in our Strikers Vs Goalie environment, if we want the swap to occur `x=4` times during `team-change=200000` steps,
+As an example, in a 2v1 scenario, if we want the swap to occur `x=4` times during `team-change=200000` steps,
 the `swap_steps` for the team of one agent is:

 ```
 In a proper training run, the ELO of the agent should steadily increase. The absolute value of the ELO is less important than the change in ELO over training iterations.

 Note, this implementation will support any number of teams but ELO is only applicable to games with two teams.  It is ongoing work to implement
-a reliable metric for measuring progress in these scenarios. These scenarios can still train, though as of now, reward and qualitative observations
+a reliable metric for measuring progress in scenarios with three or more teams. These scenarios can still train, though as of now, reward and qualitative observations
 are the only metric by which we can judge performance.
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents import tf_utils
 from mlagents.trainers.trainer_controller import TrainerController
 from mlagents.trainers.meta_curriculum import MetaCurriculum
-from mlagents.trainers.trainer_util import load_config, TrainerFactory
+from mlagents.trainers.trainer_util import (
+    load_config,
+    TrainerFactory,
+    handle_existing_directories,
+)
 from mlagents.trainers.stats import (
    TensorboardWriter,
    CSVWriter,
        default=False,
        dest="load_model",
        action="store_true",
-        help="Whether to load the model or randomly initialize",
+        help=argparse.SUPPRESS,  # Deprecated but still usable for now.
+    )
+    argparser.add_argument(
+        "--resume",
+        default=False,
+        dest="resume",
+        action="store_true",
+        help="Resumes training from a checkpoint. Specify a --run-id to use this option.",
+    )
+    argparser.add_argument(
+        "--force",
+        default=False,
+        dest="force",
+        action="store_true",
+        help="Force-overwrite existing models and summaries for a run-id that has been used "
+        "before.",
    )
    argparser.add_argument(
        "--run-id",
        default=False,
        dest="train_model",
        action="store_true",
-        help="Whether to train model, or only run inference",
+        help=argparse.SUPPRESS,
+    )
+    argparser.add_argument(
+        "--inference",
+        default=False,
+        dest="inference",
+        action="store_true",
+        help="Run in Python inference mode (don't train). Use with --resume to load a model trained with an "
+        "existing run-id.",
    )
    argparser.add_argument(
        "--base-port",
    env_path: Optional[str] = parser.get_default("env_path")
    run_id: str = parser.get_default("run_id")
    load_model: bool = parser.get_default("load_model")
+    resume: bool = parser.get_default("resume")
+    force: bool = parser.get_default("force")
+    inference: bool = parser.get_default("inference")
    save_freq: int = parser.get_default("save_freq")
    keep_checkpoints: int = parser.get_default("keep_checkpoints")
    base_port: int = parser.get_default("base_port")
            argparse_args["sampler_config"] = load_config(
                argparse_args["sampler_file_path"]
            )
-
+        # Keep deprecated --load working, TODO: remove
+        argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"]
        # Since argparse accepts file paths in the config options which don't exist in CommandLineOptions,
        # these keys will need to be deleted to use the **/splat operator below.
        argparse_args.pop("sampler_file_path")
                "Environment/Episode Length",
            ],
        )
-        tb_writer = TensorboardWriter(summaries_dir)
+        handle_existing_directories(
+            model_path, summaries_dir, options.resume, options.force
+        )
+        tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume)
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
            options.run_id,
            model_path,
            options.keep_checkpoints,
-            options.train_model,
-            options.load_model,
+            not options.inference,
+            options.resume,
            run_seed,
            maybe_meta_curriculum,
            options.multi_gpu,
            options.run_id,
            options.save_freq,
            maybe_meta_curriculum,
-            options.train_model,
+            not options.inference,
            run_seed,
            sampler_manager,
            resampling_interval,

    logger.debug("Configuration for this run:")
    logger.debug(json.dumps(options._asdict(), indent=4))
+
+    # Options deprecation warnings
+    if options.load_model:
+        logger.warning(
+            "The --load option has been deprecated. Please use the --resume option instead."
+        )
+    if options.train_model:
+        logger.warning(
+            "The --train option has been deprecated. Train mode is now the default. Use "
+            "--inference to run in inference mode."
+        )

    run_seed = options.seed
    if options.cpu:
--- a/ml-agents/mlagents/trainers/policy/tf_policy.py
+++ b/ml-agents/mlagents/trainers/policy/tf_policy.py
            logger.info("Loading Model for brain {}".format(self.brain.brain_name))
            ckpt = tf.train.get_checkpoint_state(self.model_path)
            if ckpt is None:
-                logger.info(
-                    "The model {0} could not be found. Make "
+                raise UnityPolicyException(
+                    "The model {0} could not be loaded. Make "
-                    "--run-id".format(self.model_path)
+                    "--run-id. and that the previous run you are resuming from had the same "
+                    "behavior names.".format(self.model_path)
                )
            self.saver.restore(self.sess, ckpt.model_checkpoint_path)

--- a/ml-agents/mlagents/trainers/stats.py
+++ b/ml-agents/mlagents/trainers/stats.py


 class TensorboardWriter(StatsWriter):
-    def __init__(self, base_dir: str):
+    def __init__(self, base_dir: str, clear_past_data: bool = False):
+        :param clear_past_data: Whether or not to clean up existing Tensorboard files associated with the base_dir and
+            category.
+        self._clear_past_data = clear_past_data

    def write_stats(
        self, category: str, values: Dict[str, StatsSummary], step: int
                basedir=self.base_dir, category=category
            )
            os.makedirs(filewriter_dir, exist_ok=True)
+            if self._clear_past_data:
+                self._delete_all_events_files(filewriter_dir)
+
+    def _delete_all_events_files(self, directory_name: str) -> None:
+        for file_name in os.listdir(directory_name):
+            if file_name.startswith("events.out"):
+                logger.warning(
+                    "{} was left over from a previous run. Deleting.".format(file_name)
+                )
+                full_fname = os.path.join(directory_name, file_name)
+                try:
+                    os.remove(full_fname)
+                except OSError:
+                    logger.warning(
+                        "{} was left over from a previous run and "
+                        "not deleted.".format(full_fname)
+                    )

    def add_property(
        self, category: str, property_type: StatsPropertyType, value: Any
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
    return parse_command_line(args)


+@patch("mlagents.trainers.learn.handle_existing_directories")
@patch("mlagents.trainers.learn.TrainerFactory")
@patch("mlagents.trainers.learn.SamplerManager")
@patch("mlagents.trainers.learn.SubprocessEnvManager")
    subproc_env_mock,
    sampler_manager_mock,
    trainer_factory_mock,
+    handle_dir_mock,
 ):
    mock_env = MagicMock()
    mock_env.external_brain_names = []
                "ppo",
                50000,
                None,
-                False,
+                True,
+            handle_dir_mock.assert_called_once_with(
+                "./models/ppo", "./summaries", False, False
+            )
    StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py


    assert opt.sampler_config is None
    assert opt.keep_checkpoints == 5
    assert opt.lesson == 0
-    assert opt.load_model is False
+    assert opt.resume is False
+    assert opt.inference is False
-    assert opt.train_model is False
    assert opt.base_port == 5005
    assert opt.num_envs == 1
    assert opt.no_graphics is False
        "--sampler=./mysample",
        "--keep-checkpoints=42",
        "--lesson=3",
-        "--load",
+        "--resume",
+        "--inference",
        "--run-id=myawesomerun",
        "--save-freq=123456",
        "--seed=7890",
    assert opt.sampler_config == {}
    assert opt.keep_checkpoints == 42
    assert opt.lesson == 3
-    assert opt.load_model is True
-    assert opt.train_model is True
+    assert opt.inference is True
+    assert opt.resume is True


@patch("builtins.open", new_callable=mock_open, read_data="{}")
--- a/ml-agents/mlagents/trainers/tests/test_stats.py
+++ b/ml-agents/mlagents/trainers/tests/test_stats.py
 import tempfile
 import unittest
 import csv
+import time

 from mlagents.trainers.stats import (
    StatsReporter,
    # Test write_stats
    category = "category1"
    with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir:
-        tb_writer = TensorboardWriter(base_dir)
+        tb_writer = TensorboardWriter(base_dir, clear_past_data=False)
        statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
        tb_writer.write_stats("category1", {"key1": statssummary1}, 10)

            "category1", StatsPropertyType.HYPERPARAMETERS, {"example": 1.0}
        )
        assert mock_filewriter.return_value.add_summary.call_count > 1
+
+
+def test_tensorboard_writer_clear(tmp_path):
+    tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
+    statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1)
+    tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
+    # TB has some sort of timeout before making a new file
+    time.sleep(1.0)
+    assert len(os.listdir(os.path.join(tmp_path, "category1"))) > 0
+
+    # See if creating a new one doesn't delete it
+    tb_writer = TensorboardWriter(tmp_path, clear_past_data=False)
+    tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
+    assert len(os.listdir(os.path.join(tmp_path, "category1"))) > 1
+    time.sleep(1.0)
+
+    # See if creating a new one deletes old ones
+    tb_writer = TensorboardWriter(tmp_path, clear_past_data=True)
+    tb_writer.write_stats("category1", {"key1": statssummary1}, 10)
+    assert len(os.listdir(os.path.join(tmp_path, "category1"))) == 1


 def test_csv_writer():
--- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py
 import pytest
 import yaml
 import io
+import os
 from unittest.mock import patch

 from mlagents.trainers import trainer_util
    with pytest.raises(TrainerConfigError):
        fp = io.StringIO(file_contents)
        _load_config(fp)
+
+
+def test_existing_directories(tmp_path):
+    model_path = os.path.join(tmp_path, "runid")
+    # Unused summary path
+    summary_path = os.path.join(tmp_path, "runid")
+    # Test fresh new unused path - should do nothing.
+    trainer_util.handle_existing_directories(model_path, summary_path, False, False)
+    # Test resume with fresh path - should throw an exception.
+    with pytest.raises(UnityTrainerException):
+        trainer_util.handle_existing_directories(model_path, summary_path, True, False)
+
+    # make a directory
+    os.mkdir(model_path)
+    # Test try to train w.o. force, should complain
+    with pytest.raises(UnityTrainerException):
+        trainer_util.handle_existing_directories(model_path, summary_path, False, False)
+    # Test try to train w/ resume - should work
+    trainer_util.handle_existing_directories(model_path, summary_path, True, False)
+    # Test try to train w/ force - should work
+    trainer_util.handle_existing_directories(model_path, summary_path, False, True)
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
            "Error parsing yaml file. Please check for formatting errors. "
            "A tool such as http://www.yamllint.com/ can be helpful with this."
        ) from e
+
+
+def handle_existing_directories(
+    model_path: str, summary_path: str, resume: bool, force: bool
+) -> None:
+    """
+    Validates that if the run_id model exists, we do not overwrite it unless --force is specified.
+    Throws an exception if resume isn't specified and run_id exists. Throws an exception
+    if --resume is specified and run-id was not found.
+    :param model_path: The model path specified.
+    :param summary_path: The summary path to be used.
+    :param resume: Whether or not the --resume flag was passed.
+    :param force: Whether or not the --force flag was passed.
+    """
+
+    model_path_exists = os.path.isdir(model_path)
+
+    if model_path_exists:
+        if not resume and not force:
+            raise UnityTrainerException(
+                "Previous data from this run-id was found. "
+                "Either specify a new run-id, use --resume to resume this run, "
+                "or use the --force parameter to overwrite existing data."
+            )
+    else:
+        if resume:
+            raise UnityTrainerException(
+                "Previous data from this run-id was not found. "
+                "Train a new run by removing the --resume flag."
+            )
--- a/ml-agents/tests/yamato/training_int_tests.py
+++ b/ml-agents/tests/yamato/training_int_tests.py
    base_path = get_base_path()
    print(f"Running in base path {base_path}")

+    # Only build the standalone player if we're overriding the C# version
+    # Otherwise we'll use the one built earlier in the pipeline.
+        # We can't rely on the old C# code recognizing the commandline argument to set the output
+        # So rename testPlayer (containing the most recent build) to something else temporarily
+        full_player_path = os.path.join("Project", "testPlayer.app")
+        temp_player_path = os.path.join("Project", "temp_testPlayer.app")
+        final_player_path = os.path.join("Project", f"testPlayer_{csharp_version}.app")
+
+        os.rename(full_player_path, temp_player_path)
+
+        build_returncode = run_standalone_build(base_path)
-    build_returncode = run_standalone_build(base_path)
-    if build_returncode != 0:
-        print("Standalone build FAILED!")
-        sys.exit(build_returncode)
+        if build_returncode != 0:
+            print("Standalone build FAILED!")
+            sys.exit(build_returncode)
+
+        # Now rename the newly-built executable, and restore the old one
+        os.rename(full_player_path, final_player_path)
+        os.rename(temp_player_path, full_player_path)
+        standalone_player_path = f"testPlayer_{csharp_version}"
+    else:
+        standalone_player_path = "testPlayer"

    venv_path = init_venv(python_version)

        buffer_size=10,
    )

-    # TODO pass scene name and exe destination to build
-    # TODO make sure we fail if the exe isn't found - see MLA-559
-    mla_learn_cmd = f"mlagents-learn override.yaml --train --env=Project/testPlayer --run-id={run_id} --no-graphics --env-args -logFile -"  # noqa
+    mla_learn_cmd = (
+        f"mlagents-learn override.yaml --train --env=Project/{standalone_player_path} "
+        f"--run-id={run_id} --no-graphics --env-args -logFile -"
+    )  # noqa
    res = subprocess.run(
        f"source {venv_path}/bin/activate; {mla_learn_cmd}", shell=True
    )
--- a/ml-agents/tests/yamato/yamato_utils.py
+++ b/ml-agents/tests/yamato/yamato_utils.py
    return os.getcwd()


-def run_standalone_build(base_path: str, verbose: bool = False) -> int:
+def run_standalone_build(
+    base_path: str, verbose: bool = False, output_path: str = None
+) -> int:
-    Run BuildStandalonePlayerOSX test to produce a player at Project/testPlayer
-    :param base_path:
-    :return:
+    Run BuildStandalonePlayerOSX test to produce a player. The location defaults to Project/testPlayer.
    """
    unity_exe = get_unity_executable_path()
    print(f"Running BuildStandalonePlayerOSX via {unity_exe}")
    ]
    if verbose:
        test_args += ["-logfile", "-"]
+    if output_path is not None:
+        test_args += ["--mlagents-build-output-path", output_path]
    print(f"{' '.join(test_args)} ...")

    timeout = 30 * 60  # 30 minutes, just in case
--- a/docs/images/strikersvsgoalie.png
+++ b/docs/images/strikersvsgoalie.png
--- a/ml-agents/tests/yamato/lowlevel_api_tests.py
+++ b/ml-agents/tests/yamato/lowlevel_api_tests.py