Merge branch 'soccer-2v1' into asymm-envs

5 年前 · 02d26c3f
--- a/Project/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.nn
+++ b/Project/Assets/ML-Agents/Examples/Soccer/TFModels/SoccerTwos.nn
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 ### Important changes
 * The `--load` and `--train` command-line flags have been deprecated and replaced with `--resume` and `--inference`.
 * Running with the same `--run-id` twice will now throw an error.
+* The `play_against_current_self_ratio` self-play trainer hyperparameter has been renamed to `play_against_latest_model_ratio`

 ### Steps to Migrate
 * Replace the `--load` flag with `--resume` when calling `mlagents-learn`, and don't use the `--train` flag as training
--- a/ml-agents/mlagents/trainers/behavior_id_utils.py
+++ b/ml-agents/mlagents/trainers/behavior_id_utils.py

 class BehaviorIdentifiers(NamedTuple):
    """
-    BehaviorIdentifiers is a named tuple if the identifiers that uniquely distinguish
+    BehaviorIdentifiers is a named tuple of the identifiers that uniquely distinguish
    an agent encountered in the trainer_controller. The named tuple consists of the
    fully qualified behavior name, the name of the brain name (corresponds to trainer
    in the trainer controller) and the team id.  In the future, this can be extended
--- a/ml-agents/mlagents/trainers/ghost/controller.py
+++ b/ml-agents/mlagents/trainers/ghost/controller.py
 logger = get_logger(__name__)


-class GhostController(object):
+class GhostController:
    """
    GhostController contains a queue of team ids. GhostTrainers subscribe to the GhostController and query
    it to get the current learning team.  The GhostController cycles through team ids every 'swap_interval'
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
        self.play_against_latest_model_ratio = self_play_parameters.get(
            "play_against_latest_model_ratio", 0.5
        )
+        if (
+            self.play_against_latest_model_ratio > 1.0
+            or self.play_against_latest_model_ratio < 0.0
+        ):
+            logger.warning(
+                "The play_against_latest_model_ratio is not between 0 and 1."
+            )
+
        self.steps_between_save = self_play_parameters.get("save_steps", 20000)
        self.steps_between_swap = self_play_parameters.get("swap_steps", 20000)
        self.steps_to_train_team = self_play_parameters.get("team_change", 100000)