Merge branch 'develop-team-change-reset' into asymm-envs

5 年前 · e0aa5cee
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
    hooks:
    -   id: markdown-link-check
        name: markdown-link-check
-        # markdown-link-check doesn't support multiple files on the commandline, so this hacks around that.
-        # Note that you must install the package separately via npm. For example:
-        #  brew install npm; npm install -g markdown-link-check
-        entry: bash -c 'for i in "$@"; do markdown-link-check -c markdown-link-check.fast.json "$i"; done' --
-        language: system
+        entry: utils/run_markdown_link_check.py
+        language: script
        types: [markdown]
        # Don't check localized files since their target might not be localized.
        exclude: ".*localized.*"
        name: markdown-link-check-full
-        entry: bash -c 'for i in "$@"; do markdown-link-check -c markdown-link-check.full.json "$i"; done' --
-        language: system
+        entry: utils/run_markdown_link_check.py
+        language: script
+        # Don't check localized files since their target might not be localized.
+        # Only run manually, e.g. pre-commit run --hook-stage manual markdown-link-check-full
+        args: [--check-remote]
    -   id: validate-versions
        name: validate library versions
        language: script
--- a/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
    [TestFixture]
    public class ModelRunnerTest
    {
-        const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/Resources/continuous2vis8vec2action.nn";
-        const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/Resources/discrete1vis0vec_2_3action_recurr.nn";
+        const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.nn";
+        const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.nn";
        NNModel continuous2vis8vec2actionModel;
        NNModel discrete1vis0vec_2_3action_recurrModel;
        Test3DSensorComponent sensor_21_20_3;
--- a/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
    [TestFixture]
    public class ParameterLoaderTest
    {
-        const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/Resources/continuous2vis8vec2action.nn";
-        const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/Resources/discrete1vis0vec_2_3action_recurr.nn";
+        const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.nn";
+        const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.nn";
        NNModel continuous2vis8vec2actionModel;
        NNModel discrete1vis0vec_2_3action_recurrModel;
        Test3DSensorComponent sensor_21_20_3;
--- a/docs/Installation.md
+++ b/docs/Installation.md
 just cloned. You can add the `com.unity.ml-agents` package to
 your project by navigating to the menu `Window`  -> `Package Manager`. In the package manager
 window click on the `+` button. Select `Add package from disk...` and navigate into the
-`com.unity.ml-agents` folder and select the `package.json` folder.
+`com.unity.ml-agents` folder and select the `package.json` file.

 **NOTE:** In Unity 2018.4 it's on the bottom right of the packages list, and in Unity 2019.3 it's
 on the top left of the packages list.
--- a/docs/Training-Curriculum-Learning.md
+++ b/docs/Training-Curriculum-Learning.md

 ## An Instructional Example

-*[**Note**: The example provided below is for instructional purposes, and was based on an early version of the [Wall Jump example environment](Example-Environments.md). As such, it is not possible to directly replicate the results here using that environment.]*
+*[**Note**: The example provided below is for instructional purposes, and was based on an early version of the [Wall Jump example environment](Learning-Environment-Examples.md).
+As such, it is not possible to directly replicate the results here using that environment.]*

 Imagine a task in which an agent needs to scale a wall to arrive at a goal. The
 starting point when training an agent to accomplish this task will be a random

 You can then keep track of the current lessons and progresses via TensorBoard.

-__Note__: If you are resuming a training session that uses curriculum, please pass the number of the last-reached lesson using the `--lesson` flag when running `mlagents-learn`.
+__Note__: If you are resuming a training session that uses curriculum, please pass the number of the last-reached lesson using the `--lesson` flag when running `mlagents-learn`.
--- a/docs/Training-on-Microsoft-Azure.md
+++ b/docs/Training-on-Microsoft-Azure.md

 A pre-configured virtual machine image is available in the Azure Marketplace and
 is nearly completely ready for training. You can start by deploying the
-[Data Science Virtual Machine for Linux (Ubuntu)](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/microsoft-dsvm.linux-data-science-vm-ubuntu)
+[Data Science Virtual Machine for Linux (Ubuntu)](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/microsoft-dsvm.ubuntu-1804)
 into your Azure subscription.

 Note that, if you choose to deploy the image to an
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
            if not terminated:
                self.episode_steps[global_id] += 1

-            # if the trajectory is too long, we truncate it
+            # Add a trajectory segment to the buffer if terminal or the length has reached the time horizon
            if (
                len(self.experience_buffers[global_id]) >= self.max_trajectory_length
                or terminated
--- a/ml-agents/mlagents/trainers/ghost/controller.py
+++ b/ml-agents/mlagents/trainers/ghost/controller.py
        self._learning_team: int = -1
        # Dict from team id to GhostTrainer for ELO calculation
        self._ghost_trainers: Dict[int, GhostTrainer] = {}
+        # Signals to the trainer control to perform a hard reset
+        self._reset = False

    @property
    def get_learning_team(self) -> int:
        """
        return self._learning_team
+
+    @property
+    def reset(self) -> bool:
+        """
+        Whether or not team change occurred. Causes full reset in trainer_controller
+        :return: The truth value of the team changing
+        """
+        change_team = self._reset
+        if self._reset:
+            self._reset = False
+        return change_team

    def subscribe_team_id(self, team_id: int, trainer: GhostTrainer) -> None:
        """
        logger.debug(
            "Learning team {} swapped on step {}".format(self._learning_team, step)
        )
+        self._reset = True

    # Adapted from https://github.com/Unity-Technologies/ml-agents/pull/1975 and
    # https://metinmediamath.wordpress.com/2013/11/27/how-to-calculate-the-elo-rating-including-example/
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents import tf_utils
 from mlagents.trainers.trainer_controller import TrainerController
 from mlagents.trainers.meta_curriculum import MetaCurriculum
+from mlagents.trainers.ghost.controller import GhostController
 from mlagents.trainers.trainer_util import (
    load_config,
    TrainerFactory,
        sampler_manager, resampling_interval = create_sampler_manager(
            options.sampler_config, run_seed
        )
+        ghost_controller = GhostController()
+
        trainer_factory = TrainerFactory(
            options.trainer_config,
            summaries_dir,
            not options.inference,
            options.resume,
            run_seed,
+            ghost_controller,
            maybe_init_path,
            maybe_meta_curriculum,
            options.multi_gpu,
            run_seed,
            sampler_manager,
            resampling_interval,
+            ghost_controller,
        )

    # Begin training
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 from mlagents.trainers.trainer_util import TrainerFactory
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.agent_processor import AgentManager
+from mlagents.trainers.ghost.controller import GhostController


 class TrainerController(object):
        training_seed: int,
        sampler_manager: SamplerManager,
        resampling_interval: Optional[int],
+        ghost_controller: GhostController,
    ):
        """
        :param model_path: Path to save the model.
        self.meta_curriculum = meta_curriculum
        self.sampler_manager = sampler_manager
        self.resampling_interval = resampling_interval
+        self.ghost_controller = ghost_controller

        self.trainer_threads: List[threading.Thread] = []
        self.kill_trainers = False
            and (self.resampling_interval)
            and (steps % self.resampling_interval == 0)
        )
-        if meta_curriculum_reset or generalization_reset:
+        if meta_curriculum_reset or generalization_reset or self.ghost_controller.reset:
            self.end_trainer_episodes(env, lessons_incremented)

    @timed
--- a/ml-agents/mlagents/trainers/trainer_util.py
+++ b/ml-agents/mlagents/trainers/trainer_util.py
        train_model: bool,
        load_model: bool,
        seed: int,
+        ghost_controller: GhostController,
        init_path: str = None,
        meta_curriculum: MetaCurriculum = None,
        multi_gpu: bool = False,
        self.seed = seed
        self.meta_curriculum = meta_curriculum
        self.multi_gpu = multi_gpu
-        self.ghost_controller = GhostController()
+        self.ghost_controller = ghost_controller

    def generate(self, brain_name: str) -> Trainer:
        return initialize_trainer(
--- a/ml-agents/setup.py
+++ b/ml-agents/setup.py
        "six>=1.12.0",
        "tensorflow>=1.7,<3.0",
        'pypiwin32==223;platform_system=="Windows"',
+        # We don't actually need six, but tensorflow does, and pip seems
+        # to get confused and install the wrong version.
+        "six>=1.12.0",
    ],
    python_requires=">=3.6.1",
    entry_points={
--- a/com.unity.ml-agents/Tests/Editor/TestModels.meta
+++ b/com.unity.ml-agents/Tests/Editor/TestModels.meta
+fileFormatVersion: 2
+guid: 95997790219c547e584c3cb50122a95f
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/utils/run_markdown_link_check.py
+++ b/utils/run_markdown_link_check.py
+#!/usr/bin/env python3
+
+import argparse
+import subprocess
+
+if __name__ == "__main__":
+    # markdown-link-check doesn't support multiple files on the commandline, so this hacks around that.
+    # Note that you must install the package separately via npm. For example:
+    #  brew install npm; npm install -g markdown-link-check
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--check-remote", action="store_true")
+    parser.add_argument("files", nargs="*")
+    args = parser.parse_args()
+
+    config_file = (
+        "markdown-link-check.full.json"
+        if args.check_remote
+        else "markdown-link-check.fast.json"
+    )
+
+    for f in args.files:
+        subprocess_args = ["markdown-link-check", "-c", config_file, f]
+        subprocess.check_call(subprocess_args)
--- a/com.unity.ml-agents/Tests/Editor/Resources.meta
+++ b/com.unity.ml-agents/Tests/Editor/Resources.meta
-fileFormatVersion: 2
-guid: 22f1c3c8541da48e480c6b921343c2ee
-folderAsset: yes
-DefaultImporter:
-  externalObjects: {}
-  userData: 
-  assetBundleName: 
-  assetBundleVariant: 
--- a//com.unity.ml-agents/Tests/Editor/TestModels
+++ b//com.unity.ml-agents/Tests/Editor/TestModels