Merge branch 'self-play-mutex' into soccer-2v1

5 年前 · 6ade2ddc
--- a/2
+++ b/2
 # so allow enough ports for several environments.
 EXPOSE 5004-5050

-ENTRYPOINT ["mlagents-learn"]
+ENTRYPOINT ["xvfb-run", "--auto-servernum", "--server-args='-screen 0 640x480x24'", "mlagents-learn"]
--- a/docs/Training-Self-Play.md
+++ b/docs/Training-Self-Play.md

 Recommended Range : 10000-100000

-### Play against current best ratio
+### Play against latest model ratio
-an agent will play against the current opponent. With probability
+an agent will play against the latest opponent policy. With probability
 1 - `play_against_latest_model_ratio`, the agent will play against a snapshot of its
 opponent from a past iteration.

 Note, this implementation will support any number of teams but ELO is only applicable to games with two teams.  It is ongoing work to implement
 a reliable metric for measuring progress in these scenarios. These scenarios can still train, though as of now, reward and qualitative observations
 are the only metric by which we can judge performance.
-
--- a/docs/Using-Docker.md
+++ b/docs/Using-Docker.md
           -p 5005:5005 \
           -p 6006:6006 \
           <image-name>:latest \
-           --docker-target-name=unity-volume \
           <trainer-config-file> \
           --env=<environment-name> \
           --train \
 - `source`: Reference to the path in your host OS where you will store the Unity
  executable.
 - `target`: Tells Docker to mount the `source` path as a disk with this name.
- `docker-target-name`: Tells the ML-Agents Python package what the name of the
-  disk where it can read the Unity executable and store the graph. **This should
-  therefore be identical to `target`.**
 - `trainer-config-file`, `train`, `run-id`: ML-Agents arguments passed to
  `mlagents-learn`. `trainer-config-file` is the filename of the trainer config
  file, `train` trains the algorithm, and `run-id` is used to tag each
           -p 5005:5005 \
           -p 6006:6006 \
           balance.ball.v0.1:latest 3DBall \
-           --docker-target-name=unity-volume \
-           trainer_config.yaml \
-           --env=3DBall \
+           /unity-volume/trainer_config.yaml \
+           --env=/unity-volume/3DBall \
           --train \
           --run-id=3dball_first_trial
 ```
--- a/ml-agents-envs/mlagents_envs/environment.py
+++ b/ml-agents-envs/mlagents_envs/environment.py
        worker_id: int = 0,
        base_port: Optional[int] = None,
        seed: int = 0,
-        docker_training: bool = False,
        no_graphics: bool = False,
        timeout_wait: int = 60,
        args: Optional[List[str]] = None,
        :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
        If no environment is specified (i.e. file_name is None), the DEFAULT_EDITOR_PORT will be used.
        :int worker_id: Offset from base_port. Used for training multiple environments simultaneously.
-        :bool docker_training: Informs this class whether the process is being run within a container.
        :bool no_graphics: Whether to run the Unity simulator in no-graphics mode
        :int timeout_wait: Time (in seconds) to wait for connection from environment.
        :list args: Addition Unity command line arguments
                "the worker-id must be 0 in order to connect with the Editor."
            )
        if file_name is not None:
-            self.executable_launcher(file_name, docker_training, no_graphics, args)
+            self.executable_launcher(file_name, no_graphics, args)
        else:
            logger.info(
                f"Listening on port {self.port}. "
                launch_string = candidates[0]
        return launch_string

-    def executable_launcher(self, file_name, docker_training, no_graphics, args):
+    def executable_launcher(self, file_name, no_graphics, args):
        launch_string = self.validate_environment_path(file_name)
        if launch_string is None:
            self._close(0)
        else:
            logger.debug("This is the launch string {}".format(launch_string))
            # Launch Unity environment
-            if not docker_training:
-                subprocess_args = [launch_string]
-                if no_graphics:
-                    subprocess_args += ["-nographics", "-batchmode"]
-                subprocess_args += [
-                    UnityEnvironment.PORT_COMMAND_LINE_ARG,
-                    str(self.port),
-                ]
-                subprocess_args += args
-                try:
-                    self.proc1 = subprocess.Popen(
-                        subprocess_args,
-                        # start_new_session=True means that signals to the parent python process
-                        # (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms.
-                        # This is generally good since we want the environment to have a chance to shutdown,
-                        # but may be undesirable in come cases; if so, we'll add a command-line toggle.
-                        # Note that on Windows, the CTRL_C signal will still be sent.
-                        start_new_session=True,
-                    )
-                except PermissionError as perm:
-                    # This is likely due to missing read or execute permissions on file.
-                    raise UnityEnvironmentException(
-                        f"Error when trying to launch environment - make sure "
-                        f"permissions are set correctly. For example "
-                        f'"chmod -R 755 {launch_string}"'
-                    ) from perm
-
-            else:
-                # Comments for future maintenance:
-                #     xvfb-run is a wrapper around Xvfb, a virtual xserver where all
-                #     rendering is done to virtual memory. It automatically creates a
-                #     new virtual server automatically picking a server number `auto-servernum`.
-                #     The server is passed the arguments using `server-args`, we are telling
-                #     Xvfb to create Screen number 0 with width 640, height 480 and depth 24 bits.
-                #     Note that 640 X 480 are the default width and height. The main reason for
-                #     us to add this is because we'd like to change the depth from the default
-                #     of 8 bits to 24.
-                #     Unfortunately, this means that we will need to pass the arguments through
-                #     a shell which is why we set `shell=True`. Now, this adds its own
-                #     complications. E.g SIGINT can bounce off the shell and not get propagated
-                #     to the child processes. This is why we add `exec`, so that the shell gets
-                #     launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell
-                #     we created with `xvfb`.
-                #
-                docker_ls = (
-                    f"exec xvfb-run --auto-servernum --server-args='-screen 0 640x480x24'"
-                    f" {launch_string} {UnityEnvironment.PORT_COMMAND_LINE_ARG} {self.port}"
-                )
-
+            subprocess_args = [launch_string]
+            if no_graphics:
+                subprocess_args += ["-nographics", "-batchmode"]
+            subprocess_args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)]
+            subprocess_args += args
+            try:
-                    docker_ls,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    shell=True,
+                    subprocess_args,
+                    # start_new_session=True means that signals to the parent python process
+                    # (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms.
+                    # This is generally good since we want the environment to have a chance to shutdown,
+                    # but may be undesirable in come cases; if so, we'll add a command-line toggle.
+                    # Note that on Windows, the CTRL_C signal will still be sent.
+                    start_new_session=True,
+            except PermissionError as perm:
+                # This is likely due to missing read or execute permissions on file.
+                raise UnityEnvironmentException(
+                    f"Error when trying to launch environment - make sure "
+                    f"permissions are set correctly. For example "
+                    f'"chmod -R 755 {launch_string}"'
+                ) from perm

    def _update_group_specs(self, output: UnityOutputProto) -> None:
        init_output = output.rl_initialization_output
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 import argparse

 import os
-import glob
-import shutil
 import numpy as np
 import json

        help="Number of parallel environments to use for training",
    )
    argparser.add_argument(
-        "--docker-target-name",
-        default=None,
-        dest="docker_target_name",
-        help="Docker volume to store training-specific files",
-    )
-    argparser.add_argument(
        "--no-graphics",
        default=False,
        action="store_true",
    no_graphics: bool = parser.get_default("no_graphics")
    multi_gpu: bool = parser.get_default("multi_gpu")
    sampler_config: Optional[Dict] = None
-    docker_target_name: Optional[str] = parser.get_default("docker_target_name")
    env_args: Optional[List[str]] = parser.get_default("env_args")
    cpu: bool = parser.get_default("cpu")
    width: int = parser.get_default("width")
          configs loaded from files.
        """
        argparse_args = vars(args)
-        docker_target_name = argparse_args["docker_target_name"]
-        if docker_target_name is not None:
-            trainer_config_path = f"/{docker_target_name}/{trainer_config_path}"
-            if curriculum_config_path is not None:
-                curriculum_config_path = (
-                    f"/{docker_target_name}/{curriculum_config_path}"
-                )
        argparse_args["trainer_config"] = load_config(trainer_config_path)
        if curriculum_config_path is not None:
            argparse_args["curriculum_config"] = load_config(curriculum_config_path)
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
-        # Recognize and use docker volume if one is passed as an argument
-        if not options.docker_target_name:
-            model_path = f"./models/{options.run_id}"
-            summaries_dir = "./summaries"
-        else:
-            model_path = f"/{options.docker_target_name}/models/{options.run_id}"
-            summaries_dir = f"/{options.docker_target_name}/summaries"
+        model_path = f"./models/{options.run_id}"
+        summaries_dir = "./summaries"
        port = options.base_port

        # Configure CSV, Tensorboard Writers and StatsReporter
        if options.env_path is None:
            port = UnityEnvironment.DEFAULT_EDITOR_PORT
        env_factory = create_environment_factory(
-            options.env_path,
-            options.docker_target_name,
-            options.no_graphics,
-            run_seed,
-            port,
-            options.env_args,
+            options.env_path, options.no_graphics, run_seed, port, options.env_args
        )
        engine_config = EngineConfig(
            options.width,
        return meta_curriculum


-def prepare_for_docker_run(docker_target_name, env_path):
-    for f in glob.glob(
-        "/{docker_target_name}/*".format(docker_target_name=docker_target_name)
-    ):
-        if env_path in f:
-            try:
-                b = os.path.basename(f)
-                if os.path.isdir(f):
-                    shutil.copytree(f, "/ml-agents/{b}".format(b=b))
-                else:
-                    src_f = "/{docker_target_name}/{b}".format(
-                        docker_target_name=docker_target_name, b=b
-                    )
-                    dst_f = "/ml-agents/{b}".format(b=b)
-                    shutil.copyfile(src_f, dst_f)
-                    os.chmod(dst_f, 0o775)  # Make executable
-            except Exception as e:
-                logging.getLogger("mlagents.trainers").info(e)
-    env_path = "/ml-agents/{env_path}".format(env_path=env_path)
-    return env_path
-
-
-    docker_target_name: Optional[str],
    no_graphics: bool,
    seed: int,
    start_port: int,
            raise UnityEnvironmentException(
                f"Couldn't launch the {env_path} environment. Provided filename does not match any environments."
            )
-    docker_training = docker_target_name is not None
-    if docker_training and env_path is not None:
-        #     Comments for future maintenance:
-        #         Some OS/VM instances (e.g. COS GCP Image) mount filesystems
-        #         with COS flag which prevents execution of the Unity scene,
-        #         to get around this, we will copy the executable into the
-        #         container.
-        # Navigate in docker path and find env_path and copy it.
-        env_path = prepare_for_docker_run(docker_target_name, env_path)

    def create_unity_environment(
        worker_id: int, side_channels: List[SideChannel]
            file_name=env_path,
            worker_id=worker_id,
            seed=env_seed,
-            docker_training=docker_training,
            no_graphics=no_graphics,
            base_port=start_port,
            args=env_args,
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
    StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py


-@patch("mlagents.trainers.learn.SamplerManager")
-@patch("mlagents.trainers.learn.SubprocessEnvManager")
-@patch("mlagents.trainers.learn.create_environment_factory")
-@patch("mlagents.trainers.learn.load_config")
-def test_docker_target_path(
-    load_config, create_environment_factory, subproc_env_mock, sampler_manager_mock
-):
-    mock_env = MagicMock()
-    mock_env.external_brain_names = []
-    mock_env.academy_name = "TestAcademyName"
-    create_environment_factory.return_value = mock_env
-    trainer_config_mock = MagicMock()
-    load_config.return_value = trainer_config_mock
-
-    options_with_docker_target = basic_options({"--docker-target-name": "dockertarget"})
-
-    mock_init = MagicMock(return_value=None)
-    with patch.object(TrainerController, "__init__", mock_init):
-        with patch.object(TrainerController, "start_learning", MagicMock()):
-            learn.run_training(0, options_with_docker_target)
-            mock_init.assert_called_once()
-            assert mock_init.call_args[0][1] == "/dockertarget/models/ppo"
-            assert mock_init.call_args[0][2] == "/dockertarget/summaries"
-    StatsReporter.writers.clear()  # make sure there aren't any writers as added by learn.py
-
-
-            docker_target_name=None,
            no_graphics=True,
            seed=None,
            start_port=8000,
    assert opt.train_model is False
    assert opt.base_port == 5005
    assert opt.num_envs == 1
-    assert opt.docker_target_name is None
    assert opt.no_graphics is False
    assert opt.debug is False
    assert opt.env_args is None
        "--train",
        "--base-port=4004",
        "--num-envs=2",
-        "--docker-target-name=mydockertarget",
        "--no-graphics",
        "--debug",
    ]
    assert opt.train_model is True
    assert opt.base_port == 4004
    assert opt.num_envs == 2
-    assert opt.docker_target_name == "mydockertarget"
    assert opt.no_graphics is True
    assert opt.debug is True