浏览代码

Merge branch 'self-play-mutex' into soccer-2v1

/asymm-envs
Andrew Cohen 5 年前
当前提交
6ade2ddc
共有 6 个文件被更改,包括 30 次插入160 次删除
  1. 2
      Dockerfile
  2. 5
      docs/Training-Self-Play.md
  3. 9
      docs/Using-Docker.md
  4. 79
      ml-agents-envs/mlagents_envs/environment.py
  5. 65
      ml-agents/mlagents/trainers/learn.py
  6. 30
      ml-agents/mlagents/trainers/tests/test_learn.py

2
Dockerfile


# so allow enough ports for several environments.
EXPOSE 5004-5050
ENTRYPOINT ["mlagents-learn"]
ENTRYPOINT ["xvfb-run", "--auto-servernum", "--server-args='-screen 0 640x480x24'", "mlagents-learn"]

5
docs/Training-Self-Play.md


Recommended Range : 10000-100000
### Play against current best ratio
### Play against latest model ratio
an agent will play against the current opponent. With probability
an agent will play against the latest opponent policy. With probability
1 - `play_against_latest_model_ratio`, the agent will play against a snapshot of its
opponent from a past iteration.

Note, this implementation will support any number of teams but ELO is only applicable to games with two teams. It is ongoing work to implement
a reliable metric for measuring progress in these scenarios. These scenarios can still train, though as of now, reward and qualitative observations
are the only metric by which we can judge performance.

9
docs/Using-Docker.md


-p 5005:5005 \
-p 6006:6006 \
<image-name>:latest \
--docker-target-name=unity-volume \
<trainer-config-file> \
--env=<environment-name> \
--train \

- `source`: Reference to the path in your host OS where you will store the Unity
executable.
- `target`: Tells Docker to mount the `source` path as a disk with this name.
- `docker-target-name`: Tells the ML-Agents Python package what the name of the
disk where it can read the Unity executable and store the graph. **This should
therefore be identical to `target`.**
- `trainer-config-file`, `train`, `run-id`: ML-Agents arguments passed to
`mlagents-learn`. `trainer-config-file` is the filename of the trainer config
file, `train` trains the algorithm, and `run-id` is used to tag each

-p 5005:5005 \
-p 6006:6006 \
balance.ball.v0.1:latest 3DBall \
--docker-target-name=unity-volume \
trainer_config.yaml \
--env=3DBall \
/unity-volume/trainer_config.yaml \
--env=/unity-volume/3DBall \
--train \
--run-id=3dball_first_trial
```

79
ml-agents-envs/mlagents_envs/environment.py


worker_id: int = 0,
base_port: Optional[int] = None,
seed: int = 0,
docker_training: bool = False,
no_graphics: bool = False,
timeout_wait: int = 60,
args: Optional[List[str]] = None,

:int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
If no environment is specified (i.e. file_name is None), the DEFAULT_EDITOR_PORT will be used.
:int worker_id: Offset from base_port. Used for training multiple environments simultaneously.
:bool docker_training: Informs this class whether the process is being run within a container.
:bool no_graphics: Whether to run the Unity simulator in no-graphics mode
:int timeout_wait: Time (in seconds) to wait for connection from environment.
:list args: Addition Unity command line arguments

"the worker-id must be 0 in order to connect with the Editor."
)
if file_name is not None:
self.executable_launcher(file_name, docker_training, no_graphics, args)
self.executable_launcher(file_name, no_graphics, args)
else:
logger.info(
f"Listening on port {self.port}. "

launch_string = candidates[0]
return launch_string
def executable_launcher(self, file_name, docker_training, no_graphics, args):
def executable_launcher(self, file_name, no_graphics, args):
launch_string = self.validate_environment_path(file_name)
if launch_string is None:
self._close(0)

else:
logger.debug("This is the launch string {}".format(launch_string))
# Launch Unity environment
if not docker_training:
subprocess_args = [launch_string]
if no_graphics:
subprocess_args += ["-nographics", "-batchmode"]
subprocess_args += [
UnityEnvironment.PORT_COMMAND_LINE_ARG,
str(self.port),
]
subprocess_args += args
try:
self.proc1 = subprocess.Popen(
subprocess_args,
# start_new_session=True means that signals to the parent python process
# (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms.
# This is generally good since we want the environment to have a chance to shutdown,
# but may be undesirable in come cases; if so, we'll add a command-line toggle.
# Note that on Windows, the CTRL_C signal will still be sent.
start_new_session=True,
)
except PermissionError as perm:
# This is likely due to missing read or execute permissions on file.
raise UnityEnvironmentException(
f"Error when trying to launch environment - make sure "
f"permissions are set correctly. For example "
f'"chmod -R 755 {launch_string}"'
) from perm
else:
# Comments for future maintenance:
# xvfb-run is a wrapper around Xvfb, a virtual xserver where all
# rendering is done to virtual memory. It automatically creates a
# new virtual server automatically picking a server number `auto-servernum`.
# The server is passed the arguments using `server-args`, we are telling
# Xvfb to create Screen number 0 with width 640, height 480 and depth 24 bits.
# Note that 640 X 480 are the default width and height. The main reason for
# us to add this is because we'd like to change the depth from the default
# of 8 bits to 24.
# Unfortunately, this means that we will need to pass the arguments through
# a shell which is why we set `shell=True`. Now, this adds its own
# complications. E.g SIGINT can bounce off the shell and not get propagated
# to the child processes. This is why we add `exec`, so that the shell gets
# launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell
# we created with `xvfb`.
#
docker_ls = (
f"exec xvfb-run --auto-servernum --server-args='-screen 0 640x480x24'"
f" {launch_string} {UnityEnvironment.PORT_COMMAND_LINE_ARG} {self.port}"
)
subprocess_args = [launch_string]
if no_graphics:
subprocess_args += ["-nographics", "-batchmode"]
subprocess_args += [UnityEnvironment.PORT_COMMAND_LINE_ARG, str(self.port)]
subprocess_args += args
try:
docker_ls,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True,
subprocess_args,
# start_new_session=True means that signals to the parent python process
# (e.g. SIGINT from keyboard interrupt) will not be sent to the new process on POSIX platforms.
# This is generally good since we want the environment to have a chance to shutdown,
# but may be undesirable in come cases; if so, we'll add a command-line toggle.
# Note that on Windows, the CTRL_C signal will still be sent.
start_new_session=True,
except PermissionError as perm:
# This is likely due to missing read or execute permissions on file.
raise UnityEnvironmentException(
f"Error when trying to launch environment - make sure "
f"permissions are set correctly. For example "
f'"chmod -R 755 {launch_string}"'
) from perm
def _update_group_specs(self, output: UnityOutputProto) -> None:
init_output = output.rl_initialization_output

65
ml-agents/mlagents/trainers/learn.py


import argparse
import os
import glob
import shutil
import numpy as np
import json

help="Number of parallel environments to use for training",
)
argparser.add_argument(
"--docker-target-name",
default=None,
dest="docker_target_name",
help="Docker volume to store training-specific files",
)
argparser.add_argument(
"--no-graphics",
default=False,
action="store_true",

no_graphics: bool = parser.get_default("no_graphics")
multi_gpu: bool = parser.get_default("multi_gpu")
sampler_config: Optional[Dict] = None
docker_target_name: Optional[str] = parser.get_default("docker_target_name")
env_args: Optional[List[str]] = parser.get_default("env_args")
cpu: bool = parser.get_default("cpu")
width: int = parser.get_default("width")

configs loaded from files.
"""
argparse_args = vars(args)
docker_target_name = argparse_args["docker_target_name"]
if docker_target_name is not None:
trainer_config_path = f"/{docker_target_name}/{trainer_config_path}"
if curriculum_config_path is not None:
curriculum_config_path = (
f"/{docker_target_name}/{curriculum_config_path}"
)
argparse_args["trainer_config"] = load_config(trainer_config_path)
if curriculum_config_path is not None:
argparse_args["curriculum_config"] = load_config(curriculum_config_path)

:param run_options: Command line arguments for training.
"""
with hierarchical_timer("run_training.setup"):
# Recognize and use docker volume if one is passed as an argument
if not options.docker_target_name:
model_path = f"./models/{options.run_id}"
summaries_dir = "./summaries"
else:
model_path = f"/{options.docker_target_name}/models/{options.run_id}"
summaries_dir = f"/{options.docker_target_name}/summaries"
model_path = f"./models/{options.run_id}"
summaries_dir = "./summaries"
port = options.base_port
# Configure CSV, Tensorboard Writers and StatsReporter

if options.env_path is None:
port = UnityEnvironment.DEFAULT_EDITOR_PORT
env_factory = create_environment_factory(
options.env_path,
options.docker_target_name,
options.no_graphics,
run_seed,
port,
options.env_args,
options.env_path, options.no_graphics, run_seed, port, options.env_args
)
engine_config = EngineConfig(
options.width,

return meta_curriculum
def prepare_for_docker_run(docker_target_name, env_path):
for f in glob.glob(
"/{docker_target_name}/*".format(docker_target_name=docker_target_name)
):
if env_path in f:
try:
b = os.path.basename(f)
if os.path.isdir(f):
shutil.copytree(f, "/ml-agents/{b}".format(b=b))
else:
src_f = "/{docker_target_name}/{b}".format(
docker_target_name=docker_target_name, b=b
)
dst_f = "/ml-agents/{b}".format(b=b)
shutil.copyfile(src_f, dst_f)
os.chmod(dst_f, 0o775) # Make executable
except Exception as e:
logging.getLogger("mlagents.trainers").info(e)
env_path = "/ml-agents/{env_path}".format(env_path=env_path)
return env_path
docker_target_name: Optional[str],
no_graphics: bool,
seed: int,
start_port: int,

raise UnityEnvironmentException(
f"Couldn't launch the {env_path} environment. Provided filename does not match any environments."
)
docker_training = docker_target_name is not None
if docker_training and env_path is not None:
# Comments for future maintenance:
# Some OS/VM instances (e.g. COS GCP Image) mount filesystems
# with COS flag which prevents execution of the Unity scene,
# to get around this, we will copy the executable into the
# container.
# Navigate in docker path and find env_path and copy it.
env_path = prepare_for_docker_run(docker_target_name, env_path)
def create_unity_environment(
worker_id: int, side_channels: List[SideChannel]

file_name=env_path,
worker_id=worker_id,
seed=env_seed,
docker_training=docker_training,
no_graphics=no_graphics,
base_port=start_port,
args=env_args,

30
ml-agents/mlagents/trainers/tests/test_learn.py


StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py
@patch("mlagents.trainers.learn.SamplerManager")
@patch("mlagents.trainers.learn.SubprocessEnvManager")
@patch("mlagents.trainers.learn.create_environment_factory")
@patch("mlagents.trainers.learn.load_config")
def test_docker_target_path(
load_config, create_environment_factory, subproc_env_mock, sampler_manager_mock
):
mock_env = MagicMock()
mock_env.external_brain_names = []
mock_env.academy_name = "TestAcademyName"
create_environment_factory.return_value = mock_env
trainer_config_mock = MagicMock()
load_config.return_value = trainer_config_mock
options_with_docker_target = basic_options({"--docker-target-name": "dockertarget"})
mock_init = MagicMock(return_value=None)
with patch.object(TrainerController, "__init__", mock_init):
with patch.object(TrainerController, "start_learning", MagicMock()):
learn.run_training(0, options_with_docker_target)
mock_init.assert_called_once()
assert mock_init.call_args[0][1] == "/dockertarget/models/ppo"
assert mock_init.call_args[0][2] == "/dockertarget/summaries"
StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py
docker_target_name=None,
no_graphics=True,
seed=None,
start_port=8000,

assert opt.train_model is False
assert opt.base_port == 5005
assert opt.num_envs == 1
assert opt.docker_target_name is None
assert opt.no_graphics is False
assert opt.debug is False
assert opt.env_args is None

"--train",
"--base-port=4004",
"--num-envs=2",
"--docker-target-name=mydockertarget",
"--no-graphics",
"--debug",
]

assert opt.train_model is True
assert opt.base_port == 4004
assert opt.num_envs == 2
assert opt.docker_target_name == "mydockertarget"
assert opt.no_graphics is True
assert opt.debug is True

正在加载...
取消
保存