您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
443 行
15 KiB
443 行
15 KiB
# # Unity ML-Agents Toolkit
|
|
|
|
import logging
|
|
import argparse
|
|
|
|
from multiprocessing import Process, Queue
|
|
import os
|
|
import glob
|
|
import shutil
|
|
import numpy as np
|
|
|
|
from typing import Any, Callable, Optional, List, NamedTuple
|
|
|
|
|
|
from mlagents.trainers.trainer_controller import TrainerController
|
|
from mlagents.trainers.exception import TrainerError
|
|
from mlagents.trainers.meta_curriculum import MetaCurriculumError, MetaCurriculum
|
|
from mlagents.trainers.trainer_util import initialize_trainers, load_config
|
|
from mlagents.envs.environment import UnityEnvironment
|
|
from mlagents.envs.sampler_class import SamplerManager
|
|
from mlagents.envs.exception import SamplerException
|
|
from mlagents.envs.base_unity_environment import BaseUnityEnvironment
|
|
from mlagents.envs.subprocess_env_manager import SubprocessEnvManager
|
|
import horovod.tensorflow as hvd
|
|
|
|
|
|
class CommandLineOptions(NamedTuple):
|
|
debug: bool
|
|
num_runs: int
|
|
seed: int
|
|
env_path: str
|
|
run_id: str
|
|
load_model: bool
|
|
train_model: bool
|
|
save_freq: int
|
|
keep_checkpoints: int
|
|
base_port: int
|
|
num_envs: int
|
|
curriculum_folder: Optional[str]
|
|
lesson: int
|
|
slow: bool
|
|
no_graphics: bool
|
|
multi_gpu: bool # ?
|
|
trainer_config_path: str
|
|
sampler_file_path: Optional[str]
|
|
docker_target_name: Optional[str]
|
|
env_args: Optional[List[str]]
|
|
|
|
@property
|
|
def fast_simulation(self) -> bool:
|
|
return not self.slow
|
|
|
|
@staticmethod
|
|
def from_argparse(args: Any) -> "CommandLineOptions":
|
|
return CommandLineOptions(**vars(args))
|
|
|
|
|
|
def parse_command_line(argv: Optional[List[str]] = None) -> CommandLineOptions:
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
|
)
|
|
parser.add_argument("trainer_config_path")
|
|
parser.add_argument(
|
|
"--env", default=None, dest="env_path", help="Name of the Unity executable "
|
|
)
|
|
parser.add_argument(
|
|
"--curriculum",
|
|
default=None,
|
|
dest="curriculum_folder",
|
|
help="Curriculum json directory for environment",
|
|
)
|
|
parser.add_argument(
|
|
"--sampler",
|
|
default=None,
|
|
dest="sampler_file_path",
|
|
help="Reset parameter yaml file for environment",
|
|
)
|
|
parser.add_argument(
|
|
"--keep-checkpoints",
|
|
default=5,
|
|
type=int,
|
|
help="How many model checkpoints to keep",
|
|
)
|
|
parser.add_argument(
|
|
"--lesson", default=0, type=int, help="Start learning from this lesson"
|
|
)
|
|
parser.add_argument(
|
|
"--load",
|
|
default=False,
|
|
dest="load_model",
|
|
action="store_true",
|
|
help="Whether to load the model or randomly initialize",
|
|
)
|
|
parser.add_argument(
|
|
"--run-id",
|
|
default="ppo",
|
|
help="The directory name for model and summary statistics",
|
|
)
|
|
parser.add_argument(
|
|
"--num-runs", default=1, type=int, help="Number of concurrent training sessions"
|
|
)
|
|
parser.add_argument(
|
|
"--save-freq", default=50000, type=int, help="Frequency at which to save model"
|
|
)
|
|
parser.add_argument(
|
|
"--seed", default=-1, type=int, help="Random seed used for training"
|
|
)
|
|
parser.add_argument(
|
|
"--slow", action="store_true", help="Whether to run the game at training speed"
|
|
)
|
|
parser.add_argument(
|
|
"--train",
|
|
default=False,
|
|
dest="train_model",
|
|
action="store_true",
|
|
help="Whether to train model, or only run inference",
|
|
)
|
|
parser.add_argument(
|
|
"--base-port",
|
|
default=5005,
|
|
type=int,
|
|
help="Base port for environment communication",
|
|
)
|
|
parser.add_argument(
|
|
"--num-envs",
|
|
default=1,
|
|
type=int,
|
|
help="Number of parallel environments to use for training",
|
|
)
|
|
parser.add_argument(
|
|
"--docker-target-name",
|
|
default=None,
|
|
dest="docker_target_name",
|
|
help="Docker volume to store training-specific files",
|
|
)
|
|
parser.add_argument(
|
|
"--no-graphics",
|
|
default=False,
|
|
action="store_true",
|
|
help="Whether to run the environment in no-graphics mode",
|
|
)
|
|
parser.add_argument(
|
|
"--debug",
|
|
default=False,
|
|
action="store_true",
|
|
help="Whether to run ML-Agents in debug mode with detailed logging",
|
|
)
|
|
parser.add_argument(
|
|
"--multi-gpu",
|
|
default=False,
|
|
action="store_true",
|
|
help="Setting this flag enables the use of multiple GPU's (if available) during training",
|
|
)
|
|
parser.add_argument(
|
|
"--env-args",
|
|
default=None,
|
|
nargs=argparse.REMAINDER,
|
|
help="Arguments passed to the Unity executable.",
|
|
)
|
|
|
|
args = parser.parse_args(argv)
|
|
return CommandLineOptions.from_argparse(args)
|
|
|
|
|
|
def run_training(
|
|
sub_id: int, run_seed: int, options: CommandLineOptions, process_queue: Queue
|
|
) -> None:
|
|
"""
|
|
Launches training session.
|
|
:param process_queue: Queue used to send signal back to main.
|
|
:param sub_id: Unique id for training session.
|
|
:param options: parsed command line arguments
|
|
:param run_seed: Random seed used for training.
|
|
:param run_options: Command line arguments for training.
|
|
"""
|
|
# Docker Parameters
|
|
|
|
trainer_config_path = options.trainer_config_path
|
|
curriculum_folder = options.curriculum_folder
|
|
|
|
# Recognize and use docker volume if one is passed as an argument
|
|
if not options.docker_target_name:
|
|
model_path = "./models/{run_id}-{sub_id}".format(
|
|
run_id=options.run_id, sub_id=sub_id
|
|
)
|
|
summaries_dir = "./summaries"
|
|
else:
|
|
trainer_config_path = "/{docker_target_name}/{trainer_config_path}".format(
|
|
docker_target_name=options.docker_target_name,
|
|
trainer_config_path=trainer_config_path,
|
|
)
|
|
if curriculum_folder is not None:
|
|
curriculum_folder = "/{docker_target_name}/{curriculum_folder}".format(
|
|
docker_target_name=options.docker_target_name,
|
|
curriculum_folder=curriculum_folder,
|
|
)
|
|
model_path = "/{docker_target_name}/models/{run_id}-{sub_id}".format(
|
|
docker_target_name=options.docker_target_name,
|
|
run_id=options.run_id,
|
|
sub_id=sub_id,
|
|
)
|
|
summaries_dir = "/{docker_target_name}/summaries".format(
|
|
docker_target_name=options.docker_target_name
|
|
)
|
|
|
|
trainer_config = load_config(trainer_config_path)
|
|
env_factory = create_environment_factory(
|
|
options.env_path,
|
|
options.docker_target_name,
|
|
options.no_graphics,
|
|
run_seed,
|
|
options.base_port + (sub_id * options.num_envs),
|
|
options.env_args,
|
|
)
|
|
env = SubprocessEnvManager(env_factory, options.num_envs)
|
|
maybe_meta_curriculum = try_create_meta_curriculum(
|
|
curriculum_folder, env, options.lesson
|
|
)
|
|
sampler_manager, resampling_interval = create_sampler_manager(
|
|
options.sampler_file_path, env.reset_parameters, run_seed
|
|
)
|
|
|
|
hvd.init()
|
|
trainers = initialize_trainers(
|
|
trainer_config,
|
|
env.external_brains,
|
|
summaries_dir,
|
|
options.run_id,
|
|
model_path,
|
|
options.keep_checkpoints,
|
|
options.train_model,
|
|
options.load_model,
|
|
run_seed,
|
|
maybe_meta_curriculum,
|
|
options.multi_gpu,
|
|
)
|
|
|
|
# Create controller and begin training.
|
|
tc = TrainerController(
|
|
trainers,
|
|
model_path,
|
|
summaries_dir,
|
|
options.run_id + "-" + str(sub_id),
|
|
options.save_freq,
|
|
maybe_meta_curriculum,
|
|
options.train_model,
|
|
run_seed,
|
|
options.fast_simulation,
|
|
sampler_manager,
|
|
resampling_interval,
|
|
)
|
|
|
|
# Signal that environment has been launched.
|
|
process_queue.put(True)
|
|
|
|
# Begin training
|
|
tc.start_learning(env)
|
|
|
|
|
|
def create_sampler_manager(sampler_file_path, env_reset_params, run_seed=None):
|
|
sampler_config = None
|
|
resample_interval = None
|
|
if sampler_file_path is not None:
|
|
sampler_config = load_config(sampler_file_path)
|
|
if "resampling-interval" in sampler_config:
|
|
# Filter arguments that do not exist in the environment
|
|
resample_interval = sampler_config.pop("resampling-interval")
|
|
if (resample_interval <= 0) or (not isinstance(resample_interval, int)):
|
|
raise SamplerException(
|
|
"Specified resampling-interval is not valid. Please provide"
|
|
" a positive integer value for resampling-interval"
|
|
)
|
|
else:
|
|
raise SamplerException(
|
|
"Resampling interval was not specified in the sampler file."
|
|
" Please specify it with the 'resampling-interval' key in the sampler config file."
|
|
)
|
|
sampler_manager = SamplerManager(sampler_config, run_seed)
|
|
return sampler_manager, resample_interval
|
|
|
|
|
|
def try_create_meta_curriculum(
|
|
curriculum_folder: Optional[str], env: SubprocessEnvManager, lesson: int
|
|
) -> Optional[MetaCurriculum]:
|
|
if curriculum_folder is None:
|
|
return None
|
|
else:
|
|
meta_curriculum = MetaCurriculum(curriculum_folder, env.reset_parameters)
|
|
# TODO: Should be able to start learning at different lesson numbers
|
|
# for each curriculum.
|
|
meta_curriculum.set_all_curriculums_to_lesson_num(lesson)
|
|
for brain_name in meta_curriculum.brains_to_curriculums.keys():
|
|
if brain_name not in env.external_brains.keys():
|
|
raise MetaCurriculumError(
|
|
"One of the curricula "
|
|
"defined in " + curriculum_folder + " "
|
|
"does not have a corresponding "
|
|
"Brain. Check that the "
|
|
"curriculum file has the same "
|
|
"name as the Brain "
|
|
"whose curriculum it defines."
|
|
)
|
|
return meta_curriculum
|
|
|
|
|
|
def prepare_for_docker_run(docker_target_name, env_path):
|
|
for f in glob.glob(
|
|
"/{docker_target_name}/*".format(docker_target_name=docker_target_name)
|
|
):
|
|
if env_path in f:
|
|
try:
|
|
b = os.path.basename(f)
|
|
if os.path.isdir(f):
|
|
shutil.copytree(f, "/ml-agents/{b}".format(b=b))
|
|
else:
|
|
src_f = "/{docker_target_name}/{b}".format(
|
|
docker_target_name=docker_target_name, b=b
|
|
)
|
|
dst_f = "/ml-agents/{b}".format(b=b)
|
|
shutil.copyfile(src_f, dst_f)
|
|
os.chmod(dst_f, 0o775) # Make executable
|
|
except Exception as e:
|
|
logging.getLogger("mlagents.trainers").info(e)
|
|
env_path = "/ml-agents/{env_path}".format(env_path=env_path)
|
|
return env_path
|
|
|
|
|
|
def create_environment_factory(
|
|
env_path: str,
|
|
docker_target_name: Optional[str],
|
|
no_graphics: bool,
|
|
seed: Optional[int],
|
|
start_port: int,
|
|
env_args: Optional[List[str]],
|
|
) -> Callable[[int], BaseUnityEnvironment]:
|
|
if env_path is not None:
|
|
# Strip out executable extensions if passed
|
|
env_path = (
|
|
env_path.strip()
|
|
.replace(".app", "")
|
|
.replace(".exe", "")
|
|
.replace(".x86_64", "")
|
|
.replace(".x86", "")
|
|
)
|
|
docker_training = docker_target_name is not None
|
|
if docker_training and env_path is not None:
|
|
"""
|
|
Comments for future maintenance:
|
|
Some OS/VM instances (e.g. COS GCP Image) mount filesystems
|
|
with COS flag which prevents execution of the Unity scene,
|
|
to get around this, we will copy the executable into the
|
|
container.
|
|
"""
|
|
# Navigate in docker path and find env_path and copy it.
|
|
env_path = prepare_for_docker_run(docker_target_name, env_path)
|
|
seed_count = 10000
|
|
seed_pool = [np.random.randint(0, seed_count) for _ in range(seed_count)]
|
|
|
|
def create_unity_environment(worker_id: int) -> UnityEnvironment:
|
|
env_seed = seed
|
|
if not env_seed:
|
|
env_seed = seed_pool[worker_id % len(seed_pool)]
|
|
return UnityEnvironment(
|
|
file_name=env_path,
|
|
worker_id=worker_id,
|
|
seed=env_seed,
|
|
docker_training=docker_training,
|
|
no_graphics=no_graphics,
|
|
base_port=start_port,
|
|
args=env_args,
|
|
)
|
|
|
|
return create_unity_environment
|
|
|
|
|
|
def main():
|
|
try:
|
|
print(
|
|
"""
|
|
|
|
▄▄▄▓▓▓▓
|
|
╓▓▓▓▓▓▓█▓▓▓▓▓
|
|
,▄▄▄m▀▀▀' ,▓▓▓▀▓▓▄ ▓▓▓ ▓▓▌
|
|
▄▓▓▓▀' ▄▓▓▀ ▓▓▓ ▄▄ ▄▄ ,▄▄ ▄▄▄▄ ,▄▄ ▄▓▓▌▄ ▄▄▄ ,▄▄
|
|
▄▓▓▓▀ ▄▓▓▀ ▐▓▓▌ ▓▓▌ ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌ ╒▓▓▌
|
|
▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓ ▓▀ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▄ ▓▓▌
|
|
▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄ ▓▓ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▐▓▓
|
|
^█▓▓▓ ▀▓▓▄ ▐▓▓▌ ▓▓▓▓▄▓▓▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▓▄ ▓▓▓▓`
|
|
'▀▓▓▓▄ ^▓▓▓ ▓▓▓ └▀▀▀▀ ▀▀ ^▀▀ `▀▀ `▀▀ '▀▀ ▐▓▓▌
|
|
▀▀▀▀▓▄▄▄ ▓▓▓▓▓▓, ▓▓▓▓▀
|
|
`▀█▓▓▓▓▓▓▓▓▓▌
|
|
¬`▀▀▀█▓
|
|
|
|
"""
|
|
)
|
|
except Exception:
|
|
print("\n\n\tUnity Technologies\n")
|
|
|
|
options = parse_command_line()
|
|
trainer_logger = logging.getLogger("mlagents.trainers")
|
|
env_logger = logging.getLogger("mlagents.envs")
|
|
trainer_logger.info(options)
|
|
if options.debug:
|
|
trainer_logger.setLevel("DEBUG")
|
|
env_logger.setLevel("DEBUG")
|
|
|
|
if options.env_path is None and options.num_runs > 1:
|
|
raise TrainerError(
|
|
"It is not possible to launch more than one concurrent training session "
|
|
"when training from the editor."
|
|
)
|
|
|
|
jobs = []
|
|
run_seed = options.seed
|
|
|
|
if options.num_runs == 1:
|
|
if options.seed == -1:
|
|
run_seed = np.random.randint(0, 10000)
|
|
run_training(0, run_seed, options, Queue())
|
|
else:
|
|
for i in range(options.num_runs):
|
|
if options.seed == -1:
|
|
run_seed = np.random.randint(0, 10000)
|
|
process_queue = Queue()
|
|
p = Process(target=run_training, args=(i, run_seed, options, process_queue))
|
|
jobs.append(p)
|
|
p.start()
|
|
# Wait for signal that environment has successfully launched
|
|
while process_queue.get() is not True:
|
|
continue
|
|
|
|
# Wait for jobs to complete. Otherwise we'll have an extra
|
|
# unhandled KeyboardInterrupt if we end early.
|
|
try:
|
|
for job in jobs:
|
|
job.join()
|
|
except KeyboardInterrupt:
|
|
pass
|
|
|
|
|
|
# For python debugger to directly run this script
|
|
if __name__ == "__main__":
|
|
main()
|