Unity 机器学习代理工具包 (ML-Agents) 是一个开源项目,它使游戏和模拟能够作为训练智能代理的环境。
您最多选择25个主题 主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 

482 行
17 KiB

# # Unity ML-Agents Toolkit
import logging
import argparse
import os
import glob
import shutil
import numpy as np
import json
from typing import Callable, Optional, List, NamedTuple, Dict
import mlagents.trainers
import mlagents_envs
from mlagents import tf_utils
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.meta_curriculum import MetaCurriculum
from mlagents.trainers.trainer_util import load_config, TrainerFactory
from mlagents.trainers.stats import TensorboardWriter, CSVWriter, StatsReporter
from mlagents_envs.environment import UnityEnvironment
from mlagents.trainers.sampler_class import SamplerManager
from mlagents.trainers.exception import SamplerException
from mlagents_envs.base_env import BaseEnv
from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
def _create_parser():
argparser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
argparser.add_argument("trainer_config_path")
argparser.add_argument(
"--env", default=None, dest="env_path", help="Name of the Unity executable "
)
argparser.add_argument(
"--curriculum",
default=None,
dest="curriculum_config_path",
help="Curriculum config yaml file for environment",
)
argparser.add_argument(
"--sampler",
default=None,
dest="sampler_file_path",
help="Reset parameter yaml file for environment",
)
argparser.add_argument(
"--keep-checkpoints",
default=5,
type=int,
help="How many model checkpoints to keep",
)
argparser.add_argument(
"--lesson", default=0, type=int, help="Start learning from this lesson"
)
argparser.add_argument(
"--load",
default=False,
dest="load_model",
action="store_true",
help="Whether to load the model or randomly initialize",
)
argparser.add_argument(
"--run-id",
default="ppo",
help="The directory name for model and summary statistics",
)
argparser.add_argument(
"--save-freq", default=50000, type=int, help="Frequency at which to save model"
)
argparser.add_argument(
"--seed", default=-1, type=int, help="Random seed used for training"
)
argparser.add_argument(
"--train",
default=False,
dest="train_model",
action="store_true",
help="Whether to train model, or only run inference",
)
argparser.add_argument(
"--base-port",
default=5005,
type=int,
help="Base port for environment communication",
)
argparser.add_argument(
"--num-envs",
default=1,
type=int,
help="Number of parallel environments to use for training",
)
argparser.add_argument(
"--docker-target-name",
default=None,
dest="docker_target_name",
help="Docker volume to store training-specific files",
)
argparser.add_argument(
"--no-graphics",
default=False,
action="store_true",
help="Whether to run the environment in no-graphics mode",
)
argparser.add_argument(
"--debug",
default=False,
action="store_true",
help="Whether to run ML-Agents in debug mode with detailed logging",
)
argparser.add_argument(
"--multi-gpu",
default=False,
action="store_true",
help="Setting this flag enables the use of multiple GPU's (if available) during training",
)
argparser.add_argument(
"--env-args",
default=None,
nargs=argparse.REMAINDER,
help="Arguments passed to the Unity executable.",
)
argparser.add_argument(
"--cpu", default=False, action="store_true", help="Run with CPU only"
)
argparser.add_argument("--version", action="version", version="")
eng_conf = argparser.add_argument_group(title="Engine Configuration")
eng_conf.add_argument(
"--width",
default=84,
type=int,
help="The width of the executable window of the environment(s)",
)
eng_conf.add_argument(
"--height",
default=84,
type=int,
help="The height of the executable window of the environment(s)",
)
eng_conf.add_argument(
"--quality-level",
default=5,
type=int,
help="The quality level of the environment(s)",
)
eng_conf.add_argument(
"--time-scale",
default=20,
type=float,
help="The time scale of the Unity environment(s)",
)
eng_conf.add_argument(
"--target-frame-rate",
default=-1,
type=int,
help="The target frame rate of the Unity environment(s)",
)
return argparser
parser = _create_parser()
class RunOptions(NamedTuple):
trainer_config: Dict
debug: bool = parser.get_default("debug")
seed: int = parser.get_default("seed")
env_path: Optional[str] = parser.get_default("env_path")
run_id: str = parser.get_default("run_id")
load_model: bool = parser.get_default("load_model")
train_model: bool = parser.get_default("train_model")
save_freq: int = parser.get_default("save_freq")
keep_checkpoints: int = parser.get_default("keep_checkpoints")
base_port: int = parser.get_default("base_port")
num_envs: int = parser.get_default("num_envs")
curriculum_config: Optional[Dict] = None
lesson: int = parser.get_default("lesson")
no_graphics: bool = parser.get_default("no_graphics")
multi_gpu: bool = parser.get_default("multi_gpu")
sampler_config: Optional[Dict] = None
docker_target_name: Optional[str] = parser.get_default("docker_target_name")
env_args: Optional[List[str]] = parser.get_default("env_args")
cpu: bool = parser.get_default("cpu")
width: int = parser.get_default("width")
height: int = parser.get_default("height")
quality_level: int = parser.get_default("quality_level")
time_scale: float = parser.get_default("time_scale")
target_frame_rate: int = parser.get_default("target_frame_rate")
@staticmethod
def from_argparse(args: argparse.Namespace) -> "RunOptions":
"""
Takes an argparse.Namespace as specified in `parse_command_line`, loads input configuration files
from file paths, and converts to a CommandLineOptions instance.
:param args: collection of command-line parameters passed to mlagents-learn
:return: CommandLineOptions representing the passed in arguments, with trainer config, curriculum and sampler
configs loaded from files.
"""
argparse_args = vars(args)
docker_target_name = argparse_args["docker_target_name"]
trainer_config_path = argparse_args["trainer_config_path"]
curriculum_config_path = argparse_args["curriculum_config_path"]
if docker_target_name is not None:
trainer_config_path = f"/{docker_target_name}/{trainer_config_path}"
if curriculum_config_path is not None:
curriculum_config_path = (
f"/{docker_target_name}/{curriculum_config_path}"
)
argparse_args["trainer_config"] = load_config(trainer_config_path)
if curriculum_config_path is not None:
argparse_args["curriculum_config"] = load_config(curriculum_config_path)
if argparse_args["sampler_file_path"] is not None:
argparse_args["sampler_config"] = load_config(
argparse_args["sampler_file_path"]
)
# Since argparse accepts file paths in the config options which don't exist in CommandLineOptions,
# these keys will need to be deleted to use the **/splat operator below.
argparse_args.pop("sampler_file_path")
argparse_args.pop("curriculum_config_path")
argparse_args.pop("trainer_config_path")
return RunOptions(**vars(args))
def get_version_string() -> str:
# pylint: disable=no-member
return f""" Version information:
ml-agents: {mlagents.trainers.__version__},
ml-agents-envs: {mlagents_envs.__version__},
Communicator API: {UnityEnvironment.API_VERSION},
TensorFlow: {tf_utils.tf.__version__}"""
def parse_command_line(argv: Optional[List[str]] = None) -> RunOptions:
args = parser.parse_args(argv)
return RunOptions.from_argparse(args)
def run_training(run_seed: int, options: RunOptions) -> None:
"""
Launches training session.
:param options: parsed command line arguments
:param run_seed: Random seed used for training.
:param run_options: Command line arguments for training.
"""
# Recognize and use docker volume if one is passed as an argument
if not options.docker_target_name:
model_path = f"./models/{options.run_id}"
summaries_dir = "./summaries"
else:
model_path = f"/{options.docker_target_name}/models/{options.run_id}"
summaries_dir = f"/{options.docker_target_name}/summaries"
port = options.base_port
# Configure CSV, Tensorboard Writers and StatsReporter
# We assume reward and episode length are needed in the CSV.
csv_writer = CSVWriter(
summaries_dir,
required_fields=["Environment/Cumulative Reward", "Environment/Episode Length"],
)
tb_writer = TensorboardWriter(summaries_dir)
StatsReporter.add_writer(tb_writer)
StatsReporter.add_writer(csv_writer)
if options.env_path is None:
port = 5004 # This is the in Editor Training Port
env_factory = create_environment_factory(
options.env_path,
options.docker_target_name,
options.no_graphics,
run_seed,
port,
options.env_args,
)
engine_config = EngineConfig(
options.width,
options.height,
options.quality_level,
options.time_scale,
options.target_frame_rate,
)
env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
maybe_meta_curriculum = try_create_meta_curriculum(
options.curriculum_config, env_manager, options.lesson
)
sampler_manager, resampling_interval = create_sampler_manager(
options.sampler_config, run_seed
)
trainer_factory = TrainerFactory(
options.trainer_config,
summaries_dir,
options.run_id,
model_path,
options.keep_checkpoints,
options.train_model,
options.load_model,
run_seed,
maybe_meta_curriculum,
options.multi_gpu,
)
# Create controller and begin training.
tc = TrainerController(
trainer_factory,
model_path,
summaries_dir,
options.run_id,
options.save_freq,
maybe_meta_curriculum,
options.train_model,
run_seed,
sampler_manager,
resampling_interval,
)
# Begin training
try:
tc.start_learning(env_manager)
finally:
env_manager.close()
def create_sampler_manager(sampler_config, run_seed=None):
resample_interval = None
if sampler_config is not None:
if "resampling-interval" in sampler_config:
# Filter arguments that do not exist in the environment
resample_interval = sampler_config.pop("resampling-interval")
if (resample_interval <= 0) or (not isinstance(resample_interval, int)):
raise SamplerException(
"Specified resampling-interval is not valid. Please provide"
" a positive integer value for resampling-interval"
)
else:
raise SamplerException(
"Resampling interval was not specified in the sampler file."
" Please specify it with the 'resampling-interval' key in the sampler config file."
)
sampler_manager = SamplerManager(sampler_config, run_seed)
return sampler_manager, resample_interval
def try_create_meta_curriculum(
curriculum_config: Optional[Dict], env: SubprocessEnvManager, lesson: int
) -> Optional[MetaCurriculum]:
if curriculum_config is None:
return None
else:
meta_curriculum = MetaCurriculum(curriculum_config)
# TODO: Should be able to start learning at different lesson numbers
# for each curriculum.
meta_curriculum.set_all_curricula_to_lesson_num(lesson)
return meta_curriculum
def prepare_for_docker_run(docker_target_name, env_path):
for f in glob.glob(
"/{docker_target_name}/*".format(docker_target_name=docker_target_name)
):
if env_path in f:
try:
b = os.path.basename(f)
if os.path.isdir(f):
shutil.copytree(f, "/ml-agents/{b}".format(b=b))
else:
src_f = "/{docker_target_name}/{b}".format(
docker_target_name=docker_target_name, b=b
)
dst_f = "/ml-agents/{b}".format(b=b)
shutil.copyfile(src_f, dst_f)
os.chmod(dst_f, 0o775) # Make executable
except Exception as e:
logging.getLogger("mlagents.trainers").info(e)
env_path = "/ml-agents/{env_path}".format(env_path=env_path)
return env_path
def create_environment_factory(
env_path: Optional[str],
docker_target_name: Optional[str],
no_graphics: bool,
seed: Optional[int],
start_port: int,
env_args: Optional[List[str]],
) -> Callable[[int, List[SideChannel]], BaseEnv]:
if env_path is not None:
# Strip out executable extensions if passed
env_path = (
env_path.strip()
.replace(".app", "")
.replace(".exe", "")
.replace(".x86_64", "")
.replace(".x86", "")
)
docker_training = docker_target_name is not None
if docker_training and env_path is not None:
# Comments for future maintenance:
# Some OS/VM instances (e.g. COS GCP Image) mount filesystems
# with COS flag which prevents execution of the Unity scene,
# to get around this, we will copy the executable into the
# container.
# Navigate in docker path and find env_path and copy it.
env_path = prepare_for_docker_run(docker_target_name, env_path)
seed_count = 10000
seed_pool = [np.random.randint(0, seed_count) for _ in range(seed_count)]
def create_unity_environment(
worker_id: int, side_channels: List[SideChannel]
) -> UnityEnvironment:
env_seed = seed
if not env_seed:
env_seed = seed_pool[worker_id % len(seed_pool)]
return UnityEnvironment(
file_name=env_path,
worker_id=worker_id,
seed=env_seed,
docker_training=docker_training,
no_graphics=no_graphics,
base_port=start_port,
args=env_args,
side_channels=side_channels,
)
return create_unity_environment
def run_cli(options: RunOptions) -> None:
try:
print(
"""
▄▄▄▓▓▓▓
╓▓▓▓▓▓▓█▓▓▓▓▓
,▄▄▄m▀▀▀' ,▓▓▓▀▓▓▄ ▓▓▓ ▓▓▌
▄▓▓▓▀' ▄▓▓▀ ▓▓▓ ▄▄ ▄▄ ,▄▄ ▄▄▄▄ ,▄▄ ▄▓▓▌▄ ▄▄▄ ,▄▄
▄▓▓▓▀ ▄▓▓▀ ▐▓▓▌ ▓▓▌ ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌ ╒▓▓▌
▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓ ▓▀ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▄ ▓▓▌
▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄ ▓▓ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▐▓▓
^█▓▓▓ ▀▓▓▄ ▐▓▓▌ ▓▓▓▓▄▓▓▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▓▄ ▓▓▓▓`
'▀▓▓▓▄ ^▓▓▓ ▓▓▓ └▀▀▀▀ ▀▀ ^▀▀ `▀▀ `▀▀ '▀▀ ▐▓▓▌
▀▀▀▀▓▄▄▄ ▓▓▓▓▓▓, ▓▓▓▓▀
`▀█▓▓▓▓▓▓▓▓▓▌
¬`▀▀▀█▓
"""
)
except Exception:
print("\n\n\tUnity Technologies\n")
print(get_version_string())
trainer_logger = logging.getLogger("mlagents.trainers")
env_logger = logging.getLogger("mlagents_envs")
if options.debug:
trainer_logger.setLevel("DEBUG")
env_logger.setLevel("DEBUG")
else:
# disable noisy warnings from tensorflow.
tf_utils.set_warnings_enabled(False)
trainer_logger.debug("Configuration for this run:")
trainer_logger.debug(json.dumps(options._asdict(), indent=4))
run_seed = options.seed
if options.cpu:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
if options.seed == -1:
run_seed = np.random.randint(0, 10000)
run_training(run_seed, options)
def main():
run_cli(parse_command_line())
# For python debugger to directly run this script
if __name__ == "__main__":
main()