# # Unity ML-Agents Toolkit import logging import argparse import os import glob import shutil import numpy as np from typing import Any, Callable, Optional, List, NamedTuple import mlagents.trainers import mlagents_envs from mlagents import tf_utils from mlagents.trainers.trainer_controller import TrainerController from mlagents.trainers.meta_curriculum import MetaCurriculum from mlagents.trainers.trainer_util import load_config, TrainerFactory from mlagents.trainers.stats import TensorboardWriter, CSVWriter, StatsReporter from mlagents_envs.environment import UnityEnvironment from mlagents.trainers.sampler_class import SamplerManager from mlagents.trainers.exception import SamplerException from mlagents_envs.base_env import BaseEnv from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager from mlagents_envs.side_channel.side_channel import SideChannel from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig class CommandLineOptions(NamedTuple): debug: bool seed: int env_path: str run_id: str load_model: bool train_model: bool save_freq: int keep_checkpoints: int base_port: int num_envs: int curriculum_folder: Optional[str] lesson: int no_graphics: bool multi_gpu: bool # ? trainer_config_path: str sampler_file_path: Optional[str] docker_target_name: Optional[str] env_args: Optional[List[str]] cpu: bool width: int height: int quality_level: int time_scale: float target_frame_rate: int @staticmethod def from_argparse(args: Any) -> "CommandLineOptions": return CommandLineOptions(**vars(args)) def get_version_string() -> str: # pylint: disable=no-member return f""" Version information: ml-agents: {mlagents.trainers.__version__}, ml-agents-envs: {mlagents_envs.__version__}, Communicator API: {UnityEnvironment.API_VERSION}, TensorFlow: {tf_utils.tf.__version__}""" def parse_command_line(argv: Optional[List[str]] = None) -> CommandLineOptions: parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("trainer_config_path") parser.add_argument( "--env", default=None, dest="env_path", help="Name of the Unity executable " ) parser.add_argument( "--curriculum", default=None, dest="curriculum_folder", help="Curriculum json directory for environment", ) parser.add_argument( "--sampler", default=None, dest="sampler_file_path", help="Reset parameter yaml file for environment", ) parser.add_argument( "--keep-checkpoints", default=5, type=int, help="How many model checkpoints to keep", ) parser.add_argument( "--lesson", default=0, type=int, help="Start learning from this lesson" ) parser.add_argument( "--load", default=False, dest="load_model", action="store_true", help="Whether to load the model or randomly initialize", ) parser.add_argument( "--run-id", default="ppo", help="The directory name for model and summary statistics", ) parser.add_argument( "--save-freq", default=50000, type=int, help="Frequency at which to save model" ) parser.add_argument( "--seed", default=-1, type=int, help="Random seed used for training" ) parser.add_argument( "--train", default=False, dest="train_model", action="store_true", help="Whether to train model, or only run inference", ) parser.add_argument( "--base-port", default=5005, type=int, help="Base port for environment communication", ) parser.add_argument( "--num-envs", default=1, type=int, help="Number of parallel environments to use for training", ) parser.add_argument( "--docker-target-name", default=None, dest="docker_target_name", help="Docker volume to store training-specific files", ) parser.add_argument( "--no-graphics", default=False, action="store_true", help="Whether to run the environment in no-graphics mode", ) parser.add_argument( "--debug", default=False, action="store_true", help="Whether to run ML-Agents in debug mode with detailed logging", ) parser.add_argument( "--multi-gpu", default=False, action="store_true", help="Setting this flag enables the use of multiple GPU's (if available) during training", ) parser.add_argument( "--env-args", default=None, nargs=argparse.REMAINDER, help="Arguments passed to the Unity executable.", ) parser.add_argument( "--cpu", default=False, action="store_true", help="Run with CPU only" ) parser.add_argument("--version", action="version", version="") eng_conf = parser.add_argument_group(title="Engine Configuration") eng_conf.add_argument( "--width", default=84, type=int, help="The width of the executable window of the environment(s)", ) eng_conf.add_argument( "--height", default=84, type=int, help="The height of the executable window of the environment(s)", ) eng_conf.add_argument( "--quality-level", default=5, type=int, help="The quality level of the environment(s)", ) eng_conf.add_argument( "--time-scale", default=20, type=float, help="The time scale of the Unity environment(s)", ) eng_conf.add_argument( "--target-frame-rate", default=-1, type=int, help="The target frame rate of the Unity environment(s)", ) args = parser.parse_args(argv) return CommandLineOptions.from_argparse(args) def run_training(run_seed: int, options: CommandLineOptions) -> None: """ Launches training session. :param options: parsed command line arguments :param run_seed: Random seed used for training. :param run_options: Command line arguments for training. """ # Docker Parameters trainer_config_path = options.trainer_config_path curriculum_folder = options.curriculum_folder # Recognize and use docker volume if one is passed as an argument if not options.docker_target_name: model_path = f"./models/{options.run_id}" summaries_dir = "./summaries" else: trainer_config_path = f"/{options.docker_target_name}/{trainer_config_path}" if curriculum_folder is not None: curriculum_folder = f"/{options.docker_target_name}/{curriculum_folder}" model_path = f"/{options.docker_target_name}/models/{options.run_id}" summaries_dir = f"/{options.docker_target_name}/summaries" trainer_config = load_config(trainer_config_path) port = options.base_port # Configure CSV, Tensorboard Writers and StatsReporter # We assume reward and episode length are needed in the CSV. csv_writer = CSVWriter( summaries_dir, required_fields=["Environment/Cumulative Reward", "Environment/Episode Length"], ) tb_writer = TensorboardWriter(summaries_dir) StatsReporter.add_writer(tb_writer) StatsReporter.add_writer(csv_writer) if options.env_path is None: port = 5004 # This is the in Editor Training Port env_factory = create_environment_factory( options.env_path, options.docker_target_name, options.no_graphics, run_seed, port, options.env_args, ) engine_config = EngineConfig( options.width, options.height, options.quality_level, options.time_scale, options.target_frame_rate, ) env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs) maybe_meta_curriculum = try_create_meta_curriculum( curriculum_folder, env_manager, options.lesson ) sampler_manager, resampling_interval = create_sampler_manager( options.sampler_file_path, run_seed ) trainer_factory = TrainerFactory( trainer_config, summaries_dir, options.run_id, model_path, options.keep_checkpoints, options.train_model, options.load_model, run_seed, maybe_meta_curriculum, options.multi_gpu, ) # Create controller and begin training. tc = TrainerController( trainer_factory, model_path, summaries_dir, options.run_id, options.save_freq, maybe_meta_curriculum, options.train_model, run_seed, sampler_manager, resampling_interval, ) # Begin training try: tc.start_learning(env_manager) finally: env_manager.close() def create_sampler_manager(sampler_file_path, run_seed=None): sampler_config = None resample_interval = None if sampler_file_path is not None: sampler_config = load_config(sampler_file_path) if "resampling-interval" in sampler_config: # Filter arguments that do not exist in the environment resample_interval = sampler_config.pop("resampling-interval") if (resample_interval <= 0) or (not isinstance(resample_interval, int)): raise SamplerException( "Specified resampling-interval is not valid. Please provide" " a positive integer value for resampling-interval" ) else: raise SamplerException( "Resampling interval was not specified in the sampler file." " Please specify it with the 'resampling-interval' key in the sampler config file." ) sampler_manager = SamplerManager(sampler_config, run_seed) return sampler_manager, resample_interval def try_create_meta_curriculum( curriculum_folder: Optional[str], env: SubprocessEnvManager, lesson: int ) -> Optional[MetaCurriculum]: if curriculum_folder is None: return None else: meta_curriculum = MetaCurriculum.from_directory(curriculum_folder) # TODO: Should be able to start learning at different lesson numbers # for each curriculum. meta_curriculum.set_all_curricula_to_lesson_num(lesson) return meta_curriculum def prepare_for_docker_run(docker_target_name, env_path): for f in glob.glob( "/{docker_target_name}/*".format(docker_target_name=docker_target_name) ): if env_path in f: try: b = os.path.basename(f) if os.path.isdir(f): shutil.copytree(f, "/ml-agents/{b}".format(b=b)) else: src_f = "/{docker_target_name}/{b}".format( docker_target_name=docker_target_name, b=b ) dst_f = "/ml-agents/{b}".format(b=b) shutil.copyfile(src_f, dst_f) os.chmod(dst_f, 0o775) # Make executable except Exception as e: logging.getLogger("mlagents.trainers").info(e) env_path = "/ml-agents/{env_path}".format(env_path=env_path) return env_path def create_environment_factory( env_path: str, docker_target_name: Optional[str], no_graphics: bool, seed: Optional[int], start_port: int, env_args: Optional[List[str]], ) -> Callable[[int, List[SideChannel]], BaseEnv]: if env_path is not None: # Strip out executable extensions if passed env_path = ( env_path.strip() .replace(".app", "") .replace(".exe", "") .replace(".x86_64", "") .replace(".x86", "") ) docker_training = docker_target_name is not None if docker_training and env_path is not None: # Comments for future maintenance: # Some OS/VM instances (e.g. COS GCP Image) mount filesystems # with COS flag which prevents execution of the Unity scene, # to get around this, we will copy the executable into the # container. # Navigate in docker path and find env_path and copy it. env_path = prepare_for_docker_run(docker_target_name, env_path) seed_count = 10000 seed_pool = [np.random.randint(0, seed_count) for _ in range(seed_count)] def create_unity_environment( worker_id: int, side_channels: List[SideChannel] ) -> UnityEnvironment: env_seed = seed if not env_seed: env_seed = seed_pool[worker_id % len(seed_pool)] return UnityEnvironment( file_name=env_path, worker_id=worker_id, seed=env_seed, docker_training=docker_training, no_graphics=no_graphics, base_port=start_port, args=env_args, side_channels=side_channels, ) return create_unity_environment def main(): try: print( """ ▄▄▄▓▓▓▓ ╓▓▓▓▓▓▓█▓▓▓▓▓ ,▄▄▄m▀▀▀' ,▓▓▓▀▓▓▄ ▓▓▓ ▓▓▌ ▄▓▓▓▀' ▄▓▓▀ ▓▓▓ ▄▄ ▄▄ ,▄▄ ▄▄▄▄ ,▄▄ ▄▓▓▌▄ ▄▄▄ ,▄▄ ▄▓▓▓▀ ▄▓▓▀ ▐▓▓▌ ▓▓▌ ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌ ╒▓▓▌ ▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓ ▓▀ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▄ ▓▓▌ ▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄ ▓▓ ▓▓▌ ▐▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▌ ▐▓▓▐▓▓ ^█▓▓▓ ▀▓▓▄ ▐▓▓▌ ▓▓▓▓▄▓▓▓▓ ▐▓▓ ▓▓▓ ▓▓▓ ▓▓▓▄ ▓▓▓▓` '▀▓▓▓▄ ^▓▓▓ ▓▓▓ └▀▀▀▀ ▀▀ ^▀▀ `▀▀ `▀▀ '▀▀ ▐▓▓▌ ▀▀▀▀▓▄▄▄ ▓▓▓▓▓▓, ▓▓▓▓▀ `▀█▓▓▓▓▓▓▓▓▓▌ ¬`▀▀▀█▓ """ ) except Exception: print("\n\n\tUnity Technologies\n") print(get_version_string()) options = parse_command_line() trainer_logger = logging.getLogger("mlagents.trainers") env_logger = logging.getLogger("mlagents_envs") trainer_logger.info(options) if options.debug: trainer_logger.setLevel("DEBUG") env_logger.setLevel("DEBUG") else: # disable noisy warnings from tensorflow. tf_utils.set_warnings_enabled(False) run_seed = options.seed if options.cpu: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" if options.seed == -1: run_seed = np.random.randint(0, 10000) run_training(run_seed, options) # For python debugger to directly run this script if __name__ == "__main__": main()