ml-agents/ml-agents/mlagents/trainers/learn.py


								# # Unity ML-Agents Toolkit

								import logging

								import argparse


								import os

								import glob

								import shutil

								import numpy as np

								import json


								from typing import Callable, Optional, List, NamedTuple, Dict


								import mlagents.trainers

								import mlagents_envs

								from mlagents import tf_utils

								from mlagents.trainers.trainer_controller import TrainerController

								from mlagents.trainers.meta_curriculum import MetaCurriculum

								from mlagents.trainers.trainer_util import load_config, TrainerFactory

								from mlagents.trainers.stats import TensorboardWriter, CSVWriter, StatsReporter

								from mlagents_envs.environment import UnityEnvironment

								from mlagents.trainers.sampler_class import SamplerManager

								from mlagents.trainers.exception import SamplerException

								from mlagents_envs.base_env import BaseEnv

								from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager

								from mlagents_envs.side_channel.side_channel import SideChannel

								from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig


								def _create_parser():

								    argparser = argparse.ArgumentParser(

								        formatter_class=argparse.ArgumentDefaultsHelpFormatter

								    )

								    argparser.add_argument("trainer_config_path")

								    argparser.add_argument(

								        "--env", default=None, dest="env_path", help="Name of the Unity executable "

								    )

								    argparser.add_argument(

								        "--curriculum",

								        default=None,

								        dest="curriculum_config_path",

								        help="Curriculum config yaml file for environment",

								    )

								    argparser.add_argument(

								        "--sampler",

								        default=None,

								        dest="sampler_file_path",

								        help="Reset parameter yaml file for environment",

								    )

								    argparser.add_argument(

								        "--keep-checkpoints",

								        default=5,

								        type=int,

								        help="How many model checkpoints to keep",

								    )

								    argparser.add_argument(

								        "--lesson", default=0, type=int, help="Start learning from this lesson"

								    )

								    argparser.add_argument(

								        "--load",

								        default=False,

								        dest="load_model",

								        action="store_true",

								        help="Whether to load the model or randomly initialize",

								    )

								    argparser.add_argument(

								        "--run-id",

								        default="ppo",

								        help="The directory name for model and summary statistics",

								    )

								    argparser.add_argument(

								        "--save-freq", default=50000, type=int, help="Frequency at which to save model"

								    )

								    argparser.add_argument(

								        "--seed", default=-1, type=int, help="Random seed used for training"

								    )

								    argparser.add_argument(

								        "--train",

								        default=False,

								        dest="train_model",

								        action="store_true",

								        help="Whether to train model, or only run inference",

								    )

								    argparser.add_argument(

								        "--base-port",

								        default=5005,

								        type=int,

								        help="Base port for environment communication",

								    )

								    argparser.add_argument(

								        "--num-envs",

								        default=1,

								        type=int,

								        help="Number of parallel environments to use for training",

								    )

								    argparser.add_argument(

								        "--docker-target-name",

								        default=None,

								        dest="docker_target_name",

								        help="Docker volume to store training-specific files",

								    )

								    argparser.add_argument(

								        "--no-graphics",

								        default=False,

								        action="store_true",

								        help="Whether to run the environment in no-graphics mode",

								    )

								    argparser.add_argument(

								        "--debug",

								        default=False,

								        action="store_true",

								        help="Whether to run ML-Agents in debug mode with detailed logging",

								    )

								    argparser.add_argument(

								        "--multi-gpu",

								        default=False,

								        action="store_true",

								        help="Setting this flag enables the use of multiple GPU's (if available) during training",

								    )

								    argparser.add_argument(

								        "--env-args",

								        default=None,

								        nargs=argparse.REMAINDER,

								        help="Arguments passed to the Unity executable.",

								    )

								    argparser.add_argument(

								        "--cpu", default=False, action="store_true", help="Run with CPU only"

								    )


								    argparser.add_argument("--version", action="version", version="")


								    eng_conf = argparser.add_argument_group(title="Engine Configuration")

								    eng_conf.add_argument(

								        "--width",

								        default=84,

								        type=int,

								        help="The width of the executable window of the environment(s)",

								    )

								    eng_conf.add_argument(

								        "--height",

								        default=84,

								        type=int,

								        help="The height of the executable window of the environment(s)",

								    )

								    eng_conf.add_argument(

								        "--quality-level",

								        default=5,

								        type=int,

								        help="The quality level of the environment(s)",

								    )

								    eng_conf.add_argument(

								        "--time-scale",

								        default=20,

								        type=float,

								        help="The time scale of the Unity environment(s)",

								    )

								    eng_conf.add_argument(

								        "--target-frame-rate",

								        default=-1,

								        type=int,

								        help="The target frame rate of the Unity environment(s)",

								    )

								    return argparser


								parser = _create_parser()


								class RunOptions(NamedTuple):

								    trainer_config: Dict

								    debug: bool = parser.get_default("debug")

								    seed: int = parser.get_default("seed")

								    env_path: Optional[str] = parser.get_default("env_path")

								    run_id: str = parser.get_default("run_id")

								    load_model: bool = parser.get_default("load_model")

								    train_model: bool = parser.get_default("train_model")

								    save_freq: int = parser.get_default("save_freq")

								    keep_checkpoints: int = parser.get_default("keep_checkpoints")

								    base_port: int = parser.get_default("base_port")

								    num_envs: int = parser.get_default("num_envs")

								    curriculum_config: Optional[Dict] = None

								    lesson: int = parser.get_default("lesson")

								    no_graphics: bool = parser.get_default("no_graphics")

								    multi_gpu: bool = parser.get_default("multi_gpu")

								    sampler_config: Optional[Dict] = None

								    docker_target_name: Optional[str] = parser.get_default("docker_target_name")

								    env_args: Optional[List[str]] = parser.get_default("env_args")

								    cpu: bool = parser.get_default("cpu")

								    width: int = parser.get_default("width")

								    height: int = parser.get_default("height")

								    quality_level: int = parser.get_default("quality_level")

								    time_scale: float = parser.get_default("time_scale")

								    target_frame_rate: int = parser.get_default("target_frame_rate")


								    @staticmethod

								    def from_argparse(args: argparse.Namespace) -> "RunOptions":

								        """

								        Takes an argparse.Namespace as specified in `parse_command_line`, loads input configuration files

								        from file paths, and converts to a CommandLineOptions instance.

								        :param args: collection of command-line parameters passed to mlagents-learn

								        :return: CommandLineOptions representing the passed in arguments, with trainer config, curriculum and sampler

								          configs loaded from files.

								        """

								        argparse_args = vars(args)

								        docker_target_name = argparse_args["docker_target_name"]

								        trainer_config_path = argparse_args["trainer_config_path"]

								        curriculum_config_path = argparse_args["curriculum_config_path"]

								        if docker_target_name is not None:

								            trainer_config_path = f"/{docker_target_name}/{trainer_config_path}"

								            if curriculum_config_path is not None:

								                curriculum_config_path = (

								                    f"/{docker_target_name}/{curriculum_config_path}"

								                )

								        argparse_args["trainer_config"] = load_config(trainer_config_path)

								        if curriculum_config_path is not None:

								            argparse_args["curriculum_config"] = load_config(curriculum_config_path)

								        if argparse_args["sampler_file_path"] is not None:

								            argparse_args["sampler_config"] = load_config(

								                argparse_args["sampler_file_path"]

								            )


								        # Since argparse accepts file paths in the config options which don't exist in CommandLineOptions,

								        # these keys will need to be deleted to use the **/splat operator below.

								        argparse_args.pop("sampler_file_path")

								        argparse_args.pop("curriculum_config_path")

								        argparse_args.pop("trainer_config_path")

								        return RunOptions(**vars(args))


								def get_version_string() -> str:

								    # pylint: disable=no-member

								    return f""" Version information:

								  ml-agents: {mlagents.trainers.__version__},

								  ml-agents-envs: {mlagents_envs.__version__},

								  Communicator API: {UnityEnvironment.API_VERSION},

								  TensorFlow: {tf_utils.tf.__version__}"""


								def parse_command_line(argv: Optional[List[str]] = None) -> RunOptions:

								    args = parser.parse_args(argv)

								    return RunOptions.from_argparse(args)


								def run_training(run_seed: int, options: RunOptions) -> None:

								    """

								    Launches training session.

								    :param options: parsed command line arguments

								    :param run_seed: Random seed used for training.

								    :param run_options: Command line arguments for training.

								    """

								    # Recognize and use docker volume if one is passed as an argument

								    if not options.docker_target_name:

								        model_path = f"./models/{options.run_id}"

								        summaries_dir = "./summaries"

								    else:

								        model_path = f"/{options.docker_target_name}/models/{options.run_id}"

								        summaries_dir = f"/{options.docker_target_name}/summaries"

								    port = options.base_port


								    # Configure CSV, Tensorboard Writers and StatsReporter

								    # We assume reward and episode length are needed in the CSV.

								    csv_writer = CSVWriter(

								        summaries_dir,

								        required_fields=["Environment/Cumulative Reward", "Environment/Episode Length"],

								    )

								    tb_writer = TensorboardWriter(summaries_dir)

								    StatsReporter.add_writer(tb_writer)

								    StatsReporter.add_writer(csv_writer)


								    if options.env_path is None:

								        port = 5004  # This is the in Editor Training Port

								    env_factory = create_environment_factory(

								        options.env_path,

								        options.docker_target_name,

								        options.no_graphics,

								        run_seed,

								        port,

								        options.env_args,

								    )

								    engine_config = EngineConfig(

								        options.width,

								        options.height,

								        options.quality_level,

								        options.time_scale,

								        options.target_frame_rate,

								    )

								    env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)

								    maybe_meta_curriculum = try_create_meta_curriculum(

								        options.curriculum_config, env_manager, options.lesson

								    )

								    sampler_manager, resampling_interval = create_sampler_manager(

								        options.sampler_config, run_seed

								    )

								    trainer_factory = TrainerFactory(

								        options.trainer_config,

								        summaries_dir,

								        options.run_id,

								        model_path,

								        options.keep_checkpoints,

								        options.train_model,

								        options.load_model,

								        run_seed,

								        maybe_meta_curriculum,

								        options.multi_gpu,

								    )

								    # Create controller and begin training.

								    tc = TrainerController(

								        trainer_factory,

								        model_path,

								        summaries_dir,

								        options.run_id,

								        options.save_freq,

								        maybe_meta_curriculum,

								        options.train_model,

								        run_seed,

								        sampler_manager,

								        resampling_interval,

								    )

								    # Begin training

								    try:

								        tc.start_learning(env_manager)

								    finally:

								        env_manager.close()


								def create_sampler_manager(sampler_config, run_seed=None):

								    resample_interval = None

								    if sampler_config is not None:

								        if "resampling-interval" in sampler_config:

								            # Filter arguments that do not exist in the environment

								            resample_interval = sampler_config.pop("resampling-interval")

								            if (resample_interval <= 0) or (not isinstance(resample_interval, int)):

								                raise SamplerException(

								                    "Specified resampling-interval is not valid. Please provide"

								                    " a positive integer value for resampling-interval"

								                )


								        else:

								            raise SamplerException(

								                "Resampling interval was not specified in the sampler file."

								                " Please specify it with the 'resampling-interval' key in the sampler config file."

								            )


								    sampler_manager = SamplerManager(sampler_config, run_seed)

								    return sampler_manager, resample_interval


								def try_create_meta_curriculum(

								    curriculum_config: Optional[Dict], env: SubprocessEnvManager, lesson: int

								) -> Optional[MetaCurriculum]:

								    if curriculum_config is None:

								        return None

								    else:

								        meta_curriculum = MetaCurriculum(curriculum_config)

								        # TODO: Should be able to start learning at different lesson numbers

								        # for each curriculum.

								        meta_curriculum.set_all_curricula_to_lesson_num(lesson)

								        return meta_curriculum


								def prepare_for_docker_run(docker_target_name, env_path):

								    for f in glob.glob(

								        "/{docker_target_name}/*".format(docker_target_name=docker_target_name)

								    ):

								        if env_path in f:

								            try:

								                b = os.path.basename(f)

								                if os.path.isdir(f):

								                    shutil.copytree(f, "/ml-agents/{b}".format(b=b))

								                else:

								                    src_f = "/{docker_target_name}/{b}".format(

								                        docker_target_name=docker_target_name, b=b

								                    )

								                    dst_f = "/ml-agents/{b}".format(b=b)

								                    shutil.copyfile(src_f, dst_f)

								                    os.chmod(dst_f, 0o775)  # Make executable

								            except Exception as e:

								                logging.getLogger("mlagents.trainers").info(e)

								    env_path = "/ml-agents/{env_path}".format(env_path=env_path)

								    return env_path


								def create_environment_factory(

								    env_path: Optional[str],

								    docker_target_name: Optional[str],

								    no_graphics: bool,

								    seed: Optional[int],

								    start_port: int,

								    env_args: Optional[List[str]],

								) -> Callable[[int, List[SideChannel]], BaseEnv]:

								    if env_path is not None:

								        # Strip out executable extensions if passed

								        env_path = (

								            env_path.strip()

								            .replace(".app", "")

								            .replace(".exe", "")

								            .replace(".x86_64", "")

								            .replace(".x86", "")

								        )

								    docker_training = docker_target_name is not None

								    if docker_training and env_path is not None:

								        #     Comments for future maintenance:

								        #         Some OS/VM instances (e.g. COS GCP Image) mount filesystems

								        #         with COS flag which prevents execution of the Unity scene,

								        #         to get around this, we will copy the executable into the

								        #         container.

								        # Navigate in docker path and find env_path and copy it.

								        env_path = prepare_for_docker_run(docker_target_name, env_path)

								    seed_count = 10000

								    seed_pool = [np.random.randint(0, seed_count) for _ in range(seed_count)]


								    def create_unity_environment(

								        worker_id: int, side_channels: List[SideChannel]

								    ) -> UnityEnvironment:

								        env_seed = seed

								        if not env_seed:

								            env_seed = seed_pool[worker_id % len(seed_pool)]

								        return UnityEnvironment(

								            file_name=env_path,

								            worker_id=worker_id,

								            seed=env_seed,

								            docker_training=docker_training,

								            no_graphics=no_graphics,

								            base_port=start_port,

								            args=env_args,

								            side_channels=side_channels,

								        )


								    return create_unity_environment


								def run_cli(options: RunOptions) -> None:

								    try:

								        print(

								            """


								                        ▄▄▄▓▓▓▓

								                   ╓▓▓▓▓▓▓█▓▓▓▓▓

								              ,▄▄▄m▀▀▀'  ,▓▓▓▀▓▓▄                           ▓▓▓  ▓▓▌

								            ▄▓▓▓▀'      ▄▓▓▀  ▓▓▓      ▄▄     ▄▄ ,▄▄ ▄▄▄▄   ,▄▄ ▄▓▓▌▄ ▄▄▄    ,▄▄

								          ▄▓▓▓▀        ▄▓▓▀   ▐▓▓▌     ▓▓▌   ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌  ╒▓▓▌

								        ▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓      ▓▀      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌   ▐▓▓▄ ▓▓▌

								        ▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄     ▓▓      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌    ▐▓▓▐▓▓

								          ^█▓▓▓        ▀▓▓▄   ▐▓▓▌     ▓▓▓▓▄▓▓▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▓▄    ▓▓▓▓`

								            '▀▓▓▓▄      ^▓▓▓  ▓▓▓       └▀▀▀▀ ▀▀ ^▀▀    `▀▀ `▀▀   '▀▀    ▐▓▓▌

								               ▀▀▀▀▓▄▄▄   ▓▓▓▓▓▓,                                      ▓▓▓▓▀

								                   `▀█▓▓▓▓▓▓▓▓▓▌

								                        ¬`▀▀▀█▓


								        """

								        )

								    except Exception:

								        print("\n\n\tUnity Technologies\n")

								    print(get_version_string())

								    trainer_logger = logging.getLogger("mlagents.trainers")

								    env_logger = logging.getLogger("mlagents_envs")


								    if options.debug:

								        trainer_logger.setLevel("DEBUG")

								        env_logger.setLevel("DEBUG")

								    else:

								        # disable noisy warnings from tensorflow.

								        tf_utils.set_warnings_enabled(False)


								    trainer_logger.debug("Configuration for this run:")

								    trainer_logger.debug(json.dumps(options._asdict(), indent=4))


								    run_seed = options.seed

								    if options.cpu:

								        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


								    if options.seed == -1:

								        run_seed = np.random.randint(0, 10000)

								    run_training(run_seed, options)


								def main():

								    run_cli(parse_command_line())


								# For python debugger to directly run this script

								if __name__ == "__main__":

								    main()