ml-agents/ml-agents/mlagents/trainers/learn.py


								# # Unity ML-Agents Toolkit

								import argparse


								import os

								import numpy as np

								import json


								from typing import Callable, Optional, List, NamedTuple, Dict


								import mlagents.trainers

								import mlagents_envs

								from mlagents import tf_utils

								from mlagents.trainers.trainer_controller import TrainerController

								from mlagents.trainers.meta_curriculum import MetaCurriculum

								from mlagents.trainers.trainer_util import (

								    load_config,

								    TrainerFactory,

								    handle_existing_directories,

								)

								from mlagents.trainers.stats import (

								    TensorboardWriter,

								    CSVWriter,

								    StatsReporter,

								    GaugeWriter,

								    ConsoleWriter,

								)

								from mlagents_envs.environment import UnityEnvironment

								from mlagents.trainers.sampler_class import SamplerManager

								from mlagents.trainers.exception import SamplerException

								from mlagents_envs.base_env import BaseEnv

								from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager

								from mlagents_envs.side_channel.side_channel import SideChannel

								from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig

								from mlagents_envs.exception import UnityEnvironmentException

								from mlagents_envs.timers import (

								    hierarchical_timer,

								    get_timer_tree,

								    add_metadata as add_timer_metadata,

								)

								from mlagents_envs import logging_util


								logger = logging_util.get_logger(__name__)


								def _create_parser():

								    argparser = argparse.ArgumentParser(

								        formatter_class=argparse.ArgumentDefaultsHelpFormatter

								    )

								    argparser.add_argument("trainer_config_path")

								    argparser.add_argument(

								        "--env",

								        default=None,

								        dest="env_path",

								        help="Path to the Unity executable to train",

								    )

								    argparser.add_argument(

								        "--curriculum",

								        default=None,

								        dest="curriculum_config_path",

								        help="YAML file for defining the lessons for curriculum training",

								    )

								    argparser.add_argument(

								        "--lesson",

								        default=0,

								        type=int,

								        help="The lesson to start with when performing curriculum training",

								    )

								    argparser.add_argument(

								        "--sampler",

								        default=None,

								        dest="sampler_file_path",

								        help="YAML file for defining the sampler for environment parameter randomization",

								    )

								    argparser.add_argument(

								        "--keep-checkpoints",

								        default=5,

								        type=int,

								        help="The maximum number of model checkpoints to keep. Checkpoints are saved after the"

								        "number of steps specified by the save-freq option. Once the maximum number of checkpoints"

								        "has been reached, the oldest checkpoint is deleted when saving a new checkpoint.",

								    )

								    argparser.add_argument(

								        "--load",

								        default=False,

								        dest="load_model",

								        action="store_true",

								        help=argparse.SUPPRESS,  # Deprecated but still usable for now.

								    )

								    argparser.add_argument(

								        "--resume",

								        default=False,

								        dest="resume",

								        action="store_true",

								        help="Whether to resume training from a checkpoint. Specify a --run-id to use this option. "

								        "If set, the training code loads an already trained model to initialize the neural network "

								        "before resuming training. This option is only valid when the models exist, and have the same "

								        "behavior names as the current agents in your scene.",

								    )

								    argparser.add_argument(

								        "--force",

								        default=False,

								        dest="force",

								        action="store_true",

								        help="Whether to force-overwrite this run-id's existing summary and model data. (Without "

								        "this flag, attempting to train a model with a run-id that has been used before will throw "

								        "an error.",

								    )

								    argparser.add_argument(

								        "--run-id",

								        default="ppo",

								        help="The identifier for the training run. This identifier is used to name the "

								        "subdirectories in which the trained model and summary statistics are saved as well "

								        "as the saved model itself. If you use TensorBoard to view the training statistics, "

								        "always set a unique run-id for each training run. (The statistics for all runs with the "

								        "same id are combined as if they were produced by a the same session.)",

								    )

								    argparser.add_argument(

								        "--initialize-from",

								        metavar="RUN_ID",

								        default=None,

								        help="Specify a previously saved run ID from which to initialize the model from. "

								        "This can be used, for instance, to fine-tune an existing model on a new environment. "

								        "Note that the previously saved models must have the same behavior parameters as your "

								        "current environment.",

								    )

								    argparser.add_argument(

								        "--save-freq",

								        default=50000,

								        type=int,

								        help="How often (in steps) to save the model during training",

								    )

								    argparser.add_argument(

								        "--seed",

								        default=-1,

								        type=int,

								        help="A number to use as a seed for the random number generator used by the training code",

								    )

								    argparser.add_argument(

								        "--train",

								        default=False,

								        dest="train_model",

								        action="store_true",

								        help=argparse.SUPPRESS,

								    )

								    argparser.add_argument(

								        "--inference",

								        default=False,

								        dest="inference",

								        action="store_true",

								        help="Whether to run in Python inference mode (i.e. no training). Use with --resume to load "

								        "a model trained with an existing run ID.",

								    )

								    argparser.add_argument(

								        "--base-port",

								        default=UnityEnvironment.BASE_ENVIRONMENT_PORT,

								        type=int,

								        help="The starting port for environment communication. Each concurrent Unity environment "

								        "instance will get assigned a port sequentially, starting from the base-port. Each instance "

								        "will use the port (base_port + worker_id), where the worker_id is sequential IDs given to "

								        "each instance from 0 to (num_envs - 1). Note that when training using the Editor rather "

								        "than an executable, the base port will be ignored.",

								    )

								    argparser.add_argument(

								        "--num-envs",

								        default=1,

								        type=int,

								        help="The number of concurrent Unity environment instances to collect experiences "

								        "from when training",

								    )

								    argparser.add_argument(

								        "--no-graphics",

								        default=False,

								        action="store_true",

								        help="Whether to run the Unity executable in no-graphics mode (i.e. without initializing "

								        "the graphics driver. Use this only if your agents don't use visual observations.",

								    )

								    argparser.add_argument(

								        "--debug",

								        default=False,

								        action="store_true",

								        help="Whether to enable debug-level logging for some parts of the code",

								    )

								    argparser.add_argument(

								        "--env-args",

								        default=None,

								        nargs=argparse.REMAINDER,

								        help="Arguments passed to the Unity executable. Be aware that the standalone build will also "

								        "process these as Unity Command Line Arguments. You should choose different argument names if "

								        "you want to create environment-specific arguments. All arguments after this flag will be "

								        "passed to the executable.",

								    )

								    argparser.add_argument(

								        "--cpu",

								        default=False,

								        action="store_true",

								        help="Forces training using CPU only",

								    )


								    argparser.add_argument("--version", action="version", version="")


								    eng_conf = argparser.add_argument_group(title="Engine Configuration")

								    eng_conf.add_argument(

								        "--width",

								        default=84,

								        type=int,

								        help="The width of the executable window of the environment(s) in pixels "

								        "(ignored for editor training).",

								    )

								    eng_conf.add_argument(

								        "--height",

								        default=84,

								        type=int,

								        help="The height of the executable window of the environment(s) in pixels "

								        "(ignored for editor training)",

								    )

								    eng_conf.add_argument(

								        "--quality-level",

								        default=5,

								        type=int,

								        help="The quality level of the environment(s). Equivalent to calling "

								        "QualitySettings.SetQualityLevel in Unity.",

								    )

								    eng_conf.add_argument(

								        "--time-scale",

								        default=20,

								        type=float,

								        help="The time scale of the Unity environment(s). Equivalent to setting "

								        "Time.timeScale in Unity.",

								    )

								    eng_conf.add_argument(

								        "--target-frame-rate",

								        default=-1,

								        type=int,

								        help="The target frame rate of the Unity environment(s). Equivalent to setting "

								        "Application.targetFrameRate in Unity.",

								    )

								    return argparser


								parser = _create_parser()


								class RunOptions(NamedTuple):

								    trainer_config: Dict

								    debug: bool = parser.get_default("debug")

								    seed: int = parser.get_default("seed")

								    env_path: Optional[str] = parser.get_default("env_path")

								    run_id: str = parser.get_default("run_id")

								    initialize_from: str = parser.get_default("initialize_from")

								    load_model: bool = parser.get_default("load_model")

								    resume: bool = parser.get_default("resume")

								    force: bool = parser.get_default("force")

								    train_model: bool = parser.get_default("train_model")

								    inference: bool = parser.get_default("inference")

								    save_freq: int = parser.get_default("save_freq")

								    keep_checkpoints: int = parser.get_default("keep_checkpoints")

								    base_port: int = parser.get_default("base_port")

								    num_envs: int = parser.get_default("num_envs")

								    curriculum_config: Optional[Dict] = None

								    lesson: int = parser.get_default("lesson")

								    no_graphics: bool = parser.get_default("no_graphics")

								    multi_gpu: bool = parser.get_default("multi_gpu")

								    sampler_config: Optional[Dict] = None

								    env_args: Optional[List[str]] = parser.get_default("env_args")

								    cpu: bool = parser.get_default("cpu")

								    width: int = parser.get_default("width")

								    height: int = parser.get_default("height")

								    quality_level: int = parser.get_default("quality_level")

								    time_scale: float = parser.get_default("time_scale")

								    target_frame_rate: int = parser.get_default("target_frame_rate")


								    @staticmethod

								    def from_argparse(args: argparse.Namespace) -> "RunOptions":

								        """

								        Takes an argparse.Namespace as specified in `parse_command_line`, loads input configuration files

								        from file paths, and converts to a CommandLineOptions instance.

								        :param args: collection of command-line parameters passed to mlagents-learn

								        :return: CommandLineOptions representing the passed in arguments, with trainer config, curriculum and sampler

								          configs loaded from files.

								        """

								        argparse_args = vars(args)

								        trainer_config_path = argparse_args["trainer_config_path"]

								        curriculum_config_path = argparse_args["curriculum_config_path"]

								        argparse_args["trainer_config"] = load_config(trainer_config_path)

								        if curriculum_config_path is not None:

								            argparse_args["curriculum_config"] = load_config(curriculum_config_path)

								        if argparse_args["sampler_file_path"] is not None:

								            argparse_args["sampler_config"] = load_config(

								                argparse_args["sampler_file_path"]

								            )

								        # Keep deprecated --load working, TODO: remove

								        argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"]

								        # Since argparse accepts file paths in the config options which don't exist in CommandLineOptions,

								        # these keys will need to be deleted to use the **/splat operator below.

								        argparse_args.pop("sampler_file_path")

								        argparse_args.pop("curriculum_config_path")

								        argparse_args.pop("trainer_config_path")

								        return RunOptions(**vars(args))


								def get_version_string() -> str:

								    # pylint: disable=no-member

								    return f""" Version information:

								  ml-agents: {mlagents.trainers.__version__},

								  ml-agents-envs: {mlagents_envs.__version__},

								  Communicator API: {UnityEnvironment.API_VERSION},

								  TensorFlow: {tf_utils.tf.__version__}"""


								def parse_command_line(argv: Optional[List[str]] = None) -> RunOptions:

								    args = parser.parse_args(argv)

								    return RunOptions.from_argparse(args)


								def run_training(run_seed: int, options: RunOptions) -> None:

								    """

								    Launches training session.

								    :param options: parsed command line arguments

								    :param run_seed: Random seed used for training.

								    :param run_options: Command line arguments for training.

								    """

								    with hierarchical_timer("run_training.setup"):

								        model_path = f"./models/{options.run_id}"

								        maybe_init_path = (

								            f"./models/{options.initialize_from}" if options.initialize_from else None

								        )

								        summaries_dir = "./summaries"

								        port = options.base_port


								        # Configure CSV, Tensorboard Writers and StatsReporter

								        # We assume reward and episode length are needed in the CSV.

								        csv_writer = CSVWriter(

								            summaries_dir,

								            required_fields=[

								                "Environment/Cumulative Reward",

								                "Environment/Episode Length",

								            ],

								        )

								        handle_existing_directories(

								            model_path, summaries_dir, options.resume, options.force, maybe_init_path

								        )

								        tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume)

								        gauge_write = GaugeWriter()

								        console_writer = ConsoleWriter()

								        StatsReporter.add_writer(tb_writer)

								        StatsReporter.add_writer(csv_writer)

								        StatsReporter.add_writer(gauge_write)

								        StatsReporter.add_writer(console_writer)


								        if options.env_path is None:

								            port = UnityEnvironment.DEFAULT_EDITOR_PORT

								        env_factory = create_environment_factory(

								            options.env_path, options.no_graphics, run_seed, port, options.env_args

								        )

								        engine_config = EngineConfig(

								            options.width,

								            options.height,

								            options.quality_level,

								            options.time_scale,

								            options.target_frame_rate,

								        )

								        env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)

								        maybe_meta_curriculum = try_create_meta_curriculum(

								            options.curriculum_config, env_manager, options.lesson

								        )

								        sampler_manager, resampling_interval = create_sampler_manager(

								            options.sampler_config, run_seed

								        )

								        trainer_factory = TrainerFactory(

								            options.trainer_config,

								            summaries_dir,

								            options.run_id,

								            model_path,

								            options.keep_checkpoints,

								            not options.inference,

								            options.resume,

								            run_seed,

								            maybe_init_path,

								            maybe_meta_curriculum,

								            options.multi_gpu,

								        )

								        # Create controller and begin training.

								        tc = TrainerController(

								            trainer_factory,

								            model_path,

								            summaries_dir,

								            options.run_id,

								            options.save_freq,

								            maybe_meta_curriculum,

								            not options.inference,

								            run_seed,

								            sampler_manager,

								            resampling_interval,

								        )


								    # Begin training

								    try:

								        tc.start_learning(env_manager)

								    finally:

								        env_manager.close()

								        write_timing_tree(summaries_dir, options.run_id)


								def write_timing_tree(summaries_dir: str, run_id: str) -> None:

								    timing_path = f"{summaries_dir}/{run_id}_timers.json"

								    try:

								        with open(timing_path, "w") as f:

								            json.dump(get_timer_tree(), f, indent=4)

								    except FileNotFoundError:

								        logger.warning(

								            f"Unable to save to {timing_path}. Make sure the directory exists"

								        )


								def create_sampler_manager(sampler_config, run_seed=None):

								    resample_interval = None

								    if sampler_config is not None:

								        if "resampling-interval" in sampler_config:

								            # Filter arguments that do not exist in the environment

								            resample_interval = sampler_config.pop("resampling-interval")

								            if (resample_interval <= 0) or (not isinstance(resample_interval, int)):

								                raise SamplerException(

								                    "Specified resampling-interval is not valid. Please provide"

								                    " a positive integer value for resampling-interval"

								                )


								        else:

								            raise SamplerException(

								                "Resampling interval was not specified in the sampler file."

								                " Please specify it with the 'resampling-interval' key in the sampler config file."

								            )


								    sampler_manager = SamplerManager(sampler_config, run_seed)

								    return sampler_manager, resample_interval


								def try_create_meta_curriculum(

								    curriculum_config: Optional[Dict], env: SubprocessEnvManager, lesson: int

								) -> Optional[MetaCurriculum]:

								    if curriculum_config is None:

								        return None

								    else:

								        meta_curriculum = MetaCurriculum(curriculum_config)

								        # TODO: Should be able to start learning at different lesson numbers

								        # for each curriculum.

								        meta_curriculum.set_all_curricula_to_lesson_num(lesson)

								        return meta_curriculum


								def create_environment_factory(

								    env_path: Optional[str],

								    no_graphics: bool,

								    seed: int,

								    start_port: int,

								    env_args: Optional[List[str]],

								) -> Callable[[int, List[SideChannel]], BaseEnv]:

								    if env_path is not None:

								        launch_string = UnityEnvironment.validate_environment_path(env_path)

								        if launch_string is None:

								            raise UnityEnvironmentException(

								                f"Couldn't launch the {env_path} environment. Provided filename does not match any environments."

								            )


								    def create_unity_environment(

								        worker_id: int, side_channels: List[SideChannel]

								    ) -> UnityEnvironment:

								        # Make sure that each environment gets a different seed

								        env_seed = seed + worker_id

								        return UnityEnvironment(

								            file_name=env_path,

								            worker_id=worker_id,

								            seed=env_seed,

								            no_graphics=no_graphics,

								            base_port=start_port,

								            args=env_args,

								            side_channels=side_channels,

								        )


								    return create_unity_environment


								def run_cli(options: RunOptions) -> None:

								    try:

								        print(

								            """


								                        ▄▄▄▓▓▓▓

								                   ╓▓▓▓▓▓▓█▓▓▓▓▓

								              ,▄▄▄m▀▀▀'  ,▓▓▓▀▓▓▄                           ▓▓▓  ▓▓▌

								            ▄▓▓▓▀'      ▄▓▓▀  ▓▓▓      ▄▄     ▄▄ ,▄▄ ▄▄▄▄   ,▄▄ ▄▓▓▌▄ ▄▄▄    ,▄▄

								          ▄▓▓▓▀        ▄▓▓▀   ▐▓▓▌     ▓▓▌   ▐▓▓ ▐▓▓▓▀▀▀▓▓▌ ▓▓▓ ▀▓▓▌▀ ^▓▓▌  ╒▓▓▌

								        ▄▓▓▓▓▓▄▄▄▄▄▄▄▄▓▓▓      ▓▀      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌   ▐▓▓▄ ▓▓▌

								        ▀▓▓▓▓▀▀▀▀▀▀▀▀▀▀▓▓▄     ▓▓      ▓▓▌   ▐▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▌    ▐▓▓▐▓▓

								          ^█▓▓▓        ▀▓▓▄   ▐▓▓▌     ▓▓▓▓▄▓▓▓▓ ▐▓▓    ▓▓▓ ▓▓▓  ▓▓▓▄    ▓▓▓▓`

								            '▀▓▓▓▄      ^▓▓▓  ▓▓▓       └▀▀▀▀ ▀▀ ^▀▀    `▀▀ `▀▀   '▀▀    ▐▓▓▌

								               ▀▀▀▀▓▄▄▄   ▓▓▓▓▓▓,                                      ▓▓▓▓▀

								                   `▀█▓▓▓▓▓▓▓▓▓▌

								                        ¬`▀▀▀█▓


								        """

								        )

								    except Exception:

								        print("\n\n\tUnity Technologies\n")

								    print(get_version_string())


								    if options.debug:

								        log_level = logging_util.DEBUG

								    else:

								        log_level = logging_util.INFO

								        # disable noisy warnings from tensorflow

								        tf_utils.set_warnings_enabled(False)


								    logging_util.set_log_level(log_level)


								    logger.debug("Configuration for this run:")

								    logger.debug(json.dumps(options._asdict(), indent=4))


								    # Options deprecation warnings

								    if options.load_model:

								        logger.warning(

								            "The --load option has been deprecated. Please use the --resume option instead."

								        )

								    if options.train_model:

								        logger.warning(

								            "The --train option has been deprecated. Train mode is now the default. Use "

								            "--inference to run in inference mode."

								        )


								    run_seed = options.seed

								    if options.cpu:

								        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


								    # Add some timer metadata

								    add_timer_metadata("mlagents_version", mlagents.trainers.__version__)

								    add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)

								    add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)

								    add_timer_metadata("tensorflow_version", tf_utils.tf.__version__)


								    if options.seed == -1:

								        run_seed = np.random.randint(0, 10000)

								    run_training(run_seed, options)


								def main():

								    run_cli(parse_command_line())


								# For python debugger to directly run this script

								if __name__ == "__main__":

								    main()