[refactor] Allow full RunOptions to be specified in trainer configuration YAML (#3815)

5 年前 · 7e0032f5
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 will allow use with python 3.8 using tensorflow 2.2.0rc3.
 - `UnityRLCapabilities` was added to help inform users when RL features are mismatched between C# and Python packages. (#3831)
 - Unity Player logs are now written out to the results directory. (#3877)
+- Run configuration YAML files are written out to the results directory at the end of the run. (#3815)

 ### Bug Fixes

--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
    GaugeWriter,
    ConsoleWriter,
 )
+from mlagents.trainers.cli_utils import (
+    StoreConfigFile,
+    DetectDefault,
+    DetectDefaultStoreTrue,
+)
 from mlagents_envs.environment import UnityEnvironment
 from mlagents.trainers.sampler_class import SamplerManager
 from mlagents.trainers.exception import SamplerException, TrainerConfigError
    argparser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
-    argparser.add_argument("trainer_config_path")
+    argparser.add_argument("trainer_config_path", action=StoreConfigFile)
+        action=DetectDefault,
    )
    argparser.add_argument(
        "--lesson",
+        action=DetectDefault,
    )
    argparser.add_argument(
        "--keep-checkpoints",
        "number of steps specified by the save-freq option. Once the maximum number of checkpoints"
        "has been reached, the oldest checkpoint is deleted when saving a new checkpoint.",
+        action=DetectDefault,
-        action="store_true",
+        action=DetectDefaultStoreTrue,
        help=argparse.SUPPRESS,  # Deprecated but still usable for now.
    )
    argparser.add_argument(
-        action="store_true",
+        action=DetectDefaultStoreTrue,
        help="Whether to resume training from a checkpoint. Specify a --run-id to use this option. "
        "If set, the training code loads an already trained model to initialize the neural network "
        "before resuming training. This option is only valid when the models exist, and have the same "
        "--force",
        default=False,
        dest="force",
-        action="store_true",
+        action=DetectDefaultStoreTrue,
        help="Whether to force-overwrite this run-id's existing summary and model data. (Without "
        "this flag, attempting to train a model with a run-id that has been used before will throw "
        "an error.",
        "as the saved model itself. If you use TensorBoard to view the training statistics, "
        "always set a unique run-id for each training run. (The statistics for all runs with the "
        "same id are combined as if they were produced by a the same session.)",
+        action=DetectDefault,
    )
    argparser.add_argument(
        "--initialize-from",
        "This can be used, for instance, to fine-tune an existing model on a new environment. "
        "Note that the previously saved models must have the same behavior parameters as your "
        "current environment.",
+        action=DetectDefault,
    )
    argparser.add_argument(
        "--save-freq",
+        action=DetectDefault,
    )
    argparser.add_argument(
        "--seed",
+        action=DetectDefault,
-        action="store_true",
+        action=DetectDefaultStoreTrue,
        help=argparse.SUPPRESS,
    )
    argparser.add_argument(
-        action="store_true",
+        action=DetectDefaultStoreTrue,
        help="Whether to run in Python inference mode (i.e. no training). Use with --resume to load "
        "a model trained with an existing run ID.",
    )
        "will use the port (base_port + worker_id), where the worker_id is sequential IDs given to "
        "each instance from 0 to (num_envs - 1). Note that when training using the Editor rather "
        "than an executable, the base port will be ignored.",
+        action=DetectDefault,
    )
    argparser.add_argument(
        "--num-envs",
        "from when training",
+        action=DetectDefault,
-        action="store_true",
+        action=DetectDefaultStoreTrue,
        help="Whether to run the Unity executable in no-graphics mode (i.e. without initializing "
        "the graphics driver. Use this only if your agents don't use visual observations.",
    )
-        action="store_true",
+        action=DetectDefaultStoreTrue,
        help="Whether to enable debug-level logging for some parts of the code",
    )
    argparser.add_argument(
        "process these as Unity Command Line Arguments. You should choose different argument names if "
        "you want to create environment-specific arguments. All arguments after this flag will be "
        "passed to the executable.",
+        action=DetectDefault,
-        action="store_true",
+        action=DetectDefaultStoreTrue,
        help="Forces training using CPU only",
    )

        type=int,
        help="The width of the executable window of the environment(s) in pixels "
        "(ignored for editor training).",
+        action=DetectDefault,
    )
    eng_conf.add_argument(
        "--height",
        "(ignored for editor training)",
+        action=DetectDefault,
    )
    eng_conf.add_argument(
        "--quality-level",
        "QualitySettings.SetQualityLevel in Unity.",
+        action=DetectDefault,
    )
    eng_conf.add_argument(
        "--time-scale",
        "Time.timeScale in Unity.",
+        action=DetectDefault,
    )
    eng_conf.add_argument(
        "--target-frame-rate",
        "Application.targetFrameRate in Unity.",
+        action=DetectDefault,
    )
    eng_conf.add_argument(
        "--capture-frame-rate",
        "Time.captureFramerate in Unity.",
+        action=DetectDefault,
    )
    return argparser

          configs loaded from files.
        """
        argparse_args = vars(args)
-        config_path = argparse_args["trainer_config_path"]
-        # Load YAML and apply overrides as needed
+        run_options_dict = {}
+        run_options_dict.update(argparse_args)
+        config_path = StoreConfigFile.trainer_config_path
+
+        # Load YAML
-        try:
-            argparse_args["behaviors"] = yaml_config["behaviors"]
-        except KeyError:
+        # This is the only option that is not optional and has no defaults.
+        if "behaviors" not in yaml_config:
+        # Use the YAML file values for all values not specified in the CLI.
+        for key, val in yaml_config.items():
+            # Detect bad config options
+            if not hasattr(RunOptions, key):
+                raise TrainerConfigError(
+                    "The option {} was specified in your YAML file, but is invalid.".format(
+                        key
+                    )
+                )
+            if key not in DetectDefault.non_default_args:
+                run_options_dict[key] = val
-        argparse_args["parameter_randomization"] = yaml_config.get(
-            "parameter_randomization", None
+        # Keep deprecated --load working, TODO: remove
+        run_options_dict["resume"] = (
+            run_options_dict["resume"] or run_options_dict["load_model"]
-        # Keep deprecated --load working, TODO: remove
-        argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"]
-        # Since argparse accepts file paths in the config options which don't exist in CommandLineOptions,
-        # these keys will need to be deleted to use the **/splat operator below.
-        argparse_args.pop("trainer_config_path")
-        return RunOptions(**vars(args))
+        return RunOptions(**run_options_dict)


 def get_version_string() -> str:
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
 from unittest.mock import MagicMock, patch, mock_open
 from mlagents.trainers import learn
 from mlagents.trainers.trainer_controller import TrainerController
-from mlagents.trainers.learn import parse_command_line
+from mlagents.trainers.learn import parse_command_line, DetectDefault
 from mlagents_envs.exception import UnityEnvironmentException
 from mlagents.trainers.stats import StatsReporter

 MOCK_YAML = """
    behaviors:
        {}
+    """
+
+MOCK_PARAMETER_YAML = """
+    behaviors:
+        {}
+    env_path: "./oldenvfile"
+    keep_checkpoints: 34
+    lesson: 2
+    run_id: uselessrun
+    save_freq: 654321
+    seed: 9870
+    base_port: 4001
+    num_envs: 4
+    debug: false
    """

 MOCK_SAMPLER_CURRICULUM_YAML = """

@patch("builtins.open", new_callable=mock_open, read_data=MOCK_YAML)
 def test_commandline_args(mock_file):
-
-
    # Test with defaults
    opt = parse_command_line(["mytrainerpath"])
    assert opt.behaviors == {}
    assert opt.debug is False
    assert opt.env_args is None

+    full_args = [
+        "mytrainerpath",
+        "--env=./myenvfile",
+        "--keep-checkpoints=42",
+        "--lesson=3",
+        "--resume",
+        "--inference",
+        "--run-id=myawesomerun",
+        "--save-freq=123456",
+        "--seed=7890",
+        "--train",
+        "--base-port=4004",
+        "--num-envs=2",
+        "--no-graphics",
+        "--debug",
+    ]
+
+    opt = parse_command_line(full_args)
+    assert opt.behaviors == {}
+    assert opt.env_path == "./myenvfile"
+    assert opt.parameter_randomization is None
+    assert opt.keep_checkpoints == 42
+    assert opt.lesson == 3
+    assert opt.run_id == "myawesomerun"
+    assert opt.save_freq == 123456
+    assert opt.seed == 7890
+    assert opt.base_port == 4004
+    assert opt.num_envs == 2
+    assert opt.no_graphics is True
+    assert opt.debug is True
+    assert opt.inference is True
+    assert opt.resume is True
+
+
+@patch("builtins.open", new_callable=mock_open, read_data=MOCK_PARAMETER_YAML)
+def test_yaml_args(mock_file):
+    # Test with opts loaded from YAML
+    DetectDefault.non_default_args.clear()
+    opt = parse_command_line(["mytrainerpath"])
+    assert opt.behaviors == {}
+    assert opt.env_path == "./oldenvfile"
+    assert opt.parameter_randomization is None
+    assert opt.keep_checkpoints == 34
+    assert opt.lesson == 2
+    assert opt.run_id == "uselessrun"
+    assert opt.save_freq == 654321
+    assert opt.seed == 9870
+    assert opt.base_port == 4001
+    assert opt.num_envs == 4
+    assert opt.no_graphics is False
+    assert opt.debug is False
+    assert opt.env_args is None
+    # Test that CLI overrides YAML
    full_args = [
        "mytrainerpath",
        "--env=./myenvfile",
--- a/ml-agents/mlagents/trainers/cli_utils.py
+++ b/ml-agents/mlagents/trainers/cli_utils.py
+from typing import Set
+import argparse
+
+
+class DetectDefault(argparse.Action):
+    """
+    Internal custom Action to help detect arguments that aren't default.
+    """
+
+    non_default_args: Set[str] = set()
+
+    def __call__(self, arg_parser, namespace, values, option_string=None):
+        setattr(namespace, self.dest, values)
+        DetectDefault.non_default_args.add(self.dest)
+
+
+class DetectDefaultStoreTrue(DetectDefault):
+    """
+    Internal class to help detect arguments that aren't default.
+    Used for store_true arguments.
+    """
+
+    def __init__(self, nargs=0, **kwargs):
+        super().__init__(nargs=nargs, **kwargs)
+
+    def __call__(self, arg_parser, namespace, values, option_string=None):
+        super().__call__(arg_parser, namespace, True, option_string)
+
+
+class StoreConfigFile(argparse.Action):
+    """
+    Custom Action to store the config file location not as part of the CLI args.
+    This is because we want to maintain an equivalence between the config file's
+    contents and the args themselves.
+    """
+
+    trainer_config_path: str
+
+    def __call__(self, arg_parser, namespace, values, option_string=None):
+        delattr(namespace, self.dest)
+        StoreConfigFile.trainer_config_path = values