浏览代码

[refactor] Allow full RunOptions to be specified in trainer configuration YAML (#3815)

/whitepaper-experiments
GitHub 4 年前
当前提交
7e0032f5
共有 4 个文件被更改,包括 162 次插入25 次删除
  1. 1
      com.unity.ml-agents/CHANGELOG.md
  2. 74
      ml-agents/mlagents/trainers/learn.py
  3. 71
      ml-agents/mlagents/trainers/tests/test_learn.py
  4. 41
      ml-agents/mlagents/trainers/cli_utils.py

1
com.unity.ml-agents/CHANGELOG.md


will allow use with python 3.8 using tensorflow 2.2.0rc3.
- `UnityRLCapabilities` was added to help inform users when RL features are mismatched between C# and Python packages. (#3831)
- Unity Player logs are now written out to the results directory. (#3877)
- Run configuration YAML files are written out to the results directory at the end of the run. (#3815)
### Bug Fixes

74
ml-agents/mlagents/trainers/learn.py


GaugeWriter,
ConsoleWriter,
)
from mlagents.trainers.cli_utils import (
StoreConfigFile,
DetectDefault,
DetectDefaultStoreTrue,
)
from mlagents_envs.environment import UnityEnvironment
from mlagents.trainers.sampler_class import SamplerManager
from mlagents.trainers.exception import SamplerException, TrainerConfigError

argparser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
argparser.add_argument("trainer_config_path")
argparser.add_argument("trainer_config_path", action=StoreConfigFile)
action=DetectDefault,
)
argparser.add_argument(
"--lesson",

action=DetectDefault,
)
argparser.add_argument(
"--keep-checkpoints",

"number of steps specified by the save-freq option. Once the maximum number of checkpoints"
"has been reached, the oldest checkpoint is deleted when saving a new checkpoint.",
action=DetectDefault,
action="store_true",
action=DetectDefaultStoreTrue,
help=argparse.SUPPRESS, # Deprecated but still usable for now.
)
argparser.add_argument(

action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to resume training from a checkpoint. Specify a --run-id to use this option. "
"If set, the training code loads an already trained model to initialize the neural network "
"before resuming training. This option is only valid when the models exist, and have the same "

"--force",
default=False,
dest="force",
action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to force-overwrite this run-id's existing summary and model data. (Without "
"this flag, attempting to train a model with a run-id that has been used before will throw "
"an error.",

"as the saved model itself. If you use TensorBoard to view the training statistics, "
"always set a unique run-id for each training run. (The statistics for all runs with the "
"same id are combined as if they were produced by a the same session.)",
action=DetectDefault,
)
argparser.add_argument(
"--initialize-from",

"This can be used, for instance, to fine-tune an existing model on a new environment. "
"Note that the previously saved models must have the same behavior parameters as your "
"current environment.",
action=DetectDefault,
)
argparser.add_argument(
"--save-freq",

action=DetectDefault,
)
argparser.add_argument(
"--seed",

action=DetectDefault,
action="store_true",
action=DetectDefaultStoreTrue,
help=argparse.SUPPRESS,
)
argparser.add_argument(

action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to run in Python inference mode (i.e. no training). Use with --resume to load "
"a model trained with an existing run ID.",
)

"will use the port (base_port + worker_id), where the worker_id is sequential IDs given to "
"each instance from 0 to (num_envs - 1). Note that when training using the Editor rather "
"than an executable, the base port will be ignored.",
action=DetectDefault,
)
argparser.add_argument(
"--num-envs",

"from when training",
action=DetectDefault,
action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to run the Unity executable in no-graphics mode (i.e. without initializing "
"the graphics driver. Use this only if your agents don't use visual observations.",
)

action="store_true",
action=DetectDefaultStoreTrue,
help="Whether to enable debug-level logging for some parts of the code",
)
argparser.add_argument(

"process these as Unity Command Line Arguments. You should choose different argument names if "
"you want to create environment-specific arguments. All arguments after this flag will be "
"passed to the executable.",
action=DetectDefault,
action="store_true",
action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)

type=int,
help="The width of the executable window of the environment(s) in pixels "
"(ignored for editor training).",
action=DetectDefault,
)
eng_conf.add_argument(
"--height",

"(ignored for editor training)",
action=DetectDefault,
)
eng_conf.add_argument(
"--quality-level",

"QualitySettings.SetQualityLevel in Unity.",
action=DetectDefault,
)
eng_conf.add_argument(
"--time-scale",

"Time.timeScale in Unity.",
action=DetectDefault,
)
eng_conf.add_argument(
"--target-frame-rate",

"Application.targetFrameRate in Unity.",
action=DetectDefault,
)
eng_conf.add_argument(
"--capture-frame-rate",

"Time.captureFramerate in Unity.",
action=DetectDefault,
)
return argparser

configs loaded from files.
"""
argparse_args = vars(args)
config_path = argparse_args["trainer_config_path"]
# Load YAML and apply overrides as needed
run_options_dict = {}
run_options_dict.update(argparse_args)
config_path = StoreConfigFile.trainer_config_path
# Load YAML
try:
argparse_args["behaviors"] = yaml_config["behaviors"]
except KeyError:
# This is the only option that is not optional and has no defaults.
if "behaviors" not in yaml_config:
# Use the YAML file values for all values not specified in the CLI.
for key, val in yaml_config.items():
# Detect bad config options
if not hasattr(RunOptions, key):
raise TrainerConfigError(
"The option {} was specified in your YAML file, but is invalid.".format(
key
)
)
if key not in DetectDefault.non_default_args:
run_options_dict[key] = val
argparse_args["parameter_randomization"] = yaml_config.get(
"parameter_randomization", None
# Keep deprecated --load working, TODO: remove
run_options_dict["resume"] = (
run_options_dict["resume"] or run_options_dict["load_model"]
# Keep deprecated --load working, TODO: remove
argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"]
# Since argparse accepts file paths in the config options which don't exist in CommandLineOptions,
# these keys will need to be deleted to use the **/splat operator below.
argparse_args.pop("trainer_config_path")
return RunOptions(**vars(args))
return RunOptions(**run_options_dict)
def get_version_string() -> str:

71
ml-agents/mlagents/trainers/tests/test_learn.py


from unittest.mock import MagicMock, patch, mock_open
from mlagents.trainers import learn
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.trainers.learn import parse_command_line
from mlagents.trainers.learn import parse_command_line, DetectDefault
from mlagents_envs.exception import UnityEnvironmentException
from mlagents.trainers.stats import StatsReporter

MOCK_YAML = """
behaviors:
{}
"""
MOCK_PARAMETER_YAML = """
behaviors:
{}
env_path: "./oldenvfile"
keep_checkpoints: 34
lesson: 2
run_id: uselessrun
save_freq: 654321
seed: 9870
base_port: 4001
num_envs: 4
debug: false
"""
MOCK_SAMPLER_CURRICULUM_YAML = """

@patch("builtins.open", new_callable=mock_open, read_data=MOCK_YAML)
def test_commandline_args(mock_file):
# Test with defaults
opt = parse_command_line(["mytrainerpath"])
assert opt.behaviors == {}

assert opt.debug is False
assert opt.env_args is None
full_args = [
"mytrainerpath",
"--env=./myenvfile",
"--keep-checkpoints=42",
"--lesson=3",
"--resume",
"--inference",
"--run-id=myawesomerun",
"--save-freq=123456",
"--seed=7890",
"--train",
"--base-port=4004",
"--num-envs=2",
"--no-graphics",
"--debug",
]
opt = parse_command_line(full_args)
assert opt.behaviors == {}
assert opt.env_path == "./myenvfile"
assert opt.parameter_randomization is None
assert opt.keep_checkpoints == 42
assert opt.lesson == 3
assert opt.run_id == "myawesomerun"
assert opt.save_freq == 123456
assert opt.seed == 7890
assert opt.base_port == 4004
assert opt.num_envs == 2
assert opt.no_graphics is True
assert opt.debug is True
assert opt.inference is True
assert opt.resume is True
@patch("builtins.open", new_callable=mock_open, read_data=MOCK_PARAMETER_YAML)
def test_yaml_args(mock_file):
# Test with opts loaded from YAML
DetectDefault.non_default_args.clear()
opt = parse_command_line(["mytrainerpath"])
assert opt.behaviors == {}
assert opt.env_path == "./oldenvfile"
assert opt.parameter_randomization is None
assert opt.keep_checkpoints == 34
assert opt.lesson == 2
assert opt.run_id == "uselessrun"
assert opt.save_freq == 654321
assert opt.seed == 9870
assert opt.base_port == 4001
assert opt.num_envs == 4
assert opt.no_graphics is False
assert opt.debug is False
assert opt.env_args is None
# Test that CLI overrides YAML
full_args = [
"mytrainerpath",
"--env=./myenvfile",

41
ml-agents/mlagents/trainers/cli_utils.py


from typing import Set
import argparse
class DetectDefault(argparse.Action):
"""
Internal custom Action to help detect arguments that aren't default.
"""
non_default_args: Set[str] = set()
def __call__(self, arg_parser, namespace, values, option_string=None):
setattr(namespace, self.dest, values)
DetectDefault.non_default_args.add(self.dest)
class DetectDefaultStoreTrue(DetectDefault):
"""
Internal class to help detect arguments that aren't default.
Used for store_true arguments.
"""
def __init__(self, nargs=0, **kwargs):
super().__init__(nargs=nargs, **kwargs)
def __call__(self, arg_parser, namespace, values, option_string=None):
super().__call__(arg_parser, namespace, True, option_string)
class StoreConfigFile(argparse.Action):
"""
Custom Action to store the config file location not as part of the CLI args.
This is because we want to maintain an equivalence between the config file's
contents and the args themselves.
"""
trainer_config_path: str
def __call__(self, arg_parser, namespace, values, option_string=None):
delattr(namespace, self.dest)
StoreConfigFile.trainer_config_path = values
正在加载...
取消
保存