浏览代码

[refactor] Improve config upgrade script and add test (#4056)

/MLA-1734-demo-provider
GitHub 4 年前
当前提交
ee1098d1
共有 5 个文件被更改,包括 334 次插入116 次删除
  1. 8
      docs/Migrating.md
  2. 4
      docs/Training-ML-Agents.md
  3. 191
      ml-agents/mlagents/trainers/tests/test_config_conversion.py
  4. 137
      ml-agents/mlagents/trainers/upgrade_config.py
  5. 110
      config/upgrade_config.py

8
docs/Migrating.md


To start at a different lesson, modify your Curriculum configuration.
### Steps to Migrate
- To upgrade your configuration files, an upgrade script has been provided. Run `python config/update_config.py
-h` to see the script usage.
- To upgrade your configuration files, an upgrade script has been provided. Run
`python -m mlagents.trainers.upgrade_config -h` to see the script usage. Note that you will have
had to upgrade to/install the current version of ML-Agents before running the script.
- If your training uses [curriculum](Training-ML-Agents.md#curriculum-learning), move those configurations under
the `Behavior Name` section.
- If your training uses [curriculum](Training-ML-Agents.md#curriculum-learning), move those configurations under a `curriculum` section.
- If your training uses [parameter randomization](Training-ML-Agents.md#environment-parameter-randomization), move
the contents of the sampler config to `parameter_randomization` in the main trainer configuration.
- If you are using `UnityEnvironment` directly, replace `max_step` with `interrupted`

4
docs/Training-ML-Agents.md


**NOTE:** The configuration file format has been changed from 0.17.0 and onwards. To convert
an old set of configuration files (trainer config, curriculum, and sampler files) to the new
format, a script has been provided. Run `python config/upgrade_config.py -h` in your console
to see the script's usage.
format, a script has been provided. Run `python -m mlagents.trainers.upgrade_config -h` in your
console to see the script's usage.
### Behavior Configurations

191
ml-agents/mlagents/trainers/tests/test_config_conversion.py


import yaml
import pytest
from unittest import mock
from argparse import Namespace
from mlagents.trainers.upgrade_config import convert_behaviors, main, remove_nones
from mlagents.trainers.settings import (
TrainerType,
PPOSettings,
SACSettings,
RewardSignalType,
)
BRAIN_NAME = "testbehavior"
# Check one per category
BATCH_SIZE = 256
HIDDEN_UNITS = 32
SUMMARY_FREQ = 500
PPO_CONFIG = f"""
default:
trainer: ppo
batch_size: 1024
beta: 5.0e-3
buffer_size: 10240
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
learning_rate_schedule: linear
max_steps: 5.0e5
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
{BRAIN_NAME}:
trainer: ppo
batch_size: {BATCH_SIZE}
beta: 5.0e-3
buffer_size: 64
epsilon: 0.2
hidden_units: {HIDDEN_UNITS}
lambd: 0.95
learning_rate: 5.0e-3
max_steps: 2500
memory_size: 256
normalize: false
num_epoch: 3
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: {SUMMARY_FREQ}
use_recurrent: false
reward_signals:
curiosity:
strength: 1.0
gamma: 0.99
encoding_size: 128
"""
SAC_CONFIG = f"""
default:
trainer: sac
batch_size: 128
buffer_size: 50000
buffer_init_steps: 0
hidden_units: 128
init_entcoef: 1.0
learning_rate: 3.0e-4
learning_rate_schedule: constant
max_steps: 5.0e5
memory_size: 256
normalize: false
num_update: 1
train_interval: 1
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 10000
tau: 0.005
use_recurrent: false
vis_encode_type: simple
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
{BRAIN_NAME}:
trainer: sac
batch_size: {BATCH_SIZE}
buffer_size: 64
buffer_init_steps: 100
hidden_units: {HIDDEN_UNITS}
init_entcoef: 0.01
learning_rate: 3.0e-4
max_steps: 1000
memory_size: 256
normalize: false
num_update: 1
train_interval: 1
num_layers: 1
time_horizon: 64
sequence_length: 64
summary_freq: {SUMMARY_FREQ}
tau: 0.005
use_recurrent: false
curiosity_enc_size: 128
demo_path: None
vis_encode_type: simple
reward_signals:
curiosity:
strength: 1.0
gamma: 0.99
encoding_size: 128
"""
@pytest.mark.parametrize("use_recurrent", [True, False])
@pytest.mark.parametrize("trainer_type", [TrainerType.PPO, TrainerType.SAC])
def test_convert_behaviors(trainer_type, use_recurrent):
if trainer_type == TrainerType.PPO:
trainer_config = PPO_CONFIG
trainer_settings_type = PPOSettings
elif trainer_type == TrainerType.SAC:
trainer_config = SAC_CONFIG
trainer_settings_type = SACSettings
old_config = yaml.load(trainer_config)
old_config[BRAIN_NAME]["use_recurrent"] = use_recurrent
new_config = convert_behaviors(old_config)
# Test that the new config can be converted to TrainerSettings w/o exceptions
trainer_settings = new_config[BRAIN_NAME]
# Test that the trainer_settings contains the settings for BRAIN_NAME and
# the defaults where specified
assert trainer_settings.trainer_type == trainer_type
assert isinstance(trainer_settings.hyperparameters, trainer_settings_type)
assert trainer_settings.hyperparameters.batch_size == BATCH_SIZE
assert trainer_settings.network_settings.hidden_units == HIDDEN_UNITS
assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals
@mock.patch("mlagents.trainers.upgrade_config.convert_behaviors")
@mock.patch("mlagents.trainers.upgrade_config.remove_nones")
@mock.patch("mlagents.trainers.upgrade_config.write_to_yaml_file")
@mock.patch("mlagents.trainers.upgrade_config.parse_args")
@mock.patch("mlagents.trainers.upgrade_config.load_config")
def test_main(mock_load, mock_parse, yaml_write_mock, remove_none_mock, mock_convert):
test_output_file = "test.yaml"
mock_load.side_effect = [
yaml.safe_load(PPO_CONFIG),
"test_curriculum_config",
"test_sampler_config",
]
mock_args = Namespace(
trainer_config_path="mock",
output_config_path=test_output_file,
curriculum="test",
sampler="test",
)
mock_parse.return_value = mock_args
mock_convert.return_value = "test_converted_config"
dict_without_nones = mock.Mock(name="nonones")
remove_none_mock.return_value = dict_without_nones
main()
saved_dict = remove_none_mock.call_args[0][0]
# Check that the output of the remove_none call is here
yaml_write_mock.assert_called_with(dict_without_nones, test_output_file)
assert saved_dict["behaviors"] == "test_converted_config"
assert saved_dict["curriculum"] == "test_curriculum_config"
assert saved_dict["parameter_randomization"] == "test_sampler_config"
def test_remove_nones():
dict_with_nones = {"hello": {"hello2": 2, "hello3": None}, "hello4": None}
dict_without_nones = {"hello": {"hello2": 2}}
output = remove_nones(dict_with_nones)
assert output == dict_without_nones

137
ml-agents/mlagents/trainers/upgrade_config.py


# NOTE: This upgrade script is a temporary measure for the transition between the old-format
# configuration file and the new format. It will be marked for deprecation once the
# Python CLI and configuration files are finalized, and removed the following release.
import attr
import cattr
import yaml
from typing import Dict, Any
import argparse
from mlagents.trainers.settings import TrainerSettings, NetworkSettings, TrainerType
from mlagents.trainers.cli_utils import load_config
from mlagents.trainers.exception import TrainerConfigError
# Take an existing trainer config (e.g. trainer_config.yaml) and turn it into the new format.
def convert_behaviors(old_trainer_config: Dict[str, Any]) -> Dict[str, Any]:
all_behavior_config_dict = {}
default_config = old_trainer_config.get("default", {})
for behavior_name, config in old_trainer_config.items():
if behavior_name != "default":
config = default_config.copy()
config.update(old_trainer_config[behavior_name])
# Convert to split TrainerSettings, Hyperparameters, NetworkSettings
# Set trainer_type and get appropriate hyperparameter settings
try:
trainer_type = config["trainer"]
except KeyError:
raise TrainerConfigError(
"Config doesn't specify a trainer type. "
"Please specify trainer: in your config."
)
new_config = {}
new_config["trainer_type"] = trainer_type
hyperparam_cls = TrainerType(trainer_type).to_settings()
# Try to absorb as much as possible into the hyperparam_cls
new_config["hyperparameters"] = cattr.structure(config, hyperparam_cls)
# Try to absorb as much as possible into the network settings
new_config["network_settings"] = cattr.structure(config, NetworkSettings)
# Deal with recurrent
try:
if config["use_recurrent"]:
new_config[
"network_settings"
].memory = NetworkSettings.MemorySettings(
sequence_length=config["sequence_length"],
memory_size=config["memory_size"],
)
except KeyError:
raise TrainerConfigError(
"Config doesn't specify use_recurrent. "
"Please specify true or false for use_recurrent in your config."
)
# Absorb the rest into the base TrainerSettings
for key, val in config.items():
if key in attr.fields_dict(TrainerSettings):
new_config[key] = val
# Structure the whole thing
all_behavior_config_dict[behavior_name] = cattr.structure(
new_config, TrainerSettings
)
return all_behavior_config_dict
def write_to_yaml_file(unstructed_config: Dict[str, Any], output_config: str) -> None:
with open(output_config, "w") as f:
try:
yaml.dump(unstructed_config, f, sort_keys=False)
except TypeError: # Older versions of pyyaml don't support sort_keys
yaml.dump(unstructed_config, f)
def remove_nones(config: Dict[Any, Any]) -> Dict[str, Any]:
new_config = {}
for key, val in config.items():
if isinstance(val, dict):
new_config[key] = remove_nones(val)
elif val is not None:
new_config[key] = val
return new_config
def parse_args():
argparser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
argparser.add_argument(
"trainer_config_path",
help="Path to old format (<=0.16.X) trainer configuration YAML.",
)
argparser.add_argument(
"--curriculum",
help="Path to old format (<=0.16.X) curriculum configuration YAML.",
default=None,
)
argparser.add_argument(
"--sampler",
help="Path to old format (<=0.16.X) parameter randomization configuration YAML.",
default=None,
)
argparser.add_argument(
"output_config_path", help="Path to write converted YAML file."
)
args = argparser.parse_args()
return args
def main() -> None:
args = parse_args()
print(
f"Converting {args.trainer_config_path} and saving to {args.output_config_path}."
)
old_config = load_config(args.trainer_config_path)
behavior_config_dict = convert_behaviors(old_config)
full_config = {"behaviors": behavior_config_dict}
# Convert curriculum and sampler. note that we don't validate these; if it was correct
# before it should be correct now.
if args.curriculum is not None:
curriculum_config_dict = load_config(args.curriculum)
full_config["curriculum"] = curriculum_config_dict
if args.sampler is not None:
sampler_config_dict = load_config(args.sampler)
full_config["parameter_randomization"] = sampler_config_dict
# Convert config to dict
unstructed_config = cattr.unstructure(full_config)
unstructed_config = remove_nones(unstructed_config)
write_to_yaml_file(unstructed_config, args.output_config_path)
if __name__ == "__main__":
main()

110
config/upgrade_config.py


import attr
import cattr
import yaml
from typing import Dict, Any
import argparse
from mlagents.trainers.settings import TrainerSettings, NetworkSettings, TrainerType
from mlagents.trainers.cli_utils import load_config
# Take an existing trainer config (e.g. trainer_config.yaml) and turn it into the new format.
def convert_behaviors(old_trainer_config: Dict[str, Any]) -> Dict[str, Any]:
all_behavior_config_dict = {}
default_config = old_trainer_config.get("default", {})
for behavior_name, config in old_trainer_config.items():
if behavior_name != "default":
config = default_config.copy()
config.update(old_trainer_config[behavior_name])
# Convert to split TrainerSettings, Hyperparameters, NetworkSettings
# Set trainer_type and get appropriate hyperparameter settings
trainer_type = config["trainer"]
new_config = {}
new_config["trainer_type"] = trainer_type
hyperparam_cls = TrainerType(trainer_type).to_settings()
# Try to absorb as much as possible into the hyperparam_cls
new_config["hyperparameters"] = cattr.structure(config, hyperparam_cls)
# Try to absorb as much as possible into the network settings
new_config["network_settings"] = cattr.structure(config, NetworkSettings)
# Deal with recurrent
if config["use_recurrent"]:
new_config["network_settings"].memory = NetworkSettings.MemorySettings(
sequence_length=config["sequence_length"],
memory_size=config["memory_size"],
)
# Absorb the rest into the base TrainerSettings
for key, val in config.items():
if key in attr.fields_dict(TrainerSettings):
new_config[key] = val
# Structure the whole thing
all_behavior_config_dict[behavior_name] = cattr.structure(
new_config, TrainerSettings
)
return all_behavior_config_dict
def write_to_yaml_file(config: Dict[str, Any], output_config: str):
unstructed_config = cattr.unstructure(config)
unstructed_config = remove_nones(unstructed_config)
with open(output_config, "w") as f:
try:
yaml.dump(unstructed_config, f, sort_keys=False)
except TypeError: # Older versions of pyyaml don't support sort_keys
yaml.dump(unstructed_config, f)
def remove_nones(config: Dict[Any, Any]):
new_config = {}
for key, val in config.items():
if isinstance(val, dict):
new_config[key] = remove_nones(val)
elif val is not None:
new_config[key] = val
return new_config
if __name__ == "__main__":
argparser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
argparser.add_argument(
"trainer_config_path",
help="Path to old format (<=0.16.X) trainer configuration YAML.",
)
argparser.add_argument(
"--curriculum",
help="Path to old format (<=0.16.X) curriculum configuration YAML.",
default=None,
)
argparser.add_argument(
"--sampler",
help="Path to old format (<=0.16.X) parameter randomization configuration YAML.",
default=None,
)
argparser.add_argument(
"output_config_path", help="Path to write converted YAML file."
)
args = argparser.parse_args()
print(
f"Converting {args.trainer_config_path} and saving to {args.output_config_path}."
)
old_config = load_config(args.trainer_config_path)
behavior_config_dict = convert_behaviors(old_config)
full_config = {"behaviors": behavior_config_dict}
# Convert curriculum and sampler. note that we don't validate these; if it was correct
# before it should be correct now.
if args.curriculum is not None:
curriculum_config_dict = load_config(args.curriculum)
full_config["curriculum"] = curriculum_config_dict
if args.sampler is not None:
sampler_config_dict = load_config(args.curriculum)
full_config["parameter_randomization"] = sampler_config_dict
write_to_yaml_file(full_config, args.output_config_path)
正在加载...
取消
保存