GitHub
4 年前
当前提交
ee1098d1
共有 5 个文件被更改,包括 334 次插入 和 116 次删除
-
8docs/Migrating.md
-
4docs/Training-ML-Agents.md
-
191ml-agents/mlagents/trainers/tests/test_config_conversion.py
-
137ml-agents/mlagents/trainers/upgrade_config.py
-
110config/upgrade_config.py
|
|||
import yaml |
|||
import pytest |
|||
from unittest import mock |
|||
from argparse import Namespace |
|||
|
|||
from mlagents.trainers.upgrade_config import convert_behaviors, main, remove_nones |
|||
from mlagents.trainers.settings import ( |
|||
TrainerType, |
|||
PPOSettings, |
|||
SACSettings, |
|||
RewardSignalType, |
|||
) |
|||
|
|||
BRAIN_NAME = "testbehavior" |
|||
|
|||
# Check one per category |
|||
BATCH_SIZE = 256 |
|||
HIDDEN_UNITS = 32 |
|||
SUMMARY_FREQ = 500 |
|||
|
|||
PPO_CONFIG = f""" |
|||
default: |
|||
trainer: ppo |
|||
batch_size: 1024 |
|||
beta: 5.0e-3 |
|||
buffer_size: 10240 |
|||
epsilon: 0.2 |
|||
hidden_units: 128 |
|||
lambd: 0.95 |
|||
learning_rate: 3.0e-4 |
|||
learning_rate_schedule: linear |
|||
max_steps: 5.0e5 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
{BRAIN_NAME}: |
|||
trainer: ppo |
|||
batch_size: {BATCH_SIZE} |
|||
beta: 5.0e-3 |
|||
buffer_size: 64 |
|||
epsilon: 0.2 |
|||
hidden_units: {HIDDEN_UNITS} |
|||
lambd: 0.95 |
|||
learning_rate: 5.0e-3 |
|||
max_steps: 2500 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_epoch: 3 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: {SUMMARY_FREQ} |
|||
use_recurrent: false |
|||
reward_signals: |
|||
curiosity: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
""" |
|||
|
|||
SAC_CONFIG = f""" |
|||
default: |
|||
trainer: sac |
|||
batch_size: 128 |
|||
buffer_size: 50000 |
|||
buffer_init_steps: 0 |
|||
hidden_units: 128 |
|||
init_entcoef: 1.0 |
|||
learning_rate: 3.0e-4 |
|||
learning_rate_schedule: constant |
|||
max_steps: 5.0e5 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_update: 1 |
|||
train_interval: 1 |
|||
num_layers: 2 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: 10000 |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
|
|||
{BRAIN_NAME}: |
|||
trainer: sac |
|||
batch_size: {BATCH_SIZE} |
|||
buffer_size: 64 |
|||
buffer_init_steps: 100 |
|||
hidden_units: {HIDDEN_UNITS} |
|||
init_entcoef: 0.01 |
|||
learning_rate: 3.0e-4 |
|||
max_steps: 1000 |
|||
memory_size: 256 |
|||
normalize: false |
|||
num_update: 1 |
|||
train_interval: 1 |
|||
num_layers: 1 |
|||
time_horizon: 64 |
|||
sequence_length: 64 |
|||
summary_freq: {SUMMARY_FREQ} |
|||
tau: 0.005 |
|||
use_recurrent: false |
|||
curiosity_enc_size: 128 |
|||
demo_path: None |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
curiosity: |
|||
strength: 1.0 |
|||
gamma: 0.99 |
|||
encoding_size: 128 |
|||
""" |
|||
|
|||
|
|||
@pytest.mark.parametrize("use_recurrent", [True, False]) |
|||
@pytest.mark.parametrize("trainer_type", [TrainerType.PPO, TrainerType.SAC]) |
|||
def test_convert_behaviors(trainer_type, use_recurrent): |
|||
if trainer_type == TrainerType.PPO: |
|||
trainer_config = PPO_CONFIG |
|||
trainer_settings_type = PPOSettings |
|||
elif trainer_type == TrainerType.SAC: |
|||
trainer_config = SAC_CONFIG |
|||
trainer_settings_type = SACSettings |
|||
|
|||
old_config = yaml.load(trainer_config) |
|||
old_config[BRAIN_NAME]["use_recurrent"] = use_recurrent |
|||
new_config = convert_behaviors(old_config) |
|||
|
|||
# Test that the new config can be converted to TrainerSettings w/o exceptions |
|||
trainer_settings = new_config[BRAIN_NAME] |
|||
|
|||
# Test that the trainer_settings contains the settings for BRAIN_NAME and |
|||
# the defaults where specified |
|||
assert trainer_settings.trainer_type == trainer_type |
|||
assert isinstance(trainer_settings.hyperparameters, trainer_settings_type) |
|||
assert trainer_settings.hyperparameters.batch_size == BATCH_SIZE |
|||
assert trainer_settings.network_settings.hidden_units == HIDDEN_UNITS |
|||
assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals |
|||
|
|||
|
|||
@mock.patch("mlagents.trainers.upgrade_config.convert_behaviors") |
|||
@mock.patch("mlagents.trainers.upgrade_config.remove_nones") |
|||
@mock.patch("mlagents.trainers.upgrade_config.write_to_yaml_file") |
|||
@mock.patch("mlagents.trainers.upgrade_config.parse_args") |
|||
@mock.patch("mlagents.trainers.upgrade_config.load_config") |
|||
def test_main(mock_load, mock_parse, yaml_write_mock, remove_none_mock, mock_convert): |
|||
test_output_file = "test.yaml" |
|||
mock_load.side_effect = [ |
|||
yaml.safe_load(PPO_CONFIG), |
|||
"test_curriculum_config", |
|||
"test_sampler_config", |
|||
] |
|||
mock_args = Namespace( |
|||
trainer_config_path="mock", |
|||
output_config_path=test_output_file, |
|||
curriculum="test", |
|||
sampler="test", |
|||
) |
|||
mock_parse.return_value = mock_args |
|||
mock_convert.return_value = "test_converted_config" |
|||
dict_without_nones = mock.Mock(name="nonones") |
|||
remove_none_mock.return_value = dict_without_nones |
|||
|
|||
main() |
|||
saved_dict = remove_none_mock.call_args[0][0] |
|||
# Check that the output of the remove_none call is here |
|||
yaml_write_mock.assert_called_with(dict_without_nones, test_output_file) |
|||
assert saved_dict["behaviors"] == "test_converted_config" |
|||
assert saved_dict["curriculum"] == "test_curriculum_config" |
|||
assert saved_dict["parameter_randomization"] == "test_sampler_config" |
|||
|
|||
|
|||
def test_remove_nones(): |
|||
dict_with_nones = {"hello": {"hello2": 2, "hello3": None}, "hello4": None} |
|||
dict_without_nones = {"hello": {"hello2": 2}} |
|||
output = remove_nones(dict_with_nones) |
|||
assert output == dict_without_nones |
|
|||
# NOTE: This upgrade script is a temporary measure for the transition between the old-format |
|||
# configuration file and the new format. It will be marked for deprecation once the |
|||
# Python CLI and configuration files are finalized, and removed the following release. |
|||
|
|||
import attr |
|||
import cattr |
|||
import yaml |
|||
from typing import Dict, Any |
|||
import argparse |
|||
from mlagents.trainers.settings import TrainerSettings, NetworkSettings, TrainerType |
|||
from mlagents.trainers.cli_utils import load_config |
|||
from mlagents.trainers.exception import TrainerConfigError |
|||
|
|||
|
|||
# Take an existing trainer config (e.g. trainer_config.yaml) and turn it into the new format. |
|||
def convert_behaviors(old_trainer_config: Dict[str, Any]) -> Dict[str, Any]: |
|||
all_behavior_config_dict = {} |
|||
default_config = old_trainer_config.get("default", {}) |
|||
for behavior_name, config in old_trainer_config.items(): |
|||
if behavior_name != "default": |
|||
config = default_config.copy() |
|||
config.update(old_trainer_config[behavior_name]) |
|||
|
|||
# Convert to split TrainerSettings, Hyperparameters, NetworkSettings |
|||
# Set trainer_type and get appropriate hyperparameter settings |
|||
try: |
|||
trainer_type = config["trainer"] |
|||
except KeyError: |
|||
raise TrainerConfigError( |
|||
"Config doesn't specify a trainer type. " |
|||
"Please specify trainer: in your config." |
|||
) |
|||
new_config = {} |
|||
new_config["trainer_type"] = trainer_type |
|||
hyperparam_cls = TrainerType(trainer_type).to_settings() |
|||
# Try to absorb as much as possible into the hyperparam_cls |
|||
new_config["hyperparameters"] = cattr.structure(config, hyperparam_cls) |
|||
|
|||
# Try to absorb as much as possible into the network settings |
|||
new_config["network_settings"] = cattr.structure(config, NetworkSettings) |
|||
# Deal with recurrent |
|||
try: |
|||
if config["use_recurrent"]: |
|||
new_config[ |
|||
"network_settings" |
|||
].memory = NetworkSettings.MemorySettings( |
|||
sequence_length=config["sequence_length"], |
|||
memory_size=config["memory_size"], |
|||
) |
|||
except KeyError: |
|||
raise TrainerConfigError( |
|||
"Config doesn't specify use_recurrent. " |
|||
"Please specify true or false for use_recurrent in your config." |
|||
) |
|||
# Absorb the rest into the base TrainerSettings |
|||
for key, val in config.items(): |
|||
if key in attr.fields_dict(TrainerSettings): |
|||
new_config[key] = val |
|||
|
|||
# Structure the whole thing |
|||
all_behavior_config_dict[behavior_name] = cattr.structure( |
|||
new_config, TrainerSettings |
|||
) |
|||
return all_behavior_config_dict |
|||
|
|||
|
|||
def write_to_yaml_file(unstructed_config: Dict[str, Any], output_config: str) -> None: |
|||
with open(output_config, "w") as f: |
|||
try: |
|||
yaml.dump(unstructed_config, f, sort_keys=False) |
|||
except TypeError: # Older versions of pyyaml don't support sort_keys |
|||
yaml.dump(unstructed_config, f) |
|||
|
|||
|
|||
def remove_nones(config: Dict[Any, Any]) -> Dict[str, Any]: |
|||
new_config = {} |
|||
for key, val in config.items(): |
|||
if isinstance(val, dict): |
|||
new_config[key] = remove_nones(val) |
|||
elif val is not None: |
|||
new_config[key] = val |
|||
return new_config |
|||
|
|||
|
|||
def parse_args(): |
|||
argparser = argparse.ArgumentParser( |
|||
formatter_class=argparse.ArgumentDefaultsHelpFormatter |
|||
) |
|||
argparser.add_argument( |
|||
"trainer_config_path", |
|||
help="Path to old format (<=0.16.X) trainer configuration YAML.", |
|||
) |
|||
argparser.add_argument( |
|||
"--curriculum", |
|||
help="Path to old format (<=0.16.X) curriculum configuration YAML.", |
|||
default=None, |
|||
) |
|||
argparser.add_argument( |
|||
"--sampler", |
|||
help="Path to old format (<=0.16.X) parameter randomization configuration YAML.", |
|||
default=None, |
|||
) |
|||
argparser.add_argument( |
|||
"output_config_path", help="Path to write converted YAML file." |
|||
) |
|||
args = argparser.parse_args() |
|||
return args |
|||
|
|||
|
|||
def main() -> None: |
|||
args = parse_args() |
|||
print( |
|||
f"Converting {args.trainer_config_path} and saving to {args.output_config_path}." |
|||
) |
|||
|
|||
old_config = load_config(args.trainer_config_path) |
|||
behavior_config_dict = convert_behaviors(old_config) |
|||
full_config = {"behaviors": behavior_config_dict} |
|||
|
|||
# Convert curriculum and sampler. note that we don't validate these; if it was correct |
|||
# before it should be correct now. |
|||
if args.curriculum is not None: |
|||
curriculum_config_dict = load_config(args.curriculum) |
|||
full_config["curriculum"] = curriculum_config_dict |
|||
|
|||
if args.sampler is not None: |
|||
sampler_config_dict = load_config(args.sampler) |
|||
full_config["parameter_randomization"] = sampler_config_dict |
|||
|
|||
# Convert config to dict |
|||
unstructed_config = cattr.unstructure(full_config) |
|||
unstructed_config = remove_nones(unstructed_config) |
|||
write_to_yaml_file(unstructed_config, args.output_config_path) |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
main() |
|
|||
import attr |
|||
import cattr |
|||
import yaml |
|||
from typing import Dict, Any |
|||
import argparse |
|||
from mlagents.trainers.settings import TrainerSettings, NetworkSettings, TrainerType |
|||
from mlagents.trainers.cli_utils import load_config |
|||
|
|||
|
|||
# Take an existing trainer config (e.g. trainer_config.yaml) and turn it into the new format. |
|||
def convert_behaviors(old_trainer_config: Dict[str, Any]) -> Dict[str, Any]: |
|||
all_behavior_config_dict = {} |
|||
default_config = old_trainer_config.get("default", {}) |
|||
for behavior_name, config in old_trainer_config.items(): |
|||
if behavior_name != "default": |
|||
config = default_config.copy() |
|||
config.update(old_trainer_config[behavior_name]) |
|||
|
|||
# Convert to split TrainerSettings, Hyperparameters, NetworkSettings |
|||
# Set trainer_type and get appropriate hyperparameter settings |
|||
trainer_type = config["trainer"] |
|||
new_config = {} |
|||
new_config["trainer_type"] = trainer_type |
|||
hyperparam_cls = TrainerType(trainer_type).to_settings() |
|||
# Try to absorb as much as possible into the hyperparam_cls |
|||
new_config["hyperparameters"] = cattr.structure(config, hyperparam_cls) |
|||
|
|||
# Try to absorb as much as possible into the network settings |
|||
new_config["network_settings"] = cattr.structure(config, NetworkSettings) |
|||
# Deal with recurrent |
|||
if config["use_recurrent"]: |
|||
new_config["network_settings"].memory = NetworkSettings.MemorySettings( |
|||
sequence_length=config["sequence_length"], |
|||
memory_size=config["memory_size"], |
|||
) |
|||
|
|||
# Absorb the rest into the base TrainerSettings |
|||
for key, val in config.items(): |
|||
if key in attr.fields_dict(TrainerSettings): |
|||
new_config[key] = val |
|||
|
|||
# Structure the whole thing |
|||
all_behavior_config_dict[behavior_name] = cattr.structure( |
|||
new_config, TrainerSettings |
|||
) |
|||
return all_behavior_config_dict |
|||
|
|||
|
|||
def write_to_yaml_file(config: Dict[str, Any], output_config: str): |
|||
unstructed_config = cattr.unstructure(config) |
|||
unstructed_config = remove_nones(unstructed_config) |
|||
with open(output_config, "w") as f: |
|||
try: |
|||
yaml.dump(unstructed_config, f, sort_keys=False) |
|||
except TypeError: # Older versions of pyyaml don't support sort_keys |
|||
yaml.dump(unstructed_config, f) |
|||
|
|||
|
|||
def remove_nones(config: Dict[Any, Any]): |
|||
new_config = {} |
|||
for key, val in config.items(): |
|||
if isinstance(val, dict): |
|||
new_config[key] = remove_nones(val) |
|||
elif val is not None: |
|||
new_config[key] = val |
|||
return new_config |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
|
|||
argparser = argparse.ArgumentParser( |
|||
formatter_class=argparse.ArgumentDefaultsHelpFormatter |
|||
) |
|||
argparser.add_argument( |
|||
"trainer_config_path", |
|||
help="Path to old format (<=0.16.X) trainer configuration YAML.", |
|||
) |
|||
argparser.add_argument( |
|||
"--curriculum", |
|||
help="Path to old format (<=0.16.X) curriculum configuration YAML.", |
|||
default=None, |
|||
) |
|||
argparser.add_argument( |
|||
"--sampler", |
|||
help="Path to old format (<=0.16.X) parameter randomization configuration YAML.", |
|||
default=None, |
|||
) |
|||
argparser.add_argument( |
|||
"output_config_path", help="Path to write converted YAML file." |
|||
) |
|||
args = argparser.parse_args() |
|||
print( |
|||
f"Converting {args.trainer_config_path} and saving to {args.output_config_path}." |
|||
) |
|||
|
|||
old_config = load_config(args.trainer_config_path) |
|||
behavior_config_dict = convert_behaviors(old_config) |
|||
full_config = {"behaviors": behavior_config_dict} |
|||
|
|||
# Convert curriculum and sampler. note that we don't validate these; if it was correct |
|||
# before it should be correct now. |
|||
if args.curriculum is not None: |
|||
curriculum_config_dict = load_config(args.curriculum) |
|||
full_config["curriculum"] = curriculum_config_dict |
|||
|
|||
if args.sampler is not None: |
|||
sampler_config_dict = load_config(args.curriculum) |
|||
full_config["parameter_randomization"] = sampler_config_dict |
|||
|
|||
write_to_yaml_file(full_config, args.output_config_path) |
撰写
预览
正在加载...
取消
保存
Reference in new issue