[refactor] Improve config upgrade script and add test (#4056)

5 年前 · ee1098d1
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
  To start at a different lesson, modify your Curriculum configuration.

 ### Steps to Migrate
- To upgrade your configuration files, an upgrade script has been provided. Run `python config/update_config.py
-  -h` to see the script usage.
+- To upgrade your configuration files, an upgrade script has been provided. Run
+  `python -m mlagents.trainers.upgrade_config -h` to see the script usage. Note that you will have
+  had to upgrade to/install the current version of ML-Agents before running the script.
-  - If your training uses [curriculum](Training-ML-Agents.md#curriculum-learning), move those configurations under
-  the `Behavior Name` section.
+  - If your training uses [curriculum](Training-ML-Agents.md#curriculum-learning), move those configurations under a `curriculum` section.
  - If your training uses [parameter randomization](Training-ML-Agents.md#environment-parameter-randomization), move
  the contents of the sampler config to `parameter_randomization` in the main trainer configuration.
 - If you are using `UnityEnvironment` directly, replace `max_step` with `interrupted`
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md

 **NOTE:** The configuration file format has been changed from 0.17.0 and onwards. To convert
 an old set of configuration files (trainer config, curriculum, and sampler files) to the new
-format, a script has been provided. Run `python config/upgrade_config.py -h` in your  console
-to see the script's usage.
+format, a script has been provided. Run `python -m mlagents.trainers.upgrade_config -h` in your
+console to see the script's usage.

 ### Behavior Configurations

--- a/ml-agents/mlagents/trainers/tests/test_config_conversion.py
+++ b/ml-agents/mlagents/trainers/tests/test_config_conversion.py
+import yaml
+import pytest
+from unittest import mock
+from argparse import Namespace
+
+from mlagents.trainers.upgrade_config import convert_behaviors, main, remove_nones
+from mlagents.trainers.settings import (
+    TrainerType,
+    PPOSettings,
+    SACSettings,
+    RewardSignalType,
+)
+
+BRAIN_NAME = "testbehavior"
+
+# Check one per category
+BATCH_SIZE = 256
+HIDDEN_UNITS = 32
+SUMMARY_FREQ = 500
+
+PPO_CONFIG = f"""
+    default:
+        trainer: ppo
+        batch_size: 1024
+        beta: 5.0e-3
+        buffer_size: 10240
+        epsilon: 0.2
+        hidden_units: 128
+        lambd: 0.95
+        learning_rate: 3.0e-4
+        learning_rate_schedule: linear
+        max_steps: 5.0e5
+        memory_size: 256
+        normalize: false
+        num_epoch: 3
+        num_layers: 2
+        time_horizon: 64
+        sequence_length: 64
+        summary_freq: 10000
+        use_recurrent: false
+        vis_encode_type: simple
+        reward_signals:
+            extrinsic:
+                strength: 1.0
+                gamma: 0.99
+
+    {BRAIN_NAME}:
+        trainer: ppo
+        batch_size: {BATCH_SIZE}
+        beta: 5.0e-3
+        buffer_size: 64
+        epsilon: 0.2
+        hidden_units: {HIDDEN_UNITS}
+        lambd: 0.95
+        learning_rate: 5.0e-3
+        max_steps: 2500
+        memory_size: 256
+        normalize: false
+        num_epoch: 3
+        num_layers: 2
+        time_horizon: 64
+        sequence_length: 64
+        summary_freq: {SUMMARY_FREQ}
+        use_recurrent: false
+        reward_signals:
+            curiosity:
+                strength: 1.0
+                gamma: 0.99
+                encoding_size: 128
+    """
+
+SAC_CONFIG = f"""
+    default:
+        trainer: sac
+        batch_size: 128
+        buffer_size: 50000
+        buffer_init_steps: 0
+        hidden_units: 128
+        init_entcoef: 1.0
+        learning_rate: 3.0e-4
+        learning_rate_schedule: constant
+        max_steps: 5.0e5
+        memory_size: 256
+        normalize: false
+        num_update: 1
+        train_interval: 1
+        num_layers: 2
+        time_horizon: 64
+        sequence_length: 64
+        summary_freq: 10000
+        tau: 0.005
+        use_recurrent: false
+        vis_encode_type: simple
+        reward_signals:
+            extrinsic:
+                strength: 1.0
+                gamma: 0.99
+
+    {BRAIN_NAME}:
+        trainer: sac
+        batch_size: {BATCH_SIZE}
+        buffer_size: 64
+        buffer_init_steps: 100
+        hidden_units: {HIDDEN_UNITS}
+        init_entcoef: 0.01
+        learning_rate: 3.0e-4
+        max_steps: 1000
+        memory_size: 256
+        normalize: false
+        num_update: 1
+        train_interval: 1
+        num_layers: 1
+        time_horizon: 64
+        sequence_length: 64
+        summary_freq: {SUMMARY_FREQ}
+        tau: 0.005
+        use_recurrent: false
+        curiosity_enc_size: 128
+        demo_path: None
+        vis_encode_type: simple
+        reward_signals:
+            curiosity:
+                strength: 1.0
+                gamma: 0.99
+                encoding_size: 128
+    """
+
+
+@pytest.mark.parametrize("use_recurrent", [True, False])
+@pytest.mark.parametrize("trainer_type", [TrainerType.PPO, TrainerType.SAC])
+def test_convert_behaviors(trainer_type, use_recurrent):
+    if trainer_type == TrainerType.PPO:
+        trainer_config = PPO_CONFIG
+        trainer_settings_type = PPOSettings
+    elif trainer_type == TrainerType.SAC:
+        trainer_config = SAC_CONFIG
+        trainer_settings_type = SACSettings
+
+    old_config = yaml.load(trainer_config)
+    old_config[BRAIN_NAME]["use_recurrent"] = use_recurrent
+    new_config = convert_behaviors(old_config)
+
+    # Test that the new config can be converted to TrainerSettings w/o exceptions
+    trainer_settings = new_config[BRAIN_NAME]
+
+    # Test that the trainer_settings contains the settings for BRAIN_NAME and
+    # the defaults where specified
+    assert trainer_settings.trainer_type == trainer_type
+    assert isinstance(trainer_settings.hyperparameters, trainer_settings_type)
+    assert trainer_settings.hyperparameters.batch_size == BATCH_SIZE
+    assert trainer_settings.network_settings.hidden_units == HIDDEN_UNITS
+    assert RewardSignalType.CURIOSITY in trainer_settings.reward_signals
+
+
+@mock.patch("mlagents.trainers.upgrade_config.convert_behaviors")
+@mock.patch("mlagents.trainers.upgrade_config.remove_nones")
+@mock.patch("mlagents.trainers.upgrade_config.write_to_yaml_file")
+@mock.patch("mlagents.trainers.upgrade_config.parse_args")
+@mock.patch("mlagents.trainers.upgrade_config.load_config")
+def test_main(mock_load, mock_parse, yaml_write_mock, remove_none_mock, mock_convert):
+    test_output_file = "test.yaml"
+    mock_load.side_effect = [
+        yaml.safe_load(PPO_CONFIG),
+        "test_curriculum_config",
+        "test_sampler_config",
+    ]
+    mock_args = Namespace(
+        trainer_config_path="mock",
+        output_config_path=test_output_file,
+        curriculum="test",
+        sampler="test",
+    )
+    mock_parse.return_value = mock_args
+    mock_convert.return_value = "test_converted_config"
+    dict_without_nones = mock.Mock(name="nonones")
+    remove_none_mock.return_value = dict_without_nones
+
+    main()
+    saved_dict = remove_none_mock.call_args[0][0]
+    # Check that the output of the remove_none call is here
+    yaml_write_mock.assert_called_with(dict_without_nones, test_output_file)
+    assert saved_dict["behaviors"] == "test_converted_config"
+    assert saved_dict["curriculum"] == "test_curriculum_config"
+    assert saved_dict["parameter_randomization"] == "test_sampler_config"
+
+
+def test_remove_nones():
+    dict_with_nones = {"hello": {"hello2": 2, "hello3": None}, "hello4": None}
+    dict_without_nones = {"hello": {"hello2": 2}}
+    output = remove_nones(dict_with_nones)
+    assert output == dict_without_nones
--- a/ml-agents/mlagents/trainers/upgrade_config.py
+++ b/ml-agents/mlagents/trainers/upgrade_config.py
+# NOTE: This upgrade script is a temporary measure for the transition between the old-format
+# configuration file and the new format. It will be marked for deprecation once the
+# Python CLI and configuration files are finalized, and removed the following release.
+
+import attr
+import cattr
+import yaml
+from typing import Dict, Any
+import argparse
+from mlagents.trainers.settings import TrainerSettings, NetworkSettings, TrainerType
+from mlagents.trainers.cli_utils import load_config
+from mlagents.trainers.exception import TrainerConfigError
+
+
+# Take an existing trainer config (e.g. trainer_config.yaml) and turn it into the new format.
+def convert_behaviors(old_trainer_config: Dict[str, Any]) -> Dict[str, Any]:
+    all_behavior_config_dict = {}
+    default_config = old_trainer_config.get("default", {})
+    for behavior_name, config in old_trainer_config.items():
+        if behavior_name != "default":
+            config = default_config.copy()
+            config.update(old_trainer_config[behavior_name])
+
+            # Convert to split TrainerSettings, Hyperparameters, NetworkSettings
+            # Set trainer_type and get appropriate hyperparameter settings
+            try:
+                trainer_type = config["trainer"]
+            except KeyError:
+                raise TrainerConfigError(
+                    "Config doesn't specify a trainer type. "
+                    "Please specify trainer: in your config."
+                )
+            new_config = {}
+            new_config["trainer_type"] = trainer_type
+            hyperparam_cls = TrainerType(trainer_type).to_settings()
+            # Try to absorb as much as possible into the hyperparam_cls
+            new_config["hyperparameters"] = cattr.structure(config, hyperparam_cls)
+
+            # Try to absorb as much as possible into the network settings
+            new_config["network_settings"] = cattr.structure(config, NetworkSettings)
+            # Deal with recurrent
+            try:
+                if config["use_recurrent"]:
+                    new_config[
+                        "network_settings"
+                    ].memory = NetworkSettings.MemorySettings(
+                        sequence_length=config["sequence_length"],
+                        memory_size=config["memory_size"],
+                    )
+            except KeyError:
+                raise TrainerConfigError(
+                    "Config doesn't specify use_recurrent. "
+                    "Please specify true or false for use_recurrent in your config."
+                )
+            # Absorb the rest into the base TrainerSettings
+            for key, val in config.items():
+                if key in attr.fields_dict(TrainerSettings):
+                    new_config[key] = val
+
+            # Structure the whole thing
+            all_behavior_config_dict[behavior_name] = cattr.structure(
+                new_config, TrainerSettings
+            )
+    return all_behavior_config_dict
+
+
+def write_to_yaml_file(unstructed_config: Dict[str, Any], output_config: str) -> None:
+    with open(output_config, "w") as f:
+        try:
+            yaml.dump(unstructed_config, f, sort_keys=False)
+        except TypeError:  # Older versions of pyyaml don't support sort_keys
+            yaml.dump(unstructed_config, f)
+
+
+def remove_nones(config: Dict[Any, Any]) -> Dict[str, Any]:
+    new_config = {}
+    for key, val in config.items():
+        if isinstance(val, dict):
+            new_config[key] = remove_nones(val)
+        elif val is not None:
+            new_config[key] = val
+    return new_config
+
+
+def parse_args():
+    argparser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    argparser.add_argument(
+        "trainer_config_path",
+        help="Path to old format (<=0.16.X) trainer configuration YAML.",
+    )
+    argparser.add_argument(
+        "--curriculum",
+        help="Path to old format (<=0.16.X) curriculum configuration YAML.",
+        default=None,
+    )
+    argparser.add_argument(
+        "--sampler",
+        help="Path to old format (<=0.16.X) parameter randomization configuration YAML.",
+        default=None,
+    )
+    argparser.add_argument(
+        "output_config_path", help="Path to write converted YAML file."
+    )
+    args = argparser.parse_args()
+    return args
+
+
+def main() -> None:
+    args = parse_args()
+    print(
+        f"Converting {args.trainer_config_path} and saving to {args.output_config_path}."
+    )
+
+    old_config = load_config(args.trainer_config_path)
+    behavior_config_dict = convert_behaviors(old_config)
+    full_config = {"behaviors": behavior_config_dict}
+
+    # Convert curriculum and sampler. note that we don't validate these; if it was correct
+    # before it should be correct now.
+    if args.curriculum is not None:
+        curriculum_config_dict = load_config(args.curriculum)
+        full_config["curriculum"] = curriculum_config_dict
+
+    if args.sampler is not None:
+        sampler_config_dict = load_config(args.sampler)
+        full_config["parameter_randomization"] = sampler_config_dict
+
+    # Convert config to dict
+    unstructed_config = cattr.unstructure(full_config)
+    unstructed_config = remove_nones(unstructed_config)
+    write_to_yaml_file(unstructed_config, args.output_config_path)
+
+
+if __name__ == "__main__":
+    main()
--- a/config/upgrade_config.py
+++ b/config/upgrade_config.py
-import attr
-import cattr
-import yaml
-from typing import Dict, Any
-import argparse
-from mlagents.trainers.settings import TrainerSettings, NetworkSettings, TrainerType
-from mlagents.trainers.cli_utils import load_config
-
-
-# Take an existing trainer config (e.g. trainer_config.yaml) and turn it into the new format.
-def convert_behaviors(old_trainer_config: Dict[str, Any]) -> Dict[str, Any]:
-    all_behavior_config_dict = {}
-    default_config = old_trainer_config.get("default", {})
-    for behavior_name, config in old_trainer_config.items():
-        if behavior_name != "default":
-            config = default_config.copy()
-            config.update(old_trainer_config[behavior_name])
-
-            # Convert to split TrainerSettings, Hyperparameters, NetworkSettings
-            # Set trainer_type and get appropriate hyperparameter settings
-            trainer_type = config["trainer"]
-            new_config = {}
-            new_config["trainer_type"] = trainer_type
-            hyperparam_cls = TrainerType(trainer_type).to_settings()
-            # Try to absorb as much as possible into the hyperparam_cls
-            new_config["hyperparameters"] = cattr.structure(config, hyperparam_cls)
-
-            # Try to absorb as much as possible into the network settings
-            new_config["network_settings"] = cattr.structure(config, NetworkSettings)
-            # Deal with recurrent
-            if config["use_recurrent"]:
-                new_config["network_settings"].memory = NetworkSettings.MemorySettings(
-                    sequence_length=config["sequence_length"],
-                    memory_size=config["memory_size"],
-                )
-
-            # Absorb the rest into the base TrainerSettings
-            for key, val in config.items():
-                if key in attr.fields_dict(TrainerSettings):
-                    new_config[key] = val
-
-            # Structure the whole thing
-            all_behavior_config_dict[behavior_name] = cattr.structure(
-                new_config, TrainerSettings
-            )
-    return all_behavior_config_dict
-
-
-def write_to_yaml_file(config: Dict[str, Any], output_config: str):
-    unstructed_config = cattr.unstructure(config)
-    unstructed_config = remove_nones(unstructed_config)
-    with open(output_config, "w") as f:
-        try:
-            yaml.dump(unstructed_config, f, sort_keys=False)
-        except TypeError:  # Older versions of pyyaml don't support sort_keys
-            yaml.dump(unstructed_config, f)
-
-
-def remove_nones(config: Dict[Any, Any]):
-    new_config = {}
-    for key, val in config.items():
-        if isinstance(val, dict):
-            new_config[key] = remove_nones(val)
-        elif val is not None:
-            new_config[key] = val
-    return new_config
-
-
-if __name__ == "__main__":
-
-    argparser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    argparser.add_argument(
-        "trainer_config_path",
-        help="Path to old format (<=0.16.X) trainer configuration YAML.",
-    )
-    argparser.add_argument(
-        "--curriculum",
-        help="Path to old format (<=0.16.X) curriculum configuration YAML.",
-        default=None,
-    )
-    argparser.add_argument(
-        "--sampler",
-        help="Path to old format (<=0.16.X) parameter randomization configuration YAML.",
-        default=None,
-    )
-    argparser.add_argument(
-        "output_config_path", help="Path to write converted YAML file."
-    )
-    args = argparser.parse_args()
-    print(
-        f"Converting {args.trainer_config_path} and saving to {args.output_config_path}."
-    )
-
-    old_config = load_config(args.trainer_config_path)
-    behavior_config_dict = convert_behaviors(old_config)
-    full_config = {"behaviors": behavior_config_dict}
-
-    # Convert curriculum and sampler. note that we don't validate these; if it was correct
-    # before it should be correct now.
-    if args.curriculum is not None:
-        curriculum_config_dict = load_config(args.curriculum)
-        full_config["curriculum"] = curriculum_config_dict
-
-    if args.sampler is not None:
-        sampler_config_dict = load_config(args.curriculum)
-        full_config["parameter_randomization"] = sampler_config_dict
-
-    write_to_yaml_file(full_config, args.output_config_path)