[debug] Require all behavior names to have a matching YAML entry (#5210) (#5296)

* Add strict check to settings.py * Remove warning from trainer factory, add test * Add changelog * Fix test * Update changelog * Remove strict CLI options * Remove strict option, rename, make strict default * Remove newline * Update comments * Set default dict to actually default to a default dict * Fix tests * Fix tests again * Default trainer dict to requiring all fields * Fix settings typing * Use logger * Add default_settings to error (cherry picked from commit 86a4070bad4f5bca201db57f29117362c62617d0)
4 年前 · bff0a5d2
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md

 #### ml-agents / ml-agents-envs / gym-unity (Python)
 - Some console output have been moved from `info` to `debug` and will not be printed by default. If you want all messages to be printed, you can run `mlagents-learn` with the `--debug` option or add the line `debug: true` at the top of the yaml config file. (#5211)
+- When using a configuration YAML, it is required to define all behaviors found in a Unity
+executable in the trainer configuration YAML, or specify `default_settings`. (#5210)
 - The embedding size of attention layers used when a BufferSensor is in the scene has been changed. It is now fixed to 128 units. It might be impossible to resume training from a checkpoint of a previous version. (#5272)

 ### Bug Fixes
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
                )

    @staticmethod
-    def dict_to_defaultdict(d: Dict, t: type) -> DefaultDict:
+    def dict_to_trainerdict(d: Dict, t: type) -> >"TrainerSettings.DefaultTrainerDict":
        return TrainerSettings.DefaultTrainerDict(
            cattr.structure(d, Dict[str, TrainerSettings])
        )
                super().__init__(*args)
            else:
                super().__init__(TrainerSettings, *args)
+            self._config_specified = True
+
+        def set_config_specified(self, require_config_specified: bool) -> None:
+            self._config_specified = require_config_specified
-                return copy.deepcopy(TrainerSettings.default_override)
+                self[key] = copy.deepcopy(TrainerSettings.default_override)
+            elif self._config_specified:
+                raise TrainerConfigError(
+                    f"The behavior name {key} has not been specified in the trainer configuration. "
+                    f"Please add an entry in the configuration file for {key}, or set default_settings."
+                )
-                return TrainerSettings()
+                logger.warn(
+                    f"Behavior name {key} does not match any behaviors specified "
+                    f"in the trainer configuration file. A default configuration will be used."
+                )
+                self[key] = TrainerSettings()
+            return self[key]


 # COMMAND LINE #########################################################################
@attr.s(auto_attribs=True)
 class RunOptions(ExportableSettings):
    default_settings: Optional[TrainerSettings] = None
-    behaviors: DefaultDict[str, TrainerSettings] = attr.ib(
+    behaviors: TrainerSettings.DefaultTrainerDict = attr.ib(
        factory=TrainerSettings.DefaultTrainerDict
    )
    env_settings: EnvironmentSettings = attr.ib(factory=EnvironmentSettings)
    # These are options that are relevant to the run itself, and not the engine or environment.
    # They will be left here.
    debug: bool = parser.get_default("debug")
-    # Strict conversion
+
+    # Convert to settings while making sure all fields are valid
    cattr.register_structure_hook(EnvironmentSettings, strict_to_cls)
    cattr.register_structure_hook(EngineSettings, strict_to_cls)
    cattr.register_structure_hook(CheckpointSettings, strict_to_cls)
    )
    cattr.register_structure_hook(TrainerSettings, TrainerSettings.structure)
    cattr.register_structure_hook(
-        DefaultDict[str, TrainerSettings], TrainerSettings.dict_to_defaultdict
+        TrainerSettings.DefaultTrainerDict, TrainerSettings.dict_to_trainerdict
    )
    cattr.register_unstructure_hook(collections.defaultdict, defaultdict_to_dict)

            "engine_settings": {},
            "torch_settings": {},
        }
+        _require_all_behaviors = True
+        else:
+            # If we're not loading from a file, we don't require all behavior names to be specified.
+            _require_all_behaviors = False

        # Use the YAML file values for all values not specified in the CLI.
        for key in configured_dict.keys():
                    configured_dict[key] = val

        final_runoptions = RunOptions.from_dict(configured_dict)
+        # Need check to bypass type checking but keep structure on dict working
+        if isinstance(final_runoptions.behaviors, TrainerSettings.DefaultTrainerDict):
+            # configure whether or not we should require all behavior names to be found in the config YAML
+            final_runoptions.behaviors.set_config_specified(_require_all_behaviors)
        return final_runoptions

    @staticmethod
--- a/ml-agents/mlagents/trainers/tests/test_settings.py
+++ b/ml-agents/mlagents/trainers/tests/test_settings.py
    Verify that a new config will have a PPO trainer with extrinsic rewards.
    """
    blank_runoptions = RunOptions()
+    blank_runoptions.behaviors.set_config_specified(False)
-
    assert (
        RewardSignalType.EXTRINSIC in blank_runoptions.behaviors["test"].reward_signals
    )
    default_settings_cls = cattr.structure(default_settings, TrainerSettings)
    check_if_different(default_settings_cls, run_options.behaviors["test2"])

-    # Check that an existing beehavior overrides the defaults in specified fields
+    # Check that an existing behavior overrides the defaults in specified fields
    test1_settings = run_options.behaviors["test1"]
    assert test1_settings.max_steps == 2
    assert test1_settings.network_settings.hidden_units == 2000
    test1_settings.network_settings.hidden_units == default_settings_cls.network_settings.hidden_units
    check_if_different(test1_settings, default_settings_cls)
+
+
+def test_config_specified():
+    # Test require all behavior names to be specified (or not)
+    # Remove any pre-set defaults
+    TrainerSettings.default_override = None
+    behaviors = {"test1": {"max_steps": 2, "network_settings": {"hidden_units": 2000}}}
+    run_options_dict = {"behaviors": behaviors}
+    ro = RunOptions.from_dict(run_options_dict)
+    # Don't require all behavior names
+    ro.behaviors.set_config_specified(False)
+    # Test that we can grab an entry that is not in the dict.
+    assert isinstance(ro.behaviors["test2"], TrainerSettings)
+
+    # Create strict RunOptions with no defualt_settings
+    run_options_dict = {"behaviors": behaviors}
+    ro = RunOptions.from_dict(run_options_dict)
+    # Require all behavior names
+    ro.behaviors.set_config_specified(True)
+    with pytest.raises(TrainerConfigError):
+        # Variable must be accessed otherwise Python won't query the dict
+        print(ro.behaviors["test2"])
+
+    # Create strict RunOptions with default settings
+    default_settings = {"max_steps": 1, "network_settings": {"num_layers": 1000}}
+    run_options_dict = {"default_settings": default_settings, "behaviors": behaviors}
+    ro = RunOptions.from_dict(run_options_dict)
+    # Require all behavior names
+    ro.behaviors.set_config_specified(True)
+    # Test that we can grab an entry that is not in the dict.
+    assert isinstance(ro.behaviors["test2"], TrainerSettings)


 def test_pickle():
--- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py
+++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors
+    # Pretend this was created without a YAML file
+    no_default_config.set_config_specified(False)

    trainer_factory = TrainerFactory(
        trainer_config=no_default_config,
--- a/ml-agents/mlagents/trainers/trainer/trainer_factory.py
+++ b/ml-agents/mlagents/trainers/trainer/trainer_factory.py
        self.ghost_controller = GhostController()

    def generate(self, behavior_name: str) -> Trainer:
-        if behavior_name not in self.trainer_config.keys():
-            logger.warning(
-                f"Behavior name {behavior_name} does not match any behaviors specified"
-                f"in the trainer configuration file: {sorted(self.trainer_config.keys())}"
-            )
        trainer_settings = self.trainer_config[behavior_name]
        return TrainerFactory._initialize_trainer(
            trainer_settings,