[refactor] Store and restore state along with checkpoints (#4025)

4 年前 · f5435876
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 - `use_visual` and `allow_multiple_visual_obs` in the `UnityToGymWrapper` constructor
 were replaced by `allow_multiple_obs` which allows one or more visual observations and
 vector observations to be used simultaneously. (#3981) Thank you @shakenes !
-### Minor Changes
-#### com.unity.ml-agents (C#)
- `ObservableAttribute` was added. Adding the attribute to fields or properties on an Agent will allow it to generate
-  observations via reflection. (#3925, #4006)
-#### ml-agents / ml-agents-envs / gym-unity (Python)
 - Curriculum and Parameter Randomization configurations have been merged
  into the main training configuration file. Note that this means training
  configuration files are now environment-specific. (#3791)
  directory. (#3829)
+- When using Curriculum, the current lesson will resume if training is quit and resumed. As such,
+  the `--lesson` CLI option has been removed. (#4025)
+### Minor Changes
+#### com.unity.ml-agents (C#)
+- `ObservableAttribute` was added. Adding the attribute to fields or properties on an Agent will allow it to generate
+  observations via reflection. (#3925, #4006)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
 - Unity Player logs are now written out to the results directory. (#3877)
 - Run configuration YAML files are written out to the results directory at the end of the run. (#3815)
 - When trying to load/resume from a checkpoint created with an earlier verison of ML-Agents,
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 - `use_visual` and `allow_multiple_visual_obs` in the `UnityToGymWrapper` constructor
 were replaced by `allow_multiple_obs` which allows one or more visual observations and
 vector observations to be used simultaneously.
+- `--lesson` has been removed from the CLI. Lessons will resume when using `--resume`.
+  To start at a different lesson, modify your Curriculum configuration.

 ### Steps to Migrate
 - To upgrade your configuration files, an upgrade script has been provided. Run `python config/update_config.py
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 mlagents-learn config/ppo/WallJump_curriculum.yaml --run-id=wall-jump-curriculum
 ```

-We can then keep track of the current lessons and progresses via TensorBoard.
-
-**Note**: If you are resuming a training session that uses curriculum, please
-pass the number of the last-reached lesson using the `--lesson` flag when
-running `mlagents-learn`.
+We can then keep track of the current lessons and progresses via TensorBoard. If you've terminated
+the run, you can resume it using `--resume` and lesson progress will start off where it
+ended.

 ### Environment Parameter Randomization

--- a/ml-agents/README.md
+++ b/ml-agents/README.md
  cooperative behavior among different agents is not stable.
 - Resuming self-play from a checkpoint resets the reported ELO to the default
  value.
- Resuming curriculum learning from a checkpoint requires the last lesson be
-  specified using the `--lesson` CLI option
--- a/ml-agents/mlagents/trainers/cli_utils.py
+++ b/ml-agents/mlagents/trainers/cli_utils.py
        action=DetectDefault,
    )
    argparser.add_argument(
-        "--lesson",
-        default=0,
-        type=int,
-        help="The lesson to start with when performing curriculum training",
-        action=DetectDefault,
-    )
-    argparser.add_argument(
        "--load",
        default=False,
        dest="load_model",
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents.trainers.sampler_class import SamplerManager
 from mlagents.trainers.exception import SamplerException
 from mlagents.trainers.settings import RunOptions
+from mlagents.trainers.training_status import GlobalTrainingStatus
 from mlagents_envs.base_env import BaseEnv
 from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager
 from mlagents_envs.side_channel.side_channel import SideChannel
 from mlagents_envs import logging_util

 logger = logging_util.get_logger(__name__)
+
+TRAINING_STATUS_FILE_NAME = "training_status.json"


 def get_version_string() -> str:
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
+        # Load any needed states
+        if checkpoint_settings.resume:
+            GlobalTrainingStatus.load_state(
+                os.path.join(run_logs_dir, "training_status.json")
+            )
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            env_factory, engine_config, env_settings.num_envs
        )
        maybe_meta_curriculum = try_create_meta_curriculum(
-            options.curriculum, env_manager, checkpoint_settings.lesson
+            options.curriculum, env_manager, restore=checkpoint_settings.resume
        )
        sampler_manager, resampling_interval = create_sampler_manager(
            options.parameter_randomization, run_seed
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
+        write_training_status(run_logs_dir)


 def write_run_options(output_dir: str, run_options: RunOptions) -> None:
        )


+def write_training_status(output_dir: str) -> None:
+    GlobalTrainingStatus.save_state(os.path.join(output_dir, TRAINING_STATUS_FILE_NAME))
+
+
 def write_timing_tree(output_dir: str) -> None:
    timing_path = os.path.join(output_dir, "timers.json")
    try:


 def try_create_meta_curriculum(
-    curriculum_config: Optional[Dict], env: SubprocessEnvManager, lesson: int
+    curriculum_config: Optional[Dict], env: SubprocessEnvManager, restore: bool = False
-        # TODO: Should be able to start learning at different lesson numbers
-        # for each curriculum.
-        meta_curriculum.set_all_curricula_to_lesson_num(lesson)
+        if restore:
+            meta_curriculum.try_restore_all_curriculum()
        return meta_curriculum


--- a/ml-agents/mlagents/trainers/meta_curriculum.py
+++ b/ml-agents/mlagents/trainers/meta_curriculum.py
 from typing import Dict, Set
 from mlagents.trainers.curriculum import Curriculum
 from mlagents.trainers.settings import CurriculumSettings
+from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType

 from mlagents_envs.logging_util import get_logger

                )
        return ret

-    def set_all_curricula_to_lesson_num(self, lesson_num):
-        """Sets all the curricula in this meta curriculum to a specified
-        lesson number.
-
-        Args:
-            lesson_num (int): The lesson number which all the curricula will
-                be set to.
+    def try_restore_all_curriculum(self):
-        for _, curriculum in self.brains_to_curricula.items():
-            curriculum.lesson_num = lesson_num
+        Tries to restore all the curriculums to what is saved in training_status.json
+        """
+
+        for brain_name, curriculum in self.brains_to_curricula.items():
+            lesson_num = GlobalTrainingStatus.get_parameter_state(
+                brain_name, StatusType.LESSON_NUM
+            )
+            if lesson_num is not None:
+                logger.info(
+                    f"Resuming curriculum for {brain_name} at lesson {lesson_num}."
+                )
+                curriculum.lesson_num = lesson_num
+            else:
+                curriculum.lesson_num = 0

    def get_config(self):
        """Get the combined configuration of all curricula in this
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
    force: bool = parser.get_default("force")
    train_model: bool = parser.get_default("train_model")
    inference: bool = parser.get_default("inference")
-    lesson: int = parser.get_default("lesson")


@attr.s(auto_attribs=True)
--- a/ml-agents/mlagents/trainers/tests/test_learn.py
+++ b/ml-agents/mlagents/trainers/tests/test_learn.py
        base_port: 4001
        seed: 9870
    checkpoint_settings:
-        lesson: 2
        run_id: uselessrun
        save_freq: 654321
    debug: false
    assert opt.behaviors == {}
    assert opt.env_settings.env_path is None
    assert opt.parameter_randomization is None
-    assert opt.checkpoint_settings.lesson == 0
    assert opt.checkpoint_settings.resume is False
    assert opt.checkpoint_settings.inference is False
    assert opt.checkpoint_settings.run_id == "ppo"
    full_args = [
        "mytrainerpath",
        "--env=./myenvfile",
-        "--lesson=3",
        "--resume",
        "--inference",
        "--run-id=myawesomerun",
    assert opt.behaviors == {}
    assert opt.env_settings.env_path == "./myenvfile"
    assert opt.parameter_randomization is None
-    assert opt.checkpoint_settings.lesson == 3
    assert opt.checkpoint_settings.run_id == "myawesomerun"
    assert opt.checkpoint_settings.save_freq == 123456
    assert opt.env_settings.seed == 7890
    assert opt.behaviors == {}
    assert opt.env_settings.env_path == "./oldenvfile"
    assert opt.parameter_randomization is None
-    assert opt.checkpoint_settings.lesson == 2
    assert opt.checkpoint_settings.run_id == "uselessrun"
    assert opt.checkpoint_settings.save_freq == 654321
    assert opt.env_settings.seed == 9870
    full_args = [
        "mytrainerpath",
        "--env=./myenvfile",
-        "--lesson=3",
        "--resume",
        "--inference",
        "--run-id=myawesomerun",
    assert opt.behaviors == {}
    assert opt.env_settings.env_path == "./myenvfile"
    assert opt.parameter_randomization is None
-    assert opt.checkpoint_settings.lesson == 3
    assert opt.checkpoint_settings.run_id == "myawesomerun"
    assert opt.checkpoint_settings.save_freq == 123456
    assert opt.env_settings.seed == 7890
--- a/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
+++ b/ml-agents/mlagents/trainers/tests/test_meta_curriculum.py
 import pytest
-from unittest.mock import patch, Mock
+from unittest.mock import patch, Mock, call

 from mlagents.trainers.meta_curriculum import MetaCurriculum

 )
 from mlagents.trainers.tests.test_curriculum import dummy_curriculum_config
 from mlagents.trainers.settings import CurriculumSettings
+from mlagents.trainers.training_status import StatusType


@pytest.fixture
    curriculum_b.increment_lesson.assert_not_called()


-def test_set_all_curriculums_to_lesson_num():
+@patch("mlagents.trainers.meta_curriculum.GlobalTrainingStatus")
+def test_restore_curriculums(mock_trainingstatus):
-
-    meta_curriculum.set_all_curricula_to_lesson_num(2)
-
+    # Test restore to value
+    mock_trainingstatus.get_parameter_state.return_value = 2
+    meta_curriculum.try_restore_all_curriculum()
+    mock_trainingstatus.get_parameter_state.assert_has_calls(
+        [call("Brain1", StatusType.LESSON_NUM), call("Brain2", StatusType.LESSON_NUM)],
+        any_order=True,
+    )
+
+    # Test restore to None
+    mock_trainingstatus.get_parameter_state.return_value = None
+    meta_curriculum.try_restore_all_curriculum()
+
+    assert meta_curriculum.brains_to_curricula["Brain1"].lesson_num == 0
+    assert meta_curriculum.brains_to_curricula["Brain2"].lesson_num == 0


 def test_get_config():
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
 from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
 from mlagents.trainers.agent_processor import AgentManager
 from mlagents.trainers.settings import CurriculumSettings
+from mlagents.trainers.training_status import GlobalTrainingStatus, StatusType


 class TrainerController(object):
                if brain_name in self.trainers:
                    self.trainers[brain_name].stats_reporter.set_stat(
                        "Environment/Lesson", curr.lesson_num
+                    )
+                    GlobalTrainingStatus.set_parameter_state(
+                        brain_name, StatusType.LESSON_NUM, curr.lesson_num
                    )

        for trainer in self.trainers.values():
--- a/ml-agents/mlagents/trainers/tests/test_training_status.py
+++ b/ml-agents/mlagents/trainers/tests/test_training_status.py
+import os
+import unittest
+import json
+from enum import Enum
+
+from mlagents.trainers.training_status import (
+    StatusType,
+    StatusMetaData,
+    GlobalTrainingStatus,
+)
+
+
+def test_globaltrainingstatus(tmpdir):
+    path_dir = os.path.join(tmpdir, "test.json")
+
+    GlobalTrainingStatus.set_parameter_state("Category1", StatusType.LESSON_NUM, 3)
+    GlobalTrainingStatus.save_state(path_dir)
+
+    with open(path_dir, "r") as fp:
+        test_json = json.load(fp)
+
+    assert "Category1" in test_json
+    assert StatusType.LESSON_NUM.value in test_json["Category1"]
+    assert test_json["Category1"][StatusType.LESSON_NUM.value] == 3
+    assert "metadata" in test_json
+
+    GlobalTrainingStatus.load_state(path_dir)
+    restored_val = GlobalTrainingStatus.get_parameter_state(
+        "Category1", StatusType.LESSON_NUM
+    )
+    assert restored_val == 3
+
+    # Test unknown categories and status types (keys)
+    unknown_category = GlobalTrainingStatus.get_parameter_state(
+        "Category3", StatusType.LESSON_NUM
+    )
+
+    class FakeStatusType(Enum):
+        NOTAREALKEY = "notarealkey"
+
+    unknown_key = GlobalTrainingStatus.get_parameter_state(
+        "Category1", FakeStatusType.NOTAREALKEY
+    )
+    assert unknown_category is None
+    assert unknown_key is None
+
+
+class StatsMetaDataTest(unittest.TestCase):
+    def test_metadata_compare(self):
+        # Test write_stats
+        with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
+            default_metadata = StatusMetaData()
+            version_statsmetadata = StatusMetaData(mlagents_version="test")
+            default_metadata.check_compatibility(version_statsmetadata)
+
+            tf_version_statsmetadata = StatusMetaData(tensorflow_version="test")
+            default_metadata.check_compatibility(tf_version_statsmetadata)
+
+        # Assert that 2 warnings have been thrown
+        assert len(cm.output) == 2
--- a/ml-agents/mlagents/trainers/training_status.py
+++ b/ml-agents/mlagents/trainers/training_status.py
+from typing import Dict, Any
+from enum import Enum
+from collections import defaultdict
+import json
+import attr
+import cattr
+
+from mlagents.tf_utils import tf
+from mlagents_envs.logging_util import get_logger
+from mlagents.trainers import __version__
+from mlagents.trainers.exception import TrainerError
+
+logger = get_logger(__name__)
+
+STATUS_FORMAT_VERSION = "0.1.0"
+
+
+class StatusType(Enum):
+    LESSON_NUM = "lesson_num"
+    STATS_METADATA = "metadata"
+
+
+@attr.s(auto_attribs=True)
+class StatusMetaData:
+    stats_format_version: str = STATUS_FORMAT_VERSION
+    mlagents_version: str = __version__
+    tensorflow_version: str = tf.__version__
+
+    def to_dict(self) -> Dict[str, str]:
+        return cattr.unstructure(self)
+
+    @staticmethod
+    def from_dict(import_dict: Dict[str, str]) -> "StatusMetaData":
+        return cattr.structure(import_dict, StatusMetaData)
+
+    def check_compatibility(self, other: "StatusMetaData") -> None:
+        """
+        Check compatibility with a loaded StatsMetaData and warn the user
+        if versions mismatch. This is used for resuming from old checkpoints.
+        """
+        # This should cover all stats version mismatches as well.
+        if self.mlagents_version != other.mlagents_version:
+            logger.warning(
+                "Checkpoint was loaded from a different version of ML-Agents. Some things may not resume properly."
+            )
+        if self.tensorflow_version != other.tensorflow_version:
+            logger.warning(
+                "Tensorflow checkpoint was saved with a different version of Tensorflow. Model may not resume properly."
+            )
+
+
+class GlobalTrainingStatus:
+    """
+    GlobalTrainingStatus class that contains static methods to save global training status and
+    load it on a resume. These are values that might be needed for the training resume that
+    cannot/should not be captured in a model checkpoint, such as curriclum lesson.
+    """
+
+    saved_state: Dict[str, Dict[str, Any]] = defaultdict(lambda: {})
+
+    @staticmethod
+    def load_state(path: str) -> None:
+        """
+        Load a JSON file that contains saved state.
+        :param path: Path to the JSON file containing the state.
+        """
+        try:
+            with open(path, "r") as f:
+                loaded_dict = json.load(f)
+            # Compare the metadata
+            _metadata = loaded_dict[StatusType.STATS_METADATA.value]
+            StatusMetaData.from_dict(_metadata).check_compatibility(StatusMetaData())
+            # Update saved state.
+            GlobalTrainingStatus.saved_state.update(loaded_dict)
+        except FileNotFoundError:
+            logger.warning(
+                "Training status file not found. Not all functions will resume properly."
+            )
+        except KeyError:
+            raise TrainerError(
+                "Metadata not found, resuming from an incompatible version of ML-Agents."
+            )
+
+    @staticmethod
+    def save_state(path: str) -> None:
+        """
+        Save a JSON file that contains saved state.
+        :param path: Path to the JSON file containing the state.
+        """
+        GlobalTrainingStatus.saved_state[
+            StatusType.STATS_METADATA.value
+        ] = StatusMetaData().to_dict()
+        with open(path, "w") as f:
+            json.dump(GlobalTrainingStatus.saved_state, f, indent=4)
+
+    @staticmethod
+    def set_parameter_state(category: str, key: StatusType, value: Any) -> None:
+        """
+        Stores an arbitrary-named parameter in the global saved state.
+        :param category: The category (usually behavior name) of the parameter.
+        :param key: The parameter, e.g. lesson number.
+        :param value: The value.
+        """
+        GlobalTrainingStatus.saved_state[category][key.value] = value
+
+    @staticmethod
+    def get_parameter_state(category: str, key: StatusType) -> Any:
+        """
+        Loads an arbitrary-named parameter from training_status.json.
+        If not found, returns None.
+        :param category: The category (usually behavior name) of the parameter.
+        :param key: The statistic, e.g. lesson number.
+        :param value: The value.
+        """
+        return GlobalTrainingStatus.saved_state[category].get(key.value, None)