Rebase develop

5 年前 · f331e5b7
--- a/ml-agents-envs/mlagents/envs/exception.py
+++ b/ml-agents-envs/mlagents/envs/exception.py

    pass

-class SamplerException(UnityException):
+class LessonControllerError(UnityException):
-    Related to errors with the sampler actions.
+    Any error related to the configuration of lesson controller
    """

    pass
--- a/ml-agents-envs/mlagents/envs/sampler_class.py
+++ b/ml-agents-envs/mlagents/envs/sampler_class.py
 import numpy as np
-<<<<<<< HEAD
-=======
-from functools import *
->>>>>>> Removed check_key and replaced with **param_dict for implicit type checks
-<<<<<<< HEAD
-=======
-class SamplerException(Exception):
-    pass
-
-class Sampler(ABC): 
->>>>>>> Removed check_key and replaced with **param_dict for implicit type checks

 class Sampler(ABC):
    @abstractmethod

 class UniformSampler(Sampler):
-<<<<<<< HEAD
    """
    Uniformly draws a single sample in the range [min_value, max_value).
    """
        self.max_value = max_value

    def sample_parameter(self) -> float:
-=======
-    # kwargs acts as a sink for extra unneeded args
-    def __init__(self, min_value, max_value, **kwargs):
-        self.min_value = min_value
-        self.max_value = max_value
-
-    def sample_parameter(self):
->>>>>>> Removed check_key and replaced with **param_dict for implicit type checks
-<<<<<<< HEAD

 class MultiRangeUniformSampler(Sampler):
    """
        cur_min, cur_max = self.intervals[
            np.random.choice(len(self.intervals), p=self.interval_weights)
        ]
-=======
-class MultiRangeUniformSampler(Sampler):
-    def __init__(self, intervals, **kwargs):
-        self.intervals = intervals
-        # Measure the length of the intervals
-        self.interval_lengths = list(map(lambda x: abs(x[1] - x[0]), self.intervals))
-        # Cumulative size of the intervals
-        self.cum_interval_length = reduce(lambda x,y: x + y, self.interval_lengths, 0)
-        # Assign weights to an interval proportionate to the interval size
-        self.interval_weights = list(map(lambda x: x/self.cum_interval_length, self.interval_lengths))
-    
-    
-    def sample_parameter(self):
-        cur_min, cur_max = self.intervals[np.random.choice(len(self.intervals), p=self.interval_weights)]
->>>>>>> Removed check_key and replaced with **param_dict for implicit type checks
-<<<<<<< HEAD
    """
    Draw a single sample value from a normal (gaussian) distribution.
    This sampler is characterized by the mean and the standard deviation.

    def sample_parameter(self) -> float:
        return np.random.normal(self.mean, self.st_dev)
-=======
-    def __init__(self, mean, var, **kwargs):
-        self.mean = mean
-        self.var = var
-    
-    def sample_parameter(self):
-        return np.random.normal(self.mean, self.var)
->>>>>>> Removed check_key and replaced with **param_dict for implicit type checks


 class SamplerFactory:
--- a/ml-agents/mlagents/trainers/exception.py
+++ b/ml-agents/mlagents/trainers/exception.py
    """

    pass
-
-class LessonControllerError(TrainerError):
-    """
-    Any error related to the configuration of lesson controller
-    """
-
-    pass
--- a/ml-agents/mlagents/trainers/learn.py
+++ b/ml-agents/mlagents/trainers/learn.py
 from mlagents.trainers import MetaCurriculumError, MetaCurriculum
 from mlagents.trainers.trainer_util import initialize_trainers
 from mlagents.envs import UnityEnvironment
+from mlagents.envs.lesson_controller import LessonController
 from mlagents.envs.sampler_class import SamplerManager
 from mlagents.envs.exception import UnityEnvironmentException, SamplerException
 from mlagents.envs.base_unity_environment import BaseUnityEnvironment
    sampler_file_path = (
        run_options["--sampler"] if run_options["--sampler"] != "None" else None
    )
+    lesson_config_path = (
+        run_options["--lesson-config"] if run_options["--lesson-config"] != "None" else None
+    )

    # Recognize and use docker volume if one is passed as an argument
    if not docker_target_name:
            docker_target_name=docker_target_name
        )

-    sampler = None
-    lesson_config = None
-    if sampler_file_path is not None:
-        sampler = load_config(sampler_file_path)
-        lesson_config = LessonController(lesson_config_path)
-    sampler_manager = SamplerManager(sampler)
-
-
    trainer_config = load_config(trainer_config_path)
    env_factory = create_environment_factory(
        env_path,
    )
    env = SubprocessEnvManager(env_factory, num_envs)
    maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson)
-    sampler_manager, resampling_interval = create_sampler_manager(
-        sampler_file_path, env.reset_parameters
+    sampler_manager, lesson_config = create_sampler_manager(
+        sampler_file_path, env.reset_parameters, lesson_config_path
    )

    trainers = initialize_trainers(
        run_seed,
        fast_simulation,
        sampler_manager,
-        resampling_interval,
+        lesson_config,
    )

    # Signal that environment has been launched.
    tc.start_learning(env)


-def create_sampler_manager(sampler_file_path, env_reset_params):
+def create_sampler_manager(sampler_file_path, env_reset_params, lesson_config_path):
-        if "resampling-interval" in sampler_config:
-            # Filter arguments that do not exist in the environment
-            resample_interval = sampler_config.pop("resampling-interval")
-            if (resample_interval <= 0) or (not isinstance(resample_interval, int)):
-                raise SamplerException(
-                    "Specified resampling-interval is not valid. Please provide"
-                    " a positive integer value for resampling-interval"
-                )
-        else:
-            raise SamplerException(
-                "Resampling interval was not specified in the sampler file."
-                " Please specify it with the 'resampling-interval' key in the sampler config file."
-            )
+        # if "resampling-interval" in sampler_config:
+        #     # Filter arguments that do not exist in the environment
+        #     resample_interval = sampler_config.pop("resampling-interval")
+        #     if (resample_interval <= 0) or (not isinstance(resample_interval, int)):
+        #         raise SamplerException(
+        #             "Specified resampling-interval is not valid. Please provide"
+        #             " a positive integer value for resampling-interval"
+        #         )
+        # else:
+        #     raise SamplerException(
+        #         "Resampling interval was not specified in the sampler file."
+        #         " Please specify it with the 'resampling-interval' key in the sampler config file."
+        #     )
-    return sampler_manager, resample_interval
+    lesson_controller = LessonController(lesson_config_path)
+    return sampler_manager, lesson_controller


 def try_create_meta_curriculum(
      --env=<file>                Name of the Unity executable [default: None].
      --curriculum=<directory>    Curriculum json directory for environment [default: None].
      --sampler=<file>            Reset parameter yaml file for environment [default: None].
+      --lesson-config=<file>      Indicate how to change lessons for generalization training [default: None].
      --keep-checkpoints=<n>      How many model checkpoints to keep [default: 5].
      --lesson=<n>                Start learning from this lesson [default: 0].
      --load                      Whether to load the model or randomly initialize [default: False].
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
    UnityCommunicationException,
 )
 from mlagents.envs.sampler_class import SamplerManager
+from mlagents.envs.lesson_controller import LessonController
 from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
 from mlagents.trainers import Trainer, TrainerMetrics
 from mlagents.trainers.meta_curriculum import MetaCurriculum
        training_seed: int,
        fast_simulation: bool,
        sampler_manager: SamplerManager,
-        resampling_interval: Optional[int],
+        lesson_controller: LessonController,
    ):
        """
        :param trainers: Trainers for each brain to train.
        :param train: Whether to train model, or only run inference.
        :param training_seed: Seed to use for Numpy and Tensorflow random number generation.
        :param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters.
-        :param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled.
+        :param lesson_controller: Specifies a controller to indicate when to resample the reset parameters.
        """
        self.trainers = trainers
        self.model_path = model_path
        self.training_start_time = time()
        self.fast_simulation = fast_simulation
        self.sampler_manager = sampler_manager
-        self.resampling_interval = resampling_interval
+        self.lesson_controller = lesson_controller
-        brain_names_to_measure_vals = {}
+            brain_names_to_measure_vals = {}
            for (
                brain_name,
                curriculum,
                elif curriculum.measure == "reward":
                    measure_val = np.mean(self.trainers[brain_name].reward_buffer)
                    brain_names_to_measure_vals[brain_name] = measure_val
-        else:
+            return brain_names_to_measure_vals
+
+        elif ((self.sampler_manager is not None) and (self.lesson_controller is not None)):
+            brain_names_to_measure_vals = {}
-                measure_val = np.mean(trainer.reward_buffer)
-                brain_names_to_measure_vals[brain_name] = measure_val
-        return brain_names_to_measure_vals
+                if (self.lesson_controller.measure == "progress"):
+                    measure_val = (
+                        trainer.get_step
+                        / trainer.get_max_steps
+                    )
+                    brain_names_to_measure_vals[brain_name]  = measure_val
+                elif (self.lesson_controller.measure == "reward"):
+                    measure_val = np.mean(trainer.reward_buffer)
+                    brain_names_to_measure_vals[brain_name] = measure_val
+            return brain_names_to_measure_vals
+
+        else:
+            return None

    def _save_model(self):
        """
        generalization_reset = (
            not self.sampler_manager.is_empty()
            and (steps != 0)
-            and (self.resampling_interval)
-            and (steps % self.resampling_interval == 0)
+            and (any(lessons_incremented.values()))
        )
        if meta_curriculum_reset or generalization_reset:
            self.end_trainer_episodes(env, lessons_incremented)
--- a/ml-agents/mlagents/trainers/lesson_controller.py
+++ b/ml-agents/mlagents/trainers/lesson_controller.py
-import yaml
-import math
-import logging
-
-from .exception import LessonControllerError
-
-logger = logging.getLogger("mlagents.trainers")
-
-
-class LessonController:
-    def __init__(self, location):
-        """
-        Initializes a Curriculum object.
-        :param location: Path to yaml file defining reset configuration
-        """
-        self.measure = None
-        try:
-            with open(location) as data_file:
-                data = yaml.load(data_file)
-        except IOError:
-            raise LessonControllerError("The file {0} could not be found.".format(location))
-        except UnicodeDecodeError:
-            raise LessonControllerError("There was an error decoding {}".format(location))
-        self.smoothing_value = 0
-        self.check_keys(data, location)
-        self.measure = data["measure"]
-        self.thresholds = data["thresholds"]
-        self.min_lesson_length = data["min_lesson_length"]
-        self.signal_smoothing = data["signal_smoothing"]
-        self.max_lesson_num = len(self.thresholds)
-        self._lesson_num = 0
-        self.test_lesson_length = (data["test_lesson_length"] 
-                                if "test_lesson_length" in data 
-                                else 1000)
-
-
-    def check_keys(self, data, location):
-        for key in [
-            "measure",
-            "thresholds",
-            "min_lesson_length",
-            "signal_smoothing",
-        ]:
-            if key not in data:
-                raise LessonControllerError(
-                    "{0} does not contain a " "{1} field.".format(location, key)
-                )
-
-
-    @property
-    def lesson_num(self):
-        return self._lesson_num
-
-    @lesson_num.setter
-    def lesson_num(self, lesson_num):
-        self._lesson_num = max(0, min(lesson_num, self.max_lesson_num))
-
-    def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
-        """Determines whether the curriculum of a specified brain is ready
-        to attempt an increment.
-
-        Args:
-            brain_name (str): The name of the brain whose curriculum will be
-                checked for readiness.
-            reward_buff_size (int): The size of the reward buffer of the trainer
-                that corresponds to the specified brain.
-
-        Returns:
-            Whether the curriculum of the specified brain should attempt to
-            increment its lesson.
-        """
-        return (reward_buff_size >= self.min_lesson_length)
-
-    def change_lesson(self, measure_val):
-        """
-        Increments the lesson number if threshold met
-
-        :param measure_val: A dict of brain name to measure value.
-        :return Whether the lesson was incremented.
-        """
-        if (not measure_val) or math.isnan(measure_val):
-            return False
-        if self.signal_smoothing:
-            measure_val = self.smoothing_value * 0.25 + 0.75 * measure_val
-            self.smoothing_value = measure_val
-
-        if (self.lesson_num < self.max_lesson_num):
-            if measure_val >= self.thresholds[self.lesson_num]:
-                    self.lesson_num += 1
-                    logger.info(
-                        "Lesson changed. Now in lesson {0}".format(
-                            self.lesson_num + 1,
-                        )
-                    )
-                    return True
-        return False
-
-    def check_change_lesson(self, measure_vals, reward_buff_sizes=None):
-        """Checks if the brain met the threshold defined performance. 
-        Note that calling this method does not guarantee the
-        lesson of a brain will increment. The lesson will
-        only increment if the specified measure threshold defined in the
-        param_reset_config has been reached and the minimum number of episodes in the
-        lesson have been completed.
-
-        Args:
-            measure_vals (dict): A dict of brain name to measure value.
-            reward_buff_sizes (dict): A dict of brain names to the size of their
-                corresponding reward buffers.
-
-        Returns:
-            A dict from brain name to whether that brain's lesson was changed.
-        """
-        ret = {}
-        if reward_buff_sizes:
-            for brain_name, buff_size in reward_buff_sizes.items():
-                if self._lesson_ready_to_increment(brain_name, buff_size):
-                    measure_val = measure_vals[brain_name]
-                    ret[brain_name] = self.change_lesson(measure_val)
-        else:
-            for brain_name, measure_val in measure_vals.items():
-                ret[brain_name] = self.change_lesson(measure_val)
-        return ret
-