浏览代码

Rebase develop

/develop-generalizationTraining-TrainerController
sankalp04 5 年前
当前提交
f331e5b7
共有 6 个文件被更改,包括 49 次插入214 次删除
  1. 4
      ml-agents-envs/mlagents/envs/exception.py
  2. 45
      ml-agents-envs/mlagents/envs/sampler_class.py
  3. 7
      ml-agents/mlagents/trainers/exception.py
  4. 50
      ml-agents/mlagents/trainers/learn.py
  5. 33
      ml-agents/mlagents/trainers/trainer_controller.py
  6. 124
      ml-agents/mlagents/trainers/lesson_controller.py

4
ml-agents-envs/mlagents/envs/exception.py


pass
class SamplerException(UnityException):
class LessonControllerError(UnityException):
Related to errors with the sampler actions.
Any error related to the configuration of lesson controller
"""
pass

45
ml-agents-envs/mlagents/envs/sampler_class.py


import numpy as np
<<<<<<< HEAD
=======
from functools import *
>>>>>>> Removed check_key and replaced with **param_dict for implicit type checks
<<<<<<< HEAD
=======
class SamplerException(Exception):
pass
class Sampler(ABC):
>>>>>>> Removed check_key and replaced with **param_dict for implicit type checks
class Sampler(ABC):
@abstractmethod

class UniformSampler(Sampler):
<<<<<<< HEAD
"""
Uniformly draws a single sample in the range [min_value, max_value).
"""

self.max_value = max_value
def sample_parameter(self) -> float:
=======
# kwargs acts as a sink for extra unneeded args
def __init__(self, min_value, max_value, **kwargs):
self.min_value = min_value
self.max_value = max_value
def sample_parameter(self):
>>>>>>> Removed check_key and replaced with **param_dict for implicit type checks
<<<<<<< HEAD
class MultiRangeUniformSampler(Sampler):
"""

cur_min, cur_max = self.intervals[
np.random.choice(len(self.intervals), p=self.interval_weights)
]
=======
class MultiRangeUniformSampler(Sampler):
def __init__(self, intervals, **kwargs):
self.intervals = intervals
# Measure the length of the intervals
self.interval_lengths = list(map(lambda x: abs(x[1] - x[0]), self.intervals))
# Cumulative size of the intervals
self.cum_interval_length = reduce(lambda x,y: x + y, self.interval_lengths, 0)
# Assign weights to an interval proportionate to the interval size
self.interval_weights = list(map(lambda x: x/self.cum_interval_length, self.interval_lengths))
def sample_parameter(self):
cur_min, cur_max = self.intervals[np.random.choice(len(self.intervals), p=self.interval_weights)]
>>>>>>> Removed check_key and replaced with **param_dict for implicit type checks
<<<<<<< HEAD
"""
Draw a single sample value from a normal (gaussian) distribution.
This sampler is characterized by the mean and the standard deviation.

def sample_parameter(self) -> float:
return np.random.normal(self.mean, self.st_dev)
=======
def __init__(self, mean, var, **kwargs):
self.mean = mean
self.var = var
def sample_parameter(self):
return np.random.normal(self.mean, self.var)
>>>>>>> Removed check_key and replaced with **param_dict for implicit type checks
class SamplerFactory:

7
ml-agents/mlagents/trainers/exception.py


"""
pass
class LessonControllerError(TrainerError):
"""
Any error related to the configuration of lesson controller
"""
pass

50
ml-agents/mlagents/trainers/learn.py


from mlagents.trainers import MetaCurriculumError, MetaCurriculum
from mlagents.trainers.trainer_util import initialize_trainers
from mlagents.envs import UnityEnvironment
from mlagents.envs.lesson_controller import LessonController
from mlagents.envs.sampler_class import SamplerManager
from mlagents.envs.exception import UnityEnvironmentException, SamplerException
from mlagents.envs.base_unity_environment import BaseUnityEnvironment

sampler_file_path = (
run_options["--sampler"] if run_options["--sampler"] != "None" else None
)
lesson_config_path = (
run_options["--lesson-config"] if run_options["--lesson-config"] != "None" else None
)
# Recognize and use docker volume if one is passed as an argument
if not docker_target_name:

docker_target_name=docker_target_name
)
sampler = None
lesson_config = None
if sampler_file_path is not None:
sampler = load_config(sampler_file_path)
lesson_config = LessonController(lesson_config_path)
sampler_manager = SamplerManager(sampler)
trainer_config = load_config(trainer_config_path)
env_factory = create_environment_factory(
env_path,

)
env = SubprocessEnvManager(env_factory, num_envs)
maybe_meta_curriculum = try_create_meta_curriculum(curriculum_folder, env, lesson)
sampler_manager, resampling_interval = create_sampler_manager(
sampler_file_path, env.reset_parameters
sampler_manager, lesson_config = create_sampler_manager(
sampler_file_path, env.reset_parameters, lesson_config_path
)
trainers = initialize_trainers(

run_seed,
fast_simulation,
sampler_manager,
resampling_interval,
lesson_config,
)
# Signal that environment has been launched.

tc.start_learning(env)
def create_sampler_manager(sampler_file_path, env_reset_params):
def create_sampler_manager(sampler_file_path, env_reset_params, lesson_config_path):
if "resampling-interval" in sampler_config:
# Filter arguments that do not exist in the environment
resample_interval = sampler_config.pop("resampling-interval")
if (resample_interval <= 0) or (not isinstance(resample_interval, int)):
raise SamplerException(
"Specified resampling-interval is not valid. Please provide"
" a positive integer value for resampling-interval"
)
else:
raise SamplerException(
"Resampling interval was not specified in the sampler file."
" Please specify it with the 'resampling-interval' key in the sampler config file."
)
# if "resampling-interval" in sampler_config:
# # Filter arguments that do not exist in the environment
# resample_interval = sampler_config.pop("resampling-interval")
# if (resample_interval <= 0) or (not isinstance(resample_interval, int)):
# raise SamplerException(
# "Specified resampling-interval is not valid. Please provide"
# " a positive integer value for resampling-interval"
# )
# else:
# raise SamplerException(
# "Resampling interval was not specified in the sampler file."
# " Please specify it with the 'resampling-interval' key in the sampler config file."
# )
return sampler_manager, resample_interval
lesson_controller = LessonController(lesson_config_path)
return sampler_manager, lesson_controller
def try_create_meta_curriculum(

--env=<file> Name of the Unity executable [default: None].
--curriculum=<directory> Curriculum json directory for environment [default: None].
--sampler=<file> Reset parameter yaml file for environment [default: None].
--lesson-config=<file> Indicate how to change lessons for generalization training [default: None].
--keep-checkpoints=<n> How many model checkpoints to keep [default: 5].
--lesson=<n> Start learning from this lesson [default: 0].
--load Whether to load the model or randomly initialize [default: False].

33
ml-agents/mlagents/trainers/trainer_controller.py


UnityCommunicationException,
)
from mlagents.envs.sampler_class import SamplerManager
from mlagents.envs.lesson_controller import LessonController
from mlagents.envs.timers import hierarchical_timer, get_timer_tree, timed
from mlagents.trainers import Trainer, TrainerMetrics
from mlagents.trainers.meta_curriculum import MetaCurriculum

training_seed: int,
fast_simulation: bool,
sampler_manager: SamplerManager,
resampling_interval: Optional[int],
lesson_controller: LessonController,
):
"""
:param trainers: Trainers for each brain to train.

:param train: Whether to train model, or only run inference.
:param training_seed: Seed to use for Numpy and Tensorflow random number generation.
:param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters.
:param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled.
:param lesson_controller: Specifies a controller to indicate when to resample the reset parameters.
"""
self.trainers = trainers
self.model_path = model_path

self.training_start_time = time()
self.fast_simulation = fast_simulation
self.sampler_manager = sampler_manager
self.resampling_interval = resampling_interval
self.lesson_controller = lesson_controller
brain_names_to_measure_vals = {}
brain_names_to_measure_vals = {}
for (
brain_name,
curriculum,

elif curriculum.measure == "reward":
measure_val = np.mean(self.trainers[brain_name].reward_buffer)
brain_names_to_measure_vals[brain_name] = measure_val
else:
return brain_names_to_measure_vals
elif ((self.sampler_manager is not None) and (self.lesson_controller is not None)):
brain_names_to_measure_vals = {}
measure_val = np.mean(trainer.reward_buffer)
brain_names_to_measure_vals[brain_name] = measure_val
return brain_names_to_measure_vals
if (self.lesson_controller.measure == "progress"):
measure_val = (
trainer.get_step
/ trainer.get_max_steps
)
brain_names_to_measure_vals[brain_name] = measure_val
elif (self.lesson_controller.measure == "reward"):
measure_val = np.mean(trainer.reward_buffer)
brain_names_to_measure_vals[brain_name] = measure_val
return brain_names_to_measure_vals
else:
return None
def _save_model(self):
"""

generalization_reset = (
not self.sampler_manager.is_empty()
and (steps != 0)
and (self.resampling_interval)
and (steps % self.resampling_interval == 0)
and (any(lessons_incremented.values()))
)
if meta_curriculum_reset or generalization_reset:
self.end_trainer_episodes(env, lessons_incremented)

124
ml-agents/mlagents/trainers/lesson_controller.py


import yaml
import math
import logging
from .exception import LessonControllerError
logger = logging.getLogger("mlagents.trainers")
class LessonController:
def __init__(self, location):
"""
Initializes a Curriculum object.
:param location: Path to yaml file defining reset configuration
"""
self.measure = None
try:
with open(location) as data_file:
data = yaml.load(data_file)
except IOError:
raise LessonControllerError("The file {0} could not be found.".format(location))
except UnicodeDecodeError:
raise LessonControllerError("There was an error decoding {}".format(location))
self.smoothing_value = 0
self.check_keys(data, location)
self.measure = data["measure"]
self.thresholds = data["thresholds"]
self.min_lesson_length = data["min_lesson_length"]
self.signal_smoothing = data["signal_smoothing"]
self.max_lesson_num = len(self.thresholds)
self._lesson_num = 0
self.test_lesson_length = (data["test_lesson_length"]
if "test_lesson_length" in data
else 1000)
def check_keys(self, data, location):
for key in [
"measure",
"thresholds",
"min_lesson_length",
"signal_smoothing",
]:
if key not in data:
raise LessonControllerError(
"{0} does not contain a " "{1} field.".format(location, key)
)
@property
def lesson_num(self):
return self._lesson_num
@lesson_num.setter
def lesson_num(self, lesson_num):
self._lesson_num = max(0, min(lesson_num, self.max_lesson_num))
def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
"""Determines whether the curriculum of a specified brain is ready
to attempt an increment.
Args:
brain_name (str): The name of the brain whose curriculum will be
checked for readiness.
reward_buff_size (int): The size of the reward buffer of the trainer
that corresponds to the specified brain.
Returns:
Whether the curriculum of the specified brain should attempt to
increment its lesson.
"""
return (reward_buff_size >= self.min_lesson_length)
def change_lesson(self, measure_val):
"""
Increments the lesson number if threshold met
:param measure_val: A dict of brain name to measure value.
:return Whether the lesson was incremented.
"""
if (not measure_val) or math.isnan(measure_val):
return False
if self.signal_smoothing:
measure_val = self.smoothing_value * 0.25 + 0.75 * measure_val
self.smoothing_value = measure_val
if (self.lesson_num < self.max_lesson_num):
if measure_val >= self.thresholds[self.lesson_num]:
self.lesson_num += 1
logger.info(
"Lesson changed. Now in lesson {0}".format(
self.lesson_num + 1,
)
)
return True
return False
def check_change_lesson(self, measure_vals, reward_buff_sizes=None):
"""Checks if the brain met the threshold defined performance.
Note that calling this method does not guarantee the
lesson of a brain will increment. The lesson will
only increment if the specified measure threshold defined in the
param_reset_config has been reached and the minimum number of episodes in the
lesson have been completed.
Args:
measure_vals (dict): A dict of brain name to measure value.
reward_buff_sizes (dict): A dict of brain names to the size of their
corresponding reward buffers.
Returns:
A dict from brain name to whether that brain's lesson was changed.
"""
ret = {}
if reward_buff_sizes:
for brain_name, buff_size in reward_buff_sizes.items():
if self._lesson_ready_to_increment(brain_name, buff_size):
measure_val = measure_vals[brain_name]
ret[brain_name] = self.change_lesson(measure_val)
else:
for brain_name, measure_val in measure_vals.items():
ret[brain_name] = self.change_lesson(measure_val)
return ret
正在加载...
取消
保存