浏览代码

Change reset parameters based on reward or progress metric

/develop-generalizationTraining-TrainerController
sankalp04 5 年前
当前提交
c9ea139f
共有 1 个文件被更改,包括 122 次插入0 次删除
  1. 122
      ml-agents/mlagents/trainers/lesson_controller.py

122
ml-agents/mlagents/trainers/lesson_controller.py


import os
import yaml
import math
from .exception import LessonControllerError
import logging
logger = logging.getLogger("mlagents.trainers")
class LessonController(object):
def __init__(self, location):
"""
Initializes a Curriculum object.
:param location: Path to yaml file defining reset configuration
"""
self.measure = None
try:
with open(location) as data_file:
self.data = yaml.load(data_file)
except IOError:
raise LessonControllerError("The file {0} could not be found.".format(location))
except UnicodeDecodeError:
raise LessonControllerError("There was an error decoding {}".format(location))
self.smoothing_value = 0
self.check_keys(location)
self.measure = self.data["measure"]
self.thresholds = self.data["thresholds"]
self.min_lesson_length = self.data["min_lesson_length"]
self.max_lesson_num = len(self.thresholds)
self._lesson_num = 0
def check_keys(self, location):
for key in [
"measure",
"thresholds",
"min_lesson_length",
"signal_smoothing",
]:
if key not in self.data:
raise LessonControllerError(
"{0} does not contain a " "{1} field.".format(location, key)
)
@property
def lesson_num(self):
return self._lesson_num
@lesson_num.setter
def lesson_num(self, lesson_num):
self._lesson_num = max(0, min(lesson_num, self.max_lesson_num))
def _lesson_ready_to_increment(self, brain_name, reward_buff_size):
"""Determines whether the curriculum of a specified brain is ready
to attempt an increment.
Args:
brain_name (str): The name of the brain whose curriculum will be
checked for readiness.
reward_buff_size (int): The size of the reward buffer of the trainer
that corresponds to the specified brain.
Returns:
Whether the curriculum of the specified brain should attempt to
increment its lesson.
"""
return (reward_buff_size >= self.min_lesson_length)
def change_lesson(self, measure_val):
"""
Increments the lesson number if threshold met
:param measure_val: A dict of brain name to measure value.
:return Whether the lesson was incremented.
"""
if not self.data or not measure_val or math.isnan(measure_val):
return False
if self.data["signal_smoothing"]:
measure_val = self.smoothing_value * 0.25 + 0.75 * measure_val
self.smoothing_value = measure_val
if (self.lesson_num < self.max_lesson_num):
if measure_val >= self.data["thresholds"][self.lesson_num]:
self.lesson_num += 1
logger.info(
"Lesson changed. Now in lesson {0}".format(
self.lesson_num + 1,
)
)
return True
return False
def check_change_lesson(self, measure_vals, reward_buff_sizes=None):
"""Checks if the brain met the threshold defined performance.
Note that calling this method does not guarantee the
lesson of a brain will increment. The lesson will
only increment if the specified measure threshold defined in the
param_reset_config has been reached and the minimum number of episodes in the
lesson have been completed.
Args:
measure_vals (dict): A dict of brain name to measure value.
reward_buff_sizes (dict): A dict of brain names to the size of their
corresponding reward buffers.
Returns:
A dict from brain name to whether that brain's lesson was changed.
"""
ret = {}
if reward_buff_sizes:
for brain_name, buff_size in reward_buff_sizes.items():
if self._lesson_ready_to_increment(brain_name, buff_size):
measure_val = measure_vals[brain_name]
ret[brain_name] = self.change_lesson(measure_val)
else:
for brain_name, measure_val in measure_vals.items():
ret[brain_name] = self.change_lesson(measure_val)
return ret
正在加载...
取消
保存