浏览代码

Fixing bugs, updating tests.

- Added more unit tests for school module.
- Fixed bugs found during testing with PushBlock env.
/develop-generalizationTraining-TrainerController
Deric Pang 7 年前
当前提交
c88c7e42
共有 6 个文件被更改,包括 67 次插入57 次删除
  1. 16
      python/tests/test_curriculum.py
  2. 9
      python/tests/test_school.py
  3. 42
      python/unitytrainers/curriculum.py
  4. 27
      python/unitytrainers/school.py
  5. 7
      python/unitytrainers/trainer.py
  6. 23
      python/unitytrainers/trainer_controller.py

16
python/tests/test_curriculum.py


def test_init_curriculum_happy_path(mock_file, location, default_reset_parameters):
curriculum = Curriculum(location, default_reset_parameters)
assert curriculum.lesson_number == 0
assert curriculum.lesson_num == 0
assert curriculum.measure == 'reward'

@patch('builtins.open', new_callable=mock_open, read_data=dummy_curriculum_json_str)
def test_increment_lesson(mock_file, location, default_reset_parameters):
curriculum = Curriculum(location, default_reset_parameters)
assert curriculum.lesson_number == 0
assert curriculum.lesson_num == 0
curriculum.lesson_number = 1
assert curriculum.lesson_number == 1
curriculum.lesson_num = 1
assert curriculum.lesson_num == 1
assert curriculum.lesson_number == 1
assert curriculum.lesson_num == 1
assert curriculum.lesson_number == 1
assert curriculum.lesson_num == 1
assert curriculum.lesson_number == 2
assert curriculum.lesson_num == 2
@patch('builtins.open', new_callable=mock_open, read_data=dummy_curriculum_json_str)

curriculum.lesson_number = 2
curriculum.lesson_num = 2
assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7}
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2}

9
python/tests/test_school.py


import pytest
from unittest.mock import patch
from unittest.mock import patch, call
from unitytrainers import School

@patch('unitytrainers.Curriculum.__init__', return_value=None)
@patch('os.listdir', return_value=['TestBrain1.json', 'TestBrain2.json'])
def test_init_school_happy_path(listdir, curriculum_mock, default_reset_parameters):
print(curriculum_mock)
def test_init_school_happy_path(listdir, mock_curriculum, default_reset_parameters):
school = School('test-school/', default_reset_parameters)
assert len(school.brains_to_curriculums) == 2

calls = [call('test-school/TestBrain1.json', default_reset_parameters), call('test-school/TestBrain2.json', default_reset_parameters)]
mock_curriculum.assert_has_calls(calls)

42
python/unitytrainers/curriculum.py


:param default_reset_parameters: Set of reset parameters for environment.
"""
self.lesson_length = 0
self.max_lesson_number = 0
self._measure_type = None
self._lesson_number = 0
self.max_lesson_num = 0
self.measure = None
self._lesson_num = 0
if location is None:
self.data = None

raise CurriculumError("{0} does not contain a "
"{1} field.".format(location, key))
self.smoothing_value = 0
self._measure_type = self.data['measure']
self.max_lesson_number = len(self.data['thresholds'])
self.measure = self.data['measure']
self.max_lesson_num = len(self.data['thresholds'])
parameters = self.data['parameters']
for key in parameters:

"the Environment".format(key, location))
if len(parameters[key]) != self.max_lesson_number + 1:
if len(parameters[key]) != self.max_lesson_num + 1:
self.max_lesson_number + 1, len(parameters[key])))
@property
def measure(self):
return self._measure_type
self.max_lesson_num + 1, len(parameters[key])))
def lesson_number(self):
return self._lesson_number
def lesson_num(self):
return self._lesson_num
@lesson_number.setter
def lesson_number(self, lesson_number):
@lesson_num.setter
def lesson_num(self, lesson_num):
self._lesson_number = max(0, min(lesson_number, self.max_lesson_number))
self._lesson_num = max(0, min(lesson_num, self.max_lesson_num))
def increment_lesson(self, progress):
"""

progress = self.smoothing_value * 0.25 + 0.75 * progress
self.smoothing_value = progress
self.lesson_length += 1
if self.lesson_number < self.max_lesson_number:
if ((progress > self.data['thresholds'][self.lesson_number]) and
if self.lesson_num < self.max_lesson_num:
if ((progress > self.data['thresholds'][self.lesson_num]) and
self.lesson_number += 1
self.lesson_num += 1
config[key] = parameters[key][self.lesson_number]
config[key] = parameters[key][self.lesson_num]
.format(self.lesson_number,
.format(self.lesson_num,
', '.join([str(x) + ' -> ' + str(config[x]) for x in config])))
def get_config(self, lesson=None):

if self.data is None:
return {}
if lesson is None:
lesson = self.lesson_number
lesson = max(0, min(lesson, self.max_lesson_number))
lesson = self.lesson_num
lesson = max(0, min(lesson, self.max_lesson_num))
config = {}
parameters = self.data["parameters"]
for key in parameters:

27
python/unitytrainers/school.py


self._brains_to_curriculums = None
else:
self._brains_to_curriculums = {}
for location in os.listdir(curriculum_folder):
brain_name = location.split('.')[0]
self._brains_to_curriculums[brain_name] = Curriculum(location, default_reset_parameters)
for curriculum_filename in os.listdir(curriculum_folder):
brain_name = curriculum_filename.split('.')[0]
curriculum_filepath = os.path.join(curriculum_folder, curriculum_filename)
self._brains_to_curriculums[brain_name] = Curriculum(curriculum_filepath, default_reset_parameters)
@property
def lesson_nums(self):
lesson_nums = {}
for brain_name, curriculum in self.brains_to_curriculums:
lesson_nums[brain_name] = curriculum.lesson_num
return lesson_nums
@lesson_nums.setter
def lesson_nums(self, lesson_nums):
for brain_name, lesson in lesson_nums.items():
self.brains_to_curriculums[brain_name].lesson_num = lesson
def increment_lessons(self, progresses):
for brain_name, progress in progresses.items():

def set_lesson_nums(self, lesson_nums):
for brain_name, lesson in lesson_nums.items():
self.brains_to_curriculums[brain_name].lesson_number = lesson
def set_all_curriculums_to_lesson_num(self, lesson_num):
for _, curriculum in self.brains_to_curriculums.items():
curriculum.lesson_num = lesson_num
def get_config(self):

parameters = curriculum.data["parameters"]
for key in parameters:
config[key] = parameters[key][curriculum.lesson_number]
config[key] = parameters[key][curriculum.lesson_num]
return config

7
python/unitytrainers/trainer.py


"""
raise UnityTrainerException("The update_model method was not implemented.")
def write_summary(self, lesson_number):
def write_summary(self, lesson_nums):
:param lesson_number: The lesson the trainer is at.
:param lesson_nums: The lesson the trainer is at.
"""
if (self.get_step % self.trainer_parameters['summary_freq'] == 0 and self.get_step != 0 and
self.is_training and self.get_step <= self.get_max_steps):

stat_mean = float(np.mean(self.stats[key]))
summary.value.add(tag='Info/{}'.format(key), simple_value=stat_mean)
self.stats[key] = []
summary.value.add(tag='Info/Lesson', simple_value=lesson_number)
# summary.value.add(tag='Info/Lesson', simple_value=lesson_number)
self.write_tensorboard_text('LessonNumbers', lesson_nums)
self.summary_writer.add_summary(summary, self.get_step)
self.summary_writer.flush()

23
python/unitytrainers/trainer_controller.py


class TrainerController(object):
def __init__(self, env_path, run_id, save_freq, curriculum_folder, fast_simulation, load, train,
worker_id, keep_checkpoints, lesson_nums, seed, docker_target_name, trainer_config_path,
worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path,
no_graphics):
"""
:param env_path: Location to the environment executable to be loaded.

:param train: Whether to train model, or only run inference
:param worker_id: Number to add to communication port (5005). Used for multi-environment
:param keep_checkpoints: How many model checkpoints to keep
:param lesson_nums: Dict from brain name to starting lesson number
:param lesson: Start learning from this lesson
:param seed: Random seed used for training.
:param docker_target_name: Name of docker volume that will contain all data.
:param trainer_config_path: Fully qualified path to location of trainer configuration file

self.logger = logging.getLogger("unityagents")
self.run_id = run_id
self.save_freq = save_freq
self.lesson_nums = lesson_nums
self.lesson = lesson
self.fast_simulation = fast_simulation
self.load_model = load
self.train_model = train

if self.curriculum_folder is not None:
brain_names_to_progresses = {}
for brain_name, curriculum in self.school.brains_to_curriculums.items():
if curriculum.measure_type == "progress":
if curriculum.measure == "progress":
elif curriculum.measure_type == "reward":
elif curriculum.measure == "reward":
progress = self.trainers[brain_name].get_last_reward
brain_names_to_progresses[brain_name] = progress
return brain_names_to_progresses

def _initialize_trainers(self, trainer_config, sess):
trainer_parameters_dict = {}
# TODO: This probably doesn't need to be reinitialized.
self.trainers = {}
for brain_name in self.env.external_brain_names:
trainer_parameters = trainer_config['default'].copy()

.format(model_path))
def start_learning(self):
self.school.set_lesson_nums(self.lesson_nums)
# TODO: Should be able to start learning at different lesson numbers for each curriculum.
self.school.set_all_curriculums_to_lesson_num(self.lesson)
trainer_config = self._load_config()
self._create_model_path(self.model_path)

else:
sess.run(init)
global_step = 0 # This is only for saving the model
self.school.increment_lessons(self._get_progresses)
# TODO: Environment needs a new reset method which takes into account all reset params from all
# brains.
self.school.increment_lessons(self._get_progresses())
curr_info = self.env.reset(config=self.school.get_config(), train_mode=self.fast_simulation)
if self.train_model:
for brain_name, trainer in self.trainers.items():

if self.env.global_done:
self.school.increment_lessons(self._get_progresses())
# TODO: Environment needs a new reset method which takes into account all reset params from all
# brains.
curr_info = self.env.reset(config=self.school.get_config(), train_mode=self.fast_simulation)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()

# Perform gradient descent with experience buffer
trainer.update_model()
# Write training statistics to Tensorboard.
# TODO: Not sure how to replace this line.
#trainer.write_summary(self.curriculum.lesson_number)
trainer.write_summary(self.school.lesson_nums)
if self.train_model and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step_and_update_last_reward()
if self.train_model:

正在加载...
取消
保存