浏览代码

Multi-curriculum support added.

- New school module maps brains to curriculums.
/develop-generalizationTraining-TrainerController
Deric Pang 7 年前
当前提交
c6617b70
共有 9 个文件被更改,包括 137 次插入56 次删除
  1. 8
      docs/Training-Curriculum-Learning.md
  2. 17
      python/curricula/push_curriculum.json
  3. 21
      python/tests/test_curriculum.py
  4. 1
      python/unitytrainers/__init__.py
  5. 10
      python/unitytrainers/curriculum.py
  6. 67
      python/unitytrainers/trainer_controller.py
  7. 4
      unity-environment/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
  8. 21
      python/tests/test_school.py
  9. 44
      python/unitytrainers/school.py

8
docs/Training-Curriculum-Learning.md


## How-To
So how does it work? In order to define a curriculum, the first step is to decide which
In order to define a curriculum, the first step is to decide which
varies is the height of the wall. We can define this as a reset parameter in the Academy
varies is the height of the wall. We define this as a `Reset Parameter` in the Academy
taken place, or what the average reward the agent has received in the recent past is.
taken place, or what the average reward the agent has received in the recent past is.
Finally, we have to use the reset parameter we defined and modify the environment from
the agent's `AgentReset()` function.
Once these are in place, we simply launch learn.py using the `–curriculum-file` flag to
point to the JSON file, and PPO we will train using Curriculum Learning. Of course we can
then keep track of the current lesson and progress via TensorBoard.

17
python/curricula/push_curriculum.json


{
"measure" : "progress",
"thresholds" : [0.1],
"min_lesson_length" : 2,
"signal_smoothing" : true,
"parameters" :
"PushBlockBrain" :
"goal_width" : [25.0, 5.0],
"goal_length" : [5.0, 1.0]
"measure" : "progress",
"thresholds" : [0.1],
"min_lesson_length" : 2,
"signal_smoothing" : true,
"parameters" :
{
"goal_width" : [25.0, 5.0],
"goal_length" : [5.0, 1.0]
}
}
}

21
python/tests/test_curriculum.py


}
'''
@pytest.fixture
def location():
return 'TestBrain.json'
@pytest.fixture
def default_reset_parameters():
return {"param1": 1, "param2": 1, "param3": 1}
def test_init_curriculum_happy_path(mock_file):
curriculum = Curriculum('TestBrain.json', {"param1": 1, "param2": 1, "param3": 1})
def test_init_curriculum_happy_path(mock_file, location, default_reset_parameters):
curriculum = Curriculum(location, default_reset_parameters)
assert curriculum.lesson_number == 0
assert curriculum.measure == 'reward'

def test_init_curriculum_bad_curriculum_raises_error(mock_file):
def test_init_curriculum_bad_curriculum_raises_error(mock_file, location, default_reset_parameters):
Curriculum('TestBrainCurriculum.json', {"param1": 1, "param2": 1, "param3": 1})
Curriculum(location, default_reset_parameters)
def test_increment_lesson(mock_file):
curriculum = Curriculum('TestBrain.json', {"param1": 1, "param2": 1, "param3": 1})
def test_increment_lesson(mock_file, location, default_reset_parameters):
curriculum = Curriculum(location, default_reset_parameters)
assert curriculum.lesson_number == 0
curriculum.lesson_number = 1

1
python/unitytrainers/__init__.py


from .buffer import *
from .curriculum import *
from .school import *
from .models import *
from .trainer_controller import *
from .bc.models import *

10
python/unitytrainers/curriculum.py


self.max_lesson_number = 0
self._measure_type = None
self._lesson_number = 0
if location is None:
self.data = None
else:

if key not in self.data:
raise CurriculumError("{0} does not contain a "
"{1} field.".format(location, key))
parameters = self.data['parameters']
self.measure_type = self.data['measure']
self.smoothing_value = 0
self._measure_type = self.data['measure']
parameters = self.data['parameters']
for key in parameters:
if len(parameters[key]) != self.max_lesson_number + 1:
raise CurriculumError(
"The parameter {0} in Curriculum {1} must have {2} values "

@property
def measure(self):
return self.measure_type
return self._measure_type
@property
def lesson_number(self):

67
python/unitytrainers/trainer_controller.py


from tensorflow.python.tools import freeze_graph
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unitytrainers import Curriculum
from unitytrainers import School
def __init__(self, env_path, run_id, save_freq, curriculum_file, fast_simulation, load, train,
worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path,
def __init__(self, env_path, run_id, save_freq, curriculum_folder, fast_simulation, load, train,
worker_id, keep_checkpoints, lesson_nums, seed, docker_target_name, trainer_config_path,
:param curriculum_file: Curriculum json file for environment
:param curriculum_folder: Folder containing JSON curriculums for the env
:param lesson: Start learning from this lesson
:param lesson_nums: Dict from brain name to starting lesson number
:param seed: Random seed used for training.
:param docker_target_name: Name of docker volume that will contain all data.
:param trainer_config_path: Fully qualified path to location of trainer configuration file

if docker_target_name == '':
self.docker_training = False
self.model_path = './models/{run_id}'.format(run_id=run_id)
self.curriculum_file = curriculum_file
self.curriculum_folder = curriculum_folder
self.summaries_dir = './summaries'
else:
self.docker_training = True

if env_path is not None:
env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name,
env_name=env_path)
if curriculum_file is None:
self.curriculum_file = None
if curriculum_folder is None:
self.curriculum_folder = None
self.curriculum_file = '/{docker_target_name}/{curriculum_file}'.format(
self.curriculum_folder = '/{docker_target_name}/{curriculum_file}'.format(
curriculum_file=curriculum_file)
curriculum_folder=curriculum_folder)
self.lesson = lesson
self.lesson_nums = lesson_nums
self.fast_simulation = fast_simulation
self.load_model = load
self.train_model = train

self.env_name = 'editor_'+self.env.academy_name
else:
self.env_name = os.path.basename(os.path.normpath(env_path)) # Extract out name of environment
self.curriculum = Curriculum(self.curriculum_file, self.env._resetParameters)
self.school = School(self.curriculum_folder, self.env._resetParameters)
def _get_progress(self):
if self.curriculum_file is not None:
progress = 0
if self.curriculum.measure_type == "progress":
for brain_name in self.env.external_brain_names:
progress += self.trainers[brain_name].get_step / self.trainers[brain_name].get_max_steps
return progress / len(self.env.external_brain_names)
elif self.curriculum.measure_type == "reward":
for brain_name in self.env.external_brain_names:
progress += self.trainers[brain_name].get_last_reward
return progress
else:
return None
def _get_progresses(self):
if self.curriculum_folder is not None:
brain_names_to_progresses = {}
for brain_name, curriculum in self.school.brains_to_curriculums.items():
if curriculum.measure_type == "progress":
progress = self.trainers[brain_name].get_step / self.trainers[brain_name].get_max_steps
brain_names_to_progresses[brain_name] = progress
elif curriculum.measure_type == "reward":
progress = self.trainers[brain_name].get_last_reward
brain_names_to_progresses[brain_name] = progress
return brain_names_to_progresses
else:
return None

.format(model_path))
def start_learning(self):
self.curriculum.set_lesson_number(self.lesson)
self.school.set_lesson_nums(self.lesson_nums)
trainer_config = self._load_config()
self._create_model_path(self.model_path)

self._initialize_trainers(trainer_config, sess)
for k, t in self.trainers.items():
for _, t in self.trainers.items():
self.logger.info(t)
init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=self.keep_checkpoints)

else:
sess.run(init)
global_step = 0 # This is only for saving the model
self.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(config=self.curriculum.get_config(), train_mode=self.fast_simulation)
self.school.increment_lessons(self._get_progresses)
# TODO: Environment needs a new reset method which takes into account all reset params from all
# brains.
curr_info = self.env.reset(config=self.school.get_config(), train_mode=self.fast_simulation)
if self.train_model:
for brain_name, trainer in self.trainers.items():
trainer.write_tensorboard_text('Hyperparameters', trainer.parameters)

self.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(config=self.curriculum.get_config(), train_mode=self.fast_simulation)
self.school.increment_lessons(self._get_progresses())
# TODO: Environment needs a new reset method which takes into account all reset params from all
# brains.
curr_info = self.env.reset(config=self.school.get_config(), train_mode=self.fast_simulation)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
# Decide and take an action

# Perform gradient descent with experience buffer
trainer.update_model()
# Write training statistics to Tensorboard.
trainer.write_summary(self.curriculum.lesson_number)
# TODO: Not sure how to replace this line.
#trainer.write_summary(self.curriculum.lesson_number)
if self.train_model and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step_and_update_last_reward()
if self.train_model:

4
unity-environment/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs


transform.position = GetRandomSpawnPos();
agentRB.velocity = Vector3.zero;
agentRB.angularVelocity = Vector3.zero;
}
private void FixedUpdate()
{
// Set the size of the goal according to the current lesson in the
// curriculum.
goal.transform.localScale = new Vector3(

21
python/tests/test_school.py


import pytest
from unittest.mock import patch
from unitytrainers import School
@pytest.fixture
def default_reset_parameters():
return {"param1": 1, "param2": 1, "param3": 1}
@patch('unitytrainers.Curriculum.__init__', return_value=None)
@patch('os.listdir', return_value=['TestBrain1.json', 'TestBrain2.json'])
def test_init_school_happy_path(listdir, curriculum_mock, default_reset_parameters):
print(curriculum_mock)
school = School('test-school/', default_reset_parameters)
assert len(school.brains_to_curriculums) == 2
assert 'TestBrain1' in school.brains_to_curriculums
assert 'TestBrain2' in school.brains_to_curriculums

44
python/unitytrainers/school.py


"""
A School holds many curriculums. The School tracks which brains are following which curriculums.
"""
import os
from unitytrainers import Curriculum
class School:
def __init__(self, curriculum_folder, default_reset_parameters):
"""
Initializes a School object.
"""
if curriculum_folder is None:
self._brains_to_curriculums = None
else:
self._brains_to_curriculums = {}
for location in os.listdir(curriculum_folder):
brain_name = location.split('.')[0]
self._brains_to_curriculums[brain_name] = Curriculum(location, default_reset_parameters)
@property
def brains_to_curriculums(self):
return self._brains_to_curriculums
def increment_lessons(self, progresses):
for brain_name, progress in progresses.items():
self.brains_to_curriculums[brain_name].increment_lesson(progress)
def set_lesson_nums(self, lesson_nums):
for brain_name, lesson in lesson_nums.items():
self.brains_to_curriculums[brain_name].lesson_number = lesson
def get_config(self):
config = {}
for _, curriculum in self.brains_to_curriculums.items():
parameters = curriculum.data["parameters"]
for key in parameters:
config[key] = parameters[key][curriculum.lesson_number]
return config
正在加载...
取消
保存