浏览代码

Merge pull request #968 from dericp/develop-curriculum-learning-refactor

Curriculum learning moved from environment to trainer.
/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
34035176
共有 10 个文件被更改,包括 101 次插入93 次删除
  1. 2
      docs/Training-Curriculum-Learning.md
  2. 57
      python/tests/test_unityagents.py
  3. 58
      python/tests/test_unitytrainers.py
  4. 1
      python/unityagents/__init__.py
  5. 24
      python/unityagents/environment.py
  6. 2
      python/unitytrainers/__init__.py
  7. 21
      python/unitytrainers/trainer_controller.py
  8. 14
      python/unitytrainers/curriculum.py
  9. 15
      python/unitytrainers/exception.py
  10. 0
      /python/unitytrainers/curriculum.py

2
docs/Training-Curriculum-Learning.md


structure of the curriculum. Within it we can set at what points in the training process
our wall height will change, either based on the percentage of training steps which have
taken place, or what the average reward the agent has received in the recent past is.
Once these are in place, we simply launch ppo.py using the `–curriculum-file` flag to
Once these are in place, we simply launch learn.py using the `–curriculum-file` flag to
point to the JSON file, and PPO we will train using Curriculum Learning. Of course we can
then keep track of the current lesson and progress via TensorBoard.

57
python/tests/test_unityagents.py


import json
import unittest.mock as mock
import pytest
import struct

from unityagents import UnityEnvironment, UnityEnvironmentException, UnityActionException, \
BrainInfo, Curriculum
BrainInfo
dummy_curriculum = json.loads('''{
"measure" : "reward",
"thresholds" : [10, 20, 50],
"min_lesson_length" : 3,
"signal_smoothing" : true,
"parameters" :
{
"param1" : [0.7, 0.5, 0.3, 0.1],
"param2" : [100, 50, 20, 15],
"param3" : [0.2, 0.3, 0.7, 0.9]
}
}''')
bad_curriculum = json.loads('''{
"measure" : "reward",
"thresholds" : [10, 20, 50],
"min_lesson_length" : 3,
"signal_smoothing" : false,
"parameters" :
{
"param1" : [0.7, 0.5, 0.3, 0.1],
"param2" : [100, 50, 20],
"param3" : [0.2, 0.3, 0.7, 0.9]
}
}''')
def test_handles_bad_filename():

env.close()
assert not env._loaded
assert comm.has_been_closed
def test_curriculum():
open_name = '%s.open' % __name__
with mock.patch('json.load') as mock_load:
with mock.patch(open_name, create=True) as mock_open:
mock_open.return_value = 0
mock_load.return_value = bad_curriculum
with pytest.raises(UnityEnvironmentException):
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
mock_load.return_value = dummy_curriculum
with pytest.raises(UnityEnvironmentException):
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1})
curriculum = Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
assert curriculum.get_lesson_number == 0
curriculum.set_lesson_number(1)
assert curriculum.get_lesson_number == 1
curriculum.increment_lesson(10)
assert curriculum.get_lesson_number == 1
curriculum.increment_lesson(30)
curriculum.increment_lesson(30)
assert curriculum.get_lesson_number == 1
assert curriculum.lesson_length == 3
curriculum.increment_lesson(30)
assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7}
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2}
assert curriculum.lesson_length == 0
assert curriculum.get_lesson_number == 2
if __name__ == '__main__':

58
python/tests/test_unitytrainers.py


import json
import yaml
import unittest.mock as mock
import pytest

from unitytrainers.models import *
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unityagents import UnityEnvironmentException
from unitytrainers.curriculum import Curriculum
from unitytrainers.exception import CurriculumError
from unityagents.exception import UnityEnvironmentException
from .mock_communicator import MockCommunicator
dummy_start = '''{

memory_size: 8
''')
dummy_curriculum = json.loads('''{
"measure" : "reward",
"thresholds" : [10, 20, 50],
"min_lesson_length" : 3,
"signal_smoothing" : true,
"parameters" :
{
"param1" : [0.7, 0.5, 0.3, 0.1],
"param2" : [100, 50, 20, 15],
"param3" : [0.2, 0.3, 0.7, 0.9]
}
}''')
bad_curriculum = json.loads('''{
"measure" : "reward",
"thresholds" : [10, 20, 50],
"min_lesson_length" : 3,
"signal_smoothing" : false,
"parameters" :
{
"param1" : [0.7, 0.5, 0.3, 0.1],
"param2" : [100, 50, 20],
"param3" : [0.2, 0.3, 0.7, 0.9]
}
}''')
@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')

batch_size=None, training_length=2)
assert len(b.update_buffer['action']) == 10
assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
def test_curriculum():
open_name = '%s.open' % __name__
with mock.patch('json.load') as mock_load:
with mock.patch(open_name, create=True) as mock_open:
mock_open.return_value = 0
mock_load.return_value = bad_curriculum
with pytest.raises(CurriculumError):
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
mock_load.return_value = dummy_curriculum
with pytest.raises(CurriculumError):
Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1})
curriculum = Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
assert curriculum.get_lesson_number == 0
curriculum.set_lesson_number(1)
assert curriculum.get_lesson_number == 1
curriculum.increment_lesson(10)
assert curriculum.get_lesson_number == 1
curriculum.increment_lesson(30)
curriculum.increment_lesson(30)
assert curriculum.get_lesson_number == 1
assert curriculum.lesson_length == 3
curriculum.increment_lesson(30)
assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7}
assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2}
assert curriculum.lesson_length == 0
assert curriculum.get_lesson_number == 2
if __name__ == '__main__':

1
python/unityagents/__init__.py


from .environment import *
from .brain import *
from .exception import *
from .curriculum import *

24
python/unityagents/environment.py


from .brain import BrainInfo, BrainParameters, AllBrainInfo
from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
from .curriculum import Curriculum
from communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\
EnvironmentParametersProto, UnityRLInitializationInput, UnityRLInitializationOutput,\

class UnityEnvironment(object):
def __init__(self, file_name=None, worker_id=0,
base_port=5005, curriculum=None,
seed=0, docker_training=False, no_graphics=False):
base_port=5005, seed=0,
docker_training=False, no_graphics=False):
"""
Starts a new unity environment and establishes a connection with the environment.
Notice: Currently communication between Unity and Python takes place over an open socket without authentication.

self._num_brains = len(self._brain_names)
self._num_external_brains = len(self._external_brain_names)
self._resetParameters = dict(aca_params.environment_parameters.float_parameters) # TODO
self._curriculum = Curriculum(curriculum, self._resetParameters)
@property
def curriculum(self):
return self._curriculum
@property
def logfile_path(self):

# return SocketCommunicator(worker_id, base_port)
def __str__(self):
_new_reset_param = self._curriculum.get_config()
for k in _new_reset_param:
self._resetParameters[k] = _new_reset_param[k]
Lesson number : {3}
Reset Parameters :\n\t\t{4}'''.format(self._academy_name, str(self._num_brains),
str(self._num_external_brains), self._curriculum.get_lesson_number,
"\n\t\t".join([str(k) + " -> " + str(self._resetParameters[k])
Reset Parameters :\n\t\t{3}'''.format(self._academy_name, str(self._num_brains),
str(self._num_external_brains),
"\n\t\t".join([str(k) + " -> " + str(self._resetParameters[k])
def reset(self, train_mode=True, config=None, lesson=None) -> AllBrainInfo:
def reset(self, config=None, train_mode=True) -> AllBrainInfo:
config = self._curriculum.get_config(lesson)
config = self._resetParameters
elif config != {}:
logger.info("\nAcademy Reset with parameters : \t{0}"
.format(', '.join([str(x) + ' -> ' + str(config[x]) for x in config])))

2
python/unitytrainers/__init__.py


from .buffer import *
from .curriculum import *
from .models import *
from .trainer_controller import *
from .bc.models import *

from .exception import *

21
python/unitytrainers/trainer_controller.py


from tensorflow.python.tools import freeze_graph
from unitytrainers.ppo.trainer import PPOTrainer
from unitytrainers.bc.trainer import BehavioralCloningTrainer
from unitytrainers import Curriculum
from unityagents import UnityEnvironment, UnityEnvironmentException

np.random.seed(self.seed)
tf.set_random_seed(self.seed)
self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
curriculum=self.curriculum_file, seed=self.seed,
docker_training=self.docker_training,
seed=self.seed, docker_training=self.docker_training,
self.curriculum = Curriculum(curriculum_file, self.env._resetParameters)
if self.env.curriculum.measure_type == "progress":
if self.curriculum.measure_type == "progress":
elif self.env.curriculum.measure_type == "reward":
elif self.curriculum.measure_type == "reward":
for brain_name in self.env.external_brain_names:
progress += self.trainers[brain_name].get_last_reward
return progress

.format(model_path))
def start_learning(self):
self.env.curriculum.set_lesson_number(self.lesson)
self.curriculum.set_lesson_number(self.lesson)
trainer_config = self._load_config()
self._create_model_path(self.model_path)

else:
sess.run(init)
global_step = 0 # This is only for saving the model
self.env.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(train_mode=self.fast_simulation)
self.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(config=self.curriculum.get_config(), train_mode=self.fast_simulation)
if self.train_model:
for brain_name, trainer in self.trainers.items():
trainer.write_tensorboard_text('Hyperparameters', trainer.parameters)

self.env.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(train_mode=self.fast_simulation)
self.curriculum.increment_lesson(self._get_progress())
curr_info = self.env.reset(config=self.curriculum.get_config(), train_mode=self.fast_simulation)
for brain_name, trainer in self.trainers.items():
trainer.end_episode()
# Decide and take an action

# Perform gradient descent with experience buffer
trainer.update_model()
# Write training statistics to Tensorboard.
trainer.write_summary(self.env.curriculum.lesson_number)
trainer.write_summary(self.curriculum.lesson_number)
if self.train_model and trainer.get_step <= trainer.get_max_steps:
trainer.increment_step_and_update_last_reward()
if self.train_model:

14
python/unitytrainers/curriculum.py


import json
from .exception import UnityEnvironmentException
from .exception import CurriculumError
import logging

with open(location) as data_file:
self.data = json.load(data_file)
except IOError:
raise UnityEnvironmentException(
raise CurriculumError(
raise UnityEnvironmentException("There was an error decoding {}".format(location))
raise CurriculumError("There was an error decoding {}".format(location))
raise UnityEnvironmentException("{0} does not contain a "
raise CurriculumError("{0} does not contain a "
"{1} field.".format(location, key))
parameters = self.data['parameters']
self.measure_type = self.data['measure']

raise UnityEnvironmentException(
raise CurriculumError(
raise UnityEnvironmentException(
raise CurriculumError(
"The parameter {0} in Curriculum {1} must have {2} values "
"but {3} were found".format(key, location,
self.max_lesson_number + 1, len(parameters[key])))

def increment_lesson(self, progress):
"""
Increments the lesson number depending on the progree given.
Increments the lesson number depending on the progress given.
:param progress: Measure of progress (either reward or percentage steps completed).
"""
if self.data is None or progress is None:

15
python/unitytrainers/exception.py


"""
Contains exceptions for the unitytrainers package.
"""
class TrainerError(Exception):
"""
Any error related to the trainers in the ML-Agents Toolkit.
"""
pass
class CurriculumError(TrainerError):
"""
Any error related to training with a curriculum.
"""
pass

/python/unityagents/curriculum.py → /python/unitytrainers/curriculum.py

正在加载...
取消
保存