Merge pull request #968 from dericp/develop-curriculum-learning-refactor

Curriculum learning moved from environment to trainer.
7 年前 · 34035176
--- a/docs/Training-Curriculum-Learning.md
+++ b/docs/Training-Curriculum-Learning.md
 structure of the curriculum. Within it we can set at what points in the training process 
 our wall height will change, either based on the percentage of training steps which have 
 taken place, or what the average reward the agent has received in the recent past is. 
-Once these are in place, we simply launch ppo.py using the `–curriculum-file` flag to 
+Once these are in place, we simply launch learn.py using the `–curriculum-file` flag to 
 point to the JSON file, and PPO we will train using Curriculum Learning. Of course we can 
 then keep track of the current lesson and progress via TensorBoard.

--- a/python/tests/test_unityagents.py
+++ b/python/tests/test_unityagents.py
-import json
 import unittest.mock as mock
 import pytest
 import struct
 from unityagents import UnityEnvironment, UnityEnvironmentException, UnityActionException, \
-    BrainInfo, Curriculum
+    BrainInfo
-
-
-dummy_curriculum = json.loads('''{
-    "measure" : "reward",
-    "thresholds" : [10, 20, 50],
-    "min_lesson_length" : 3,
-    "signal_smoothing" : true, 
-    "parameters" : 
-    {
-        "param1" : [0.7, 0.5, 0.3, 0.1],
-        "param2" : [100, 50, 20, 15],
-        "param3" : [0.2, 0.3, 0.7, 0.9]
-    }
-}''')
-bad_curriculum = json.loads('''{
-    "measure" : "reward",
-    "thresholds" : [10, 20, 50],
-    "min_lesson_length" : 3,
-    "signal_smoothing" : false, 
-    "parameters" : 
-    {
-        "param1" : [0.7, 0.5, 0.3, 0.1],
-        "param2" : [100, 50, 20],
-        "param3" : [0.2, 0.3, 0.7, 0.9]
-    }
-}''')


 def test_handles_bad_filename():
    env.close()
    assert not env._loaded
    assert comm.has_been_closed
-
-
-def test_curriculum():
-    open_name = '%s.open' % __name__
-    with mock.patch('json.load') as mock_load:
-        with mock.patch(open_name, create=True) as mock_open:
-            mock_open.return_value = 0
-            mock_load.return_value = bad_curriculum
-            with pytest.raises(UnityEnvironmentException):
-                Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
-            mock_load.return_value = dummy_curriculum
-            with pytest.raises(UnityEnvironmentException):
-                Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1})
-            curriculum = Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
-            assert curriculum.get_lesson_number == 0
-            curriculum.set_lesson_number(1)
-            assert curriculum.get_lesson_number == 1
-            curriculum.increment_lesson(10)
-            assert curriculum.get_lesson_number == 1
-            curriculum.increment_lesson(30)
-            curriculum.increment_lesson(30)
-            assert curriculum.get_lesson_number == 1
-            assert curriculum.lesson_length == 3
-            curriculum.increment_lesson(30)
-            assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7}
-            assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2}
-            assert curriculum.lesson_length == 0
-            assert curriculum.get_lesson_number == 2


 if __name__ == '__main__':
--- a/python/tests/test_unitytrainers.py
+++ b/python/tests/test_unitytrainers.py
+import json
 import yaml
 import unittest.mock as mock
 import pytest
 from unitytrainers.models import *
 from unitytrainers.ppo.trainer import PPOTrainer
 from unitytrainers.bc.trainer import BehavioralCloningTrainer
-from unityagents import UnityEnvironmentException
+from unitytrainers.curriculum import Curriculum
+from unitytrainers.exception import CurriculumError
+from unityagents.exception import UnityEnvironmentException
 from .mock_communicator import MockCommunicator

 dummy_start = '''{
    memory_size: 8
 ''')

+dummy_curriculum = json.loads('''{
+    "measure" : "reward",
+    "thresholds" : [10, 20, 50],
+    "min_lesson_length" : 3,
+    "signal_smoothing" : true,
+    "parameters" :
+    {
+        "param1" : [0.7, 0.5, 0.3, 0.1],
+        "param2" : [100, 50, 20, 15],
+        "param3" : [0.2, 0.3, 0.7, 0.9]
+    }
+}''')
+bad_curriculum = json.loads('''{
+    "measure" : "reward",
+    "thresholds" : [10, 20, 50],
+    "min_lesson_length" : 3,
+    "signal_smoothing" : false,
+    "parameters" :
+    {
+        "param1" : [0.7, 0.5, 0.3, 0.1],
+        "param2" : [100, 50, 20],
+        "param3" : [0.2, 0.3, 0.7, 0.9]
+    }
+}''')
+

@mock.patch('unityagents.UnityEnvironment.executable_launcher')
@mock.patch('unityagents.UnityEnvironment.get_communicator')
                           batch_size=None, training_length=2)
    assert len(b.update_buffer['action']) == 10
    assert np.array(b.update_buffer['action']).shape == (10, 2, 2)
+
+
+def test_curriculum():
+    open_name = '%s.open' % __name__
+    with mock.patch('json.load') as mock_load:
+        with mock.patch(open_name, create=True) as mock_open:
+            mock_open.return_value = 0
+            mock_load.return_value = bad_curriculum
+            with pytest.raises(CurriculumError):
+                Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
+            mock_load.return_value = dummy_curriculum
+            with pytest.raises(CurriculumError):
+                Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1})
+            curriculum = Curriculum('tests/test_unityagents.py', {"param1": 1, "param2": 1, "param3": 1})
+            assert curriculum.get_lesson_number == 0
+            curriculum.set_lesson_number(1)
+            assert curriculum.get_lesson_number == 1
+            curriculum.increment_lesson(10)
+            assert curriculum.get_lesson_number == 1
+            curriculum.increment_lesson(30)
+            curriculum.increment_lesson(30)
+            assert curriculum.get_lesson_number == 1
+            assert curriculum.lesson_length == 3
+            curriculum.increment_lesson(30)
+            assert curriculum.get_config() == {'param1': 0.3, 'param2': 20, 'param3': 0.7}
+            assert curriculum.get_config(0) == {"param1": 0.7, "param2": 100, "param3": 0.2}
+            assert curriculum.lesson_length == 0
+            assert curriculum.get_lesson_number == 2


 if __name__ == '__main__':
--- a/python/unityagents/init.py
+++ b/python/unityagents/init.py
 from .environment import *
 from .brain import *
 from .exception import *
-from .curriculum import *
--- a/python/unityagents/environment.py
+++ b/python/unityagents/environment.py

 from .brain import BrainInfo, BrainParameters, AllBrainInfo
 from .exception import UnityEnvironmentException, UnityActionException, UnityTimeOutException
-from .curriculum import Curriculum

 from communicator_objects import UnityRLInput, UnityRLOutput, AgentActionProto,\
    EnvironmentParametersProto, UnityRLInitializationInput, UnityRLInitializationOutput,\

 class UnityEnvironment(object):
    def __init__(self, file_name=None, worker_id=0,
-                 base_port=5005, curriculum=None,
-                 seed=0, docker_training=False, no_graphics=False):
+                 base_port=5005, seed=0,
+                 docker_training=False, no_graphics=False):
        """
        Starts a new unity environment and establishes a connection with the environment.
        Notice: Currently communication between Unity and Python takes place over an open socket without authentication.
        self._num_brains = len(self._brain_names)
        self._num_external_brains = len(self._external_brain_names)
        self._resetParameters = dict(aca_params.environment_parameters.float_parameters) # TODO
-        self._curriculum = Curriculum(curriculum, self._resetParameters)
-
-    @property
-    def curriculum(self):
-        return self._curriculum

    @property
    def logfile_path(self):
        # return SocketCommunicator(worker_id, base_port)

    def __str__(self):
-        _new_reset_param = self._curriculum.get_config()
-        for k in _new_reset_param:
-            self._resetParameters[k] = _new_reset_param[k]
-        Lesson number : {3}
-        Reset Parameters :\n\t\t{4}'''.format(self._academy_name, str(self._num_brains),
-                                 str(self._num_external_brains), self._curriculum.get_lesson_number,
-                                  "\n\t\t".join([str(k) + " -> " + str(self._resetParameters[k])
+        Reset Parameters :\n\t\t{3}'''.format(self._academy_name, str(self._num_brains),
+                                 str(self._num_external_brains),
+                                 "\n\t\t".join([str(k) + " -> " + str(self._resetParameters[k])
-    def reset(self, train_mode=True, config=None, lesson=None) -> AllBrainInfo:
+    def reset(self, config=None, train_mode=True) -> AllBrainInfo:
-            config = self._curriculum.get_config(lesson)
+            config = self._resetParameters
        elif config != {}:
            logger.info("\nAcademy Reset with parameters : \t{0}"
                        .format(', '.join([str(x) + ' -> ' + str(config[x]) for x in config])))
--- a/python/unitytrainers/init.py
+++ b/python/unitytrainers/init.py
 from .buffer import *
+from .curriculum import *
 from .models import *
 from .trainer_controller import *
 from .bc.models import *
+from .exception import *
--- a/python/unitytrainers/trainer_controller.py
+++ b/python/unitytrainers/trainer_controller.py
 from tensorflow.python.tools import freeze_graph
 from unitytrainers.ppo.trainer import PPOTrainer
 from unitytrainers.bc.trainer import BehavioralCloningTrainer
+from unitytrainers import Curriculum
 from unityagents import UnityEnvironment, UnityEnvironmentException


        np.random.seed(self.seed)
        tf.set_random_seed(self.seed)
        self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
-                                    curriculum=self.curriculum_file, seed=self.seed,
-                                    docker_training=self.docker_training,
+                                    seed=self.seed, docker_training=self.docker_training,
+        self.curriculum = Curriculum(curriculum_file, self.env._resetParameters)
-            if self.env.curriculum.measure_type == "progress":
+            if self.curriculum.measure_type == "progress":
-            elif self.env.curriculum.measure_type == "reward":
+            elif self.curriculum.measure_type == "reward":
                for brain_name in self.env.external_brain_names:
                    progress += self.trainers[brain_name].get_last_reward
                return progress
                                            .format(model_path))

    def start_learning(self):
-        self.env.curriculum.set_lesson_number(self.lesson)
+        self.curriculum.set_lesson_number(self.lesson)
        trainer_config = self._load_config()
        self._create_model_path(self.model_path)

            else:
                sess.run(init)
            global_step = 0  # This is only for saving the model
-            self.env.curriculum.increment_lesson(self._get_progress())
-            curr_info = self.env.reset(train_mode=self.fast_simulation)
+            self.curriculum.increment_lesson(self._get_progress())
+            curr_info = self.env.reset(config=self.curriculum.get_config(), train_mode=self.fast_simulation)
            if self.train_model:
                for brain_name, trainer in self.trainers.items():
                    trainer.write_tensorboard_text('Hyperparameters', trainer.parameters)
-                        self.env.curriculum.increment_lesson(self._get_progress())
-                        curr_info = self.env.reset(train_mode=self.fast_simulation)
+                        self.curriculum.increment_lesson(self._get_progress())
+                        curr_info = self.env.reset(config=self.curriculum.get_config(), train_mode=self.fast_simulation)
                        for brain_name, trainer in self.trainers.items():
                            trainer.end_episode()
                    # Decide and take an action
                            # Perform gradient descent with experience buffer
                            trainer.update_model()
                        # Write training statistics to Tensorboard.
-                        trainer.write_summary(self.env.curriculum.lesson_number)
+                        trainer.write_summary(self.curriculum.lesson_number)
                        if self.train_model and trainer.get_step <= trainer.get_max_steps:
                            trainer.increment_step_and_update_last_reward()
                    if self.train_model:
--- a/python/unitytrainers/curriculum.py
+++ b/python/unitytrainers/curriculum.py
 import json

-from .exception import UnityEnvironmentException
+from .exception import CurriculumError

 import logging

                with open(location) as data_file:
                    self.data = json.load(data_file)
            except IOError:
-                raise UnityEnvironmentException(
+                raise CurriculumError(
-                raise UnityEnvironmentException("There was an error decoding {}".format(location))
+                raise CurriculumError("There was an error decoding {}".format(location))
-                    raise UnityEnvironmentException("{0} does not contain a "
+                    raise CurriculumError("{0} does not contain a "
                                                    "{1} field.".format(location, key))
            parameters = self.data['parameters']
            self.measure_type = self.data['measure']
-                    raise UnityEnvironmentException(
+                    raise CurriculumError(
-                    raise UnityEnvironmentException(
+                    raise CurriculumError(
                        "The parameter {0} in Curriculum {1} must have {2} values "
                        "but {3} were found".format(key, location,
                                                    self.max_lesson_number + 1, len(parameters[key])))

    def increment_lesson(self, progress):
        """
-        Increments the lesson number depending on the progree given.
+        Increments the lesson number depending on the progress given.
        :param progress: Measure of progress (either reward or percentage steps completed).
        """
        if self.data is None or progress is None:
--- a/python/unitytrainers/exception.py
+++ b/python/unitytrainers/exception.py
+"""
+Contains exceptions for the unitytrainers package.
+"""
+
+class TrainerError(Exception):
+    """
+    Any error related to the trainers in the ML-Agents Toolkit.
+    """
+    pass
+
+class CurriculumError(TrainerError):
+    """
+    Any error related to training with a curriculum.
+    """
+    pass
--- a//python/unitytrainers/curriculum.py
+++ b//python/unitytrainers/curriculum.py