Trainer controller lines wrapped.

7 年前 · 032446de
--- a/python/unitytrainers/trainer_controller.py
+++ b/python/unitytrainers/trainer_controller.py
 # # Unity ML-Agents Toolkit
 # ## ML-Agent Learning
-# Launches unitytrainers for each External Brains in a Unity Environment
+"""Launches unitytrainers for each External Brains in a Unity Environment."""
-import logging
+import logging
+
-
 import numpy as np
 import tensorflow as tf
 from tensorflow.python.tools import freeze_graph


 class TrainerController(object):
-    def __init__(self, env_path, run_id, save_freq, curriculum_folder, fast_simulation, load, train,
-                 worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path,
+    def __init__(self, env_path, run_id, save_freq, curriculum_folder,
+                 fast_simulation, load, train, worker_id, keep_checkpoints,
+                 lesson, seed, docker_target_name, trainer_config_path,
-        :param curriculum_folder: Folder containing JSON curriculums for the env
-        :param fast_simulation: Whether to run the game at training speed
-        :param load: Whether to load the model or randomly initialize
-        :param train: Whether to train model, or only run inference
-        :param worker_id: Number to add to communication port (5005). Used for multi-environment
-        :param keep_checkpoints: How many model checkpoints to keep
-        :param lesson: Start learning from this lesson
+        :param curriculum_folder: Folder containing JSON curriculums for the
+               environment.
+        :param fast_simulation: Whether to run the game at training speed.
+        :param load: Whether to load the model or randomly initialize.
+        :param train: Whether to train model, or only run inference.
+        :param worker_id: Number to add to communication port (5005).
+               Used for multi-environment
+        :param keep_checkpoints: How many model checkpoints to keep.
+        :param lesson: Start learning from this lesson.
-        :param docker_target_name: Name of docker volume that will contain all data.
-        :param trainer_config_path: Fully qualified path to location of trainer configuration file
-        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
+        :param docker_target_name: Name of docker volume that will contain all
+               data.
+        :param trainer_config_path: Fully qualified path to location of trainer
+               configuration file.
+        :param no_graphics: Whether to run the Unity simulator in no-graphics
+                            mode.
+            # Strip out executable extensions if passed
-                        .replace('.x86', ''))  # Strip out executable extensions if passed
+                        .replace('.x86', ''))

        # Recognize and use docker volume if one is passed as an argument
        if docker_target_name == '':
                docker_target_name=docker_target_name,
                run_id=run_id)
            if env_path is not None:
-                env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name,
-                                                                     env_name=env_path)
+                env_path = '/{docker_target_name}/{env_name}'.format(
+                    docker_target_name=docker_target_name, env_name=env_path)
-                self.curriculum_folder = '/{docker_target_name}/{curriculum_file}'.format(
+                self.curriculum_folder = \
+                    '/{docker_target_name}/{curriculum_file}'.format(
-            self.summaries_dir = '/{docker_target_name}/summaries'.format(docker_target_name=docker_target_name)
+            self.summaries_dir = '/{docker_target_name}/summaries'.format(
+                docker_target_name=docker_target_name)

        self.logger = logging.getLogger("unityagents")
        self.run_id = run_id
        self.seed = seed
        np.random.seed(self.seed)
        tf.set_random_seed(self.seed)
-        self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id,
-                                    seed=self.seed, docker_training=self.docker_training,
+        self.env = UnityEnvironment(file_name=env_path,
+                                    worker_id=self.worker_id,
+                                    seed=self.seed,
+                                    docker_training=self.docker_training,
-            self.env_name = os.path.basename(os.path.normpath(env_path))  # Extract out name of environment
+            # Extract out name of environment
+            self.env_name = os.path.basename(os.path.normpath(env_path))
-            self.meta_curriculum = MetaCurriculum(self.curriculum_folder, self.env._resetParameters)
+            self.meta_curriculum = MetaCurriculum(self.curriculum_folder,
+                self.env._resetParameters)
-        if self.meta_curriculum is not None:
+        if self.meta_curriculum:
            for brain_name in self.meta_curriculum.brains_to_curriculums.keys():
                if brain_name not in self.env.external_brain_names:
                    raise MetaCurriculumError('One of the curriculums '
                                              'whose curriculum it defines.')

    def _get_progresses(self):
-        if self.meta_curriculum is not None:
+        if self.meta_curriculum:
-            for brain_name, curriculum in self.meta_curriculum.brains_to_curriculums.items():
+            for brain_name, curriculum \
+                in self.meta_curriculum.brains_to_curriculums.items():
-                    progress = self.trainers[brain_name].get_step / self.trainers[brain_name].get_max_steps
+                    progress = (self.trainers[brain_name].get_step /
+                        self.trainers[brain_name].get_max_steps)
                    brain_names_to_progresses[brain_name] = progress
                elif curriculum.measure == "reward":
                    progress = self.trainers[brain_name].get_last_reward
                if scope == '/':
                    scope = ''
                scopes += [scope]
-                if self.trainers[brain_name].parameters["trainer"] == "imitation":
+                if self.trainers[brain_name].parameters["trainer"] \
+                   == "imitation":
-                    nodes += [scope + x for x in ["action", "value_estimate", "action_probs", "value_estimate"]]
+                    nodes += [scope + x for x in ["action", "value_estimate",
+                        "action_probs", "value_estimate"]]
-                    nodes += [scope + x for x in ["recurrent_out", "memory_size"]]
+                    nodes += [scope + x for x in ["recurrent_out",
+                                                  "memory_size"]]
        if len(scopes) > 1:
            self.logger.info("List of available scopes :")
            for scope in scopes:
        """
        last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk'
        saver.save(sess, last_checkpoint)
-        tf.train.write_graph(sess.graph_def, self.model_path, 'raw_graph_def.pb', as_text=False)
+        tf.train.write_graph(sess.graph_def, self.model_path,
+                             'raw_graph_def.pb', as_text=False)
        self.logger.info("Saved Model")

    def _export_graph(self):
        target_nodes = ','.join(self._process_graph())
        ckpt = tf.train.get_checkpoint_state(self.model_path)
-        freeze_graph.freeze_graph(input_graph=self.model_path + '/raw_graph_def.pb',
-                                  input_binary=True,
-                                  input_checkpoint=ckpt.model_checkpoint_path,
-                                  output_node_names=target_nodes,
-                                  output_graph=self.model_path + '/' + self.env_name + "_" + self.run_id + '.bytes',
-                                  clear_devices=True, initializer_nodes="", input_saver="",
-                                  restore_op_name="save/restore_all", filename_tensor_name="save/Const:0")
+        freeze_graph.freeze_graph(
+            input_graph=self.model_path + '/raw_graph_def.pb',
+            input_binary=True,
+            input_checkpoint=ckpt.model_checkpoint_path,
+            output_node_names=target_nodes,
+            output_graph=(self.model_path + '/' + self.env_name + "_"
+                + self.run_id + '.bytes'),
+            clear_devices=True, initializer_nodes="", input_saver="",
+            restore_op_name="save/restore_all",
+            filename_tensor_name="save/Const:0")

    def _initialize_trainers(self, trainer_config, sess):
        trainer_parameters_dict = {}
            trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in self.env.external_brain_names:
            if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
-                self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
-                                                                     trainer_parameters_dict[brain_name],
-                                                                     self.train_model, self.seed, self.run_id)
+                self.trainers[brain_name] = BehavioralCloningTrainer(
+                    sess, self.env, brain_name,
+                    trainer_parameters_dict[brain_name], self.train_model,
+                    self.seed, self.run_id)
-                self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name],
-                                                       self.train_model, self.seed, self.run_id)
+                self.trainers[brain_name] = PPOTrainer(
+                    sess, self.env, brain_name,
+                    trainer_parameters_dict[brain_name],
+                    self.train_model, self.seed, self.run_id)
-                raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
+                raise UnityEnvironmentException('The trainer config contains '
+                                                'an unknown trainer type for '
+                                                'brain {}'
                                                .format(brain_name))

    def _load_config(self):
                return trainer_config
        except IOError:
-            raise UnityEnvironmentException("""Parameter file could not be found here {}.
-                                            Will use default Hyper parameters"""
+            raise UnityEnvironmentException('Parameter file could not be found '
+                                            'here {}. Will use default Hyper '
+                                            'parameters.'
-            raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}"
+            raise UnityEnvironmentException('There was an error decoding '
+                                            'Trainer Config from this path : {}'
                                            .format(self.trainer_config_path))

    @staticmethod
                os.makedirs(model_path)
        except Exception:
-            raise UnityEnvironmentException("The folder {} containing the generated model could not be accessed."
-                                            " Please make sure the permissions are set correctly."
+            raise UnityEnvironmentException('The folder {} containing the '
+                                            'generated model could not be '
+                                            'accessed. Please make sure the '
+                                            'permissions are set correctly.'
-        # TODO: Should be able to start learning at different lesson numbers for each curriculum.
+        # TODO: Should be able to start learning at different lesson numbers
+        # for each curriculum.
        if self.meta_curriculum is not None:
            self.meta_curriculum.set_all_curriculums_to_lesson_num(self.lesson)
        trainer_config = self._load_config()
                self.logger.info('Loading Model...')
                ckpt = tf.train.get_checkpoint_state(self.model_path)
                if ckpt is None:
-                    self.logger.info('The model {0} could not be found. Make sure you specified the right '
-                                     '--run-id'.format(self.model_path))
+                    self.logger.info('The model {0} could not be found. Make '
+                                     'sure you specified the right '
+                                     '--run-id'
+                                     .format(self.model_path))
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                sess.run(init)
-                curr_info = self.env.reset(config=self.meta_curriculum.get_config(), train_mode=self.fast_simulation)
+                curr_info = self.env.reset(
+                    config=self.meta_curriculum.get_config(),
+                    train_mode=self.fast_simulation)
-                    trainer.write_tensorboard_text('Hyperparameters', trainer.parameters)
+                    trainer.write_tensorboard_text('Hyperparameters',
+                                                   trainer.parameters)
-                while any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()]) or not self.train_model:
+                while any([t.get_step <= t.get_max_steps \
+                           for k, t in self.trainers.items()]) \
+                      or not self.train_model:
-                            self.meta_curriculum.increment_lessons(self._get_progresses())
-                            curr_info = self.env.reset(config=self.meta_curriculum.get_config(), train_mode=self.fast_simulation)
+                            self.meta_curriculum.increment_lessons(
+                                self._get_progresses())
+                            curr_info = self.env.reset(
+                                config=self.meta_curriculum.get_config(),
+                                train_mode=self.fast_simulation)
-                            curr_info = self.env.reset(train_mode=self.fast_simulation)
+                            curr_info = self.env.reset(
+                                train_mode=self.fast_simulation)
                        for brain_name, trainer in self.trainers.items():
                            trainer.end_episode()
                    # Decide and take an action
                         take_action_memories[brain_name],
                         take_action_text[brain_name],
                         take_action_value[brain_name],
-                         take_action_outputs[brain_name]) = trainer.take_action(curr_info)
-                    new_info = self.env.step(vector_action=take_action_vector, memory=take_action_memories,
-                                             text_action=take_action_text, value=take_action_value)
+                         take_action_outputs[brain_name]) = \
+                            trainer.take_action(curr_info)
+                    new_info = self.env.step(vector_action=take_action_vector,
+                                             memory=take_action_memories,
+                                             text_action=take_action_text,
+                                             value=take_action_value)
-                        trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name])
+                        trainer.add_experiences(curr_info, new_info,
+                            take_action_outputs[brain_name])
-                        if trainer.is_ready_update() and self.train_model and trainer.get_step <= trainer.get_max_steps:
+                        if trainer.is_ready_update() and self.train_model \
+                           and trainer.get_step <= trainer.get_max_steps:
-                            trainer.write_summary(lesson=self.meta_curriculum.brains_to_curriculums[brain_name].lesson_num)
+                            trainer.write_summary(
+                                lesson=self.meta_curriculum
+                                           .brains_to_curriculums[brain_name]
+                                           .lesson_num)
-                        if self.train_model and trainer.get_step <= trainer.get_max_steps:
+                        if self.train_model \
+                           and trainer.get_step <= trainer.get_max_steps:
-                    if global_step % self.save_freq == 0 and global_step != 0 and self.train_model:
+                    if global_step % self.save_freq == 0 and global_step != 0 \
+                       and self.train_model:
                        # Save Tensorflow model
                        self._save_model(sess, steps=global_step, saver=saver)
                    curr_info = new_info
            except KeyboardInterrupt:
-                print('--------------------------Now saving model-------------------------')
+                print('--------------------------Now saving model--------------'
+                      '-----------')
-                    self.logger.info("Learning was interrupted. Please wait while the graph is generated.")
+                    self.logger.info('Learning was interrupted. Please wait '
+                                     'while the graph is generated.')
                    self._save_model(sess, steps=global_step, saver=saver)
                pass
        self.env.close()