Enable concurrent sessions

7 年前 · d7338050
--- a/python/learn.py
+++ b/python/learn.py
 import logging

 import os
+import ray
 from docopt import docopt

 from unitytrainers.trainer_controller import TrainerController
      --keep-checkpoints=<n>     How many model checkpoints to keep [default: 5].
      --lesson=<n>               Start learning from this lesson [default: 0].
      --load                     Whether to load the model or randomly initialize [default: False].
-      --run-id=<path>            The sub-directory name for model and summary statistics [default: ppo]. 
+      --run-id=<path>            The sub-directory name for model and summary statistics [default: ppo].
+      --num-runs=<n>             Number of runs of session [default: 1]. 
      --save-freq=<n>            Frequency at which to save model [default: 50000].
      --seed=<n>                 Random seed used for training [default: -1].
      --slow                     Whether to run the game at training speed [default: False].

    # General parameters
    run_id = options['--run-id']
+    num_runs = int(options['--num-runs'])
    seed = int(options['--seed'])
    load_model = options['--load']
    train_model = options['--train']
    base_path = os.path.dirname(__file__)
    TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml"))

-    tc = TrainerController(env_path, run_id, save_freq, curriculum_file, fast_simulation, load_model, train_model,
-                           worker_id, keep_checkpoints, lesson, seed, docker_target_name, TRAINER_CONFIG_PATH)
-    tc.start_learning()
+    @ray.remote
+    def run_training(sub_id):
+        tc = TrainerController(env_path, run_id+"-"+str(sub_id), save_freq, curriculum_file, fast_simulation,
+                               load_model, train_model, worker_id+sub_id, keep_checkpoints, lesson, seed,
+                               docker_target_name, TRAINER_CONFIG_PATH)
+        tc.start_learning()
+
+    ray.init()
+    ray.get([run_training.remote(i) for i in range(num_runs)])
--- a/python/unitytrainers/bc/trainer.py
+++ b/python/unitytrainers/bc/trainer.py
 class BehavioralCloningTrainer(Trainer):
    """The ImitationTrainer is an implementation of the imitation learning."""

-    def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
+    def __init__(self, sess, env, brain_name, trainer_parameters, training, seed, run_id):
        """
        Responsible for collecting experiences and training PPO model.
        :param sess: Tensorflow session.
                raise UnityTrainerException("The hyperparameter {0} could not be found for the Imitation trainer of "
                                            "brain {1}.".format(k, brain_name))

-        super(BehavioralCloningTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training)
+        super(BehavioralCloningTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training, run_id)

        self.variable_scope = trainer_parameters['graph_scope']
        self.brain_to_imitate = trainer_parameters['brain_to_imitate']
--- a/python/unitytrainers/ppo/trainer.py
+++ b/python/unitytrainers/ppo/trainer.py
 class PPOTrainer(Trainer):
    """The PPOTrainer is an implementation of the PPO algorithm."""

-    def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
+    def __init__(self, sess, env, brain_name, trainer_parameters, training, seed, run_id):
        """
        Responsible for collecting experiences and training PPO model.
        :param sess: Tensorflow session.
                raise UnityTrainerException("The hyperparameter {0} could not be found for the PPO trainer of "
                                            "brain {1}.".format(k, brain_name))

-        super(PPOTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training)
+        super(PPOTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training, run_id)

        self.use_recurrent = trainer_parameters["use_recurrent"]
        self.use_curiosity = bool(trainer_parameters['use_curiosity'])
--- a/python/unitytrainers/trainer.py
+++ b/python/unitytrainers/trainer.py
 class Trainer(object):
    """This class is the abstract class for the unitytrainers"""

-    def __init__(self, sess, env, brain_name, trainer_parameters, training):
+    def __init__(self, sess, env, brain_name, trainer_parameters, training, run_id):
        """
        Responsible for collecting experiences and training a neural network model.
        :param sess: Tensorflow session.
        """
        self.brain_name = brain_name
+        self.run_id = run_id
        self.brain = env.brains[self.brain_name]
        self.trainer_parameters = trainer_parameters
        self.is_training = training
            steps = self.get_step
            if len(self.stats['cumulative_reward']) > 0:
                mean_reward = np.mean(self.stats['cumulative_reward'])
-                logger.info(" {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}."
-                            .format(self.brain_name, steps,
+                logger.info("{} {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}."
+                            .format(self.run_id, self.brain_name, steps,
                                    mean_reward, np.std(self.stats['cumulative_reward'])))
            else:
                logger.info(" {}: Step: {}. No episode was completed since last summary."
--- a/python/unitytrainers/trainer_controller.py
+++ b/python/unitytrainers/trainer_controller.py
            if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
                self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
                                                                     trainer_parameters_dict[brain_name],
-                                                                     self.train_model, self.seed)
+                                                                     self.train_model, self.seed, self.run_id)
-                                                       self.train_model, self.seed)
+                                                       self.train_model, self.seed, self.run_id)
            else:
                raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
                                                .format(brain_name))