Merge pull request #1004 from Unity-Technologies/develop-runs

Enable multiple runs in learn.py
7 年前 · ef3025e6
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
 * `--keep-checkpoints=<n>` – Specify the maximum number of model checkpoints to keep. Checkpoints are saved after the number of steps specified by the `save-freq` option. Once the maximum number of checkpoints has been reached, the oldest checkpoint is deleted when saving a new checkpoint. Defaults to 5.
 * `--lesson=<n>` – Specify which lesson to start with when performing curriculum training. Defaults to 0.
 * `--load` – If set, the training code loads an already trained model to initialize the neural network before training. The learning code looks for the model in `python/models/<run-id>/` (which is also where it saves models at the end of training). When not set (the default), the neural network weights are randomly initialized and an existing model is not loaded.
+* `--num-runs=<n>` - Sets the number of concurrent training sessions to perform. Default is set to 1. Set to higher values when benchmarking performance and multiple training sessions is desired. Training sessions are independent, and do not improve learning performance.
 * `--run-id=<path>` – Specifies an identifier for each training run. This identifier is used to name the subdirectories in which the trained model and summary statistics are saved as well as the saved model itself. The default id is "ppo". If you use TensorBoard to view the training statistics, always set a unique run-id for each training run. (The statistics for all runs with the same id are combined as if they were produced by a the same session.)
 * `--save-freq=<n>` Specifies how often (in  steps) to save the model during training. Defaults to 50000.
 * `--seed=<n>` – Specifies a number to use as a seed for the random number generator used by the training code.
--- a/python/learn.py
+++ b/python/learn.py
 import logging

 import os
+import multiprocessing
-
+from unitytrainers.exception import TrainerError

 if __name__ == '__main__':
    print('''
      --keep-checkpoints=<n>     How many model checkpoints to keep [default: 5].
      --lesson=<n>               Start learning from this lesson [default: 0].
      --load                     Whether to load the model or randomly initialize [default: False].
-      --run-id=<path>            The sub-directory name for model and summary statistics [default: ppo]. 
+      --run-id=<path>            The sub-directory name for model and summary statistics [default: ppo].
+      --num-runs=<n>             Number of concurrent training sessions [default: 1]. 
      --save-freq=<n>            Frequency at which to save model [default: 50000].
      --seed=<n>                 Random seed used for training [default: -1].
      --slow                     Whether to run the game at training speed [default: False].

    # General parameters
    run_id = options['--run-id']
+    num_runs = int(options['--num-runs'])
    seed = int(options['--seed'])
    load_model = options['--load']
    train_model = options['--train']
    base_path = os.path.dirname(__file__)
    TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml"))

-    tc = TrainerController(env_path, run_id, save_freq, curriculum_file, fast_simulation, load_model, train_model,
-                           worker_id, keep_checkpoints, lesson, seed, docker_target_name, TRAINER_CONFIG_PATH,
-                           no_graphics)
-    tc.start_learning()
+
+    def run_training(sub_id):
+        tc = TrainerController(env_path, run_id + "-" + str(sub_id), save_freq, curriculum_file, fast_simulation,
+                               load_model, train_model, worker_id + sub_id, keep_checkpoints, lesson, seed,
+                               docker_target_name, TRAINER_CONFIG_PATH, no_graphics)
+        tc.start_learning()
+
+
+    if env_path is None and num_runs > 1:
+        raise TrainerError("It is not possible to launch more than one concurrent training session "
+                           "when training from the editor")
+
+    jobs = []
+    for i in range(num_runs):
+        p = multiprocessing.Process(target=run_training, args=(i,))
+        jobs.append(p)
+        p.start()
--- a/python/unitytrainers/bc/trainer.py
+++ b/python/unitytrainers/bc/trainer.py
 class BehavioralCloningTrainer(Trainer):
    """The ImitationTrainer is an implementation of the imitation learning."""

-    def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
+    def __init__(self, sess, env, brain_name, trainer_parameters, training, seed, run_id):
        """
        Responsible for collecting experiences and training PPO model.
        :param sess: Tensorflow session.
                raise UnityTrainerException("The hyperparameter {0} could not be found for the Imitation trainer of "
                                            "brain {1}.".format(k, brain_name))

-        super(BehavioralCloningTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training)
+        super(BehavioralCloningTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training, run_id)

        self.variable_scope = trainer_parameters['graph_scope']
        self.brain_to_imitate = trainer_parameters['brain_to_imitate']
--- a/python/unitytrainers/ppo/trainer.py
+++ b/python/unitytrainers/ppo/trainer.py
 class PPOTrainer(Trainer):
    """The PPOTrainer is an implementation of the PPO algorithm."""

-    def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
+    def __init__(self, sess, env, brain_name, trainer_parameters, training, seed, run_id):
        """
        Responsible for collecting experiences and training PPO model.
        :param sess: Tensorflow session.
                raise UnityTrainerException("The hyperparameter {0} could not be found for the PPO trainer of "
                                            "brain {1}.".format(k, brain_name))

-        super(PPOTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training)
+        super(PPOTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training, run_id)

        self.use_recurrent = trainer_parameters["use_recurrent"]
        self.use_curiosity = bool(trainer_parameters['use_curiosity'])
--- a/python/unitytrainers/trainer.py
+++ b/python/unitytrainers/trainer.py
 class Trainer(object):
    """This class is the abstract class for the unitytrainers"""

-    def __init__(self, sess, env, brain_name, trainer_parameters, training):
+    def __init__(self, sess, env, brain_name, trainer_parameters, training, run_id):
        """
        Responsible for collecting experiences and training a neural network model.
        :param sess: Tensorflow session.
        """
        self.brain_name = brain_name
+        self.run_id = run_id
        self.brain = env.brains[self.brain_name]
        self.trainer_parameters = trainer_parameters
        self.is_training = training
                self.is_training and self.get_step <= self.get_max_steps):
            if len(self.stats['cumulative_reward']) > 0:
                mean_reward = np.mean(self.stats['cumulative_reward'])
-                logger.info(" {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}."
-                            .format(self.brain_name, self.get_step,
+                logger.info("{}: {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}."
+                            .format(self.run_id, self.brain_name, self.get_step,
-                logger.info(" {}: Step: {}. No episode was completed since last summary."
-                            .format(self.brain_name, self.get_step))
+                logger.info("{}: {}: Step: {}. No episode was completed since last summary."
+                            .format(self.run_id, self.brain_name, self.get_step))
            summary = tf.Summary()
            for key in self.stats:
                if len(self.stats[key]) > 0:
--- a/python/unitytrainers/trainer_controller.py
+++ b/python/unitytrainers/trainer_controller.py
            if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
                self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
                                                                     trainer_parameters_dict[brain_name],
-                                                                     self.train_model, self.seed)
+                                                                     self.train_model, self.seed, self.run_id)
-                                                       self.train_model, self.seed)
+                                                       self.train_model, self.seed, self.run_id)
            else:
                raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
                                                .format(brain_name))