浏览代码

Merge pull request #1004 from Unity-Technologies/develop-runs

Enable multiple runs in learn.py
/develop-generalizationTraining-TrainerController
GitHub 7 年前
当前提交
ef3025e6
共有 6 个文件被更改,包括 35 次插入17 次删除
  1. 1
      docs/Training-ML-Agents.md
  2. 28
      python/learn.py
  3. 4
      python/unitytrainers/bc/trainer.py
  4. 4
      python/unitytrainers/ppo/trainer.py
  5. 11
      python/unitytrainers/trainer.py
  6. 4
      python/unitytrainers/trainer_controller.py

1
docs/Training-ML-Agents.md


* `--keep-checkpoints=<n>` – Specify the maximum number of model checkpoints to keep. Checkpoints are saved after the number of steps specified by the `save-freq` option. Once the maximum number of checkpoints has been reached, the oldest checkpoint is deleted when saving a new checkpoint. Defaults to 5.
* `--lesson=<n>` – Specify which lesson to start with when performing curriculum training. Defaults to 0.
* `--load` – If set, the training code loads an already trained model to initialize the neural network before training. The learning code looks for the model in `python/models/<run-id>/` (which is also where it saves models at the end of training). When not set (the default), the neural network weights are randomly initialized and an existing model is not loaded.
* `--num-runs=<n>` - Sets the number of concurrent training sessions to perform. Default is set to 1. Set to higher values when benchmarking performance and multiple training sessions is desired. Training sessions are independent, and do not improve learning performance.
* `--run-id=<path>` – Specifies an identifier for each training run. This identifier is used to name the subdirectories in which the trained model and summary statistics are saved as well as the saved model itself. The default id is "ppo". If you use TensorBoard to view the training statistics, always set a unique run-id for each training run. (The statistics for all runs with the same id are combined as if they were produced by a the same session.)
* `--save-freq=<n>` Specifies how often (in steps) to save the model during training. Defaults to 50000.
* `--seed=<n>` – Specifies a number to use as a seed for the random number generator used by the training code.

28
python/learn.py


import logging
import os
import multiprocessing
from unitytrainers.exception import TrainerError
if __name__ == '__main__':
print('''

--keep-checkpoints=<n> How many model checkpoints to keep [default: 5].
--lesson=<n> Start learning from this lesson [default: 0].
--load Whether to load the model or randomly initialize [default: False].
--run-id=<path> The sub-directory name for model and summary statistics [default: ppo].
--run-id=<path> The sub-directory name for model and summary statistics [default: ppo].
--num-runs=<n> Number of concurrent training sessions [default: 1].
--save-freq=<n> Frequency at which to save model [default: 50000].
--seed=<n> Random seed used for training [default: -1].
--slow Whether to run the game at training speed [default: False].

# General parameters
run_id = options['--run-id']
num_runs = int(options['--num-runs'])
seed = int(options['--seed'])
load_model = options['--load']
train_model = options['--train']

base_path = os.path.dirname(__file__)
TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml"))
tc = TrainerController(env_path, run_id, save_freq, curriculum_file, fast_simulation, load_model, train_model,
worker_id, keep_checkpoints, lesson, seed, docker_target_name, TRAINER_CONFIG_PATH,
no_graphics)
tc.start_learning()
def run_training(sub_id):
tc = TrainerController(env_path, run_id + "-" + str(sub_id), save_freq, curriculum_file, fast_simulation,
load_model, train_model, worker_id + sub_id, keep_checkpoints, lesson, seed,
docker_target_name, TRAINER_CONFIG_PATH, no_graphics)
tc.start_learning()
if env_path is None and num_runs > 1:
raise TrainerError("It is not possible to launch more than one concurrent training session "
"when training from the editor")
jobs = []
for i in range(num_runs):
p = multiprocessing.Process(target=run_training, args=(i,))
jobs.append(p)
p.start()

4
python/unitytrainers/bc/trainer.py


class BehavioralCloningTrainer(Trainer):
"""The ImitationTrainer is an implementation of the imitation learning."""
def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
def __init__(self, sess, env, brain_name, trainer_parameters, training, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.
:param sess: Tensorflow session.

raise UnityTrainerException("The hyperparameter {0} could not be found for the Imitation trainer of "
"brain {1}.".format(k, brain_name))
super(BehavioralCloningTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training)
super(BehavioralCloningTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training, run_id)
self.variable_scope = trainer_parameters['graph_scope']
self.brain_to_imitate = trainer_parameters['brain_to_imitate']

4
python/unitytrainers/ppo/trainer.py


class PPOTrainer(Trainer):
"""The PPOTrainer is an implementation of the PPO algorithm."""
def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
def __init__(self, sess, env, brain_name, trainer_parameters, training, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.
:param sess: Tensorflow session.

raise UnityTrainerException("The hyperparameter {0} could not be found for the PPO trainer of "
"brain {1}.".format(k, brain_name))
super(PPOTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training)
super(PPOTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training, run_id)
self.use_recurrent = trainer_parameters["use_recurrent"]
self.use_curiosity = bool(trainer_parameters['use_curiosity'])

11
python/unitytrainers/trainer.py


class Trainer(object):
"""This class is the abstract class for the unitytrainers"""
def __init__(self, sess, env, brain_name, trainer_parameters, training):
def __init__(self, sess, env, brain_name, trainer_parameters, training, run_id):
"""
Responsible for collecting experiences and training a neural network model.
:param sess: Tensorflow session.

"""
self.brain_name = brain_name
self.run_id = run_id
self.brain = env.brains[self.brain_name]
self.trainer_parameters = trainer_parameters
self.is_training = training

self.is_training and self.get_step <= self.get_max_steps):
if len(self.stats['cumulative_reward']) > 0:
mean_reward = np.mean(self.stats['cumulative_reward'])
logger.info(" {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}."
.format(self.brain_name, self.get_step,
logger.info("{}: {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}."
.format(self.run_id, self.brain_name, self.get_step,
logger.info(" {}: Step: {}. No episode was completed since last summary."
.format(self.brain_name, self.get_step))
logger.info("{}: {}: Step: {}. No episode was completed since last summary."
.format(self.run_id, self.brain_name, self.get_step))
summary = tf.Summary()
for key in self.stats:
if len(self.stats[key]) > 0:

4
python/unitytrainers/trainer_controller.py


if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
trainer_parameters_dict[brain_name],
self.train_model, self.seed)
self.train_model, self.seed, self.run_id)
self.train_model, self.seed)
self.train_model, self.seed, self.run_id)
else:
raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
.format(brain_name))

正在加载...
取消
保存