浏览代码

Enable concurrent sessions

/develop-generalizationTraining-TrainerController
Arthur Juliani 7 年前
当前提交
d7338050
共有 5 个文件被更改,包括 23 次插入13 次删除
  1. 17
      python/learn.py
  2. 4
      python/unitytrainers/bc/trainer.py
  3. 4
      python/unitytrainers/ppo/trainer.py
  4. 7
      python/unitytrainers/trainer.py
  5. 4
      python/unitytrainers/trainer_controller.py

17
python/learn.py


import logging
import os
import ray
from docopt import docopt
from unitytrainers.trainer_controller import TrainerController

--keep-checkpoints=<n> How many model checkpoints to keep [default: 5].
--lesson=<n> Start learning from this lesson [default: 0].
--load Whether to load the model or randomly initialize [default: False].
--run-id=<path> The sub-directory name for model and summary statistics [default: ppo].
--run-id=<path> The sub-directory name for model and summary statistics [default: ppo].
--num-runs=<n> Number of runs of session [default: 1].
--save-freq=<n> Frequency at which to save model [default: 50000].
--seed=<n> Random seed used for training [default: -1].
--slow Whether to run the game at training speed [default: False].

# General parameters
run_id = options['--run-id']
num_runs = int(options['--num-runs'])
seed = int(options['--seed'])
load_model = options['--load']
train_model = options['--train']

base_path = os.path.dirname(__file__)
TRAINER_CONFIG_PATH = os.path.abspath(os.path.join(base_path, "trainer_config.yaml"))
tc = TrainerController(env_path, run_id, save_freq, curriculum_file, fast_simulation, load_model, train_model,
worker_id, keep_checkpoints, lesson, seed, docker_target_name, TRAINER_CONFIG_PATH)
tc.start_learning()
@ray.remote
def run_training(sub_id):
tc = TrainerController(env_path, run_id+"-"+str(sub_id), save_freq, curriculum_file, fast_simulation,
load_model, train_model, worker_id+sub_id, keep_checkpoints, lesson, seed,
docker_target_name, TRAINER_CONFIG_PATH)
tc.start_learning()
ray.init()
ray.get([run_training.remote(i) for i in range(num_runs)])

4
python/unitytrainers/bc/trainer.py


class BehavioralCloningTrainer(Trainer):
"""The ImitationTrainer is an implementation of the imitation learning."""
def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
def __init__(self, sess, env, brain_name, trainer_parameters, training, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.
:param sess: Tensorflow session.

raise UnityTrainerException("The hyperparameter {0} could not be found for the Imitation trainer of "
"brain {1}.".format(k, brain_name))
super(BehavioralCloningTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training)
super(BehavioralCloningTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training, run_id)
self.variable_scope = trainer_parameters['graph_scope']
self.brain_to_imitate = trainer_parameters['brain_to_imitate']

4
python/unitytrainers/ppo/trainer.py


class PPOTrainer(Trainer):
"""The PPOTrainer is an implementation of the PPO algorithm."""
def __init__(self, sess, env, brain_name, trainer_parameters, training, seed):
def __init__(self, sess, env, brain_name, trainer_parameters, training, seed, run_id):
"""
Responsible for collecting experiences and training PPO model.
:param sess: Tensorflow session.

raise UnityTrainerException("The hyperparameter {0} could not be found for the PPO trainer of "
"brain {1}.".format(k, brain_name))
super(PPOTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training)
super(PPOTrainer, self).__init__(sess, env, brain_name, trainer_parameters, training, run_id)
self.use_recurrent = trainer_parameters["use_recurrent"]
self.use_curiosity = bool(trainer_parameters['use_curiosity'])

7
python/unitytrainers/trainer.py


class Trainer(object):
"""This class is the abstract class for the unitytrainers"""
def __init__(self, sess, env, brain_name, trainer_parameters, training):
def __init__(self, sess, env, brain_name, trainer_parameters, training, run_id):
"""
Responsible for collecting experiences and training a neural network model.
:param sess: Tensorflow session.

"""
self.brain_name = brain_name
self.run_id = run_id
self.brain = env.brains[self.brain_name]
self.trainer_parameters = trainer_parameters
self.is_training = training

steps = self.get_step
if len(self.stats['cumulative_reward']) > 0:
mean_reward = np.mean(self.stats['cumulative_reward'])
logger.info(" {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}."
.format(self.brain_name, steps,
logger.info("{} {}: Step: {}. Mean Reward: {:0.3f}. Std of Reward: {:0.3f}."
.format(self.run_id, self.brain_name, steps,
mean_reward, np.std(self.stats['cumulative_reward'])))
else:
logger.info(" {}: Step: {}. No episode was completed since last summary."

4
python/unitytrainers/trainer_controller.py


if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
trainer_parameters_dict[brain_name],
self.train_model, self.seed)
self.train_model, self.seed, self.run_id)
self.train_model, self.seed)
self.train_model, self.seed, self.run_id)
else:
raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
.format(brain_name))

正在加载...
取消
保存