|
|
|
|
|
|
# # Unity ML-Agents Toolkit |
|
|
|
# ## ML-Agent Learning |
|
|
|
# Launches unitytrainers for each External Brains in a Unity Environment |
|
|
|
"""Launches unitytrainers for each External Brains in a Unity Environment.""" |
|
|
|
import logging |
|
|
|
import logging |
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
|
import tensorflow as tf |
|
|
|
from tensorflow.python.tools import freeze_graph |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TrainerController(object): |
|
|
|
def __init__(self, env_path, run_id, save_freq, curriculum_folder, fast_simulation, load, train, |
|
|
|
worker_id, keep_checkpoints, lesson, seed, docker_target_name, trainer_config_path, |
|
|
|
def __init__(self, env_path, run_id, save_freq, curriculum_folder, |
|
|
|
fast_simulation, load, train, worker_id, keep_checkpoints, |
|
|
|
lesson, seed, docker_target_name, trainer_config_path, |
|
|
|
:param curriculum_folder: Folder containing JSON curriculums for the env |
|
|
|
:param fast_simulation: Whether to run the game at training speed |
|
|
|
:param load: Whether to load the model or randomly initialize |
|
|
|
:param train: Whether to train model, or only run inference |
|
|
|
:param worker_id: Number to add to communication port (5005). Used for multi-environment |
|
|
|
:param keep_checkpoints: How many model checkpoints to keep |
|
|
|
:param lesson: Start learning from this lesson |
|
|
|
:param curriculum_folder: Folder containing JSON curriculums for the |
|
|
|
environment. |
|
|
|
:param fast_simulation: Whether to run the game at training speed. |
|
|
|
:param load: Whether to load the model or randomly initialize. |
|
|
|
:param train: Whether to train model, or only run inference. |
|
|
|
:param worker_id: Number to add to communication port (5005). |
|
|
|
Used for multi-environment |
|
|
|
:param keep_checkpoints: How many model checkpoints to keep. |
|
|
|
:param lesson: Start learning from this lesson. |
|
|
|
:param docker_target_name: Name of docker volume that will contain all data. |
|
|
|
:param trainer_config_path: Fully qualified path to location of trainer configuration file |
|
|
|
:param no_graphics: Whether to run the Unity simulator in no-graphics mode |
|
|
|
:param docker_target_name: Name of docker volume that will contain all |
|
|
|
data. |
|
|
|
:param trainer_config_path: Fully qualified path to location of trainer |
|
|
|
configuration file. |
|
|
|
:param no_graphics: Whether to run the Unity simulator in no-graphics |
|
|
|
mode. |
|
|
|
# Strip out executable extensions if passed |
|
|
|
.replace('.x86', '')) # Strip out executable extensions if passed |
|
|
|
.replace('.x86', '')) |
|
|
|
|
|
|
|
# Recognize and use docker volume if one is passed as an argument |
|
|
|
if docker_target_name == '': |
|
|
|
|
|
|
docker_target_name=docker_target_name, |
|
|
|
run_id=run_id) |
|
|
|
if env_path is not None: |
|
|
|
env_path = '/{docker_target_name}/{env_name}'.format(docker_target_name=docker_target_name, |
|
|
|
env_name=env_path) |
|
|
|
env_path = '/{docker_target_name}/{env_name}'.format( |
|
|
|
docker_target_name=docker_target_name, env_name=env_path) |
|
|
|
self.curriculum_folder = '/{docker_target_name}/{curriculum_file}'.format( |
|
|
|
self.curriculum_folder = \ |
|
|
|
'/{docker_target_name}/{curriculum_file}'.format( |
|
|
|
self.summaries_dir = '/{docker_target_name}/summaries'.format(docker_target_name=docker_target_name) |
|
|
|
self.summaries_dir = '/{docker_target_name}/summaries'.format( |
|
|
|
docker_target_name=docker_target_name) |
|
|
|
|
|
|
|
self.logger = logging.getLogger("unityagents") |
|
|
|
self.run_id = run_id |
|
|
|
|
|
|
self.seed = seed |
|
|
|
np.random.seed(self.seed) |
|
|
|
tf.set_random_seed(self.seed) |
|
|
|
self.env = UnityEnvironment(file_name=env_path, worker_id=self.worker_id, |
|
|
|
seed=self.seed, docker_training=self.docker_training, |
|
|
|
self.env = UnityEnvironment(file_name=env_path, |
|
|
|
worker_id=self.worker_id, |
|
|
|
seed=self.seed, |
|
|
|
docker_training=self.docker_training, |
|
|
|
self.env_name = os.path.basename(os.path.normpath(env_path)) # Extract out name of environment |
|
|
|
# Extract out name of environment |
|
|
|
self.env_name = os.path.basename(os.path.normpath(env_path)) |
|
|
|
self.meta_curriculum = MetaCurriculum(self.curriculum_folder, self.env._resetParameters) |
|
|
|
self.meta_curriculum = MetaCurriculum(self.curriculum_folder, |
|
|
|
self.env._resetParameters) |
|
|
|
if self.meta_curriculum is not None: |
|
|
|
if self.meta_curriculum: |
|
|
|
for brain_name in self.meta_curriculum.brains_to_curriculums.keys(): |
|
|
|
if brain_name not in self.env.external_brain_names: |
|
|
|
raise MetaCurriculumError('One of the curriculums ' |
|
|
|
|
|
|
'whose curriculum it defines.') |
|
|
|
|
|
|
|
def _get_progresses(self): |
|
|
|
if self.meta_curriculum is not None: |
|
|
|
if self.meta_curriculum: |
|
|
|
for brain_name, curriculum in self.meta_curriculum.brains_to_curriculums.items(): |
|
|
|
for brain_name, curriculum \ |
|
|
|
in self.meta_curriculum.brains_to_curriculums.items(): |
|
|
|
progress = self.trainers[brain_name].get_step / self.trainers[brain_name].get_max_steps |
|
|
|
progress = (self.trainers[brain_name].get_step / |
|
|
|
self.trainers[brain_name].get_max_steps) |
|
|
|
brain_names_to_progresses[brain_name] = progress |
|
|
|
elif curriculum.measure == "reward": |
|
|
|
progress = self.trainers[brain_name].get_last_reward |
|
|
|
|
|
|
if scope == '/': |
|
|
|
scope = '' |
|
|
|
scopes += [scope] |
|
|
|
if self.trainers[brain_name].parameters["trainer"] == "imitation": |
|
|
|
if self.trainers[brain_name].parameters["trainer"] \ |
|
|
|
== "imitation": |
|
|
|
nodes += [scope + x for x in ["action", "value_estimate", "action_probs", "value_estimate"]] |
|
|
|
nodes += [scope + x for x in ["action", "value_estimate", |
|
|
|
"action_probs", "value_estimate"]] |
|
|
|
nodes += [scope + x for x in ["recurrent_out", "memory_size"]] |
|
|
|
nodes += [scope + x for x in ["recurrent_out", |
|
|
|
"memory_size"]] |
|
|
|
if len(scopes) > 1: |
|
|
|
self.logger.info("List of available scopes :") |
|
|
|
for scope in scopes: |
|
|
|
|
|
|
""" |
|
|
|
last_checkpoint = self.model_path + '/model-' + str(steps) + '.cptk' |
|
|
|
saver.save(sess, last_checkpoint) |
|
|
|
tf.train.write_graph(sess.graph_def, self.model_path, 'raw_graph_def.pb', as_text=False) |
|
|
|
tf.train.write_graph(sess.graph_def, self.model_path, |
|
|
|
'raw_graph_def.pb', as_text=False) |
|
|
|
self.logger.info("Saved Model") |
|
|
|
|
|
|
|
def _export_graph(self): |
|
|
|
|
|
|
target_nodes = ','.join(self._process_graph()) |
|
|
|
ckpt = tf.train.get_checkpoint_state(self.model_path) |
|
|
|
freeze_graph.freeze_graph(input_graph=self.model_path + '/raw_graph_def.pb', |
|
|
|
input_binary=True, |
|
|
|
input_checkpoint=ckpt.model_checkpoint_path, |
|
|
|
output_node_names=target_nodes, |
|
|
|
output_graph=self.model_path + '/' + self.env_name + "_" + self.run_id + '.bytes', |
|
|
|
clear_devices=True, initializer_nodes="", input_saver="", |
|
|
|
restore_op_name="save/restore_all", filename_tensor_name="save/Const:0") |
|
|
|
freeze_graph.freeze_graph( |
|
|
|
input_graph=self.model_path + '/raw_graph_def.pb', |
|
|
|
input_binary=True, |
|
|
|
input_checkpoint=ckpt.model_checkpoint_path, |
|
|
|
output_node_names=target_nodes, |
|
|
|
output_graph=(self.model_path + '/' + self.env_name + "_" |
|
|
|
+ self.run_id + '.bytes'), |
|
|
|
clear_devices=True, initializer_nodes="", input_saver="", |
|
|
|
restore_op_name="save/restore_all", |
|
|
|
filename_tensor_name="save/Const:0") |
|
|
|
|
|
|
|
def _initialize_trainers(self, trainer_config, sess): |
|
|
|
trainer_parameters_dict = {} |
|
|
|
|
|
|
trainer_parameters_dict[brain_name] = trainer_parameters.copy() |
|
|
|
for brain_name in self.env.external_brain_names: |
|
|
|
if trainer_parameters_dict[brain_name]['trainer'] == "imitation": |
|
|
|
self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name, |
|
|
|
trainer_parameters_dict[brain_name], |
|
|
|
self.train_model, self.seed, self.run_id) |
|
|
|
self.trainers[brain_name] = BehavioralCloningTrainer( |
|
|
|
sess, self.env, brain_name, |
|
|
|
trainer_parameters_dict[brain_name], self.train_model, |
|
|
|
self.seed, self.run_id) |
|
|
|
self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name], |
|
|
|
self.train_model, self.seed, self.run_id) |
|
|
|
self.trainers[brain_name] = PPOTrainer( |
|
|
|
sess, self.env, brain_name, |
|
|
|
trainer_parameters_dict[brain_name], |
|
|
|
self.train_model, self.seed, self.run_id) |
|
|
|
raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}" |
|
|
|
raise UnityEnvironmentException('The trainer config contains ' |
|
|
|
'an unknown trainer type for ' |
|
|
|
'brain {}' |
|
|
|
.format(brain_name)) |
|
|
|
|
|
|
|
def _load_config(self): |
|
|
|
|
|
|
return trainer_config |
|
|
|
except IOError: |
|
|
|
raise UnityEnvironmentException("""Parameter file could not be found here {}. |
|
|
|
Will use default Hyper parameters""" |
|
|
|
raise UnityEnvironmentException('Parameter file could not be found ' |
|
|
|
'here {}. Will use default Hyper ' |
|
|
|
'parameters.' |
|
|
|
raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}" |
|
|
|
raise UnityEnvironmentException('There was an error decoding ' |
|
|
|
'Trainer Config from this path : {}' |
|
|
|
.format(self.trainer_config_path)) |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
|
|
|
os.makedirs(model_path) |
|
|
|
except Exception: |
|
|
|
raise UnityEnvironmentException("The folder {} containing the generated model could not be accessed." |
|
|
|
" Please make sure the permissions are set correctly." |
|
|
|
raise UnityEnvironmentException('The folder {} containing the ' |
|
|
|
'generated model could not be ' |
|
|
|
'accessed. Please make sure the ' |
|
|
|
'permissions are set correctly.' |
|
|
|
# TODO: Should be able to start learning at different lesson numbers for each curriculum. |
|
|
|
# TODO: Should be able to start learning at different lesson numbers |
|
|
|
# for each curriculum. |
|
|
|
if self.meta_curriculum is not None: |
|
|
|
self.meta_curriculum.set_all_curriculums_to_lesson_num(self.lesson) |
|
|
|
trainer_config = self._load_config() |
|
|
|
|
|
|
self.logger.info('Loading Model...') |
|
|
|
ckpt = tf.train.get_checkpoint_state(self.model_path) |
|
|
|
if ckpt is None: |
|
|
|
self.logger.info('The model {0} could not be found. Make sure you specified the right ' |
|
|
|
'--run-id'.format(self.model_path)) |
|
|
|
self.logger.info('The model {0} could not be found. Make ' |
|
|
|
'sure you specified the right ' |
|
|
|
'--run-id' |
|
|
|
.format(self.model_path)) |
|
|
|
saver.restore(sess, ckpt.model_checkpoint_path) |
|
|
|
else: |
|
|
|
sess.run(init) |
|
|
|
|
|
|
curr_info = self.env.reset(config=self.meta_curriculum.get_config(), train_mode=self.fast_simulation) |
|
|
|
curr_info = self.env.reset( |
|
|
|
config=self.meta_curriculum.get_config(), |
|
|
|
train_mode=self.fast_simulation) |
|
|
|
trainer.write_tensorboard_text('Hyperparameters', trainer.parameters) |
|
|
|
trainer.write_tensorboard_text('Hyperparameters', |
|
|
|
trainer.parameters) |
|
|
|
while any([t.get_step <= t.get_max_steps for k, t in self.trainers.items()]) or not self.train_model: |
|
|
|
while any([t.get_step <= t.get_max_steps \ |
|
|
|
for k, t in self.trainers.items()]) \ |
|
|
|
or not self.train_model: |
|
|
|
self.meta_curriculum.increment_lessons(self._get_progresses()) |
|
|
|
curr_info = self.env.reset(config=self.meta_curriculum.get_config(), train_mode=self.fast_simulation) |
|
|
|
self.meta_curriculum.increment_lessons( |
|
|
|
self._get_progresses()) |
|
|
|
curr_info = self.env.reset( |
|
|
|
config=self.meta_curriculum.get_config(), |
|
|
|
train_mode=self.fast_simulation) |
|
|
|
curr_info = self.env.reset(train_mode=self.fast_simulation) |
|
|
|
curr_info = self.env.reset( |
|
|
|
train_mode=self.fast_simulation) |
|
|
|
for brain_name, trainer in self.trainers.items(): |
|
|
|
trainer.end_episode() |
|
|
|
# Decide and take an action |
|
|
|
|
|
|
take_action_memories[brain_name], |
|
|
|
take_action_text[brain_name], |
|
|
|
take_action_value[brain_name], |
|
|
|
take_action_outputs[brain_name]) = trainer.take_action(curr_info) |
|
|
|
new_info = self.env.step(vector_action=take_action_vector, memory=take_action_memories, |
|
|
|
text_action=take_action_text, value=take_action_value) |
|
|
|
take_action_outputs[brain_name]) = \ |
|
|
|
trainer.take_action(curr_info) |
|
|
|
new_info = self.env.step(vector_action=take_action_vector, |
|
|
|
memory=take_action_memories, |
|
|
|
text_action=take_action_text, |
|
|
|
value=take_action_value) |
|
|
|
trainer.add_experiences(curr_info, new_info, take_action_outputs[brain_name]) |
|
|
|
trainer.add_experiences(curr_info, new_info, |
|
|
|
take_action_outputs[brain_name]) |
|
|
|
if trainer.is_ready_update() and self.train_model and trainer.get_step <= trainer.get_max_steps: |
|
|
|
if trainer.is_ready_update() and self.train_model \ |
|
|
|
and trainer.get_step <= trainer.get_max_steps: |
|
|
|
trainer.write_summary(lesson=self.meta_curriculum.brains_to_curriculums[brain_name].lesson_num) |
|
|
|
trainer.write_summary( |
|
|
|
lesson=self.meta_curriculum |
|
|
|
.brains_to_curriculums[brain_name] |
|
|
|
.lesson_num) |
|
|
|
if self.train_model and trainer.get_step <= trainer.get_max_steps: |
|
|
|
if self.train_model \ |
|
|
|
and trainer.get_step <= trainer.get_max_steps: |
|
|
|
if global_step % self.save_freq == 0 and global_step != 0 and self.train_model: |
|
|
|
if global_step % self.save_freq == 0 and global_step != 0 \ |
|
|
|
and self.train_model: |
|
|
|
# Save Tensorflow model |
|
|
|
self._save_model(sess, steps=global_step, saver=saver) |
|
|
|
curr_info = new_info |
|
|
|
|
|
|
except KeyboardInterrupt: |
|
|
|
print('--------------------------Now saving model-------------------------') |
|
|
|
print('--------------------------Now saving model--------------' |
|
|
|
'-----------') |
|
|
|
self.logger.info("Learning was interrupted. Please wait while the graph is generated.") |
|
|
|
self.logger.info('Learning was interrupted. Please wait ' |
|
|
|
'while the graph is generated.') |
|
|
|
self._save_model(sess, steps=global_step, saver=saver) |
|
|
|
pass |
|
|
|
self.env.close() |