else :
return None
def _process_graph ( self ) :
nodes = [ ]
scopes = [ ]
for brain_name in self . trainers . keys ( ) :
if self . trainers [ brain_name ] . policy . graph_scope is not None :
scope = self . trainers [ brain_name ] . policy . graph_scope + ' / '
if scope == ' / ' :
scope = ' '
scopes + = [ scope ]
if self . trainers [ brain_name ] . parameters [ ' trainer ' ] \
== ' imitation ' :
nodes + = [ scope + x for x in [ ' action ' ] ]
else :
nodes + = [ scope + x for x in [ ' action ' , ' value_estimate ' ,
' action_probs ' ,
' value_estimate ' ] ]
if self . trainers [ brain_name ] . parameters [ ' use_recurrent ' ] :
nodes + = [ scope + x for x in [ ' recurrent_out ' ,
' memory_size ' ] ]
if len ( scopes ) > 1 :
self . logger . info ( ' List of available scopes : ' )
for scope in scopes :
self . logger . info ( ' \t ' + scope )
self . logger . info ( ' List of nodes to export : ' )
for n in nodes :
self . logger . info ( ' \t ' + n )
return nodes
def _save_model ( self , sess , saver , steps = 0 ) :
def _save_model ( self , steps = 0 ) :
: param sess : Current Tensorflow session .
last_checkpoint = self . model_path + ' /model- ' + str ( steps ) + ' .cptk '
saver . save ( sess , last_checkpoint )
tf . train . write_graph ( sess . graph_def , self . model_path ,
' raw_graph_def.pb ' , as_text = False )
for brain_name in self . trainers . keys ( ) :
self . trainers [ brain_name ] . save_model ( steps )
Exports latest saved model to . bytes format for Unity embedding .
Exports latest saved models to . bytes format for Unity embedding .
target_nodes = ' , ' . join ( self . _process_graph ( ) )
ckpt = tf . train . get_checkpoint_state ( self . model_path )
freeze_graph . freeze_graph (
input_graph = self . model_path + ' /raw_graph_def.pb ' ,
input_binary = True ,
input_checkpoint = ckpt . model_checkpoint_path ,
output_node_names = target_nodes ,
output_graph = ( self . model_path + ' / ' + self . env_name + ' _ '
+ self . run_id + ' .bytes ' ) ,
clear_devices = True , initializer_nodes = ' ' , input_saver = ' ' ,
restore_op_name = ' save/restore_all ' ,
filename_tensor_name = ' save/Const:0 ' )
for brain_name in self . trainers . keys ( ) :
self . trainers [ brain_name ] . export_model ( )
def _initialize_trainers ( self , trainer_config , sess ) :
def _initialize_trainers ( self , trainer_config ) :
"""
Initialization of the trainers
: param trainer_config : The configurations of the trainers
"""
# TODO: This probably doesn't need to be reinitialized.
self . trainers = { }
if len ( self . env . external_brain_names ) > 1 :
graph_scope = re . sub ( ' [^0-9a-zA-Z]+ ' , ' - ' , brain_name )
trainer_parameters [ ' graph_scope ' ] = graph_scope
trainer_parameters [ ' summary_path ' ] = ' {basedir}/{name} ' . format (
basedir = self . summaries_dir ,
name = str ( self . run_id ) + ' _ ' + graph_scope )
else :
trainer_parameters [ ' graph_scope ' ] = ' '
trainer_parameters [ ' summary_path ' ] = ' {basedir}/{name} ' . format (
basedir = self . summaries_dir ,
name = str ( self . run_id ) )
trainer_parameters [ ' summary_path ' ] = ' {basedir}/{name} ' . format (
basedir = self . summaries_dir ,
name = str ( self . run_id ) + ' _ ' + brain_name )
trainer_parameters [ ' model_path ' ] = ' {basedir}/{name} ' . format (
basedir = self . model_path ,
name = brain_name )
trainer_parameters [ ' keep_checkpoints ' ] = self . keep_checkpoints
if brain_name in trainer_config :
_brain_key = brain_name
while not isinstance ( trainer_config [ _brain_key ] , dict ) :
for brain_name in self . env . external_brain_names :
if trainer_parameters_dict [ brain_name ] [ ' trainer ' ] == ' imitation ' :
self . trainers [ brain_name ] = BehavioralCloningTrainer (
sess , self . env . brains [ brain_name ] ,
self . env . brains [ brain_name ] ,
self . seed , self . run_id )
self . load_model , self . seed , self . run_id )
sess , self . env . brains [ brain_name ] ,
self . env . brains [ brain_name ] ,
self . train_model , self . seed , self . run_id )
self . train_model , self . load_model , self . seed , self . run_id )
else :
raise UnityEnvironmentException ( ' The trainer config contains '
' an unknown trainer type for '
tf . reset_default_graph ( )
# Prevent a single session from taking all GPU memory.
config = tf . ConfigProto ( )
config . gpu_options . allow_growth = True
with tf . Session ( config = config ) as sess :
self . _initialize_trainers ( trainer_config , sess )
for _ , t in self . trainers . items ( ) :
self . logger . info ( t )
init = tf . global_variables_initializer ( )
saver = tf . train . Saver ( max_to_keep = self . keep_checkpoints )
# Instantiate model parameters
if self . load_model :
self . logger . info ( ' Loading Model... ' )
ckpt = tf . train . get_checkpoint_state ( self . model_path )
if ckpt is None :
self . logger . info ( ' The model {0} could not be found. Make '
' sure you specified the right '
' --run-id '
. format ( self . model_path ) )
saver . restore ( sess , ckpt . model_checkpoint_path )
else :
sess . run ( init )
global_step = 0 # This is only for saving the model
curr_info = self . _reset_env ( )
if self . train_model :
for brain_name , trainer in self . trainers . items ( ) :
trainer . write_tensorboard_text ( ' Hyperparameters ' ,
trainer . parameters )
try :
while any ( [ t . get_step < = t . get_max_steps \
for k , t in self . trainers . items ( ) ] ) \
or not self . train_model :
if self . meta_curriculum :
# Get the sizes of the reward buffers.
reward_buff_sizes = { k : len ( t . reward_buffer ) \
for ( k , t ) in self . trainers . items ( ) }
# Attempt to increment the lessons of the brains who
# were ready.
lessons_incremented = \
self . meta_curriculum . increment_lessons (
self . _get_measure_vals ( ) ,
reward_buff_sizes = reward_buff_sizes )
# If any lessons were incremented or the environment is
# ready to be reset
if ( self . meta_curriculum
and any ( lessons_incremented . values ( ) ) ) :
curr_info = self . _reset_env ( )
for brain_name , trainer in self . trainers . items ( ) :
trainer . end_episode ( )
for brain_name , changed in lessons_incremented . items ( ) :
if changed :
self . trainers [ brain_name ] . reward_buffer . clear ( )
elif self . env . global_done :
curr_info = self . _reset_env ( )
for brain_name , trainer in self . trainers . items ( ) :
trainer . end_episode ( )
self . _initialize_trainers ( trainer_config )
for _ , t in self . trainers . items ( ) :
self . logger . info ( t )
global_step = 0 # This is only for saving the model
curr_info = self . _reset_env ( )
if self . train_model :
for brain_name , trainer in self . trainers . items ( ) :
trainer . write_tensorboard_text ( ' Hyperparameters ' ,
trainer . parameters )
try :
while any ( [ t . get_step < = t . get_max_steps \
for k , t in self . trainers . items ( ) ] ) \
or not self . train_model :
if self . meta_curriculum :
# Get the sizes of the reward buffers.
reward_buff_sizes = { k : len ( t . reward_buffer ) \
for ( k , t ) in self . trainers . items ( ) }
# Attempt to increment the lessons of the brains who
# were ready.
lessons_incremented = \
self . meta_curriculum . increment_lessons (
self . _get_measure_vals ( ) ,
reward_buff_sizes = reward_buff_sizes )
# Decide and take an action
take_action_vector , \
take_action_memories , \
take_action_text , \
take_action_value , \
take_action_outputs \
= { } , { } , { } , { } , { }
# If any lessons were incremented or the environment is
# ready to be reset
if ( self . meta_curriculum
and any ( lessons_incremented . values ( ) ) ) :
curr_info = self . _reset_env ( )
( take_action_vector [ brain_name ] ,
take_action_memories [ brain_name ] ,
take_action_text [ brain_name ] ,
take_action_value [ brain_name ] ,
take_action_outputs [ brain_name ] ) = \
trainer . take_action ( curr_info )
new_info = self . env . step ( vector_action = take_action_vector ,
memory = take_action_memories ,
text_action = take_action_text ,
value = take_action_value )
trainer . end_episode ( )
for brain_name , changed in lessons_incremented . items ( ) :
if changed :
self . trainers [ brain_name ] . reward_buffer . clear ( )
elif self . env . global_done :
curr_info = self . _reset_env ( )
trainer . add_experiences ( curr_info , new_info ,
take_action_outputs [ brain_name ] )
trainer . process_experiences ( curr_info , new_info )
if trainer . is_ready_update ( ) and self . train_model \
and trainer . get_step < = trainer . get_max_steps :
# Perform gradient descent with experience buffer
trainer . update_policy ( )
# Write training statistics to Tensorboard.
if self . meta_curriculum is not None :
trainer . write_summary (
global_step ,
lesson_num = self . meta_curriculum
. brains_to_curriculums [ brain_name ]
. lesson_num )
else :
trainer . write_summary ( global_step )
if self . train_model \
and trainer . get_step < = trainer . get_max_steps :
trainer . increment_step_and_update_last_reward ( )
global_step + = 1
if global_step % self . save_freq == 0 and global_step != 0 \
and self . train_model :
# Save Tensorflow model
self . _save_model ( sess , steps = global_step , saver = saver )
curr_info = new_info
# Final save Tensorflow model
if global_step != 0 and self . train_model :
self . _save_model ( sess , steps = global_step , saver = saver )
except KeyboardInterrupt :
print ( ' --------------------------Now saving model-------------- '
' ----------- ' )
if self . train_model :
self . logger . info ( ' Learning was interrupted. Please wait '
' while the graph is generated. ' )
self . _save_model ( sess , steps = global_step , saver = saver )
pass
trainer . end_episode ( )
# Decide and take an action
take_action_vector , \
take_action_memories , \
take_action_text , \
take_action_value , \
take_action_outputs \
= { } , { } , { } , { } , { }
for brain_name , trainer in self . trainers . items ( ) :
( take_action_vector [ brain_name ] ,
take_action_memories [ brain_name ] ,
take_action_text [ brain_name ] ,
take_action_value [ brain_name ] ,
take_action_outputs [ brain_name ] ) = \
trainer . take_action ( curr_info )
new_info = self . env . step ( vector_action = take_action_vector ,
memory = take_action_memories ,
text_action = take_action_text ,
value = take_action_value )
for brain_name , trainer in self . trainers . items ( ) :
trainer . add_experiences ( curr_info , new_info ,
take_action_outputs [ brain_name ] )
trainer . process_experiences ( curr_info , new_info )
if trainer . is_ready_update ( ) and self . train_model \
and trainer . get_step < = trainer . get_max_steps :
# Perform gradient descent with experience buffer
trainer . update_policy ( )
# Write training statistics to Tensorboard.
if self . meta_curriculum is not None :
trainer . write_summary (
global_step ,
lesson_num = self . meta_curriculum
. brains_to_curriculums [ brain_name ]
. lesson_num )
else :
trainer . write_summary ( global_step )
if self . train_model \
and trainer . get_step < = trainer . get_max_steps :
trainer . increment_step_and_update_last_reward ( )
global_step + = 1
if global_step % self . save_freq == 0 and global_step != 0 \
and self . train_model :
# Save Tensorflow model
self . _save_model ( steps = global_step )
curr_info = new_info
# Final save Tensorflow model
if global_step != 0 and self . train_model :
self . _save_model ( steps = global_step )
except KeyboardInterrupt :
print ( ' --------------------------Now saving model-------------- '
' ----------- ' )
if self . train_model :
self . logger . info ( ' Learning was interrupted. Please wait '
' while the graph is generated. ' )
self . _save_model ( steps = global_step )
pass
self . env . close ( )
if self . train_model :
self . _export_graph ( )