# # Unity ML-Agents Toolkit
import logging
import argparse
from multiprocessing import Process , Queue
import os
import yaml
from docopt import docopt
from typing import Any , Callable , Dict , Optional
from typing import Any , Callable , Dict , Optional , List , NamedTuple
from mlagents.trainers.trainer_controller import TrainerController
from mlagents.envs.subprocess_env_manager import SubprocessEnvManager
class CommandLineOptions ( NamedTuple ) :
debug : bool
num_runs : int
seed : int
env_path : str
run_id : str
load_model : bool
train_model : bool
save_freq : int
keep_checkpoints : int
base_port : int
num_envs : int
curriculum_folder : Optional [ str ]
lesson : int
slow : bool
no_graphics : bool
multi_gpu : bool # ?
trainer_config_path : str
sampler_file_path : Optional [ str ]
docker_target_name : Optional [ str ]
@property
def fast_simulation ( self ) - > bool :
return not self . slow
@staticmethod
def from_argparse ( args : Any ) - > " CommandLineOptions " :
return CommandLineOptions ( * * vars ( args ) )
def parse_command_line ( argv : Optional [ List [ str ] ] = None ) - > CommandLineOptions :
parser = argparse . ArgumentParser (
formatter_class = argparse . ArgumentDefaultsHelpFormatter
)
parser . add_argument ( " trainer_config_path " )
parser . add_argument (
" --env " , default = None , dest = " env_path " , help = " Name of the Unity executable "
)
parser . add_argument (
" --curriculum " ,
default = None ,
dest = " curriculum_folder " ,
help = " Curriculum json directory for environment " ,
)
parser . add_argument (
" --sampler " ,
default = None ,
dest = " sampler_file_path " ,
help = " Reset parameter yaml file for environment " ,
)
parser . add_argument (
" --keep-checkpoints " ,
default = 5 ,
type = int ,
help = " How many model checkpoints to keep " ,
)
parser . add_argument (
" --lesson " , default = 0 , type = int , help = " Start learning from this lesson "
)
parser . add_argument (
" --load " ,
default = False ,
dest = " load_model " ,
action = " store_true " ,
help = " Whether to load the model or randomly initialize " ,
)
parser . add_argument (
" --run-id " ,
default = " ppo " ,
help = " The directory name for model and summary statistics " ,
)
parser . add_argument (
" --num-runs " , default = 1 , type = int , help = " Number of concurrent training sessions "
)
parser . add_argument (
" --save-freq " , default = 50000 , type = int , help = " Frequency at which to save model "
)
parser . add_argument (
" --seed " , default = - 1 , type = int , help = " Random seed used for training "
)
parser . add_argument (
" --slow " , action = " store_true " , help = " Whether to run the game at training speed "
)
parser . add_argument (
" --train " ,
default = False ,
dest = " train_model " ,
action = " store_true " ,
help = " Whether to train model, or only run inference " ,
)
parser . add_argument (
" --base-port " ,
default = 5005 ,
type = int ,
help = " Base port for environment communication " ,
)
parser . add_argument (
" --num-envs " ,
default = 1 ,
type = int ,
help = " Number of parallel environments to use for training " ,
)
parser . add_argument (
" --docker-target-name " ,
default = None ,
dest = " docker_target_name " ,
help = " Docker volume to store training-specific files " ,
)
parser . add_argument (
" --no-graphics " ,
default = False ,
action = " store_true " ,
help = " Whether to run the environment in no-graphics mode " ,
)
parser . add_argument (
" --debug " ,
default = False ,
action = " store_true " ,
help = " Whether to run ML-Agents in debug mode with detailed logging " ,
)
parser . add_argument (
" --multi-gpu " ,
default = False ,
action = " store_true " ,
help = " Setting this flag enables the use of multiple GPU ' s (if available) during training " ,
)
args = parser . parse_args ( argv )
return CommandLineOptions . from_argparse ( args )
sub_id : int , run_seed : int , run_options : Dict [ str , Any ] , process_queue : Queue
sub_id : int , run_seed : int , options : CommandLineOptions , process_queue : Queue
: param options : parsed command line arguments
docker_target_name = (
run_options [ " --docker-target-name " ]
if run_options [ " --docker-target-name " ] != " None "
else None
)
# General parameters
env_path = run_options [ " --env " ] if run_options [ " --env " ] != " None " else None
run_id = run_options [ " --run-id " ]
load_model = run_options [ " --load " ]
train_model = run_options [ " --train " ]
save_freq = int ( run_options [ " --save-freq " ] )
keep_checkpoints = int ( run_options [ " --keep-checkpoints " ] )
base_port = int ( run_options [ " --base-port " ] )
num_envs = int ( run_options [ " --num-envs " ] )
curriculum_folder = (
run_options [ " --curriculum " ] if run_options [ " --curriculum " ] != " None " else None
)
lesson = int ( run_options [ " --lesson " ] )
fast_simulation = not bool ( run_options [ " --slow " ] )
no_graphics = run_options [ " --no-graphics " ]
multi_gpu = run_options [ " --multi-gpu " ]
trainer_config_path = run_options [ " <trainer-config-path> " ]
sampler_file_path = (
run_options [ " --sampler " ] if run_options [ " --sampler " ] != " None " else None
)
trainer_config_path = options . trainer_config_path
curriculum_folder = options . curriculum_folder
if not docker_target_name :
model_path = " ./models/{run_id}-{sub_id} " . format ( run_id = run_id , sub_id = sub_id )
if not options . docker_target_name :
model_path = " ./models/{run_id}-{sub_id} " . format (
run_id = options . run_id , sub_id = sub_id
)
docker_target_name = docker_target_name ,
docker_target_name = options . docker_target_name ,
docker_target_name = docker_target_name ,
docker_target_name = options . docker_target_name ,
docker_target_name = docker_target_name , run_id = run_id , sub_id = sub_id
docker_target_name = options . docker_target_name ,
run_id = options . run_id ,
sub_id = sub_id ,
docker_target_name = docker_target_name
docker_target_name = options . docker_target_name
env_path ,
docker_target_name ,
no_graphics ,
options . env_path ,
options . docker_target_name ,
options . no_graphics ,
base_port + ( sub_id * num_envs ) ,
options . base_port + ( sub_id * options . num_envs ) ,
env = SubprocessEnvManager ( env_factory , num_envs )
maybe_meta_curriculum = try_create_meta_curriculum ( curriculum_folder , env , lesson )
env = SubprocessEnvManager ( env_factory , options . num_envs )
maybe_meta_curriculum = try_create_meta_curriculum (
curriculum_folder , env , options . lesson
)
sampler_file_path , env . reset_parameters , run_seed
options . sampler_file_path , env . reset_parameters , run_seed
)
trainers = initialize_trainers (
run_id ,
options . run_id ,
keep_checkpoints ,
train_model ,
load_model ,
options . keep_checkpoints ,
options . train_model ,
options . load_model ,
multi_gpu ,
options . multi_gpu ,
)
# Create controller and begin training.
summaries_dir ,
run_id + " - " + str ( sub_id ) ,
save_freq ,
options . run_id + " - " + str ( sub_id ) ,
options . save_freq ,
train_model ,
options . train_model ,
fast_simulation ,
options . fast_simulation ,
sampler_manager ,
resampling_interval ,
)
def create_environment_factory (
env_path : str ,
docker_target_name : str ,
docker_target_name : Optional [ str ] ,
no_graphics : bool ,
seed : Optional [ int ] ,
start_port : int ,
except Exception :
print ( " \n \n \t Unity Technologies \n " )
_USAGE = """
Usage :
mlagents - learn < trainer - config - path > [ options ]
mlagents - learn - - help
Options :
- - env = < file > Name of the Unity executable [ default : None ] .
- - curriculum = < directory > Curriculum json directory for environment [ default : None ] .
- - sampler = < file > Reset parameter yaml file for environment [ default : None ] .
- - keep - checkpoints = < n > How many model checkpoints to keep [ default : 5 ] .
- - lesson = < n > Start learning from this lesson [ default : 0 ] .
- - load Whether to load the model or randomly initialize [ default : False ] .
- - run - id = < path > The directory name for model and summary statistics [ default : ppo ] .
- - num - runs = < n > Number of concurrent training sessions [ default : 1 ] .
- - save - freq = < n > Frequency at which to save model [ default : 50000 ] .
- - seed = < n > Random seed used for training [ default : - 1 ] .
- - slow Whether to run the game at training speed [ default : False ] .
- - train Whether to train model , or only run inference [ default : False ] .
- - base - port = < n > Base port for environment communication [ default : 5005 ] .
- - num - envs = < n > Number of parallel environments to use for training [ default : 1 ]
- - docker - target - name = < dt > Docker volume to store training - specific files [ default : None ] .
- - no - graphics Whether to run the environment in no - graphics mode [ default : False ] .
- - debug Whether to run ML - Agents in debug mode with detailed logging [ default : False ] .
- - multi - gpu Setting this flag enables the use of multiple GPU ' s (if available) during training
[ default : False ] .
"""
options = docopt ( _USAGE )
options = parse_command_line ( )
if options [ " --debug " ] :
if options . debug :
num_runs = int ( options [ " --num-runs " ] )
seed = int ( options [ " --seed " ] )
if options [ " --env " ] == " None " and num_runs > 1 :
if options . env_path is None and options . num_runs > 1 :
raise TrainerError (
" It is not possible to launch more than one concurrent training session "
" when training from the editor. "
run_seed = seed
run_seed = options . seed
if num_runs == 1 :
if seed == - 1 :
if options . num_runs == 1 :
if options . seed == - 1 :
for i in range ( num_runs ) :
if seed == - 1 :
for i in range ( options . num_runs ) :
if options . seed == - 1 :
run_seed = np . random . randint ( 0 , 10000 )
process_queue = Queue ( )
p = Process ( target = run_training , args = ( i , run_seed , options , process_queue ) )