import json import os import torch from mlagents.tf_utils import tf import argparse from mlagents.trainers.learn import run_cli, parse_command_line from mlagents.trainers.settings import TestingConfiguration from mlagents.trainers.stats import StatsReporter from mlagents_envs.timers import _thread_timer_stacks def run_experiment( name: str, steps: int, use_torch: bool, algo: str, num_torch_threads: int, use_gpu: bool, num_envs: int = 1, config_name=None, ): TestingConfiguration.env_name = name TestingConfiguration.max_steps = steps TestingConfiguration.use_torch = use_torch TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" if use_gpu: tf.device("/GPU:0") else: tf.device("/device:CPU:0") if not torch.cuda.is_available() and use_gpu: return ( name, str(steps), str(use_torch), algo, str(num_torch_threads), str(num_envs), str(use_gpu), "na", "na", "na", "na", "na", "na", "na", ) if config_name is None: config_name = name run_options = parse_command_line( [f"config/{algo}/{config_name}.yaml", "--num-envs", f"{num_envs}"] ) run_options.checkpoint_settings.run_id = ( f"{name}_test_" + str(steps) + "_" + ("torch" if use_torch else "tf") ) run_options.checkpoint_settings.force = True # run_options.env_settings.num_envs = num_envs for trainer_settings in run_options.behaviors.values(): trainer_settings.threaded = False timers_path = os.path.join( "results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json" ) if use_torch: torch.set_num_threads(num_torch_threads) run_cli(run_options) StatsReporter.writers.clear() StatsReporter.stats_dict.clear() _thread_timer_stacks.clear() with open(timers_path) as timers_json_file: timers_json = json.load(timers_json_file) total = timers_json["total"] tc_advance = timers_json["children"]["TrainerController.start_learning"][ "children" ]["TrainerController.advance"] evaluate = timers_json["children"]["TrainerController.start_learning"][ "children" ]["TrainerController.advance"]["children"]["env_step"]["children"][ "SubprocessEnvManager._take_step" ][ "children" ] update = timers_json["children"]["TrainerController.start_learning"][ "children" ]["TrainerController.advance"]["children"]["trainer_advance"]["children"][ "_update_policy" ][ "children" ] tc_advance_total = tc_advance["total"] tc_advance_count = tc_advance["count"] if use_torch: if algo == "ppo": update_total = update["TorchPPOOptimizer.update"]["total"] update_count = update["TorchPPOOptimizer.update"]["count"] else: update_total = update["SACTrainer._update_policy"]["total"] update_count = update["SACTrainer._update_policy"]["count"] evaluate_total = evaluate["TorchPolicy.evaluate"]["total"] evaluate_count = evaluate["TorchPolicy.evaluate"]["count"] else: if algo == "ppo": update_total = update["TFPPOOptimizer.update"]["total"] update_count = update["TFPPOOptimizer.update"]["count"] else: update_total = update["SACTrainer._update_policy"]["total"] update_count = update["SACTrainer._update_policy"]["count"] evaluate_total = evaluate["NNPolicy.evaluate"]["total"] evaluate_count = evaluate["NNPolicy.evaluate"]["count"] # todo: do total / count return ( name, str(steps), str(use_torch), algo, str(num_torch_threads), str(num_envs), str(use_gpu), str(total), str(tc_advance_total), str(tc_advance_count), str(update_total), str(update_count), str(evaluate_total), str(evaluate_count), ) def main(): parser = argparse.ArgumentParser() parser.add_argument("--steps", default=25000, type=int, help="The number of steps") parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") parser.add_argument( "--gpu", default=False, action="store_true", help="If true, will use the GPU" ) parser.add_argument( "--threads", default=False, action="store_true", help="If true, will try both 1 and 8 threads for torch", ) parser.add_argument( "--ball", default=False, action="store_true", help="If true, will only do 3dball", ) parser.add_argument( "--sac", default=False, action="store_true", help="If true, will run sac instead of ppo", ) args = parser.parse_args() if args.gpu: os.environ["CUDA_VISIBLE_DEVICES"] = "0" else: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" algo = "ppo" if args.sac: algo = "sac" envs_config_tuples = [ ("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("CrawlerStaticTarget", "CrawlerStatic"), ] if algo == "ppo": envs_config_tuples += [ ("Hallway", "Hallway"), ("VisualHallway", "VisualHallway"), ] if args.ball: envs_config_tuples = [("3DBall", "3DBall")] labels = ( "name", "steps", "use_torch", "algorithm", "num_torch_threads", "num_envs", "use_gpu", "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count", ) results = [] results.append(labels) f = open( f"result_data_steps_{args.steps}_algo_{algo}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt", "w", ) f.write(" ".join(labels) + "\n") for env_config in envs_config_tuples: data = run_experiment( name=env_config[0], steps=args.steps, use_torch=True, algo=algo, num_torch_threads=1, use_gpu=args.gpu, num_envs=args.num_envs, config_name=env_config[1], ) results.append(data) f.write(" ".join(data) + "\n") if args.threads: data = run_experiment( name=env_config[0], steps=args.steps, use_torch=True, algo=algo, num_torch_threads=8, use_gpu=args.gpu, num_envs=args.num_envs, config_name=env_config[1], ) results.append(data) f.write(" ".join(data) + "\n") data = run_experiment( name=env_config[0], steps=args.steps, use_torch=False, algo=algo, num_torch_threads=1, use_gpu=args.gpu, num_envs=args.num_envs, config_name=env_config[1], ) results.append(data) f.write(" ".join(data) + "\n") for r in results: print(*r) f.close() if __name__ == "__main__": main()