您最多选择25个主题
主题必须以中文或者字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
248 行
7.3 KiB
248 行
7.3 KiB
import json
|
|
import os
|
|
import torch
|
|
from mlagents.tf_utils import tf
|
|
import argparse
|
|
from mlagents.trainers.learn import run_cli, parse_command_line
|
|
from mlagents.trainers.settings import TestingConfiguration
|
|
from mlagents.trainers.stats import StatsReporter
|
|
from mlagents_envs.timers import _thread_timer_stacks
|
|
|
|
|
|
def run_experiment(
|
|
name: str,
|
|
steps: int,
|
|
use_torch: bool,
|
|
algo: str,
|
|
num_torch_threads: int,
|
|
use_gpu: bool,
|
|
num_envs: int = 1,
|
|
config_name=None,
|
|
):
|
|
TestingConfiguration.env_name = name
|
|
TestingConfiguration.max_steps = steps
|
|
TestingConfiguration.use_torch = use_torch
|
|
TestingConfiguration.device = "cuda:0" if use_gpu else "cpu"
|
|
if use_gpu:
|
|
tf.device("/GPU:0")
|
|
else:
|
|
tf.device("/device:CPU:0")
|
|
if not torch.cuda.is_available() and use_gpu:
|
|
return (
|
|
name,
|
|
str(steps),
|
|
str(use_torch),
|
|
algo,
|
|
str(num_torch_threads),
|
|
str(num_envs),
|
|
str(use_gpu),
|
|
"na",
|
|
"na",
|
|
"na",
|
|
"na",
|
|
"na",
|
|
"na",
|
|
"na",
|
|
)
|
|
if config_name is None:
|
|
config_name = name
|
|
run_options = parse_command_line(
|
|
[f"config/{algo}/{config_name}.yaml", "--num-envs", f"{num_envs}"]
|
|
)
|
|
run_options.checkpoint_settings.run_id = (
|
|
f"{name}_test_" + str(steps) + "_" + ("torch" if use_torch else "tf")
|
|
)
|
|
run_options.checkpoint_settings.force = True
|
|
# run_options.env_settings.num_envs = num_envs
|
|
for trainer_settings in run_options.behaviors.values():
|
|
trainer_settings.threaded = False
|
|
timers_path = os.path.join(
|
|
"results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json"
|
|
)
|
|
if use_torch:
|
|
torch.set_num_threads(num_torch_threads)
|
|
run_cli(run_options)
|
|
StatsReporter.writers.clear()
|
|
StatsReporter.stats_dict.clear()
|
|
_thread_timer_stacks.clear()
|
|
with open(timers_path) as timers_json_file:
|
|
timers_json = json.load(timers_json_file)
|
|
total = timers_json["total"]
|
|
tc_advance = timers_json["children"]["TrainerController.start_learning"][
|
|
"children"
|
|
]["TrainerController.advance"]
|
|
evaluate = timers_json["children"]["TrainerController.start_learning"][
|
|
"children"
|
|
]["TrainerController.advance"]["children"]["env_step"]["children"][
|
|
"SubprocessEnvManager._take_step"
|
|
][
|
|
"children"
|
|
]
|
|
update = timers_json["children"]["TrainerController.start_learning"][
|
|
"children"
|
|
]["TrainerController.advance"]["children"]["trainer_advance"]["children"][
|
|
"_update_policy"
|
|
][
|
|
"children"
|
|
]
|
|
tc_advance_total = tc_advance["total"]
|
|
tc_advance_count = tc_advance["count"]
|
|
if use_torch:
|
|
if algo == "ppo":
|
|
update_total = update["TorchPPOOptimizer.update"]["total"]
|
|
update_count = update["TorchPPOOptimizer.update"]["count"]
|
|
else:
|
|
update_total = update["SACTrainer._update_policy"]["total"]
|
|
update_count = update["SACTrainer._update_policy"]["count"]
|
|
evaluate_total = evaluate["TorchPolicy.evaluate"]["total"]
|
|
evaluate_count = evaluate["TorchPolicy.evaluate"]["count"]
|
|
else:
|
|
if algo == "ppo":
|
|
update_total = update["PPOOptimizer.update"]["total"]
|
|
update_count = update["PPOOptimizer.update"]["count"]
|
|
else:
|
|
update_total = update["SACTrainer._update_policy"]["total"]
|
|
update_count = update["SACTrainer._update_policy"]["count"]
|
|
evaluate_total = evaluate["NNPolicy.evaluate"]["total"]
|
|
evaluate_count = evaluate["NNPolicy.evaluate"]["count"]
|
|
# todo: do total / count
|
|
return (
|
|
name,
|
|
str(steps),
|
|
str(use_torch),
|
|
algo,
|
|
str(num_torch_threads),
|
|
str(num_envs),
|
|
str(use_gpu),
|
|
str(total),
|
|
str(tc_advance_total),
|
|
str(tc_advance_count),
|
|
str(update_total),
|
|
str(update_count),
|
|
str(evaluate_total),
|
|
str(evaluate_count),
|
|
)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--steps", default=25000, type=int, help="The number of steps")
|
|
parser.add_argument("--num-envs", default=1, type=int, help="The number of envs")
|
|
parser.add_argument(
|
|
"--gpu", default=False, action="store_true", help="If true, will use the GPU"
|
|
)
|
|
parser.add_argument(
|
|
"--threads",
|
|
default=False,
|
|
action="store_true",
|
|
help="If true, will try both 1 and 8 threads for torch",
|
|
)
|
|
parser.add_argument(
|
|
"--ball",
|
|
default=False,
|
|
action="store_true",
|
|
help="If true, will only do 3dball",
|
|
)
|
|
parser.add_argument(
|
|
"--sac",
|
|
default=False,
|
|
action="store_true",
|
|
help="If true, will run sac instead of ppo",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
if args.gpu:
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
|
else:
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
|
|
|
algo = "ppo"
|
|
if args.sac:
|
|
algo = "sac"
|
|
|
|
envs_config_tuples = [
|
|
("3DBall", "3DBall"),
|
|
("GridWorld", "GridWorld"),
|
|
("PushBlock", "PushBlock"),
|
|
("CrawlerStaticTarget", "CrawlerStatic"),
|
|
]
|
|
if algo == "ppo":
|
|
envs_config_tuples += [
|
|
("Hallway", "Hallway"),
|
|
("VisualHallway", "VisualHallway"),
|
|
]
|
|
if args.ball:
|
|
envs_config_tuples = [("3DBall", "3DBall")]
|
|
|
|
labels = (
|
|
"name",
|
|
"steps",
|
|
"use_torch",
|
|
"algorithm",
|
|
"num_torch_threads",
|
|
"num_envs",
|
|
"use_gpu",
|
|
"total",
|
|
"tc_advance_total",
|
|
"tc_advance_count",
|
|
"update_total",
|
|
"update_count",
|
|
"evaluate_total",
|
|
"evaluate_count",
|
|
)
|
|
|
|
results = []
|
|
results.append(labels)
|
|
f = open(
|
|
f"result_data_steps_{args.steps}_algo_{algo}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt",
|
|
"w",
|
|
)
|
|
f.write(" ".join(labels) + "\n")
|
|
|
|
for env_config in envs_config_tuples:
|
|
data = run_experiment(
|
|
name=env_config[0],
|
|
steps=args.steps,
|
|
use_torch=True,
|
|
algo=algo,
|
|
num_torch_threads=1,
|
|
use_gpu=args.gpu,
|
|
num_envs=args.num_envs,
|
|
config_name=env_config[1],
|
|
)
|
|
results.append(data)
|
|
f.write(" ".join(data) + "\n")
|
|
|
|
if args.threads:
|
|
data = run_experiment(
|
|
name=env_config[0],
|
|
steps=args.steps,
|
|
use_torch=True,
|
|
algo=algo,
|
|
num_torch_threads=8,
|
|
use_gpu=args.gpu,
|
|
num_envs=args.num_envs,
|
|
config_name=env_config[1],
|
|
)
|
|
results.append(data)
|
|
f.write(" ".join(data) + "\n")
|
|
|
|
data = run_experiment(
|
|
name=env_config[0],
|
|
steps=args.steps,
|
|
use_torch=False,
|
|
algo=algo,
|
|
num_torch_threads=1,
|
|
use_gpu=args.gpu,
|
|
num_envs=args.num_envs,
|
|
config_name=env_config[1],
|
|
)
|
|
results.append(data)
|
|
f.write(" ".join(data) + "\n")
|
|
for r in results:
|
|
print(*r)
|
|
f.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|