浏览代码
Removing the experiment script from add fire (#4373)
Removing the experiment script from add fire (#4373)
* Removing the experiment script * Removing the script/develop/add-fire
GitHub
5 年前
当前提交
8985a040
共有 6 个文件被更改,包括 17 次插入 和 301 次删除
-
35ml-agents/mlagents/trainers/learn.py
-
9ml-agents/mlagents/trainers/policy/torch_policy.py
-
9ml-agents/mlagents/trainers/ppo/trainer.py
-
7ml-agents/mlagents/trainers/settings.py
-
10ml-agents/mlagents/trainers/trainer/rl_trainer.py
-
248experiment_torch.py
|
|||
import json |
|||
import os |
|||
import torch |
|||
from mlagents.tf_utils import tf |
|||
import argparse |
|||
from mlagents.trainers.learn import run_cli, parse_command_line |
|||
from mlagents.trainers.settings import TestingConfiguration |
|||
from mlagents.trainers.stats import StatsReporter |
|||
from mlagents_envs.timers import _thread_timer_stacks |
|||
|
|||
|
|||
def run_experiment( |
|||
name: str, |
|||
steps: int, |
|||
use_torch: bool, |
|||
algo: str, |
|||
num_torch_threads: int, |
|||
use_gpu: bool, |
|||
num_envs: int = 1, |
|||
config_name=None, |
|||
): |
|||
TestingConfiguration.env_name = name |
|||
TestingConfiguration.max_steps = steps |
|||
TestingConfiguration.use_torch = use_torch |
|||
TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" |
|||
if use_gpu: |
|||
tf.device("/GPU:0") |
|||
else: |
|||
tf.device("/device:CPU:0") |
|||
if not torch.cuda.is_available() and use_gpu: |
|||
return ( |
|||
name, |
|||
str(steps), |
|||
str(use_torch), |
|||
algo, |
|||
str(num_torch_threads), |
|||
str(num_envs), |
|||
str(use_gpu), |
|||
"na", |
|||
"na", |
|||
"na", |
|||
"na", |
|||
"na", |
|||
"na", |
|||
"na", |
|||
) |
|||
if config_name is None: |
|||
config_name = name |
|||
run_options = parse_command_line( |
|||
[f"config/{algo}/{config_name}.yaml", "--num-envs", f"{num_envs}"] |
|||
) |
|||
run_options.checkpoint_settings.run_id = ( |
|||
f"{name}_test_" + str(steps) + "_" + ("torch" if use_torch else "tf") |
|||
) |
|||
run_options.checkpoint_settings.force = True |
|||
# run_options.env_settings.num_envs = num_envs |
|||
for trainer_settings in run_options.behaviors.values(): |
|||
trainer_settings.threaded = False |
|||
timers_path = os.path.join( |
|||
"results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json" |
|||
) |
|||
if use_torch: |
|||
torch.set_num_threads(num_torch_threads) |
|||
run_cli(run_options) |
|||
StatsReporter.writers.clear() |
|||
StatsReporter.stats_dict.clear() |
|||
_thread_timer_stacks.clear() |
|||
with open(timers_path) as timers_json_file: |
|||
timers_json = json.load(timers_json_file) |
|||
total = timers_json["total"] |
|||
tc_advance = timers_json["children"]["TrainerController.start_learning"][ |
|||
"children" |
|||
]["TrainerController.advance"] |
|||
evaluate = timers_json["children"]["TrainerController.start_learning"][ |
|||
"children" |
|||
]["TrainerController.advance"]["children"]["env_step"]["children"][ |
|||
"SubprocessEnvManager._take_step" |
|||
][ |
|||
"children" |
|||
] |
|||
update = timers_json["children"]["TrainerController.start_learning"][ |
|||
"children" |
|||
]["TrainerController.advance"]["children"]["trainer_advance"]["children"][ |
|||
"_update_policy" |
|||
][ |
|||
"children" |
|||
] |
|||
tc_advance_total = tc_advance["total"] |
|||
tc_advance_count = tc_advance["count"] |
|||
if use_torch: |
|||
if algo == "ppo": |
|||
update_total = update["TorchPPOOptimizer.update"]["total"] |
|||
update_count = update["TorchPPOOptimizer.update"]["count"] |
|||
else: |
|||
update_total = update["SACTrainer._update_policy"]["total"] |
|||
update_count = update["SACTrainer._update_policy"]["count"] |
|||
evaluate_total = evaluate["TorchPolicy.evaluate"]["total"] |
|||
evaluate_count = evaluate["TorchPolicy.evaluate"]["count"] |
|||
else: |
|||
if algo == "ppo": |
|||
update_total = update["PPOOptimizer.update"]["total"] |
|||
update_count = update["PPOOptimizer.update"]["count"] |
|||
else: |
|||
update_total = update["SACTrainer._update_policy"]["total"] |
|||
update_count = update["SACTrainer._update_policy"]["count"] |
|||
evaluate_total = evaluate["NNPolicy.evaluate"]["total"] |
|||
evaluate_count = evaluate["NNPolicy.evaluate"]["count"] |
|||
# todo: do total / count |
|||
return ( |
|||
name, |
|||
str(steps), |
|||
str(use_torch), |
|||
algo, |
|||
str(num_torch_threads), |
|||
str(num_envs), |
|||
str(use_gpu), |
|||
str(total), |
|||
str(tc_advance_total), |
|||
str(tc_advance_count), |
|||
str(update_total), |
|||
str(update_count), |
|||
str(evaluate_total), |
|||
str(evaluate_count), |
|||
) |
|||
|
|||
|
|||
def main(): |
|||
parser = argparse.ArgumentParser() |
|||
parser.add_argument("--steps", default=25000, type=int, help="The number of steps") |
|||
parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") |
|||
parser.add_argument( |
|||
"--gpu", default=False, action="store_true", help="If true, will use the GPU" |
|||
) |
|||
parser.add_argument( |
|||
"--threads", |
|||
default=False, |
|||
action="store_true", |
|||
help="If true, will try both 1 and 8 threads for torch", |
|||
) |
|||
parser.add_argument( |
|||
"--ball", |
|||
default=False, |
|||
action="store_true", |
|||
help="If true, will only do 3dball", |
|||
) |
|||
parser.add_argument( |
|||
"--sac", |
|||
default=False, |
|||
action="store_true", |
|||
help="If true, will run sac instead of ppo", |
|||
) |
|||
args = parser.parse_args() |
|||
|
|||
if args.gpu: |
|||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" |
|||
else: |
|||
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" |
|||
|
|||
algo = "ppo" |
|||
if args.sac: |
|||
algo = "sac" |
|||
|
|||
envs_config_tuples = [ |
|||
("3DBall", "3DBall"), |
|||
("GridWorld", "GridWorld"), |
|||
("PushBlock", "PushBlock"), |
|||
("CrawlerStaticTarget", "CrawlerStatic"), |
|||
] |
|||
if algo == "ppo": |
|||
envs_config_tuples += [ |
|||
("Hallway", "Hallway"), |
|||
("VisualHallway", "VisualHallway"), |
|||
] |
|||
if args.ball: |
|||
envs_config_tuples = [("3DBall", "3DBall")] |
|||
|
|||
labels = ( |
|||
"name", |
|||
"steps", |
|||
"use_torch", |
|||
"algorithm", |
|||
"num_torch_threads", |
|||
"num_envs", |
|||
"use_gpu", |
|||
"total", |
|||
"tc_advance_total", |
|||
"tc_advance_count", |
|||
"update_total", |
|||
"update_count", |
|||
"evaluate_total", |
|||
"evaluate_count", |
|||
) |
|||
|
|||
results = [] |
|||
results.append(labels) |
|||
f = open( |
|||
f"result_data_steps_{args.steps}_algo_{algo}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt", |
|||
"w", |
|||
) |
|||
f.write(" ".join(labels) + "\n") |
|||
|
|||
for env_config in envs_config_tuples: |
|||
data = run_experiment( |
|||
name=env_config[0], |
|||
steps=args.steps, |
|||
use_torch=True, |
|||
algo=algo, |
|||
num_torch_threads=1, |
|||
use_gpu=args.gpu, |
|||
num_envs=args.num_envs, |
|||
config_name=env_config[1], |
|||
) |
|||
results.append(data) |
|||
f.write(" ".join(data) + "\n") |
|||
|
|||
if args.threads: |
|||
data = run_experiment( |
|||
name=env_config[0], |
|||
steps=args.steps, |
|||
use_torch=True, |
|||
algo=algo, |
|||
num_torch_threads=8, |
|||
use_gpu=args.gpu, |
|||
num_envs=args.num_envs, |
|||
config_name=env_config[1], |
|||
) |
|||
results.append(data) |
|||
f.write(" ".join(data) + "\n") |
|||
|
|||
data = run_experiment( |
|||
name=env_config[0], |
|||
steps=args.steps, |
|||
use_torch=False, |
|||
algo=algo, |
|||
num_torch_threads=1, |
|||
use_gpu=args.gpu, |
|||
num_envs=args.num_envs, |
|||
config_name=env_config[1], |
|||
) |
|||
results.append(data) |
|||
f.write(" ".join(data) + "\n") |
|||
for r in results: |
|||
print(*r) |
|||
f.close() |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
main() |
撰写
预览
正在加载...
取消
保存
Reference in new issue