浏览代码
Develop add fire exp framework (#4213)
Develop add fire exp framework (#4213)
* Experiment branch for comparing torch * Updates and merging ervin changes * improvements on experiment_torch.py * Better printing of results * preliminary gpu experiment * Testing gpu * Prepare to see a lot of commits, because I like my IDE and I am testing on a server and I am using git to sync the two * Prepare to see a lot of commits, because I like my IDE and I am testing on a server and I am using git to sync the two * _ * _ * _ * _ * _ * _ * _ * _ * Attempt at gpu on tf. Does not work * _ * _ * _ * _ * _ * _ * _ * _ * _ * _ * _ * Fixing learn.py/develop/add-fire
GitHub
5 年前
当前提交
05a11c96
共有 8 个文件被更改,包括 195 次插入 和 41 次删除
-
34ml-agents/mlagents/trainers/learn.py
-
5ml-agents/mlagents/trainers/models_torch.py
-
4ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
-
2ml-agents/mlagents/trainers/policy/nn_policy.py
-
62ml-agents/mlagents/trainers/policy/torch_policy.py
-
4ml-agents/mlagents/trainers/ppo/optimizer_torch.py
-
14ml-agents/mlagents/trainers/ppo/trainer.py
-
111experiment_torch.py
|
|||
|
|||
import json |
|||
import os |
|||
import torch |
|||
import tensorflow as tf |
|||
import argparse |
|||
from mlagents.trainers.learn import run_cli, parse_command_line |
|||
from mlagents.trainers.settings import RunOptions |
|||
from mlagents.trainers.stats import StatsReporter |
|||
from mlagents.trainers.ppo.trainer import TestingConfiguration |
|||
from mlagents_envs.timers import _thread_timer_stacks |
|||
|
|||
|
|||
|
|||
|
|||
def run_experiment(name:str, steps:int, use_torch:bool, num_torch_threads:int, use_gpu:bool, num_envs :int= 1, config_name=None): |
|||
TestingConfiguration.env_name = name |
|||
TestingConfiguration.max_steps = steps |
|||
TestingConfiguration.use_torch = use_torch |
|||
TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" |
|||
if use_gpu: |
|||
tf.device("/GPU:0") |
|||
else: |
|||
tf.device("/device:CPU:0") |
|||
if (not torch.cuda.is_available() and use_gpu): |
|||
return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), "na","na","na","na","na","na","na" |
|||
if config_name is None: |
|||
config_name = name |
|||
run_options = parse_command_line([f"config/ppo/{config_name}.yaml", "--num-envs", f"{num_envs}"]) |
|||
run_options.checkpoint_settings.run_id = f"{name}_test_" +str(steps) +"_"+("torch" if use_torch else "tf") |
|||
run_options.checkpoint_settings.force = True |
|||
# run_options.env_settings.num_envs = num_envs |
|||
for trainer_settings in run_options.behaviors.values(): |
|||
trainer_settings.threaded = False |
|||
timers_path = os.path.join("results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json") |
|||
if use_torch: |
|||
torch.set_num_threads(num_torch_threads) |
|||
run_cli(run_options) |
|||
StatsReporter.writers.clear() |
|||
StatsReporter.stats_dict.clear() |
|||
_thread_timer_stacks.clear() |
|||
with open(timers_path) as timers_json_file: |
|||
timers_json = json.load(timers_json_file) |
|||
total = timers_json["total"] |
|||
tc_advance = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"] |
|||
evaluate = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["env_step"]["children"]["SubprocessEnvManager._take_step"]["children"] |
|||
update = timers_json["children"]["TrainerController.start_learning"]["children"]["TrainerController.advance"]["children"]["trainer_advance"]["children"]["_update_policy"]["children"] |
|||
tc_advance_total = tc_advance["total"] |
|||
tc_advance_count = tc_advance["count"] |
|||
if use_torch: |
|||
update_total = update["TorchPPOOptimizer.update"]["total"] |
|||
evaluate_total = evaluate["TorchPolicy.evaluate"]["total"] |
|||
update_count = update["TorchPPOOptimizer.update"]["count"] |
|||
evaluate_count = evaluate["TorchPolicy.evaluate"]["count"] |
|||
else: |
|||
update_total = update["TFPPOOptimizer.update"]["total"] |
|||
evaluate_total = evaluate["NNPolicy.evaluate"]["total"] |
|||
update_count = update["TFPPOOptimizer.update"]["count"] |
|||
evaluate_count= evaluate["NNPolicy.evaluate"]["count"] |
|||
# todo: do total / count |
|||
return name, str(steps), str(use_torch), str(num_torch_threads), str(num_envs), str(use_gpu), str(total), str(tc_advance_total), str(tc_advance_count), str(update_total), str(update_count), str(evaluate_total), str(evaluate_count) |
|||
|
|||
|
|||
def main(): |
|||
parser = argparse.ArgumentParser() |
|||
parser.add_argument("--steps", default=25000, type=int, help="The number of steps") |
|||
parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") |
|||
parser.add_argument("--gpu", default = False, action="store_true", help="If true, will use the GPU") |
|||
parser.add_argument("--threads", default=False, action="store_true", help="If true, will try both 1 and 8 threads for torch") |
|||
parser.add_argument("--ball", default=False, action="store_true", help="If true, will only do 3dball") |
|||
args = parser.parse_args() |
|||
|
|||
if args.gpu: |
|||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" |
|||
else: |
|||
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" |
|||
|
|||
envs_config_tuples = [("3DBall", "3DBall"), ("GridWorld", "GridWorld"), ("PushBlock", "PushBlock"), ("Hallway", "Hallway"), ("CrawlerStaticTarget", "CrawlerStatic"), ("VisualHallway", "VisualHallway")] |
|||
if args.ball: |
|||
envs_config_tuples=[("3DBall", "3DBall")] |
|||
|
|||
|
|||
labels = ("name", "steps", "use_torch", "num_torch_threads", "num_envs", "use_gpu" , "total", "tc_advance_total", "tc_advance_count", "update_total", "update_count", "evaluate_total", "evaluate_count") |
|||
|
|||
results = [] |
|||
results.append(labels) |
|||
f = open(f"result_data_steps_{args.steps}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt", "w") |
|||
f.write(" ".join(labels)+ "\n") |
|||
|
|||
for env_config in envs_config_tuples: |
|||
data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1]) |
|||
results.append(data) |
|||
f.write(" ".join(data) + "\n") |
|||
|
|||
if args.threads: |
|||
data = run_experiment(name = env_config[0], steps=args.steps, use_torch=True, num_torch_threads=8, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1]) |
|||
results.append(data) |
|||
f.write(" ".join(data)+ "\n") |
|||
|
|||
|
|||
data = run_experiment(name = env_config[0], steps=args.steps, use_torch=False, num_torch_threads=1, use_gpu=args.gpu, num_envs = args.num_envs, config_name=env_config[1]) |
|||
results.append(data) |
|||
f.write(" ".join(data)+ "\n") |
|||
for r in results: |
|||
print(*r) |
|||
f.close() |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
main() |
|||
|
撰写
预览
正在加载...
取消
保存
Reference in new issue