浏览代码
Merge branch 'develop-add-fire' into develop-add-fire-memoryclass
/develop/add-fire/memoryclass
Merge branch 'develop-add-fire' into develop-add-fire-memoryclass
/develop/add-fire/memoryclass
Ervin Teng
4 年前
当前提交
a04e68a4
共有 7 个文件被更改,包括 195 次插入 和 319 次删除
-
35ml-agents/mlagents/trainers/learn.py
-
55ml-agents/mlagents/trainers/policy/torch_policy.py
-
9ml-agents/mlagents/trainers/ppo/trainer.py
-
7ml-agents/mlagents/trainers/settings.py
-
10ml-agents/mlagents/trainers/trainer/rl_trainer.py
-
150ml-agents/mlagents/trainers/tests/torch/test_policy.py
-
248experiment_torch.py
|
|||
import pytest |
|||
|
|||
import torch |
|||
from mlagents.trainers.policy.torch_policy import TorchPolicy |
|||
from mlagents.trainers.tests import mock_brain as mb |
|||
from mlagents.trainers.settings import TrainerSettings, NetworkSettings |
|||
from mlagents.trainers.torch.utils import ModelUtils |
|||
|
|||
VECTOR_ACTION_SPACE = 2 |
|||
VECTOR_OBS_SPACE = 8 |
|||
DISCRETE_ACTION_SPACE = [3, 3, 3, 2] |
|||
BUFFER_INIT_SAMPLES = 32 |
|||
NUM_AGENTS = 12 |
|||
EPSILON = 1e-7 |
|||
|
|||
|
|||
def create_policy_mock( |
|||
dummy_config: TrainerSettings, |
|||
use_rnn: bool = False, |
|||
use_discrete: bool = True, |
|||
use_visual: bool = False, |
|||
seed: int = 0, |
|||
) -> TorchPolicy: |
|||
mock_spec = mb.setup_test_behavior_specs( |
|||
use_discrete, |
|||
use_visual, |
|||
vector_action_space=DISCRETE_ACTION_SPACE |
|||
if use_discrete |
|||
else VECTOR_ACTION_SPACE, |
|||
vector_obs_space=VECTOR_OBS_SPACE, |
|||
) |
|||
|
|||
trainer_settings = dummy_config |
|||
trainer_settings.keep_checkpoints = 3 |
|||
trainer_settings.network_settings.memory = ( |
|||
NetworkSettings.MemorySettings() if use_rnn else None |
|||
) |
|||
policy = TorchPolicy(seed, mock_spec, trainer_settings) |
|||
return policy |
|||
|
|||
|
|||
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"]) |
|||
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"]) |
|||
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"]) |
|||
def test_policy_evaluate(rnn, visual, discrete): |
|||
# Test evaluate |
|||
policy = create_policy_mock( |
|||
TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual |
|||
) |
|||
decision_step, terminal_step = mb.create_steps_from_behavior_spec( |
|||
policy.behavior_spec, num_agents=NUM_AGENTS |
|||
) |
|||
|
|||
run_out = policy.evaluate(decision_step, list(decision_step.agent_id)) |
|||
if discrete: |
|||
run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE)) |
|||
else: |
|||
assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE) |
|||
|
|||
|
|||
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"]) |
|||
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"]) |
|||
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"]) |
|||
def test_evaluate_actions(rnn, visual, discrete): |
|||
policy = create_policy_mock( |
|||
TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual |
|||
) |
|||
buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size) |
|||
vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])] |
|||
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"]) |
|||
if policy.use_continuous_act: |
|||
actions = ModelUtils.list_to_tensor(buffer["actions"]).unsqueeze(-1) |
|||
else: |
|||
actions = ModelUtils.list_to_tensor(buffer["actions"], dtype=torch.long) |
|||
vis_obs = [] |
|||
for idx, _ in enumerate(policy.actor_critic.network_body.visual_encoders): |
|||
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx]) |
|||
vis_obs.append(vis_ob) |
|||
|
|||
memories = [ |
|||
ModelUtils.list_to_tensor(buffer["memory"][i]) |
|||
for i in range(0, len(buffer["memory"]), policy.sequence_length) |
|||
] |
|||
if len(memories) > 0: |
|||
memories = torch.stack(memories).unsqueeze(0) |
|||
|
|||
log_probs, entropy, values = policy.evaluate_actions( |
|||
vec_obs, |
|||
vis_obs, |
|||
masks=act_masks, |
|||
actions=actions, |
|||
memories=memories, |
|||
seq_len=policy.sequence_length, |
|||
) |
|||
assert log_probs.shape == (64, policy.behavior_spec.action_size) |
|||
assert entropy.shape == (64, policy.behavior_spec.action_size) |
|||
for val in values.values(): |
|||
assert val.shape == (64,) |
|||
|
|||
|
|||
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"]) |
|||
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"]) |
|||
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"]) |
|||
def test_sample_actions(rnn, visual, discrete): |
|||
policy = create_policy_mock( |
|||
TrainerSettings(), use_rnn=rnn, use_discrete=discrete, use_visual=visual |
|||
) |
|||
buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size) |
|||
vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])] |
|||
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"]) |
|||
|
|||
vis_obs = [] |
|||
for idx, _ in enumerate(policy.actor_critic.network_body.visual_encoders): |
|||
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx]) |
|||
vis_obs.append(vis_ob) |
|||
|
|||
memories = [ |
|||
ModelUtils.list_to_tensor(buffer["memory"][i]) |
|||
for i in range(0, len(buffer["memory"]), policy.sequence_length) |
|||
] |
|||
if len(memories) > 0: |
|||
memories = torch.stack(memories).unsqueeze(0) |
|||
|
|||
( |
|||
sampled_actions, |
|||
log_probs, |
|||
entropies, |
|||
sampled_values, |
|||
memories, |
|||
) = policy.sample_actions( |
|||
vec_obs, |
|||
vis_obs, |
|||
masks=act_masks, |
|||
memories=memories, |
|||
seq_len=policy.sequence_length, |
|||
all_log_probs=not policy.use_continuous_act, |
|||
) |
|||
if discrete: |
|||
assert log_probs.shape == ( |
|||
64, |
|||
sum(policy.behavior_spec.discrete_action_branches), |
|||
) |
|||
else: |
|||
assert log_probs.shape == (64, policy.behavior_spec.action_shape) |
|||
assert entropies.shape == (64, policy.behavior_spec.action_size) |
|||
for val in sampled_values.values(): |
|||
assert val.shape == (64,) |
|||
|
|||
if rnn: |
|||
assert memories.shape == (1, 1, policy.m_size) |
|
|||
import json |
|||
import os |
|||
import torch |
|||
from mlagents.tf_utils import tf |
|||
import argparse |
|||
from mlagents.trainers.learn import run_cli, parse_command_line |
|||
from mlagents.trainers.settings import TestingConfiguration |
|||
from mlagents.trainers.stats import StatsReporter |
|||
from mlagents_envs.timers import _thread_timer_stacks |
|||
|
|||
|
|||
def run_experiment( |
|||
name: str, |
|||
steps: int, |
|||
use_torch: bool, |
|||
algo: str, |
|||
num_torch_threads: int, |
|||
use_gpu: bool, |
|||
num_envs: int = 1, |
|||
config_name=None, |
|||
): |
|||
TestingConfiguration.env_name = name |
|||
TestingConfiguration.max_steps = steps |
|||
TestingConfiguration.use_torch = use_torch |
|||
TestingConfiguration.device = "cuda:0" if use_gpu else "cpu" |
|||
if use_gpu: |
|||
tf.device("/GPU:0") |
|||
else: |
|||
tf.device("/device:CPU:0") |
|||
if not torch.cuda.is_available() and use_gpu: |
|||
return ( |
|||
name, |
|||
str(steps), |
|||
str(use_torch), |
|||
algo, |
|||
str(num_torch_threads), |
|||
str(num_envs), |
|||
str(use_gpu), |
|||
"na", |
|||
"na", |
|||
"na", |
|||
"na", |
|||
"na", |
|||
"na", |
|||
"na", |
|||
) |
|||
if config_name is None: |
|||
config_name = name |
|||
run_options = parse_command_line( |
|||
[f"config/{algo}/{config_name}.yaml", "--num-envs", f"{num_envs}"] |
|||
) |
|||
run_options.checkpoint_settings.run_id = ( |
|||
f"{name}_test_" + str(steps) + "_" + ("torch" if use_torch else "tf") |
|||
) |
|||
run_options.checkpoint_settings.force = True |
|||
# run_options.env_settings.num_envs = num_envs |
|||
for trainer_settings in run_options.behaviors.values(): |
|||
trainer_settings.threaded = False |
|||
timers_path = os.path.join( |
|||
"results", run_options.checkpoint_settings.run_id, "run_logs", "timers.json" |
|||
) |
|||
if use_torch: |
|||
torch.set_num_threads(num_torch_threads) |
|||
run_cli(run_options) |
|||
StatsReporter.writers.clear() |
|||
StatsReporter.stats_dict.clear() |
|||
_thread_timer_stacks.clear() |
|||
with open(timers_path) as timers_json_file: |
|||
timers_json = json.load(timers_json_file) |
|||
total = timers_json["total"] |
|||
tc_advance = timers_json["children"]["TrainerController.start_learning"][ |
|||
"children" |
|||
]["TrainerController.advance"] |
|||
evaluate = timers_json["children"]["TrainerController.start_learning"][ |
|||
"children" |
|||
]["TrainerController.advance"]["children"]["env_step"]["children"][ |
|||
"SubprocessEnvManager._take_step" |
|||
][ |
|||
"children" |
|||
] |
|||
update = timers_json["children"]["TrainerController.start_learning"][ |
|||
"children" |
|||
]["TrainerController.advance"]["children"]["trainer_advance"]["children"][ |
|||
"_update_policy" |
|||
][ |
|||
"children" |
|||
] |
|||
tc_advance_total = tc_advance["total"] |
|||
tc_advance_count = tc_advance["count"] |
|||
if use_torch: |
|||
if algo == "ppo": |
|||
update_total = update["TorchPPOOptimizer.update"]["total"] |
|||
update_count = update["TorchPPOOptimizer.update"]["count"] |
|||
else: |
|||
update_total = update["SACTrainer._update_policy"]["total"] |
|||
update_count = update["SACTrainer._update_policy"]["count"] |
|||
evaluate_total = evaluate["TorchPolicy.evaluate"]["total"] |
|||
evaluate_count = evaluate["TorchPolicy.evaluate"]["count"] |
|||
else: |
|||
if algo == "ppo": |
|||
update_total = update["PPOOptimizer.update"]["total"] |
|||
update_count = update["PPOOptimizer.update"]["count"] |
|||
else: |
|||
update_total = update["SACTrainer._update_policy"]["total"] |
|||
update_count = update["SACTrainer._update_policy"]["count"] |
|||
evaluate_total = evaluate["NNPolicy.evaluate"]["total"] |
|||
evaluate_count = evaluate["NNPolicy.evaluate"]["count"] |
|||
# todo: do total / count |
|||
return ( |
|||
name, |
|||
str(steps), |
|||
str(use_torch), |
|||
algo, |
|||
str(num_torch_threads), |
|||
str(num_envs), |
|||
str(use_gpu), |
|||
str(total), |
|||
str(tc_advance_total), |
|||
str(tc_advance_count), |
|||
str(update_total), |
|||
str(update_count), |
|||
str(evaluate_total), |
|||
str(evaluate_count), |
|||
) |
|||
|
|||
|
|||
def main(): |
|||
parser = argparse.ArgumentParser() |
|||
parser.add_argument("--steps", default=25000, type=int, help="The number of steps") |
|||
parser.add_argument("--num-envs", default=1, type=int, help="The number of envs") |
|||
parser.add_argument( |
|||
"--gpu", default=False, action="store_true", help="If true, will use the GPU" |
|||
) |
|||
parser.add_argument( |
|||
"--threads", |
|||
default=False, |
|||
action="store_true", |
|||
help="If true, will try both 1 and 8 threads for torch", |
|||
) |
|||
parser.add_argument( |
|||
"--ball", |
|||
default=False, |
|||
action="store_true", |
|||
help="If true, will only do 3dball", |
|||
) |
|||
parser.add_argument( |
|||
"--sac", |
|||
default=False, |
|||
action="store_true", |
|||
help="If true, will run sac instead of ppo", |
|||
) |
|||
args = parser.parse_args() |
|||
|
|||
if args.gpu: |
|||
os.environ["CUDA_VISIBLE_DEVICES"] = "0" |
|||
else: |
|||
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" |
|||
|
|||
algo = "ppo" |
|||
if args.sac: |
|||
algo = "sac" |
|||
|
|||
envs_config_tuples = [ |
|||
("3DBall", "3DBall"), |
|||
("GridWorld", "GridWorld"), |
|||
("PushBlock", "PushBlock"), |
|||
("CrawlerStaticTarget", "CrawlerStatic"), |
|||
] |
|||
if algo == "ppo": |
|||
envs_config_tuples += [ |
|||
("Hallway", "Hallway"), |
|||
("VisualHallway", "VisualHallway"), |
|||
] |
|||
if args.ball: |
|||
envs_config_tuples = [("3DBall", "3DBall")] |
|||
|
|||
labels = ( |
|||
"name", |
|||
"steps", |
|||
"use_torch", |
|||
"algorithm", |
|||
"num_torch_threads", |
|||
"num_envs", |
|||
"use_gpu", |
|||
"total", |
|||
"tc_advance_total", |
|||
"tc_advance_count", |
|||
"update_total", |
|||
"update_count", |
|||
"evaluate_total", |
|||
"evaluate_count", |
|||
) |
|||
|
|||
results = [] |
|||
results.append(labels) |
|||
f = open( |
|||
f"result_data_steps_{args.steps}_algo_{algo}_envs_{args.num_envs}_gpu_{args.gpu}_thread_{args.threads}.txt", |
|||
"w", |
|||
) |
|||
f.write(" ".join(labels) + "\n") |
|||
|
|||
for env_config in envs_config_tuples: |
|||
data = run_experiment( |
|||
name=env_config[0], |
|||
steps=args.steps, |
|||
use_torch=True, |
|||
algo=algo, |
|||
num_torch_threads=1, |
|||
use_gpu=args.gpu, |
|||
num_envs=args.num_envs, |
|||
config_name=env_config[1], |
|||
) |
|||
results.append(data) |
|||
f.write(" ".join(data) + "\n") |
|||
|
|||
if args.threads: |
|||
data = run_experiment( |
|||
name=env_config[0], |
|||
steps=args.steps, |
|||
use_torch=True, |
|||
algo=algo, |
|||
num_torch_threads=8, |
|||
use_gpu=args.gpu, |
|||
num_envs=args.num_envs, |
|||
config_name=env_config[1], |
|||
) |
|||
results.append(data) |
|||
f.write(" ".join(data) + "\n") |
|||
|
|||
data = run_experiment( |
|||
name=env_config[0], |
|||
steps=args.steps, |
|||
use_torch=False, |
|||
algo=algo, |
|||
num_torch_threads=1, |
|||
use_gpu=args.gpu, |
|||
num_envs=args.num_envs, |
|||
config_name=env_config[1], |
|||
) |
|||
results.append(data) |
|||
f.write(" ".join(data) + "\n") |
|||
for r in results: |
|||
print(*r) |
|||
f.close() |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
main() |
撰写
预览
正在加载...
取消
保存
Reference in new issue