浏览代码

test code

/develop/bisim-sac-transfer
yanchaosun 4 年前
当前提交
fb5c33c1
共有 2 个文件被更改,包括 185 次插入35 次删除
  1. 152
      ml-agents/mlagents/trainers/tests/reward_plot.ipynb
  2. 68
      ml-agents/mlagents/trainers/tests/test_simple_transfer.py

152
ml-agents/mlagents/trainers/tests/reward_plot.ipynb
文件差异内容过多而无法显示
查看文件

68
ml-agents/mlagents/trainers/tests/test_simple_transfer.py


batch_size=16,
buffer_size=64,
),
network_settings=NetworkSettings(num_layers=2, hidden_units=64),
network_settings=NetworkSettings(num_layers=2, hidden_units=32),
summary_freq=500,
max_steps=3000,
threaded=False,

trainer_type=TrainerType.PPO_Transfer,
hyperparameters=PPOTransferSettings(
learning_rate=5.0e-3,
learning_rate_schedule=ScheduleType.CONSTANT,
# learning_rate_schedule=ScheduleType.CONSTANT,
reuse_encoder=False,
in_epoch_alter=True,
# in_batch_alter=True,
use_op_buffer=True,
# policy_layers=0,
# value_layers=0,
# conv_thres=1e-4,
# predict_return=True
# separate_policy_train=True,
# separate_value_train=True
# separate_value_net=True,
),
network_settings=NetworkSettings(num_layers=1, hidden_units=32),
summary_freq=500,

config.hyperparameters, batch_size=1200, buffer_size=12000, learning_rate=5.0e-3
)
config = attr.evolve(
config, hyperparameters=new_hyperparams, max_steps=350000, summary_freq=5000
config, hyperparameters=new_hyperparams, max_steps=500000, summary_freq=5000
)
_check_environment_trains(
env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed

value_layers=2,
forward_layers=0,
encoder_layers=2,
action_layers=1,
# use_inverse_model=True
action_feature_size=4,
config, hyperparameters=new_hyperparams, max_steps=250000, summary_freq=5000
config, hyperparameters=new_hyperparams, max_steps=500000, summary_freq=5000
)
_check_environment_trains(
env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed

train_policy=True,
train_value=True,
train_model=False,
train_action=False,
action_feature_size=4,
load_action=True,
action_layers=1,
config, hyperparameters=new_hyperparams, max_steps=250000, summary_freq=5000
config, hyperparameters=new_hyperparams, max_steps=500000, summary_freq=5000
)
_check_environment_trains(
env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed

if __name__ == "__main__":
for seed in range(5, 10):
if seed > -1:
for obs in ["normal", "rich1", "rich2"]:
test_2d_model(seed=seed, obs_spec_type=obs, run_id="model_" + obs)
for seed in range(5):
# if seed > -1:
# for obs in ["normal", "long", "longpre"]:
# test_2d_model(seed=seed, obs_spec_type=obs, run_id="model_" + obs)
# test_2d_ppo(seed=seed, obs_spec_type=obs, run_id="ppo_" + obs)
for obs in ["long", "longpre"]:
test_2d_transfer(
seed=seed,
obs_spec_type=obs,
transfer_from="./transfer_results/model_normal_s" + str(seed) + "/Simple",
run_id="normal_transfer_linear_fix_to_" + obs,
)
# # test_2d_model(config=SAC_CONFIG, run_id="sac_rich2_hard", seed=0)
# for obs in ["normal", "rich2"]:

# run_id=obs + "transfer_to_rich2",
# )
for obs in ["longpre"]:
test_2d_model(seed=0, obs_spec_type=obs, run_id="model_" + obs)
test_2d_ppo(seed=0, obs_spec_type=obs, run_id="ppo_" + obs)
# for obs in ["longpre"]:
# test_2d_model(seed=0, obs_spec_type=obs, run_id="model_" + obs)
# test_2d_ppo(seed=0, obs_spec_type=obs, run_id="ppo_" + obs)
# test_2d_transfer(seed=0, obs_spec_type="longpre",
# transfer_from="./transfer_results/model_normal_s0/Simple",
# run_id="normal_transfer_to_longpre_reuse_trainmod")
# # test_2d_transfer(seed=0, obs_spec_type="longpre",
# # transfer_from="./transfer_results/model_normal_s0/Simple",
# # run_id="normal_transfer_to_longpre_reuse_trainmod")
# for s in range(1, 5):
# for obs in ["normal", "long", "longpre"]:
# test_2d_model(seed=s, obs_spec_type=obs, run_id="model_" + obs)
# test_2d_ppo(seed=s, obs_spec_type=obs, run_id="ppo_" + obs)
# for obs in ["long", "longpre"]:
# test_2d_transfer(seed=s, obs_spec_type=obs,
# transfer_from="./transfer_results/model_normal_s"+str(s)+"/Simple",
# run_id="normal_transfer_to_"+ obs)
正在加载...
取消
保存