浏览代码

new test

/develop/transfer-bisim
yanchaosun 4 年前
当前提交
447124f1
共有 1 个文件被更改,包括 80 次插入28 次删除
  1. 108
      ml-agents/mlagents/trainers/tests/test_simple_transfer.py

108
ml-agents/mlagents/trainers/tests/test_simple_transfer.py


in_epoch_alter=True,
# in_batch_alter=True,
use_op_buffer=True,
# policy_layers=0,
# value_layers=0,
# conv_thres=1e-4,
# predict_return=True
# separate_policy_train=True,
# separate_value_train=True
# separate_value_net=True,
),
network_settings=NetworkSettings(num_layers=1, hidden_units=32),
summary_freq=500,

# assert all(not math.isnan(reward) for reward in processed_rewards)
# assert all(reward > success_threshold for reward in processed_rewards)
def test_2d_ppo(config=PPO_CONFIG, obs_spec_type="rich1", run_id="ppo_rich1", seed=0):
env = SimpleTransferEnvironment(
[BRAIN_NAME], use_discrete=False, action_size=2, step_size=0.1,
num_vector=2, obs_spec_type=obs_spec_type, goal_type="hard"
)
new_hyperparams = attr.evolve(
config.hyperparameters, batch_size=360, buffer_size=12000, learning_rate=5.0e-3,
learning_rate_schedule=ScheduleType.LINEAR,
)
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=300000, summary_freq=5000)
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed)
def test_2d_model(config=Transfer_CONFIG, obs_spec_type="rich1", run_id="model_rich1", seed=0):
env = SimpleTransferEnvironment(

new_hyperparams = attr.evolve(
config.hyperparameters, batch_size=120, buffer_size=12000, learning_rate=5.0e-3,
use_bisim=True, predict_return=True,
# separate_value_train=True, separate_policy_train=True,
use_var_predict=True, with_prior=True, use_op_buffer=True, in_epoch_alter=False, in_batch_alter=True,
policy_layers=2, value_layers=2, encoder_layers=0, feature_size=2,
#use_inverse_model=True
config.hyperparameters, batch_size=1200, buffer_size=12000, learning_rate=5.0e-3,
learning_rate_schedule=ScheduleType.LINEAR,
use_bisim=True, predict_return=True, use_var_predict=True, with_prior=False,
separate_policy_train=False, separate_value_train=True,
use_op_buffer=False, in_epoch_alter=False, in_batch_alter=True,
policy_layers=0, value_layers=2, encoder_layers=2, feature_size=16,
forward_layers=2,
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=200000, summary_freq=5000)
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=500000, summary_freq=5000)
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed)
def test_2d_transfer(config=Transfer_CONFIG, obs_spec_type="rich1",

num_vector=2, obs_spec_type=obs_spec_type, goal_type="hard"
)
new_hyperparams = attr.evolve(
config.hyperparameters, batch_size=120, buffer_size=12000, use_transfer=True,
config.hyperparameters, batch_size=1200, buffer_size=12000, use_transfer=True,
learning_rate_schedule=ScheduleType.LINEAR,
transfer_path=transfer_from,
use_bisim=True, predict_return=True, use_var_predict=True, with_prior=False,
separate_policy_train=False, separate_value_train=True,
use_op_buffer=False, in_epoch_alter=False, in_batch_alter=True,
policy_layers=0, value_layers=2, encoder_layers=2, forward_layers=2,
feature_size=16, train_model=False, load_policy=False, load_value=False,
)
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=500000, summary_freq=5000)
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed)
def test_2d_transfer_dynamics(config=Transfer_CONFIG, obs_spec_type="rich1",
transfer_from="./transfer_results/model_rich2_f4_pv-l0_rew_bisim-op_s0/Simple",
run_id="transfer_f4_rich1_from-rich2-retrain-pv_rew_bisim-op", seed=1337):
env = SimpleTransferEnvironment(
[BRAIN_NAME], use_discrete=False, action_size=2, step_size=0.5,
num_vector=2, obs_spec_type=obs_spec_type, goal_type="hard"
)
new_hyperparams = attr.evolve(
config.hyperparameters, batch_size=360, buffer_size=12000, use_transfer=True,
learning_rate_schedule=ScheduleType.LINEAR,
use_op_buffer=False, in_epoch_alter=False, in_batch_alter=True, learning_rate=5.0e-3,
train_policy=True, train_value=True, train_model=False, feature_size=2,
use_var_predict=True, with_prior=True, policy_layers=2, load_policy=False,
load_value=False, predict_return=True, value_layers=2, encoder_layers=1,
use_bisim=False,
transfer_type="observation", train_encoder=False, feature_size=4,
use_op_buffer=True, in_epoch_alter=True, in_batch_alter=False, learning_rate=5.0e-3,
use_var_predict=True, with_prior=True, predict_return=True,
policy_layers=0, value_layers=1, encoder_layers=2
)
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=300000, summary_freq=5000)
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed)

# for obs in ["normal"]: # ["normal", "rich1", "rich2"]:
# test_2d_model(seed=0, obs_spec_type=obs, run_id="model_" + obs \
# + "_f2_pv-l2_linear-rew_ibalter_conlr_enc-l0-op4_bisim_suf1")
# test_2d_model(config=SAC_CONFIG, run_id="sac_rich2_hard", seed=0)
for obs in ["normal"]:
for obs in ["normal", "rich1", "rich2"]:
test_2d_model(seed=0, obs_spec_type=obs, run_id="model_" + obs \
+ "_f16_2-0-2-2")
for obs in ["normal", "rich2"]:
transfer_from="./transfer_results/model_"+ obs +"_f2_pv-l2_linear-rew_ibalter_conlr_enc-l0-op4_s0/Simple",
run_id="transfer_rich1_f2_pv-l2_ibalter_suf1_nobisim_from_" + obs)
transfer_from="./transfer_results/model_"+ obs +"_f16_2-0-2-2_s0/Simple",
run_id="transfer_rich1_f16_2-0-2-2_from_" + obs)
for obs in ["normal", "rich1"]:
test_2d_transfer(seed=10, obs_spec_type="rich2",
transfer_from="./transfer_results/model_"+ obs +"_f16_2-0-2-2_s0/Simple",
run_id="transfer_rich2_f16_2-0-2-2_from_" + obs)
# for s in [100, 101, 102]:
# for obs in ["normal", "rich1", "rich2"]:
# test_2d_model(seed=s, obs_spec_type=obs, run_id="model_" + obs \
# + "_f4_linear_360_iealter")
# # for obs in ["normal", "rich1", "rich2"]:
# # test_2d_ppo(config=PPO_CONFIG, obs_spec_type=obs, run_id="ppo_linear"+obs, seed=s)
# for obs in ["normal", "rich2"]:
# test_2d_transfer(seed=s, obs_spec_type="rich1",
# transfer_from="./transfer_results/model_"+ obs +"_f4_linear_360_iealter_s"+str(s)+"/Simple",
# run_id="transfer_rich1_f4_linear_iealter_360_from_" + obs)
# for obs in ["normal", "rich1"]:
# test_2d_transfer(seed=s, obs_spec_type="rich2",
# transfer_from="./transfer_results/model_"+ obs +"_f4_linear_360_iealter_s"+str(s)+"/Simple",
# run_id="transfer_rich2_f4_linear_iealter_360_from_" + obs)
# for obs in ["normal", "rich1", "rich2"]:
# test_2d_transfer_dynamics(seed=s, obs_spec_type=obs,
# transfer_from="./transfer_results/model_"+ obs +"_f4_linear_360_iealter_s"+str(s)+"/Simple",
# run_id="transfer_" + obs + "_ss5_f4_linear_iealter_360_from_ss1")
# for obs in ["normal"]:
# test_2d_transfer(seed=0, obs_spec_type="rich1",

正在加载...
取消
保存