|
|
|
|
|
|
in_epoch_alter=True, |
|
|
|
# in_batch_alter=True, |
|
|
|
use_op_buffer=True, |
|
|
|
# policy_layers=0, |
|
|
|
# value_layers=0, |
|
|
|
# conv_thres=1e-4, |
|
|
|
# predict_return=True |
|
|
|
# separate_policy_train=True, |
|
|
|
# separate_value_train=True |
|
|
|
# separate_value_net=True, |
|
|
|
), |
|
|
|
network_settings=NetworkSettings(num_layers=1, hidden_units=32), |
|
|
|
summary_freq=500, |
|
|
|
|
|
|
# assert all(not math.isnan(reward) for reward in processed_rewards) |
|
|
|
# assert all(reward > success_threshold for reward in processed_rewards) |
|
|
|
|
|
|
|
def test_2d_ppo(config=PPO_CONFIG, obs_spec_type="rich1", run_id="ppo_rich1", seed=0): |
|
|
|
env = SimpleTransferEnvironment( |
|
|
|
[BRAIN_NAME], use_discrete=False, action_size=2, step_size=0.1, |
|
|
|
num_vector=2, obs_spec_type=obs_spec_type, goal_type="hard" |
|
|
|
) |
|
|
|
new_hyperparams = attr.evolve( |
|
|
|
config.hyperparameters, batch_size=360, buffer_size=12000, learning_rate=5.0e-3, |
|
|
|
learning_rate_schedule=ScheduleType.LINEAR, |
|
|
|
) |
|
|
|
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=300000, summary_freq=5000) |
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed) |
|
|
|
|
|
|
|
def test_2d_model(config=Transfer_CONFIG, obs_spec_type="rich1", run_id="model_rich1", seed=0): |
|
|
|
env = SimpleTransferEnvironment( |
|
|
|
|
|
|
new_hyperparams = attr.evolve( |
|
|
|
config.hyperparameters, batch_size=120, buffer_size=12000, learning_rate=5.0e-3, |
|
|
|
use_bisim=True, predict_return=True, |
|
|
|
# separate_value_train=True, separate_policy_train=True, |
|
|
|
use_var_predict=True, with_prior=True, use_op_buffer=True, in_epoch_alter=False, in_batch_alter=True, |
|
|
|
policy_layers=2, value_layers=2, encoder_layers=0, feature_size=2, |
|
|
|
#use_inverse_model=True |
|
|
|
config.hyperparameters, batch_size=1200, buffer_size=12000, learning_rate=5.0e-3, |
|
|
|
learning_rate_schedule=ScheduleType.LINEAR, |
|
|
|
use_bisim=True, predict_return=True, use_var_predict=True, with_prior=False, |
|
|
|
separate_policy_train=False, separate_value_train=True, |
|
|
|
use_op_buffer=False, in_epoch_alter=False, in_batch_alter=True, |
|
|
|
policy_layers=0, value_layers=2, encoder_layers=2, feature_size=16, |
|
|
|
forward_layers=2, |
|
|
|
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=200000, summary_freq=5000) |
|
|
|
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=500000, summary_freq=5000) |
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed) |
|
|
|
|
|
|
|
def test_2d_transfer(config=Transfer_CONFIG, obs_spec_type="rich1", |
|
|
|
|
|
|
num_vector=2, obs_spec_type=obs_spec_type, goal_type="hard" |
|
|
|
) |
|
|
|
new_hyperparams = attr.evolve( |
|
|
|
config.hyperparameters, batch_size=120, buffer_size=12000, use_transfer=True, |
|
|
|
config.hyperparameters, batch_size=1200, buffer_size=12000, use_transfer=True, |
|
|
|
learning_rate_schedule=ScheduleType.LINEAR, |
|
|
|
transfer_path=transfer_from, |
|
|
|
use_bisim=True, predict_return=True, use_var_predict=True, with_prior=False, |
|
|
|
separate_policy_train=False, separate_value_train=True, |
|
|
|
use_op_buffer=False, in_epoch_alter=False, in_batch_alter=True, |
|
|
|
policy_layers=0, value_layers=2, encoder_layers=2, forward_layers=2, |
|
|
|
feature_size=16, train_model=False, load_policy=False, load_value=False, |
|
|
|
) |
|
|
|
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=500000, summary_freq=5000) |
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed) |
|
|
|
|
|
|
|
def test_2d_transfer_dynamics(config=Transfer_CONFIG, obs_spec_type="rich1", |
|
|
|
transfer_from="./transfer_results/model_rich2_f4_pv-l0_rew_bisim-op_s0/Simple", |
|
|
|
run_id="transfer_f4_rich1_from-rich2-retrain-pv_rew_bisim-op", seed=1337): |
|
|
|
env = SimpleTransferEnvironment( |
|
|
|
[BRAIN_NAME], use_discrete=False, action_size=2, step_size=0.5, |
|
|
|
num_vector=2, obs_spec_type=obs_spec_type, goal_type="hard" |
|
|
|
) |
|
|
|
new_hyperparams = attr.evolve( |
|
|
|
config.hyperparameters, batch_size=360, buffer_size=12000, use_transfer=True, |
|
|
|
learning_rate_schedule=ScheduleType.LINEAR, |
|
|
|
use_op_buffer=False, in_epoch_alter=False, in_batch_alter=True, learning_rate=5.0e-3, |
|
|
|
train_policy=True, train_value=True, train_model=False, feature_size=2, |
|
|
|
use_var_predict=True, with_prior=True, policy_layers=2, load_policy=False, |
|
|
|
load_value=False, predict_return=True, value_layers=2, encoder_layers=1, |
|
|
|
use_bisim=False, |
|
|
|
transfer_type="observation", train_encoder=False, feature_size=4, |
|
|
|
use_op_buffer=True, in_epoch_alter=True, in_batch_alter=False, learning_rate=5.0e-3, |
|
|
|
use_var_predict=True, with_prior=True, predict_return=True, |
|
|
|
policy_layers=0, value_layers=1, encoder_layers=2 |
|
|
|
) |
|
|
|
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=300000, summary_freq=5000) |
|
|
|
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed) |
|
|
|
|
|
|
# for obs in ["normal"]: # ["normal", "rich1", "rich2"]: |
|
|
|
# test_2d_model(seed=0, obs_spec_type=obs, run_id="model_" + obs \ |
|
|
|
# + "_f2_pv-l2_linear-rew_ibalter_conlr_enc-l0-op4_bisim_suf1") |
|
|
|
|
|
|
|
# test_2d_model(config=SAC_CONFIG, run_id="sac_rich2_hard", seed=0) |
|
|
|
for obs in ["normal"]: |
|
|
|
for obs in ["normal", "rich1", "rich2"]: |
|
|
|
test_2d_model(seed=0, obs_spec_type=obs, run_id="model_" + obs \ |
|
|
|
+ "_f16_2-0-2-2") |
|
|
|
|
|
|
|
for obs in ["normal", "rich2"]: |
|
|
|
transfer_from="./transfer_results/model_"+ obs +"_f2_pv-l2_linear-rew_ibalter_conlr_enc-l0-op4_s0/Simple", |
|
|
|
run_id="transfer_rich1_f2_pv-l2_ibalter_suf1_nobisim_from_" + obs) |
|
|
|
transfer_from="./transfer_results/model_"+ obs +"_f16_2-0-2-2_s0/Simple", |
|
|
|
run_id="transfer_rich1_f16_2-0-2-2_from_" + obs) |
|
|
|
|
|
|
|
for obs in ["normal", "rich1"]: |
|
|
|
test_2d_transfer(seed=10, obs_spec_type="rich2", |
|
|
|
transfer_from="./transfer_results/model_"+ obs +"_f16_2-0-2-2_s0/Simple", |
|
|
|
run_id="transfer_rich2_f16_2-0-2-2_from_" + obs) |
|
|
|
|
|
|
|
# for s in [100, 101, 102]: |
|
|
|
# for obs in ["normal", "rich1", "rich2"]: |
|
|
|
# test_2d_model(seed=s, obs_spec_type=obs, run_id="model_" + obs \ |
|
|
|
# + "_f4_linear_360_iealter") |
|
|
|
|
|
|
|
# # for obs in ["normal", "rich1", "rich2"]: |
|
|
|
# # test_2d_ppo(config=PPO_CONFIG, obs_spec_type=obs, run_id="ppo_linear"+obs, seed=s) |
|
|
|
|
|
|
|
# for obs in ["normal", "rich2"]: |
|
|
|
# test_2d_transfer(seed=s, obs_spec_type="rich1", |
|
|
|
# transfer_from="./transfer_results/model_"+ obs +"_f4_linear_360_iealter_s"+str(s)+"/Simple", |
|
|
|
# run_id="transfer_rich1_f4_linear_iealter_360_from_" + obs) |
|
|
|
|
|
|
|
# for obs in ["normal", "rich1"]: |
|
|
|
# test_2d_transfer(seed=s, obs_spec_type="rich2", |
|
|
|
# transfer_from="./transfer_results/model_"+ obs +"_f4_linear_360_iealter_s"+str(s)+"/Simple", |
|
|
|
# run_id="transfer_rich2_f4_linear_iealter_360_from_" + obs) |
|
|
|
|
|
|
|
# for obs in ["normal", "rich1", "rich2"]: |
|
|
|
# test_2d_transfer_dynamics(seed=s, obs_spec_type=obs, |
|
|
|
# transfer_from="./transfer_results/model_"+ obs +"_f4_linear_360_iealter_s"+str(s)+"/Simple", |
|
|
|
# run_id="transfer_" + obs + "_ss5_f4_linear_iealter_360_from_ss1") |
|
|
|
|
|
|
|
# for obs in ["normal"]: |
|
|
|
# test_2d_transfer(seed=0, obs_spec_type="rich1", |
|
|
|