浏览代码

minor update

/develop/bisim-review
yanchaosun 4 年前
当前提交
ec929746
共有 3 个文件被更改,包括 13 次插入86 次删除
  1. 75
      ml-agents/mlagents/trainers/policy/transfer_policy.py
  2. 6
      ml-agents/mlagents/trainers/ppo_transfer/optimizer.py
  3. 18
      ml-agents/mlagents/trainers/tests/test_simple_transfer.py

75
ml-agents/mlagents/trainers/policy/transfer_policy.py


self.next_visual_in: List[tf.Tensor] = []
# if var_encoder:
# self.encoder, self.targ_encoder, self.encoder_distribution, _ = self.create_encoders(var_latent=True, reuse_encoder=reuse_encoder)
# else:
# self.encoder, self.targ_encoder = self.create_encoders(reuse_encoder=reuse_encoder)
# if not reuse_encoder:
# self.targ_encoder = tf.stop_gradient(self.targ_encoder)
# self._create_hard_copy()
if var_encoder:
self.encoder_distribution, self.encoder = self._create_var_encoder(
self.visual_in,

self.create_bisim_model(self.h_size, self.feature_size, encoder_layers,
self.vis_encode_type, forward_layers, var_predict, predict_return)
# if var_predict:
# self.predict_distribution, self.predict = self._create_var_world_model(
# self.encoder,
# self.h_size,
# self.feature_size,
# self.num_layers,
# self.vis_encode_type,
# predict_return
# )
# else:
# self.predict = self._create_world_model(
# self.encoder,
# self.h_size,
# self.feature_size,
# self.num_layers,
# self.vis_encode_type,
# predict_return
# )
# if inverse_model:
# self._create_inverse_model(self.encoder, self.targ_encoder)
if self.use_continuous_act:
self._create_cc_actor(
self.encoder,

partial_model_checkpoint = os.path.join(path, f"{net}.ckpt")
partial_saver.restore(self.sess, partial_model_checkpoint)
print("loaded net", net, "from path", path)
# variables_to_restore = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "encoding/latent")
# partial_saver = tf.train.Saver(variables_to_restore)
# partial_model_checkpoint = os.path.join(path, f"latent.ckpt")
# partial_saver.restore(self.sess, partial_model_checkpoint)
# print("loaded net latent from path", path)
if transfer_type == "observation":
self.run_hard_copy()

)
return predict
def _create_var_world_model(
self,
encoder: tf.Tensor,
h_size: int,
feature_size: int,
num_layers: int,
vis_encode_type: EncoderType,
predict_return: bool=False
) -> tf.Tensor:
""""
Builds the world model for state prediction
"""
with self.graph.as_default():
with tf.variable_scope("predict"):
hidden_stream = ModelUtils.create_vector_observation_encoder(
tf.concat([encoder, self.current_action], axis=1),
h_size,
ModelUtils.swish,
num_layers,
scope=f"main_graph",
reuse=False
)
with tf.variable_scope("latent"):
if predict_return:
predict_distribution = GaussianEncoderDistribution(
hidden_stream,
feature_size+1
)
# separate prediction of return
else:
predict_distribution = GaussianEncoderDistribution(
hidden_stream,
feature_size
)
predict = predict_distribution.sample()
return predict_distribution, predict
@timed
def evaluate(

6
ml-agents/mlagents/trainers/ppo_transfer/optimizer.py


print("start update policy", self.num_updates)
elif self.in_batch_alter:
update_vals = self._execute_model(feed_dict, self.ppo_update_dict)
update_vals = self._execute_model(feed_dict, self.model_update_dict)
update_vals.update(self._execute_model(feed_dict, self.ppo_update_dict))
batch.shuffle(sequence_length=10)
batch.shuffle(sequence_length=1)
update_vals.update(self._execute_model(feed_dict, self.model_update_dict))
elif self.use_transfer and self.smart_transfer:
if self.update_mode == "model":
update_vals = self._execute_model(feed_dict, self.update_dict)

18
ml-agents/mlagents/trainers/tests/test_simple_transfer.py


new_hyperparams = attr.evolve(
config.hyperparameters, batch_size=120, buffer_size=12000, use_transfer=True,
transfer_path=transfer_from, #separate_policy_train=True, separate_value_train=True,
use_op_buffer=True, in_epoch_alter=False, in_batch_alter=True, learning_rate=5.0e-3,
train_policy=False, train_value=False, train_model=False, feature_size=2,
use_var_predict=True, with_prior=True, policy_layers=2, load_policy=True,
load_value=True, predict_return=True, value_layers=2, encoder_layers=0,
use_bisim=True,
use_op_buffer=False, in_epoch_alter=False, in_batch_alter=True, learning_rate=5.0e-3,
train_policy=True, train_value=True, train_model=False, feature_size=2,
use_var_predict=True, with_prior=True, policy_layers=2, load_policy=False,
load_value=False, predict_return=True, value_layers=2, encoder_layers=1,
use_bisim=False,
)
config = attr.evolve(config, hyperparameters=new_hyperparams, max_steps=300000, summary_freq=5000)
_check_environment_trains(env, {BRAIN_NAME: config}, run_id=run_id + "_s" + str(seed), seed=seed)

# for obs in ["normal"]: # ["normal", "rich1", "rich2"]:
# test_2d_model(seed=0, obs_spec_type=obs, run_id="model_" + obs \
# + "_f2_pv-l2_linear-rew_ibalter_conlr_enc-l0-op4_bisim")
# + "_f2_pv-l2_linear-rew_ibalter_conlr_enc-l0-op4_bisim_suf1")
test_2d_transfer(seed=0, obs_spec_type="normal",
transfer_from="./transfer_results/model_"+ obs +"_f2_pv-l2_linear-rew_ibalter_conlr_enc-l0-op4_bisim_s0/Simple",
run_id="transfer_normal_f2_pv-l2_ibalter_fixbisim_from_" + obs)
test_2d_transfer(seed=0, obs_spec_type="rich1",
transfer_from="./transfer_results/model_"+ obs +"_f2_pv-l2_linear-rew_ibalter_conlr_enc-l0-op4_s0/Simple",
run_id="transfer_rich1_f2_pv-l2_ibalter_suf1_nobisim_from_" + obs)
# for obs in ["normal"]:
# test_2d_transfer(seed=0, obs_spec_type="rich1",

正在加载...
取消
保存