浏览代码

1 layer everything

/develop/bisim-sac-transfer
yanchaosun 4 年前
当前提交
3ce88589
共有 5 个文件被更改,包括 27 次插入23 次删除
  1. 4
      config/sac_transfer/3DBallCloud.yaml
  2. 4
      config/sac_transfer/3DBallHardCloud.yaml
  3. 6
      config/sac_transfer/3DBallHardTransferCloud.yaml
  4. 34
      ml-agents/mlagents/trainers/policy/transfer_policy.py
  5. 2
      ml-agents/mlagents/trainers/settings.py

4
config/sac_transfer/3DBallCloud.yaml


save_replay_buffer: false
init_entcoef: 0.5
reward_signal_steps_per_update: 10.0
encoder_layers: 2
encoder_layers: 1
forward_layers: 0
forward_layers: 1
value_layers: 1
feature_size: 16
# separate_value_net: true

4
config/sac_transfer/3DBallHardCloud.yaml


save_replay_buffer: false
init_entcoef: 1.0
reward_signal_steps_per_update: 10.0
encoder_layers: 2
encoder_layers: 1
forward_layers: 0
forward_layers: 1
value_layers: 1
feature_size: 16
# separate_value_net: true

6
config/sac_transfer/3DBallHardTransferCloud.yaml


save_replay_buffer: false
init_entcoef: 1.0
reward_signal_steps_per_update: 10.0
encoder_layers: 2
encoder_layers: 1
forward_layers: 0
forward_layers: 1
value_layers: 1
feature_size: 16
# separate_value_net: true

use_transfer: true
load_model: true
train_model: false
transfer_path: "results/sac-ball-f16-p1f0/3DBall"
transfer_path: "results/sac-ball-f16-e1p1f1/3DBall"
network_settings:
normalize: true
hidden_units: 128

34
ml-agents/mlagents/trainers/policy/transfer_policy.py


# reuse_encoder,
# )
# self.action_encoder = self._create_action_encoder(
# self.current_action,
# self.h_size,
# self.action_feature_size,
# action_layers
# )
self.action_encoder = self._create_action_encoder(
self.current_action,
self.h_size,
self.action_feature_size,
action_layers
)
if self.inverse_model:
with tf.variable_scope("inverse"):

self.predict, self.predict_distribution = self.create_forward_model(
self.encoder,
self.current_action,
self.action_encoder,
forward_layers,
var_predict=var_predict,
separate_train=separate_model_train

self.targ_encoder,
self.current_action,
self.action_encoder,
forward_layers,
var_predict=var_predict,
reuse=True,

if predict_return:
with tf.variable_scope("reward"):
self.create_reward_model(
self.encoder, self.current_action, forward_layers, separate_train=separate_model_train
self.encoder, self.action_encoder, forward_layers, separate_train=separate_model_train
)
if self.use_bisim:

num_layers: int,
reuse: bool=False
) -> tf.Tensor:
if num_layers < 0:
return self.current_action
hidden_stream = ModelUtils.create_vector_observation_encoder(
action,

encoding_checkpoint = os.path.join(self.model_path, f"encoding.ckpt")
encoding_saver.save(self.sess, encoding_checkpoint)
# action_vars = tf.get_collection(
# tf.GraphKeys.TRAINABLE_VARIABLES, "action_enc"
# )
# action_saver = tf.train.Saver(action_vars)
# action_checkpoint = os.path.join(self.model_path, f"action_enc.ckpt")
# action_saver.save(self.sess, action_checkpoint)
action_vars = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, "action_enc"
)
if len(action_vars) > 0:
action_saver = tf.train.Saver(action_vars)
action_checkpoint = os.path.join(self.model_path, f"action_enc.ckpt")
action_saver.save(self.sess, action_checkpoint)
latent_vars = tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, "encoding/latent"

2
ml-agents/mlagents/trainers/settings.py


# Network
encoder_layers: int = 1
action_layers: int = 1
action_layers: int = -1
policy_layers: int = 1
value_layers: int = 1
forward_layers: int = 1

正在加载...
取消
保存