浏览代码

target encoder

/develop/bisim-review
Andrew Cohen 5 年前
当前提交
0c7db26a
共有 3 个文件被更改,包括 17 次插入20 次删除
  1. 2
      config/ppo_transfer/CrawlerStatic.yaml
  2. 4
      config/ppo_transfer/OldCrawlerStatic.yaml
  3. 31
      ml-agents/mlagents/trainers/policy/transfer_policy.py

2
config/ppo_transfer/CrawlerStatic.yaml


encoder_layers: 2
action_layers: 2
policy_layers: 2
forward_layers: 1
forward_layers: 0
value_layers: 3
feature_size: 128
action_feature_size: 128

4
config/ppo_transfer/OldCrawlerStatic.yaml


encoder_layers: 2
action_layers: 2
policy_layers: 2
forward_layers: 1
forward_layers: 0
reuse_encoder: true
reuse_encoder: false
in_epoch_alter: false
use_op_buffer: false
use_var_predict: true

31
ml-agents/mlagents/trainers/policy/transfer_policy.py


self.current_action,
self.h_size,
self.action_feature_size,
action_layers
action_layers,
# if not reuse_encoder:
# self.targ_encoder = tf.stop_gradient(self.targ_encoder)
# self._create_hard_copy()
if not reuse_encoder:
self.targ_encoder = tf.stop_gradient(self.targ_encoder)
self._create_hard_copy()
# if self.inverse_model:
# with tf.variable_scope("inverse"):

load_policy=False,
load_value=False,
load_encoder=False,
load_action=False
load_action=False,
):
load_nets = []
if load_model:

activation=tf.tanh, # ModelUtils.swish,
kernel_initializer=tf.initializers.variance_scaling(1.0),
)
if not reuse_encoder:
latent_targ = tf.stop_gradient(latent_targ)
# return tf.stop_gradient(latent_targ)
def _create_encoder(
self,

return latent
def _create_action_encoder(
self,
action: tf.Tensor,
h_size: int,
action_feature_size: int,
num_layers: int,
self, action: tf.Tensor, h_size: int, action_feature_size: int, num_layers: int
action,
h_size,
action,
h_size,
num_layers,
num_layers,
reuse=False
reuse=False,
)
with tf.variable_scope("action_enc"):

name="latent",
activation=tf.tanh,
activation=tf.tanh,
kernel_initializer=tf.initializers.variance_scaling(1.0),
)
return latent

正在加载...
取消
保存