浏览代码

linear v

/develop/bisim-sac-transfer
yanchaosun 4 年前
当前提交
ee48cca4
共有 3 个文件被更改,包括 35 次插入8 次删除
  1. 6
      config/sac_transfer/CrawlerStatic.yaml
  2. 31
      ml-agents/mlagents/trainers/sac_transfer/network.py
  3. 6
      ml-agents/mlagents/trainers/sac_transfer/optimizer.py

6
config/sac_transfer/CrawlerStatic.yaml


encoder_layers: 3
policy_layers: 0
forward_layers: 0
value_layers: 2
feature_size: 300
value_layers: 0
feature_size: 512
action_feature_size: 128
action_feature_size: 256
separate_policy_train: true
separate_model_train: true
# separate_value_net: true

31
ml-agents/mlagents/trainers/sac_transfer/network.py


self.value_heads[name] = value
self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
def _create_cc_critic(self, hidden_value, scope, create_qs=True):
def _create_cc_critic(self, hidden_value, scope, create_qs=True, a_layers=-1, a_features=16):
"""
Creates just the critic network
"""

name="external_action_in",
)
self.value_vars = self.get_vars(self.join_scopes(scope, "value"))
# external_action_encoder = self.policy._create_action_encoder(
# self.external_action_in,
# self.h_size,
# a_features,
# a_layers,
# reuse=True
# )
# output_action_encoder = self.policy._create_action_encoder(
# self.policy.output,
# self.h_size,
# a_features,
# a_layers,
# reuse=True
# )
# hidden_q = tf.concat([hidden_value, external_action_encoder], axis=-1)
# hidden_qp = tf.concat([hidden_value, output_action_encoder], axis=-1)
self.num_layers,
self.num_layers+2,
self.h_size,
self.join_scopes(scope, "q"),
)

self.num_layers,
self.num_layers+2,
self.h_size,
self.join_scopes(scope, "q"),
reuse=True,

use_recurrent=False,
encoder_layers=0,
num_layers=2,
action_layers=-1,
action_features=16,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
separate_train=False,

use_recurrent=False,
encoder_layers=0,
num_layers=2,
action_layers=-1,
action_features=16,
stream_names=None,
vis_encode_type=EncoderType.SIMPLE,
separate_train=False,

hidden_critic = tf.stop_gradient(hidden_critic)
if self.policy.use_continuous_act:
self._create_cc_critic(hidden_critic, POLICY_SCOPE)
self._create_cc_critic(hidden_critic, POLICY_SCOPE, a_layers=action_layers, a_features=action_features)
else:
self._create_dc_critic(hidden_critic, POLICY_SCOPE)
# self._create_dc_critic(hidden, POLICY_SCOPE)

6
ml-agents/mlagents/trainers/sac_transfer/optimizer.py


use_recurrent=self.policy.use_recurrent,
encoder_layers=hyperparameters.encoder_layers,
num_layers=hyperparameters.value_layers,
action_layers=hyperparameters.action_layers,
action_features=hyperparameters.action_feature_size,
stream_names=stream_names,
vis_encode_type=vis_encode_type,
separate_train=hyperparameters.separate_value_train,

use_recurrent=self.policy.use_recurrent,
encoder_layers=hyperparameters.encoder_layers,
num_layers=hyperparameters.value_layers,
action_layers=hyperparameters.action_layers,
action_features=hyperparameters.action_feature_size,
stream_names=stream_names,
vis_encode_type=vis_encode_type,
separate_train=hyperparameters.separate_value_train,

policy_vars = self.policy.get_trainable_variables(
train_encoder=not self.separate_policy_train,
train_action=self.train_action,
train_action=not self.separate_policy_train,
train_model=False,
train_policy=self.train_policy
)

正在加载...
取消
保存