浏览代码

update: separate model train as an option

/develop/bisim-sac-transfer
yanchaosun 4 年前
当前提交
6df774ed
共有 7 个文件被更改,包括 45 次插入34 次删除
  1. 20
      config/sac_transfer/3DBall.yaml
  2. 18
      config/sac_transfer/3DBallHard.yaml
  3. 20
      config/sac_transfer/3DBallHardTransfer.yaml
  4. 15
      ml-agents/mlagents/trainers/policy/transfer_policy.py
  5. 2
      ml-agents/mlagents/trainers/ppo_transfer/optimizer.py
  6. 2
      ml-agents/mlagents/trainers/sac_transfer/optimizer.py
  7. 2
      ml-agents/mlagents/trainers/settings.py

20
config/sac_transfer/3DBall.yaml


learning_rate_schedule: linear
model_schedule: linear
batch_size: 64
buffer_size: 12000
buffer_size: 200000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0

encoder_layers: 1
policy_layers: 1
forward_layers: 1
value_layers: 1
feature_size: 16
# separate_value_net: true
separate_policy_train: true
reuse_encoder: true
encoder_layers: 2
policy_layers: 0
forward_layers: 0
value_layers: 2
feature_size: 32
separate_value_net: true
# separate_policy_train: true
reuse_encoder: false
in_epoch_alter: false
in_batch_alter: true
use_op_buffer: false

use_bisim: true
use_bisim: false
network_settings:
normalize: true
hidden_units: 64

18
config/sac_transfer/3DBallHard.yaml


learning_rate: 0.0003
learning_rate_schedule: linear
batch_size: 256
buffer_size: 50000
buffer_size: 500000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0

encoder_layers: 1
policy_layers: 1
forward_layers: 1
value_layers: 1
feature_size: 16
# separate_value_net: true
separate_policy_train: true
encoder_layers: 2
policy_layers: 0
forward_layers: 0
value_layers: 2
feature_size: 32
separate_value_net: true
# separate_policy_train: true
reuse_encoder: false
in_epoch_alter: false
in_batch_alter: true

predict_return: true
use_bisim: true
use_bisim: false
network_settings:
normalize: true
hidden_units: 64

20
config/sac_transfer/3DBallHardTransfer.yaml


learning_rate: 0.0003
learning_rate_schedule: linear
batch_size: 256
buffer_size: 50000
buffer_size: 500000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0

encoder_layers: 1
policy_layers: 1
forward_layers: 1
value_layers: 1
feature_size: 16
# separate_value_net: true
separate_policy_train: true
encoder_layers: 2
policy_layers: 0
forward_layers: 0
value_layers: 2
feature_size: 32
separate_value_net: true
# separate_policy_train: true
reuse_encoder: false
in_epoch_alter: false
in_batch_alter: false

predict_return: true
use_bisim: true
use_bisim: false
transfer_path: "results/sac_model_ball_bisim/3DBall"
transfer_path: "results/sac-ball-lintest/3DBall"
network_settings:
normalize: true
hidden_units: 64

15
ml-agents/mlagents/trainers/policy/transfer_policy.py


action_feature_size=16,
transfer=False,
separate_train=False,
separate_model_train=False,
var_encoder=False,
var_predict=True,
predict_return=True,

self.current_action,
forward_layers,
var_predict=var_predict,
separate_train=separate_model_train
)
self.targ_predict, self.targ_predict_distribution = self.create_forward_model(

var_predict=var_predict,
reuse=True
reuse=True,
separate_train=separate_model_train
)
self.create_forward_loss(self.reuse_encoder, self.transfer)

self.create_reward_model(
self.encoder, self.current_action, forward_layers
self.encoder, self.current_action, forward_layers, separate_train=separate_model_train
)
if self.use_bisim:

forward_layers: int,
var_predict: bool = False,
reuse: bool = False,
separate_train: bool = False
) -> None:
"""
Creates forward model TensorFlow ops for Curiosity module.

combined_input = tf.concat([encoded_state, encoded_action], axis=1)
hidden = combined_input
if not self.transfer:
hidden = tf.stop_gradient(hidden)
if separate_train:
hidden = tf.stop_gradient(hidden)
for i in range(forward_layers):
hidden = tf.layers.dense(

encoded_state: tf.Tensor,
encoded_action: tf.Tensor,
forward_layers: int,
separate_train: bool = False
if not self.transfer:
if separate_train:
hidden = tf.stop_gradient(hidden)
for i in range(forward_layers):
hidden = tf.layers.dense(

2
ml-agents/mlagents/trainers/ppo_transfer/optimizer.py


self.separate_value_train = hyperparameters.separate_value_train
self.separate_policy_train = hyperparameters.separate_policy_train
self.separate_model_train = hyperparameters.separate_model_train
self.use_var_encoder = hyperparameters.use_var_encoder
self.use_var_predict = hyperparameters.use_var_predict
self.with_prior = hyperparameters.with_prior

hyperparameters.action_feature_size,
self.use_transfer,
self.separate_policy_train,
self.separate_model_train,
self.use_var_encoder,
self.use_var_predict,
self.predict_return,

2
ml-agents/mlagents/trainers/sac_transfer/optimizer.py


self.separate_value_train = hyperparameters.separate_value_train
self.separate_policy_train = hyperparameters.separate_policy_train
self.separate_model_train = hyperparameters.separate_model_train
self.use_var_encoder = hyperparameters.use_var_encoder
self.use_var_predict = hyperparameters.use_var_predict
self.with_prior = hyperparameters.with_prior

hyperparameters.action_feature_size,
self.use_transfer,
self.separate_policy_train,
self.separate_model_train,
self.use_var_encoder,
self.use_var_predict,
self.predict_return,

2
ml-agents/mlagents/trainers/settings.py


separate_value_train: bool = False
separate_policy_train: bool = False
separate_model_train: bool = False
separate_value_net: bool = False
use_var_encoder: bool = False
use_var_predict: bool = False

separate_value_train: bool = False
separate_policy_train: bool = False
separate_model_train: bool = False
separate_value_net: bool = False
use_var_encoder: bool = False
use_var_predict: bool = False

正在加载...
取消
保存