浏览代码

sac crawler config

/develop/bisim-sac-transfer
yanchaosun 4 年前
当前提交
b5e02978
共有 9 个文件被更改,包括 47 次插入39 次删除
  1. 9
      config/ppo_transfer/3DBall.yaml
  2. 15
      config/ppo_transfer/3DBallHard.yaml
  3. 7
      config/ppo_transfer/3DBallHardTransfer.yaml
  4. 8
      config/sac_transfer/CrawlerStatic.yaml
  5. 10
      config/sac_transfer/CrawlerStaticTransfer.yaml
  6. 28
      ml-agents/mlagents/trainers/ppo_transfer/optimizer.py
  7. 2
      ml-agents/mlagents/trainers/ppo_transfer/trainer.py
  8. 5
      ml-agents/mlagents/trainers/sac_transfer/optimizer.py
  9. 2
      ml-agents/mlagents/trainers/settings.py

9
config/ppo_transfer/3DBall.yaml


learning_rate_schedule: linear
model_schedule: constant
separate_model_train: true
# separate_value_train: true
reuse_encoder: false
reuse_encoder: true
in_batch_alter: true
use_op_buffer: false
in_batch_alter: false
use_op_buffer: true
use_var_predict: true
with_prior: false
predict_return: true

gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
max_steps: 2000000
time_horizon: 1000
summary_freq: 12000
threaded: true

15
config/ppo_transfer/3DBallHard.yaml


learning_rate_schedule: linear
model_schedule: constant
separate_model_train: true
encoder_layers: 1
policy_layers: 1
forward_layers: 1
separate_policy_train: true
encoder_layers: 2
policy_layers: 0
forward_layers: 0
feature_size: 16
reuse_encoder: false
feature_size: 64
reuse_encoder: true
in_batch_alter: true
use_op_buffer: false
in_batch_alter: false
use_op_buffer: true
use_var_predict: true
with_prior: false
predict_return: true

7
config/ppo_transfer/3DBallHardTransfer.yaml


num_epoch: 3
learning_rate_schedule: linear
model_schedule: linear
# separate_value_train: true
reuse_encoder: false
reuse_encoder: true
in_epoch_alter: false
in_batch_alter: false
use_op_buffer: false

use_bisim: false
separate_value_net: false
use_transfer: true
transfer_path: "results/ppo-ball/3DBall"
transfer_path: "results/ppomodel_ball_l1/3DBall"
# load_policy: true
# load_value: true
network_settings:
normalize: true
hidden_units: 128

8
config/sac_transfer/CrawlerStatic.yaml


init_entcoef: 1.0
reward_signal_steps_per_update: 20.0
encoder_layers: 2
policy_layers: 1
forward_layers: 1
policy_layers: 2
forward_layers: 0
feature_size: 128
feature_size: 256
action_layers: 2
action_feature_size: 128
separate_policy_train: true
separate_model_train: true
# separate_value_net: true

10
config/sac_transfer/CrawlerStaticTransfer.yaml


init_entcoef: 1.0
reward_signal_steps_per_update: 20.0
encoder_layers: 2
policy_layers: 1
forward_layers: 1
policy_layers: 2
forward_layers: 0
feature_size: 128
feature_size: 256
action_layers: 2
action_feature_size: 128
separate_policy_train: true
# separate_model_train: true
# separate_value_net: true

use_transfer: true
load_model: true
train_model: false
transfer_path: "results/csold-l1/CrawlerStatic"
transfer_path: "results/oldcs/CrawlerStatic"
network_settings:
normalize: true
hidden_units: 512

28
ml-agents/mlagents/trainers/ppo_transfer/optimizer.py


max_step,
)
self._create_ppo_optimizer_ops()
self._init_alter_update()
self.update_dict.update(
{

if self.predict_return:
self.update_dict.update({"reward_loss": self.policy.reward_loss})
if (
self.use_alter
or self.smart_transfer
or self.in_batch_alter
or self.in_epoch_alter
or self.op_buffer
):
self._init_alter_update()
self.policy.initialize_or_load()
if self.use_transfer:

hyperparameters.load_encoder,
hyperparameters.load_action,
)
self.policy.run_hard_copy()
if not self.reuse_encoder:
self.policy.run_hard_copy()
# self.policy.get_encoder_weights()
# self.policy.get_policy_weights()

if self.update_mode == "policy":
update_vals = self._execute_model(feed_dict, self.ppo_update_dict)
else:
update_vals = self._execute_model(feed_dict, self.update_dict)
if self.use_transfer:
update_vals = self._execute_model(feed_dict, self.update_dict)
else:
update_vals = self._execute_model(feed_dict, self.ppo_update_dict)
self.policy.run_soft_copy()
if not self.reuse_encoder:
self.policy.run_soft_copy()
# if update_name in update_vals.keys():
update_stats[stat_name] = update_vals[update_name]
if update_name in update_vals.keys():
update_stats[stat_name] = update_vals[update_name]
if self.in_batch_alter and self.use_bisim:
update_stats.update(bisim_stats)

update_vals = self._execute_model(feed_dict, self.model_only_update_dict)
# update target encoder
self.policy.run_soft_copy()
if not self.reuse_encoder:
self.policy.run_soft_copy()
# print("copy")
# self.policy.get_encoder_weights()

2
ml-agents/mlagents/trainers/ppo_transfer/trainer.py


update_stats = self.optimizer.update_part(
buffer.sample_mini_batch(batch_size, self.policy.sequence_length),
n_sequences,
"model",
"model_only",
)
# buffer.make_mini_batch(i, i + batch_size), n_sequences, "model"
# )

5
ml-agents/mlagents/trainers/sac_transfer/optimizer.py


hyperparameters.load_action,
)
self.policy.run_hard_copy()
# self.sess.run(self.target_init_op)
self.num_updates = 0
print("All variables in the graph:")

encoding_vars = self.policy.encoding_variables
if self.train_value:
critic_vars = self.policy_network.critic_vars + encoding_vars
critic_vars = self.policy_network.critic_vars + model_vars
critic_vars = encoding_vars
critic_vars = model_vars
self.target_init_op = [
tf.assign(target, source)

2
ml-agents/mlagents/trainers/settings.py


# Network
encoder_layers: int = 1
action_layers: int = 1
action_layers: int = -1
policy_layers: int = 1
value_layers: int = 1
forward_layers: int = 1

正在加载...
取消
保存