yanchaosun
5 年前
当前提交
a1859fb8
共有 6 个文件被更改,包括 213 次插入 和 5 次删除
-
8config/sac/Reacher.yaml
-
2config/sac_transfer/ReacherTransfer.yaml
-
52config/sac_transfer/ReacherTransfer1.yaml
-
52config/sac_transfer/ReacherTransfer2.yaml
-
52config/sac_transfer/ReacherTransfer3.yaml
-
52config/sac_transfer/ReacherTransfer4.yaml
|
|||
behaviors: |
|||
Reacher: |
|||
trainer_type: sac_transfer |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
model_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 6000000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 20.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 20.0 |
|||
encoder_layers: 1 |
|||
policy_layers: 2 |
|||
forward_layers: 0 |
|||
value_layers: 2 |
|||
action_layers: 1 |
|||
feature_size: 64 |
|||
action_feature_size: 16 |
|||
separate_policy_train: true |
|||
separate_policy_net: true |
|||
reuse_encoder: false |
|||
in_epoch_alter: false |
|||
in_batch_alter: true |
|||
use_op_buffer: false |
|||
use_var_predict: true |
|||
with_prior: false |
|||
predict_return: true |
|||
use_bisim: false |
|||
use_transfer: true |
|||
load_model: true |
|||
train_model: false |
|||
load_action: true |
|||
train_action: false |
|||
transfer_path: "results/reacher-qr-s1/Reacher" |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
gamma: 0.99 |
|||
strength: 1.0 |
|||
keep_checkpoints: 5 |
|||
max_steps: 6000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer_type: sac_transfer |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
model_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 6000000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 20.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 20.0 |
|||
encoder_layers: 1 |
|||
policy_layers: 2 |
|||
forward_layers: 0 |
|||
value_layers: 2 |
|||
action_layers: 1 |
|||
feature_size: 64 |
|||
action_feature_size: 16 |
|||
separate_policy_train: true |
|||
separate_policy_net: true |
|||
reuse_encoder: false |
|||
in_epoch_alter: false |
|||
in_batch_alter: true |
|||
use_op_buffer: false |
|||
use_var_predict: true |
|||
with_prior: false |
|||
predict_return: true |
|||
use_bisim: false |
|||
use_transfer: true |
|||
load_model: true |
|||
train_model: false |
|||
load_action: true |
|||
train_action: false |
|||
transfer_path: "results/reacher-qr-s2/Reacher" |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
gamma: 0.99 |
|||
strength: 1.0 |
|||
keep_checkpoints: 5 |
|||
max_steps: 6000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer_type: sac_transfer |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
model_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 6000000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 20.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 20.0 |
|||
encoder_layers: 1 |
|||
policy_layers: 2 |
|||
forward_layers: 0 |
|||
value_layers: 2 |
|||
action_layers: 1 |
|||
feature_size: 64 |
|||
action_feature_size: 16 |
|||
separate_policy_train: true |
|||
separate_policy_net: true |
|||
reuse_encoder: false |
|||
in_epoch_alter: false |
|||
in_batch_alter: true |
|||
use_op_buffer: false |
|||
use_var_predict: true |
|||
with_prior: false |
|||
predict_return: true |
|||
use_bisim: false |
|||
use_transfer: true |
|||
load_model: true |
|||
train_model: false |
|||
load_action: true |
|||
train_action: false |
|||
transfer_path: "results/reacher-qr-s3/Reacher" |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
gamma: 0.99 |
|||
strength: 1.0 |
|||
keep_checkpoints: 5 |
|||
max_steps: 6000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 60000 |
|||
threaded: true |
|
|||
behaviors: |
|||
Reacher: |
|||
trainer_type: sac_transfer |
|||
hyperparameters: |
|||
learning_rate: 0.0003 |
|||
learning_rate_schedule: constant |
|||
model_schedule: constant |
|||
batch_size: 256 |
|||
buffer_size: 6000000 |
|||
buffer_init_steps: 0 |
|||
tau: 0.005 |
|||
steps_per_update: 20.0 |
|||
save_replay_buffer: false |
|||
init_entcoef: 1.0 |
|||
reward_signal_steps_per_update: 20.0 |
|||
encoder_layers: 1 |
|||
policy_layers: 2 |
|||
forward_layers: 0 |
|||
value_layers: 2 |
|||
action_layers: 1 |
|||
feature_size: 64 |
|||
action_feature_size: 16 |
|||
separate_policy_train: true |
|||
separate_policy_net: true |
|||
reuse_encoder: false |
|||
in_epoch_alter: false |
|||
in_batch_alter: true |
|||
use_op_buffer: false |
|||
use_var_predict: true |
|||
with_prior: false |
|||
predict_return: true |
|||
use_bisim: false |
|||
use_transfer: true |
|||
load_model: true |
|||
train_model: false |
|||
load_action: true |
|||
train_action: false |
|||
transfer_path: "results/reacher-qr-s4/Reacher" |
|||
network_settings: |
|||
normalize: true |
|||
hidden_units: 128 |
|||
num_layers: 2 |
|||
vis_encode_type: simple |
|||
reward_signals: |
|||
extrinsic: |
|||
gamma: 0.99 |
|||
strength: 1.0 |
|||
keep_checkpoints: 5 |
|||
max_steps: 6000000 |
|||
time_horizon: 1000 |
|||
summary_freq: 60000 |
|||
threaded: true |
撰写
预览
正在加载...
取消
保存
Reference in new issue