trainer_type: sac_transfer
hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
learning_rate_schedule: linear
model_schedule: constant
buffer_size: 50000
buffer_size: 2000000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
policy_layers: 2
forward_layers: 2
value_layers: 2
action_layers: -1
action_layers: 2
feature_size: 128
action_feature_size: 64
separate_policy_train: true
batch_size: 128
use_transfer: true
load_model: true
train_model: false
# load_action: true
# train_action: false
load_action: true
train_action: false
transfer_path: "results/block/PushBlock"
network_settings:
normalize: false
buffer_size: 500000
buffer_size: 6000000
steps_per_update: 20.0
forward_layers: 0
action_layers: 1
feature_size: 64
action_feature_size: 16
encoder_layers: 2
encoder_layers: 1
transfer_path: "results/reacher/Reacher"
transfer_path: "results/sacmod-reacher/Reacher"
normalize: true
hidden_units: 128