lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
model_schedule: linear
model_schedule: constant
encoder_layers: 2
policy_layers: 2
forward_layers: 2
in_epoch_alter: false
in_batch_alter: false
in_batch_alter: true
use_op_buffer: false
use_var_predict: true
with_prior: false
use_transfer: true
load_model: true
train_model: false
transfer_path: "results/csold/CrawlerStatic"
transfer_path: "results/csold-const/CrawlerStatic"
network_settings:
normalize: true
hidden_units: 512
policy_layers: 1
forward_layers: 1
value_layers: 2
feature_size: 16
feature_size: 32
reuse_encoder: false
learning_rate: 0.0003
batch_size: 256
buffer_size: 50000
buffer_size: 24000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 10.0
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 500000
max_steps: 1000000
time_horizon: 1000
summary_freq: 12000
threaded: true
transfer_path: "results/sac_model_ball_sep_bisim/3DBall"
transfer_path: "results/sac_model_ball_sep_linear_f32/3DBall"
hidden_units: 128