hyperparameters:
learning_rate: 0.0003
learning_rate_schedule: constant
batch_size: 256
buffer_size: 500000
batch_size: 1024
buffer_size: 2000000
buffer_init_steps: 0
tau: 0.005
steps_per_update: 30.0
network_settings:
normalize: true
hidden_units: 512
num_layers: 4
hidden_units: 256
num_layers: 3
vis_encode_type: simple
reward_signals:
extrinsic:
keep_checkpoints: 5
max_steps: 20000000
max_steps: 15000000
time_horizon: 1000
summary_freq: 30000
threaded: true
batch_size: 128
buffer_size: 50000
buffer_size: 200000
steps_per_update: 10.0
steps_per_update: 20.0
save_replay_buffer: false
init_entcoef: 0.1
reward_signal_steps_per_update: 10.0
strength: 1.0
output_path: default
time_horizon: 128
summary_freq: 20000