memory_size: 128
normalize: false
num_update: 1
train_interval: 1
steps_per_update: 10
num_layers: 2
time_horizon: 64
sequence_length: 64
buffer_size: 500000
max_steps: 2.0e6
init_entcoef: 0.05
Bouncer:
normalize: true
time_horizon: 1000
batch_size: 256
train_interval: 2
steps_per_update: 20
buffer_init_steps: 2000
max_steps: 5e6
summary_freq: 30000
num_layers: 3
max_steps: 1e7
hidden_units: 512
max_steps: 2e7
num_layers: 4
reward_signals:
extrinsic: