hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
max_steps: 5.0e5
memory_size: 256
normalize: false
num_epoch: 3
buffer_size: 12000
summary_freq: 12000
time_horizon: 1000
max_steps: 5.0e6
beta: 0.001
reward_signals:
extrinsic: