learning_rate: 3.0e-4
learning_rate_schedule: constant
max_steps: 5.0e5
memory_size: 256
memory_size: 128
normalize: false
num_update: 1
train_interval: 1
sequence_length: 32
num_layers: 2
hidden_units: 128
init_entcoef: 0.1
max_steps: 1.0e7
summary_freq: 1000
num_layers: 1
gamma: 0.99
batch_size: 64
buffer_size: 1024
learning_rate_schedule: linear
num_epoch: 3
sequence_length: 64
beta: 1.0e-2