epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: linear
learning_rate_schedule: constant
network_settings:
normalize: False
hidden_units: 128
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
max_steps: 10000000
time_horizon: 256
summary_freq: 10000
threaded: true