learning_rate_schedule: constant
batch_size: 2048
buffer_size: 20480
hidden_units: 256
hidden_units: 512
beta: 1.0e-2
time_horizon: 1000
self_play: