normalize: true
max_steps: 5.0e7
learning_rate_schedule: constant
batch_size: 1024
buffer_size: 10240
batch_size: 2048
buffer_size: 20480
beta: 1.0e-2
self_play:
window: 10
play_against_latest_model_ratio: 0.5