Tennis:
normalize: true
max_steps: 5.0e7
learning_rate: 1.0e-4
hidden_units: 512
hidden_units: 256
beta: 1.0e-2
time_horizon: 1000
self_play: