gamma: 0.995
Tennis:
max_steps: 5.0e7
max_steps: 1.0e8
learning_rate_schedule: constant
batch_size: 2048
buffer_size: 20480
time_horizon: 1000
self_play:
window: 10
window: 100
swap_steps: 50000
swap_steps: 2000
team_change: 100000
Goalie:
SoccerTwos:
normalize: false
team_change: 200000
SmallCubeSoldier: