self_play:
save_steps: 50000
team_change: 200000
swap_steps: 50000
swap_steps: 2000
window: 10
play_against_latest_model_ratio: 0.5
initial_elo: 1200.0
swap_steps: 25000
swap_steps: 1000
swap_steps: 100000
swap_steps: 4000
Tennis:
trainer_type: ppo
hyperparameters:
batch_size: 1024
buffer_size: 10240
batch_size: 2048
buffer_size: 20480
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
team_change: 100000
```
# Assign team_change to about 4x save_steps
return self.save_steps * 5
swap_steps: int = 10000
swap_steps: int = 2000
window: int = 10
play_against_latest_model_ratio: float = 0.5
initial_elo: float = 1200.0