void AgentAWins()
{
m_AgentA.SetReward(1 + m_AgentA.timePenalty);
m_AgentB.SetReward(-1);
m_AgentB.SetReward(-1 - m_AgentB.timePenalty);
m_AgentA.score += 1;
Reset();
m_AgentA.SetReward(-1);
m_AgentA.SetReward(-1 - m_AgentA.timePenalty);
m_AgentB.SetReward(1 + m_AgentB.timePenalty);
m_AgentB.score += 1;
time_horizon: 1000
self_play:
window: 10
play_against_latest_model_ratio: 0.1
play_against_latest_model_ratio: 0.5
save_steps: 50000
swap_steps: 50000
team_change: 100000