Self play hyperparameter improvements (#4063)

5 年前 · 91f199cd
--- a/config/ppo/SoccerTwos.yaml
+++ b/config/ppo/SoccerTwos.yaml
    self_play:
      save_steps: 50000
      team_change: 200000
-      swap_steps: 50000
+      swap_steps: 2000
      window: 10
      play_against_latest_model_ratio: 0.5
      initial_elo: 1200.0
--- a/config/ppo/StrikersVsGoalie.yaml
+++ b/config/ppo/StrikersVsGoalie.yaml
    self_play:
      save_steps: 50000
      team_change: 200000
-      swap_steps: 25000
+      swap_steps: 1000
      window: 10
      play_against_latest_model_ratio: 0.5
      initial_elo: 1200.0
    self_play:
      save_steps: 50000
      team_change: 200000
-      swap_steps: 100000
+      swap_steps: 4000
      window: 10
      play_against_latest_model_ratio: 0.5
      initial_elo: 1200.0
--- a/config/ppo/Tennis.yaml
+++ b/config/ppo/Tennis.yaml
  Tennis:
    trainer_type: ppo
    hyperparameters:
-      batch_size: 1024
-      buffer_size: 10240
+      batch_size: 2048
+      buffer_size: 20480
      learning_rate: 0.0003
      beta: 0.005
      epsilon: 0.2
    self_play:
      save_steps: 50000
      team_change: 100000
-      swap_steps: 50000
+      swap_steps: 2000
      window: 10
      play_against_latest_model_ratio: 0.5
      initial_elo: 1200.0
--- a/docs/Training-ML-Agents.md
+++ b/docs/Training-ML-Agents.md
      window: 10
      play_against_latest_model_ratio: 0.5
      save_steps: 50000
-      swap_steps: 50000
+      swap_steps: 2000
      team_change: 100000
 ```

--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
        # Assign team_change to about 4x save_steps
        return self.save_steps * 5

-    swap_steps: int = 10000
+    swap_steps: int = 2000
    window: int = 10
    play_against_latest_model_ratio: float = 0.5
    initial_elo: float = 1200.0