current_best_ratio -> latest_model_ratio

5 年前 · 345fa382
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
    time_horizon: 1000
    self_play:
        window: 10
-        play_against_current_best_ratio: 0.5
+        play_against_latest_model_ratio: 0.5
        save_steps: 50000
        swap_steps: 50000
        team_change: 100000
    num_layers: 2
    self_play:
        window: 10
-        play_against_current_best_ratio: 0.5
+        play_against_latest_model_ratio: 0.5
        save_steps: 50000
        swap_steps: 50000
        team_change: 100000
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py

        self_play_parameters = trainer_parameters["self_play"]
        self.window = self_play_parameters.get("window", 10)
-        self.play_against_current_best_ratio = self_play_parameters.get(
-            "play_against_current_best_ratio", 0.5
+        self.play_against_latest_model_ratio = self_play_parameters.get(
+            "play_against_latest_model_ratio", 0.5
        )
        self.steps_between_save = self_play_parameters.get("save_steps", 20000)
        self.steps_between_swap = self_play_parameters.get("swap_steps", 20000)
        for team_id in self._team_to_name_to_policy_queue:
            if team_id == self._learning_team:
                continue
-            elif np.random.uniform() < (1 - self.play_against_current_best_ratio):
+            elif np.random.uniform() < (1 - self.play_against_latest_model_ratio):
                x = np.random.randint(len(self.policy_snapshots))
                snapshot = self.policy_snapshots[x]
            else:
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
    override_vals = {
        "max_steps": 2500,
        "self_play": {
-            "play_against_current_best_ratio": 1.0,
+            "play_against_latest_model_ratio": 1.0,
            "save_steps": 2000,
            "swap_steps": 2000,
        },
    override_vals = {
        "max_steps": 2500,
        "self_play": {
-            "play_against_current_best_ratio": 1.0,
+            "play_against_latest_model_ratio": 1.0,
            "save_steps": 2000,
            "swap_steps": 4000,
        },
    override_vals = {
        "max_steps": 2000,
        "self_play": {
-            "play_against_current_best_ratio": 1.0,
+            "play_against_latest_model_ratio": 1.0,
            "save_steps": 5000,
            "swap_steps": 5000,
            "team_change": 2000,
    override_vals = {
        "max_steps": 2000,
        "self_play": {
-            "play_against_current_best_ratio": 0.0,
+            "play_against_latest_model_ratio": 0.0,
            "save_steps": 5000,
            "swap_steps": 5000,
            "team_change": 2000,