fix trainer c and soccer config

4 年前 · 34420044
--- a/config/ppo/SoccerTwos.yaml
+++ b/config/ppo/SoccerTwos.yaml
    network_settings:
      normalize: false
      hidden_units: 512
-      num_layers: 2
+      num_layers: 3
      vis_encode_type: simple
    reward_signals:
      extrinsic:
-    max_steps: 50000000
+    max_steps: 100000000
    time_horizon: 1000
    summary_freq: 10000
    threaded: false
      window: 10
      play_against_latest_model_ratio: 0.5
      initial_elo: 1200.0
+environment_parameters:
+  ball_touch:
+    curriculum:
+      - name: Lesson0
+        completion_criteria:
+          measure: progress
+          behavior: SoccerTwos
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.0005
+        value: 1.0
+      - name: Lesson1
+        completion_criteria:
+          measure: progress
+          behavior: SoccerTwos
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.001
+        value: 0.5
+      - name: Lesson2
+        completion_criteria:
+          measure: progress
+          behavior: SoccerTwos
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.0015
+        value: 0.1
+      - name: Lesson3
+        completion_criteria:
+          measure: progress
+          behavior: SoccerTwos
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.002
+        value: 0.0
+env_settings:
+  num_envs: 2
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
    def reset_env_if_ready(self, env: EnvManager) -> None:
        # Get the sizes of the reward buffers.
        reward_buff = {k: list(t.reward_buffer) for (k, t) in self.trainers.items()}
-        curr_step = {k: int(t.step) for (k, t) in self.trainers.items()}
+        curr_step = {k: int(t.get_step) for (k, t) in self.trainers.items()}
        max_step = {k: int(t.get_max_steps) for (k, t) in self.trainers.items()}
        # Attempt to increment the lessons of the brains who
        # were ready.