update sequencer env

4 年前 · bedf9886
--- a/Project/Assets/ML-Agents/Examples/Arena-Sequence/Prefabs/Area.prefab
+++ b/Project/Assets/ML-Agents/Examples/Arena-Sequence/Prefabs/Area.prefab
  hasUpgradedFromAgentParameters: 1
  MaxStep: 5000
  SelectNewTiles: 0
-  MaxNumberOfTiles: 5
+  MaxNumberOfTiles: 20
  SequenceTilesList:
  - {fileID: 3646228404148452640}
  - {fileID: 3646228403214730312}
  m_Script: {fileID: 11500000, guid: dd8012d5925524537b27131fef517017, type: 3}
  m_Name: 
  m_EditorClassIdentifier: 
-  ObservableSize: 7
+  ObservableSize: 22
  MaxNumObservables: 20
 --- !u!1 &3647816068884681782
 GameObject:
--- a/Project/Assets/ML-Agents/Examples/Arena-Sequence/Scripts/SequencerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Arena-Sequence/Scripts/SequencerAgent.cs
    //     public int SpawnIndexPos;
    // }
    public bool SelectNewTiles;
+    EnvironmentParameters m_ResetParams;

    int m_NumberOfTilesToSpawn;
    public int MaxNumberOfTiles;
        m_PushBlockSettings = FindObjectOfType<PushBlockSettings>();
        m_AgentRb = GetComponent<Rigidbody>();
        m_StartingPos = transform.position;
+        m_ResetParams = Academy.Instance.EnvironmentParameters;
    }


    public override void OnEpisodeBegin()
    {

-        m_NumberOfTilesToSpawn = Random.Range(1, MaxNumberOfTiles);
+        MaxNumberOfTiles = (int)m_ResetParams.GetWithDefault("num_tiles", 5);
+        m_NumberOfTilesToSpawn = Random.Range(1, MaxNumberOfTiles + 1);
        SelectTilesToShow();
        SetTilePositions();

        foreach (var item in CurrentlyVisibleTilesList)
        {
       
-            float[] listObservation = new float[MaxNumberOfTiles + 2];
+            float[] listObservation = new float[20 + 2];
-            listObservation[MaxNumberOfTiles] = (item.transform.localRotation.eulerAngles.y / 360f);
-            listObservation[MaxNumberOfTiles + 1] = item.visited ? 1.0f : 0.0f; 
+            listObservation[20] = (item.transform.localRotation.eulerAngles.y / 360f);
+            listObservation[21] = item.visited ? 1.0f : 0.0f; 
            //Debug.Log(listObservation[20]);
            //Debug.Log(listObservation[21]);
            //Debug.Log(listObservation[22]);
        int numLeft = m_NumberOfTilesToSpawn;
        while (numLeft > 0)
        {
-            int rndInt = Random.Range(0, MaxNumberOfTiles);
+            int rndInt = Random.Range(0, SequenceTilesList.Count);// MaxNumberOfTiles);
            var tmp = SequenceTilesList[rndInt];
            if (!CurrentlyVisibleTilesList.Contains(tmp))
            {
--- a/config/ppo/Sorter.yaml
+++ b/config/ppo/Sorter.yaml
  Sorter:
    trainer_type: ppo
    hyperparameters:
-      batch_size: 256
-      buffer_size: 10240
+      batch_size: 512
+      buffer_size: 40960
      learning_rate: 0.0003
      beta: 0.005
      epsilon: 0.2
    network_settings:
      normalize: False
-      hidden_units: 128
+      hidden_units: 256
      num_layers: 2
      vis_encode_type: simple
    reward_signals:
    keep_checkpoints: 5
-    max_steps: 50000000
-    time_horizon: 512
+    max_steps: 100000000
+    time_horizon: 256
+environment_parameters:
+  num_tiles:
+    curriculum:
+      - name: Lesson9
+        value: 12.0
-  num_envs: 8
+  num_envs: 1
--- a/config/ppo/Sorter_curriculum.yaml
+++ b/config/ppo/Sorter_curriculum.yaml
+behaviors:
+  Sorter:
+    trainer_type: ppo
+    hyperparameters:
+      batch_size: 512
+      buffer_size: 40960
+      learning_rate: 0.0003
+      beta: 0.005
+      epsilon: 0.2
+      lambd: 0.95
+      num_epoch: 3
+      learning_rate_schedule: linear
+    network_settings:
+      normalize: False
+      hidden_units: 128
+      num_layers: 2
+      vis_encode_type: simple
+    reward_signals:
+      extrinsic:
+        gamma: 0.99
+        strength: 1.0
+    keep_checkpoints: 5
+    max_steps: 100000000
+    time_horizon: 256
+    summary_freq: 10000
+    threaded: true
+environment_parameters:
+  num_tiles:
+    curriculum:
+      - name: Lesson0 # The '-' is important as this is a list
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.05
+        value: 2.0
+      - name: Lesson1 # The '-' is important as this is a list
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.1
+        value: 4.0
+      - name: Lesson2 # This is the start of the second lesson
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.15
+        value: 6.0
+      - name: Lesson3
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.2
+        value: 8.0
+      - name: Lesson4 # The '-' is important as this is a list
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.25
+        value: 10.0
+      - name: Lesson5 # This is the start of the second lesson
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.3
+        value: 12.0
+      - name: Lesson6
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.35
+        value: 14.0
+      - name: Lesson7 # This is the start of the second lesson
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.4
+        value: 16.0
+      - name: Lesson8
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.45
+        value: 18.0
+      - name: Lesson9
+        value: 20.0
+env_settings:
+  num_envs: 8