-

4 年前 · 48bd37ee
--- a/Project/Assets/ML-Agents/Examples/Arena-Sequence/Scripts/SequencerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Arena-Sequence/Scripts/SequencerAgent.cs

 public class SequencerAgent : Agent
 {
+    private const int k_HighestTileValue = 20;
+
    public bool SelectNewTiles;

    int m_NumberOfTilesToSpawn;
    private Vector3 m_StartingPos;

    GameObject m_Area;
+    EnvironmentParameters m_ResetParams;


    // private SequenceTile m_NextExpectedTile;
    public override void Initialize()
    {
        m_Area = transform.parent.gameObject;
-        m_MaxNumberOfTiles = SequenceTilesList.Count;
+        m_MaxNumberOfTiles = k_HighestTileValue;
+        m_ResetParams = Academy.Instance.EnvironmentParameters;
        m_BufferSensor = GetComponent<BufferSensorComponent>();
        m_PushBlockSettings = FindObjectOfType<PushBlockSettings>();
        m_AgentRb = GetComponent<Rigidbody>();
    /// </summary>
    public override void OnEpisodeBegin()
    {
+        m_MaxNumberOfTiles = (int)m_ResetParams.GetWithDefault("num_tiles", 5);
-        m_NumberOfTilesToSpawn = Random.Range(1, m_MaxNumberOfTiles);
+
+        m_NumberOfTilesToSpawn = Random.Range(1, m_MaxNumberOfTiles + 1);
        SelectTilesToShow();
        SetTilePositions();

        foreach (var item in CurrentlyVisibleTilesList)
        {

-            float[] listObservation = new float[m_MaxNumberOfTiles + 2];
+            float[] listObservation = new float[k_HighestTileValue + 2];
-            listObservation[m_MaxNumberOfTiles] = (item.transform.localRotation.eulerAngles.y / 360f);
-            listObservation[m_MaxNumberOfTiles + 1] = item.visited ? 1.0f : 0.0f;
+            listObservation[k_HighestTileValue] = (item.transform.localRotation.eulerAngles.y / 360f);
+            listObservation[k_HighestTileValue + 1] = item.visited ? 1.0f : 0.0f;
            //Debug.Log(listObservation[20]);
            //Debug.Log(listObservation[21]);
            //Debug.Log(listObservation[22]);
            int rndPosIndx = 0;
            while (!posChosen)
            {
-                rndPosIndx = Random.Range(0, SequenceTilesList.Count);
+                rndPosIndx = Random.Range(0, k_HighestTileValue);
                if (!m_UsedPositionsList.Contains(rndPosIndx))
                {
                    m_UsedPositionsList.Add(rndPosIndx);
-            item.transform.localRotation = Quaternion.Euler(0, rndPosIndx * (360f / SequenceTilesList.Count), 0);
+            item.transform.localRotation = Quaternion.Euler(0, rndPosIndx * (360f / k_HighestTileValue), 0);
            item.rend.sharedMaterial = TileMaterial;
            item.gameObject.SetActive(true);
        }
        int numLeft = m_NumberOfTilesToSpawn;
        while (numLeft > 0)
        {
-            int rndInt = Random.Range(0, m_MaxNumberOfTiles);
+            int rndInt = Random.Range(0, k_HighestTileValue);
            var tmp = SequenceTilesList[rndInt];
            if (!CurrentlyVisibleTilesList.Contains(tmp))
            {
        m_AgentRb.AddForce(dirToGo * m_PushBlockSettings.agentRunSpeed,
            ForceMode.VelocityChange);

-        }
+    }

    /// <summary>
    /// Called every step of the engine. Here the agent takes an action.

    public override void Heuristic(in ActionBuffers actionsOut)
    {
-            var discreteActionsOut = actionsOut.DiscreteActions;
+        var discreteActionsOut = actionsOut.DiscreteActions;
        discreteActionsOut.Clear();
        //forward
        if (Input.GetKey(KeyCode.W))
--- a/config/ppo/Sorter_curriculum.yaml
+++ b/config/ppo/Sorter_curriculum.yaml
+behaviors:
+  Sorter:
+    trainer_type: ppo
+    hyperparameters:
+      batch_size: 512
+      buffer_size: 40960
+      learning_rate: 0.0003
+      beta: 0.005
+      epsilon: 0.2
+      lambd: 0.95
+      num_epoch: 3
+      learning_rate_schedule: linear
+    network_settings:
+      normalize: False
+      hidden_units: 128
+      num_layers: 2
+      vis_encode_type: simple
+    reward_signals:
+      extrinsic:
+        gamma: 0.99
+        strength: 1.0
+    keep_checkpoints: 5
+    max_steps: 100000000
+    time_horizon: 256
+    summary_freq: 10000
+    threaded: true
+environment_parameters:
+  num_tiles:
+    curriculum:
+      - name: Lesson0 # The '-' is important as this is a list
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.05
+        value: 2.0
+      - name: Lesson1 # The '-' is important as this is a list
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.1
+        value: 4.0
+      - name: Lesson2 # This is the start of the second lesson
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.15
+        value: 6.0
+      - name: Lesson3
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.2
+        value: 8.0
+      - name: Lesson4 # The '-' is important as this is a list
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.25
+        value: 10.0
+      - name: Lesson5 # This is the start of the second lesson
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.3
+        value: 12.0
+      - name: Lesson6
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.35
+        value: 14.0
+      - name: Lesson7 # This is the start of the second lesson
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.4
+        value: 16.0
+      - name: Lesson8
+        completion_criteria:
+          measure: progress
+          behavior: Sorter
+          signal_smoothing: true
+          min_lesson_length: 100
+          threshold: 0.45
+        value: 18.0
+      - name: Lesson9
+        value: 20.0
+env_settings:
+  num_envs: 8
--- a/config/ppo/Sorter.yaml
+++ b/config/ppo/Sorter.yaml
-behaviors:
-  Sorter:
-    trainer_type: ppo
-    hyperparameters:
-      batch_size: 256
-      buffer_size: 10240
-      learning_rate: 0.0003
-      beta: 0.005
-      epsilon: 0.2
-      lambd: 0.95
-      num_epoch: 3
-      learning_rate_schedule: linear
-    network_settings:
-      normalize: False
-      hidden_units: 128
-      num_layers: 2
-      vis_encode_type: simple
-    reward_signals:
-      extrinsic:
-        gamma: 0.99
-        strength: 1.0
-    keep_checkpoints: 5
-    max_steps: 50000000
-    time_horizon: 512
-    summary_freq: 10000
-    threaded: true
-env_settings:
-  num_envs: 8