fixed episode length modification issue.

4 年前 · 130512b4
--- a/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallAgentArea.prefab
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallAgentArea.prefab
  m_Component:
  - component: {fileID: 8706416217891658080}
  - component: {fileID: 3027918195473112231}
-  - component: {fileID: 2318041762275472194}
  m_Layer: 0
  m_Name: 3DBallAgentArea
  m_TagString: Untagged
  actorObjs: []
  prefab: {fileID: 1321468028730240, guid: c5e235d7c7cba4e5393f3e6b4c6bfe44, type: 3}
  numberOfParallel: 18
-  maxStep: 100
+  maxStep: 10
--- !u!114 &2318041762275472194
-MonoBehaviour:
-  m_ObjectHideFlags: 0
-  m_CorrespondingSourceObject: {fileID: 0}
-  m_PrefabInstance: {fileID: 0}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 8706416217891658087}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 5d1c4e0b1822b495aa52bc52839ecb30, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
-  m_BrainParameters:
-    VectorObservationSize: 1
-    NumStackedVectorObservations: 1
-    m_ActionSpec:
-      m_NumContinuousActions: 0
-      BranchSizes: 01000000
-    VectorActionSize: 01000000
-    VectorActionDescriptions: []
-    VectorActionSpaceType: 0
-    hasUpgradedBrainParametersWithActionSpec: 1
-  m_Model: {fileID: 0}
-  m_InferenceDevice: 0
-  m_BehaviorType: 0
-  m_BehaviorName: My Behavior
-  TeamId: 0
-  m_UseChildSensors: 1
-  m_UseChildActuators: 1
-  m_ObservableAttributeHandling: 0
--- a/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallMulti.prefab
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Prefabs/3DBallMulti.prefab
  m_RewardType: 2
  goal: {fileID: 5497951568357209023}
  epsilon: 0.25
+  stepvalue: 40
 --- !u!114 &1306725529891448089
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_GameObject: {fileID: 5497951568357209023}
  m_Material: {fileID: 0}
  m_IsTrigger: 0
-  m_Enabled: 1
+  m_Enabled: 0
  serializedVersion: 2
  m_Radius: 0.5
  m_Center: {x: 0, y: 0, z: 0}
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallMulti.unity
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scenes/3DBallMulti.unity
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 3027918195473112231, guid: 7f24aa5e0e9d54a9b8bb72772633cee7,
+        type: 3}
+      propertyPath: maxStep
+      value: 20
+      objectReference: {fileID: 0}
    - target: {fileID: 8706416217891658080, guid: 7f24aa5e0e9d54a9b8bb72772633cee7,
        type: 3}
      propertyPath: m_RootOrder
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgentArea.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgentArea.cs
        {
            Ball3DMultiAgent agent = actor.GetComponent<Ball3DMultiAgent>();
            agent.m_RewardType = rewardType;
-            agent.MaxStep = maxStep;
+            agent.setMaxStep(maxStep);
        }
    }
    public void AreaReset()
        if (changed)
        {
            AreaReset();
+            update_agents();
        }
    }
 }
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DMultiAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DMultiAgent.cs
    public GameObject goal;
    [Tooltip("Specifies the radius of the goal region")]
    public float epsilon=0.25f;
-    
+    public int stepvalue=5000;
    public override void Initialize()
    {
        m_BallRb = ball.GetComponent<Rigidbody>();
            sensor.AddObservation(m_BallRb.velocity);
        }
    }
+
+    // public void FixedUpdate()
+    // {
+    //     MaxStep = stepvalue;
+    // }

    public override void OnActionReceived(ActionBuffers actionBuffers)
    {
            + gameObject.transform.position;
        //Reset the parameters when the Agent is reset.
        SetResetParameters();
+        MaxStep = stepvalue;
    }

    public override void Heuristic(in ActionBuffers actionsOut)
        float maxdist = 3.54f;  // assumes max distance is 2.5 - -2.5 in each dim. This is an upper bound. 
        float dist = Vector3.Distance(ball, goal);
        //distance between our actual velocity and goal velocity
-        dist = Mathf.Clamp(epsilon - dist, 0, maxdist);
+        dist = Mathf.Clamp(dist, 0, maxdist);
+    }
+
+    public void setMaxStep(int value)
+    {
+        stepvalue = value;
+        MaxStep = value;
    }

 }
--- a/3DBallMultiPower.yaml
+++ b/3DBallMultiPower.yaml
+behaviors:
+  3DBall:
+    trainer_type: ppo
+    hyperparameters:
+      batch_size: 64
+      buffer_size: 12000
+      learning_rate: 0.0003
+      beta: 0.001
+      epsilon: 0.2
+      lambd: 0.99
+      num_epoch: 3
+      learning_rate_schedule: linear
+    network_settings:
+      normalize: true
+      hidden_units: 128
+      num_layers: 2
+      vis_encode_type: simple
+    reward_signals:
+      extrinsic:
+        gamma: 0.99
+        strength: 1.0
+    keep_checkpoints: 5
+    max_steps: 500000
+    time_horizon: 1000
+    summary_freq: 12000
+
+# Add this section
+environment_parameters:
+    maxStep: 1000
+
--- a/config/ppo/3DBallMulti_power.yaml
+++ b/config/ppo/3DBallMulti_power.yaml
+behaviors:
+  3DBall:
+    trainer_type: ppo
+    hyperparameters:
+      batch_size: 64
+      buffer_size: 12000
+      learning_rate: 0.0003
+      beta: 0.001
+      epsilon: 0.2
+      lambd: 0.99
+      num_epoch: 3
+      learning_rate_schedule: constant
+    network_settings:
+      normalize: true
+      hidden_units: 128
+      num_layers: 2
+      vis_encode_type: simple
+    reward_signals:
+      extrinsic:
+        gamma: 0.999
+        strength: 1.0
+    keep_checkpoints: 5
+    max_steps: 500000
+    time_horizon: 5000
+    summary_freq: 18000
+
+environment_parameters:
+  maxStep: 20
+  rewardType: 2
+  numParallel: 8
+