update obsv

4 年前 · 74478199
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab
    - target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
        type: 3}
      propertyPath: m_BrainParameters.VectorObservationSize
-      value: 237
+      value: 238
      objectReference: {fileID: 0}
    - target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
        type: 3}
        type: 3}
      propertyPath: jointDampen
      value: 5000
+      objectReference: {fileID: 0}
+    - target: {fileID: 895268871377934303, guid: 765582efd9dda46ed98564603316353f,
+        type: 3}
+      propertyPath: maxJointForceLimit
+      value: 15000
      objectReference: {fileID: 0}
    - target: {fileID: 895268871635378176, guid: 765582efd9dda46ed98564603316353f,
        type: 3}
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
    /// </summary>
    public override void CollectObservations(VectorSensor sensor)
    {
+
+        var cubeForward = orientationCube.transform.forward;
+//        avgVelValue = GetVelocity();
+        //normalized value of the difference in avg speed vs target walking speed.
+        //value of 0 means we are matching velocity perfectly
+        //value of 1 means we are not matching velocity
+//        sensor.AddObservation(VelocityInverseLerp(cubeForward * walkingSpeed, avgVelValue));
+        sensor.AddObservation(VelocityInverseLerp( cubeForward * walkingSpeed));
+        
-        sensor.AddObservation(walkingSpeed);
-        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, orientationCube.transform.forward));
-        sensor.AddObservation(Quaternion.FromToRotation(head.forward, orientationCube.transform.forward));
+        
+        
+        sensor.AddObservation(walkingSpeed/m_maxWalkingSpeed);
+        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
+        sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));

        sensor.AddObservation(orientationCube.transform.InverseTransformPoint(target.transform.position));

        UpdateRewards();
    }

+    Vector3 GetVelocity()
+    { 
+        Vector3 velSum = Vector3.zero;
+        Vector3 avg = Vector3.zero;
+        
+//        velSum += m_JdController.bodyPartsDict[hips].rb.velocity;
+//        velSum += m_JdController.bodyPartsDict[spine].rb.velocity;
+//        velSum += m_JdController.bodyPartsDict[chest].rb.velocity;
+//        velSum += m_JdController.bodyPartsDict[head].rb.velocity;
+//        avg = velSum/4;
+        
+        //ALL RBS
+        int counter = 0;
+        foreach (var item in m_JdController.bodyPartsList)
+        {
+            counter++;
+            velSum += item.rb.velocity;
+//            velSum += Mathf.Clamp(item.rb.velocity.magnitude, 0, m_maxWalkingSpeed);
+//            velSum += Vector3.ClampMagnitude(item.rb.velocity, m_maxWalkingSpeed);
+        }
+        avg = velSum/counter;
+        return avg;
+        
+//        velInverseLerpVal = VelocityInverseLerp(cubeForward * walkingSpeed, avgVelValue);
+    }
+
    public float velInverseLerpVal;
    public float hipsVelMag;
    public float lookAtTargetReward; //reward for looking at the target
 //        rewardManager.UpdateReward("bpVel", bpVelPenaltyThisStep);


-        Vector3 velSum = Vector3.zero;
-        avgVelValue = Vector3.zero;
-        velSum += m_JdController.bodyPartsDict[hips].rb.velocity;
-        velSum += m_JdController.bodyPartsDict[spine].rb.velocity;
-        velSum += m_JdController.bodyPartsDict[chest].rb.velocity;
-        velSum += m_JdController.bodyPartsDict[head].rb.velocity;
-        avgVelValue = velSum/4;
-        velInverseLerpVal = VelocityInverseLerp(cubeForward * walkingSpeed, avgVelValue);
+//        Vector3 velSum = Vector3.zero;
+//        avgVelValue = Vector3.zero;
+//        velSum += m_JdController.bodyPartsDict[hips].rb.velocity;
+//        velSum += m_JdController.bodyPartsDict[spine].rb.velocity;
+//        velSum += m_JdController.bodyPartsDict[chest].rb.velocity;
+//        velSum += m_JdController.bodyPartsDict[head].rb.velocity;
+//        avgVelValue = velSum/4;
+//        velInverseLerpVal = VelocityInverseLerp(cubeForward * walkingSpeed, avgVelValue);
+        velInverseLerpVal = VelocityInverseLerp(cubeForward * walkingSpeed);
        rewardManager.UpdateReward("productOfAllRewards", velInverseLerpVal * lookAtTargetReward * headHeightOverFeetReward);
 //            velInverseLerpVal = VelocityInverseLerp(Vector3.zero, cubeForward * walkingSpeed, avgVelValue);

    }
    public Vector3 avgVelValue;
    public float velDeltaDistance; //distance between the goal and actual vel
-    public float VelocityInverseLerp(Vector3 velocityGoal, Vector3 currentVel)
+    public float VelocityInverseLerp(Vector3 velocityGoal)
-        velDeltaDistance = Vector3.Distance(currentVel, velocityGoal);
+        avgVelValue = GetVelocity();
+
+        velDeltaDistance = Vector3.Distance(avgVelValue, velocityGoal);
+    
+//    public float VelocityInverseLerp(Vector3 velocityGoal, Vector3 currentVel)
+//    {
+//        avgVelValue = GetVelocity();
+//
+//        velDeltaDistance = Vector3.Distance(currentVel, velocityGoal);
+////        float percent = Mathf.InverseLerp(m_maxWalkingSpeed, 0, velDeltaDistance);
+//        float percent = Mathf.InverseLerp(walkingSpeed, 0, velDeltaDistance);
+//        return percent;
+//    }
    
 //    public float VelocityInverseLerp(Vector3 a, Vector3 b, Vector3 value)
 //    {