fixed bp heirarchy

5 年前 · eca1dac5
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerRagdollScale1.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerRagdollScale1.prefab
  m_PrefabAsset: {fileID: 0}
  m_GameObject: {fileID: 5756232957616717705}
  m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
-  m_LocalPosition: {x: 0, y: 2.1669998, z: 0}
+  m_LocalPosition: {x: 0, y: 1.3619995, z: 0}
-  m_Father: {fileID: 5756232958866523899}
-  m_RootOrder: 1
+  m_Father: {fileID: 5756232958925281413}
+  m_RootOrder: 3
  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
 --- !u!54 &5756232957616717715
 Rigidbody:
  m_LocalScale: {x: 1, y: 1, z: 1}
  m_Children:
  - {fileID: 3284846713557781917}
-  - {fileID: 5756232957616717704}
  - {fileID: 5756232958925281413}
  m_Father: {fileID: 3284846714078240390}
  m_RootOrder: 3
  - {fileID: 3284846713634731969}
  - {fileID: 5756232958318828582}
  - {fileID: 5756232958734302656}
+  - {fileID: 5756232957616717704}
-  m_RootOrder: 2
+  m_RootOrder: 1
  m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
 --- !u!54 &5756232958925281468
 Rigidbody:
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs
            CollectObservationBodyPart(bodyPart, sensor);
        }

-//        print(m_OrientationCube.transform.rotation.eulerAngles);
-//        Debug.DrawRay(m_OrientationCube.transform.position, m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity), Color.green,Time.fixedDeltaTime * 5);
-  AddReward(
-//            runForwardTowardsTargetReward
-//            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
-//            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-//            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
-            +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
-                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
-            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
-
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
-//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
-//            .5f //penalize not looking at
-//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
-//            .5f //penalize not looking at
-
-            + 0.005f * (head.position.y - shinL.position.y)
-            + 0.005f * (head.position.y - shinR.position.y)
-//            + 0.01f * (head.position.y - shinL.position.y)
-//            + 0.01f * (head.position.y - shinR.position.y)
-//            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
-//                6, 9999)
-//            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
-//                6, 9999)
-//            + 0.02f * (head.position.y - hips.position.y)
-//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-//                m_JdController.bodyPartsDict[hips].rb.velocity)
-        );
+////        print(m_OrientationCube.transform.rotation.eulerAngles);
+////        Debug.DrawRay(m_OrientationCube.transform.position, m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity), Color.green,Time.fixedDeltaTime * 5);
+//  AddReward(
+////            runForwardTowardsTargetReward
+////            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
+////            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+////            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+//            +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
+//                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
+//            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
+//
+////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
+////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
+////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
+////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
+////            .5f //penalize not looking at
+////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
+////            .5f //penalize not looking at
+//
+//            + 0.005f * (head.position.y - shinL.position.y)
+//            + 0.005f * (head.position.y - shinR.position.y)
+////            + 0.01f * (head.position.y - shinL.position.y)
+////            + 0.01f * (head.position.y - shinR.position.y)
+////            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
+////                6, 9999)
+////            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
+////                6, 9999)
+////            + 0.02f * (head.position.y - hips.position.y)
+////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
+////                m_JdController.bodyPartsDict[hips].rb.velocity)
+//        );

    }

    }
    
    
-//    void FixedUpdate()
-//    {
-////        UpdateOrientationCube();
-//        //reward looking at
-////        float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
-////                             + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
-//
-////        print($"FacingRewardDot {facingReward}");
-////        float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
-////        print($"VelRewardDot {velReward}");
-////        float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
-//
-//
-//
-//
-//
-////        //Multiplying these amplifies the reward.
-////        float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
-////                             + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
-////        float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
-////        float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
-//        
-////        print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
-//
-//
-////        print($"Combined {runForwardTowardsTargetReward}");
-////        float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
-//        // Set reward for this step according to mixture of the following elements.
-//        // a. Velocity alignment with goal direction.
-//        // b. Rotation alignment with goal direction.
-//        // c. Encourage head height.
-//        // d. Discourage head movement.
+    void FixedUpdate()
+    {
+//        UpdateOrientationCube();
+        //reward looking at
+//        float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
+//                             + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
+
+//        print($"FacingRewardDot {facingReward}");
+//        float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
+//        print($"VelRewardDot {velReward}");
+//        float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
+
+
+
+
+
+//        //Multiplying these amplifies the reward.
+//        float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
+//                             + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
+//        float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
+//        float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
+        
+//        print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
+
+
+//        print($"Combined {runForwardTowardsTargetReward}");
+//        float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
+        // Set reward for this step according to mixture of the following elements.
+        // a. Velocity alignment with goal direction.
+        // b. Rotation alignment with goal direction.
+        // c. Encourage head height.
+        // d. Discourage head movement.
+        AddReward(
+//            runForwardTowardsTargetReward
+//            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
+//            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+//            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+            +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
+                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
+            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
+
+//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
+//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
+//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
+//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
+//            .5f //penalize not looking at
+//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
+//            .5f //penalize not looking at
+
+            + 0.005f * (head.position.y - shinL.position.y)
+            + 0.005f * (head.position.y - shinR.position.y)
+//            + 0.01f * (head.position.y - shinL.position.y)
+//            + 0.01f * (head.position.y - shinR.position.y)
+//            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
+//                6, 9999)
+//            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
+//                6, 9999)
+//            + 0.02f * (head.position.y - hips.position.y)
+//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
+//                m_JdController.bodyPartsDict[hips].rb.velocity)
+        );
+//        var handLVel = m_JdController.bodyPartsDict[handL].rb.velocity.magnitude;
+//        var handRVel = m_JdController.bodyPartsDict[handR].rb.velocity.magnitude;
+//        if (handLVel > 6)
+//        {
+//            AddReward(-0.005f * handLVel);
+//        }
+//        if (handRVel > 6)
+//        {
+//            AddReward(-0.005f * handRVel);
+//        }
+        
+        
+//        //SUNDAY VERSION
-//            +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
-//                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
-//            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
-//
-////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
+//            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
-////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
-////            .5f //penalize not looking at
-////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
-////            .5f //penalize not looking at
+//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
+//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
-//            + 0.005f * (head.position.y - shinL.position.y)
-//            + 0.005f * (head.position.y - shinR.position.y)
-////            + 0.01f * (head.position.y - shinL.position.y)
-////            + 0.01f * (head.position.y - shinR.position.y)
-////            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
-////                6, 9999)
-////            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
-////                6, 9999)
+//            
+//            
-////        var handLVel = m_JdController.bodyPartsDict[handL].rb.velocity.magnitude;
-////        var handRVel = m_JdController.bodyPartsDict[handR].rb.velocity.magnitude;
-////        if (handLVel > 6)
-////        {
-////            AddReward(-0.005f * handLVel);
-////        }
-////        if (handRVel > 6)
-////        {
-////            AddReward(-0.005f * handRVel);
-////        }
-//        
-//        
-////        //SUNDAY VERSION
-////        AddReward(
-//////            runForwardTowardsTargetReward
-//////            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
-//////            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-//////            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
-////            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
-//////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
-//////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
-////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
-////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
-////
-////            
-////            
-//////            + 0.02f * (head.position.y - hips.position.y)
-//////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-//////                m_JdController.bodyPartsDict[hips].rb.velocity)
-////        );
-//        
-//        
-//        
-//        
-////        // Set reward for this step according to mixture of the following elements.
-////        // a. Velocity alignment with goal direction.
-////        // b. Rotation alignment with goal direction.
-////        // c. Encourage head height.
-////        // d. Discourage head movement.
-////        m_WalkDir = target.position - m_OrientationCube.transform.position;
-////        AddReward(
-////            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
-////            + 0.02f * (head.position.y - hips.position.y)
-////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-////                m_JdController.bodyPartsDict[hips].rb.velocity)
-////        );
-////        m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
-////        AddReward(
-////            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-////            + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
-////            + 0.02f * (head.position.y - hips.position.y)
-////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-////                m_JdController.bodyPartsDict[hips].rb.velocity)
-////        );
-//    }
+        
+        
+        
+        
+//        // Set reward for this step according to mixture of the following elements.
+//        // a. Velocity alignment with goal direction.
+//        // b. Rotation alignment with goal direction.
+//        // c. Encourage head height.
+//        // d. Discourage head movement.
+//        m_WalkDir = target.position - m_OrientationCube.transform.position;
+//        AddReward(
+//            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
+//            + 0.02f * (head.position.y - hips.position.y)
+//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
+//                m_JdController.bodyPartsDict[hips].rb.velocity)
+//        );
+//        m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
+//        AddReward(
+//            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+//            + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
+//            + 0.02f * (head.position.y - hips.position.y)
+//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
+//                m_JdController.bodyPartsDict[hips].rb.velocity)
+//        );
+    }

    /// <summary>
    /// Loop over body parts and reset them to initial conditions.