reduced maxAngVel, enabled enhanced determinism, cont spec

5 年前 · f7e650a6
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/JointDriveController.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/JointDriveController.cs
                startingPos = t.position,
                startingRot = t.rotation
            };
-            bp.rb.maxAngularVelocity = 100;
+            bp.rb.maxAngularVelocity = 30;
+//            bp.rb.maxAngularVelocity = 100;

            // Add & setup the ground contact script
            bp.groundContact = t.GetComponent<GroundContact>();
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPairDynamic.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPairDynamic.prefab
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs
        Vector3 oCubePos = hips.position;
        oCubePos.y = -.45f;
        m_OrientationCube = Instantiate(Resources.Load<GameObject>("OrientationCube"), oCubePos, Quaternion.identity);
-        m_OrientationCube.transform.SetParent(transform);
-        
+        m_OrientationCube.transform.SetParent(transform.parent);
+        UpdateOrientationCube();
+
        m_JdController = GetComponent<JointDriveController>();
        m_JdController.SetupBodyPart(hips);
        m_JdController.SetupBodyPart(chest);
 //        var angularVelocityRelativeToLookRotationToTarget = m_worldPosMatrix.inverse.MultiplyVector(bp.rb.angularVelocity);
 //        sensor.AddObservation(angularVelocityRelativeToLookRotationToTarget);

-        //RELATIVE RB VELOCITIES
-        sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.velocity));
-        sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.angularVelocity));
+        //RELATIVE RB VELOCITIES --WAS
+//        sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.velocity));
+//        sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.angularVelocity));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity)); //best if cube fixed rot?
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity)); //best if cube fixed rot?
+//        sensor.AddObservation(bp.rb.velocity - m_JdController.bodyPartsDict[hips].rb.velocity);
+//        sensor.AddObservation(bp.rb.angularVelocity - m_JdController.bodyPartsDict[hips].rb.angularVelocity);
-//        sensor.AddObservation(rb.velocity);
-//        sensor.AddObservation(rb.angularVelocity);
+        
+        
+//        sensor.AddObservation(bp.rb.velocity);
+//        sensor.AddObservation(bp.rb.angularVelocity);
-        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPointUnscaled(bp.rb.position));
+//        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPointUnscaled(bp.rb.position));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));  //best
 //        sensor.AddObservation(hips.InverseTransformPointUnscaled(bp.rb.position));
        

        

        sensor.AddObservation(RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, hips.rotation));
+//        sensor.AddObservation(RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, chest.rotation));
        sensor.AddObservation(RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, head.rotation));
 //        m_TargetDirMatrix = Matrix4x4.TRS(Vector3.zero, m_LookRotation, Vector3.one);


 //        print(m_OrientationCube.transform.rotation.eulerAngles);
 //        Debug.DrawRay(m_OrientationCube.transform.position, m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity), Color.green,Time.fixedDeltaTime * 5);
+  AddReward(
+//            runForwardTowardsTargetReward
+//            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
+//            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+//            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+            +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
+                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
+            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
+//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
+//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
+//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
+//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
+//            .5f //penalize not looking at
+//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
+//            .5f //penalize not looking at
+
+            + 0.005f * (head.position.y - shinL.position.y)
+            + 0.005f * (head.position.y - shinR.position.y)
+//            + 0.01f * (head.position.y - shinL.position.y)
+//            + 0.01f * (head.position.y - shinR.position.y)
+//            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
+//                6, 9999)
+//            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
+//                6, 9999)
+//            + 0.02f * (head.position.y - hips.position.y)
+//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
+//                m_JdController.bodyPartsDict[hips].rb.velocity)
+        );

    }

        bpDict[forearmL].SetJointStrength(vectorAction[++i]);
        bpDict[armR].SetJointStrength(vectorAction[++i]);
        bpDict[forearmR].SetJointStrength(vectorAction[++i]);
+//        print(Vector3.Dot(m_OrientationCube.transform.forward,
+//            Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3)));
+//        print((Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f);
+//        print(Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation));
+//        print(Vector3.Dot(m_OrientationCube.transform.forward, hips.transform.forward));
    }

    void UpdateOrientationCube()
        
    }
    
-    void FixedUpdate()
-    {
-        UpdateOrientationCube();
-        //reward looking at
-//        float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
-//                             + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
-
-//        print($"FacingRewardDot {facingReward}");
-//        float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
-//        print($"VelRewardDot {velReward}");
-//        float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
-
-
-
-
-
-//        //Multiplying these amplifies the reward.
-//        float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
-//                             + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
-//        float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
-//        float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
-        
-//        print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
-
-
-//        print($"Combined {runForwardTowardsTargetReward}");
-//        float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
-        // Set reward for this step according to mixture of the following elements.
-        // a. Velocity alignment with goal direction.
-        // b. Rotation alignment with goal direction.
-        // c. Encourage head height.
-        // d. Discourage head movement.
-        AddReward(
-//            runForwardTowardsTargetReward
-//            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
-//            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-//            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
-            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
-            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
-            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
-
-            
-            
-//            + 0.02f * (head.position.y - hips.position.y)
-//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-//                m_JdController.bodyPartsDict[hips].rb.velocity)
-        );
-        
-        
-        
-        
+    
+//    void FixedUpdate()
+//    {
+////        UpdateOrientationCube();
+//        //reward looking at
+////        float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
+////                             + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
+//
+////        print($"FacingRewardDot {facingReward}");
+////        float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
+////        print($"VelRewardDot {velReward}");
+////        float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
+//
+//
+//
+//
+//
+////        //Multiplying these amplifies the reward.
+////        float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
+////                             + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
+////        float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
+////        float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
+//        
+////        print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
+//
+//
+////        print($"Combined {runForwardTowardsTargetReward}");
+////        float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
-//        m_WalkDir = target.position - m_OrientationCube.transform.position;
-//            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
-//            + 0.02f * (head.position.y - hips.position.y)
-//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-//                m_JdController.bodyPartsDict[hips].rb.velocity)
+////            runForwardTowardsTargetReward
+////            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
+////            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+////            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+//            +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
+//                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
+//            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
+//
+////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
+////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
+////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
+////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
+////            .5f //penalize not looking at
+////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
+////            .5f //penalize not looking at
+//
+//            + 0.005f * (head.position.y - shinL.position.y)
+//            + 0.005f * (head.position.y - shinR.position.y)
+////            + 0.01f * (head.position.y - shinL.position.y)
+////            + 0.01f * (head.position.y - shinR.position.y)
+////            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
+////                6, 9999)
+////            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
+////                6, 9999)
+////            + 0.02f * (head.position.y - hips.position.y)
+////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
+////                m_JdController.bodyPartsDict[hips].rb.velocity)
-//        m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
-//        AddReward(
-//            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-//            + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
-//            + 0.02f * (head.position.y - hips.position.y)
-//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-//                m_JdController.bodyPartsDict[hips].rb.velocity)
-//        );
-    }
+////        var handLVel = m_JdController.bodyPartsDict[handL].rb.velocity.magnitude;
+////        var handRVel = m_JdController.bodyPartsDict[handR].rb.velocity.magnitude;
+////        if (handLVel > 6)
+////        {
+////            AddReward(-0.005f * handLVel);
+////        }
+////        if (handRVel > 6)
+////        {
+////            AddReward(-0.005f * handRVel);
+////        }
+//        
+//        
+////        //SUNDAY VERSION
+////        AddReward(
+//////            runForwardTowardsTargetReward
+//////            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
+//////            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+//////            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+////            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
+//////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
+//////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
+////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
+////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
+////
+////            
+////            
+//////            + 0.02f * (head.position.y - hips.position.y)
+//////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
+//////                m_JdController.bodyPartsDict[hips].rb.velocity)
+////        );
+//        
+//        
+//        
+//        
+////        // Set reward for this step according to mixture of the following elements.
+////        // a. Velocity alignment with goal direction.
+////        // b. Rotation alignment with goal direction.
+////        // c. Encourage head height.
+////        // d. Discourage head movement.
+////        m_WalkDir = target.position - m_OrientationCube.transform.position;
+////        AddReward(
+////            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
+////            + 0.02f * (head.position.y - hips.position.y)
+////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
+////                m_JdController.bodyPartsDict[hips].rb.velocity)
+////        );
+////        m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
+////        AddReward(
+////            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
+////            + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
+////            + 0.02f * (head.position.y - hips.position.y)
+////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
+////                m_JdController.bodyPartsDict[hips].rb.velocity)
+////        );
+//    }

    /// <summary>
    /// Loop over body parts and reset them to initial conditions.
 //            transform.rotation = Quaternion.LookRotation(m_WalkDir);
 //        }
        transform.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
-        UpdateOrientationCube();
+//        UpdateOrientationCube();

 //        transform.Rotate(Vector3.up, Random.Range(0.0f, 360.0f));

--- a/Project/ProjectSettings/DynamicsManager.asset
+++ b/Project/ProjectSettings/DynamicsManager.asset
    m_Extent: {x: 250, y: 250, z: 250}
  m_WorldSubdivisions: 8
  m_FrictionType: 0
-  m_EnableEnhancedDeterminism: 0
+  m_EnableEnhancedDeterminism: 1
  m_EnableUnifiedHeightmaps: 1
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
            strength: 1.0
            gamma: 0.995

+# WalkerDynamic:
+#     normalize: true
+#     num_epoch: 8
+#     time_horizon: 1000
+#     batch_size: 2048
+#     buffer_size: 20480
+#     # time_horizon: 128
+#     # batch_size: 512 #2048
+#     # buffer_size: 4096 #20480
+#     max_steps: 7.5e6
+#     summary_freq: 30000
+#     num_layers: 3
+#     hidden_units: 128
+#     beta: 1.0e-2
+#     learning_rate: 1.0e-3
+#     reward_signals:
+#         extrinsic:
+#             strength: 1.0
+#             gamma: 0.995
+#         curiosity:
+#             strength: 0.02
+#             gamma: 0.99
+#             encoding_size: 256
+
 WalkerDynamic:
    normalize: true
    num_epoch: 3
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/MeshSkewFix.cs
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/MeshSkewFix.cs
+using System.Collections;
+using System.Collections.Generic;
+using UnityEngine;
+
+
+[ExecuteAlways]
+public class MeshSkewFix : MonoBehaviour
+{
+    public bool fix;
+    public GameObject rootGameObject;
+    // Start is called before the first frame update
+    void Start()
+    {
+        
+    }
+
+    // Update is called once per frame
+    void Update()
+    {
+        if (fix)
+        {
+            fix = false;
+            foreach (var t in GetComponentsInChildren<Transform>())
+            {
+                var joint = t.GetComponent<ConfigurableJoint>();
+                if (joint)
+                {
+                    var meshFilter = t.GetComponent<MeshFilter>();
+                    var meshRend = t.GetComponent<MeshFilter>();
+                }
+                
+            }
+        }
+        
+    }
+}
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/MeshSkewFix.cs.meta
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/MeshSkewFix.cs.meta
+fileFormatVersion: 2
+guid: cdd5a22612a4949bbb5672f2dca17d3b
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: