added dirIndicator and orentCubeGizmo

5 年前 · 81e976ac
--- a/Project/Assets/ML-Agents/Examples/SharedAssets/Resources/OrientationCube.prefab
+++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Resources/OrientationCube.prefab
  m_Icon: {fileID: 0}
  m_NavMeshLayer: 0
  m_StaticEditorFlags: 0
-  m_IsActive: 1
+  m_IsActive: 0
 --- !u!4 &2591864625898824440
 Transform:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab
+++ b/Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab
--- a/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs
 using Unity.MLAgents;
 using Unity.MLAgentsExamples;
 using Unity.MLAgents.Sensors;
+using UnityEditor;
+using BodyPart = Unity.MLAgentsExamples.BodyPart;
+    [Header("Walking Speed")]
+    [Space(10)]
+    public float maximumWalkingSpeed = 999; //The max walk velocity magnitude an agent will be rewarded for
+    
+    [Header("Orientation Cube")]
+    [Space(10)]
+    //This will be used as a stable observation platform for the ragdoll to use.
+    GameObject m_OrientationCube;
+    public bool showOrientationCubeGizmo = true;
+    public Transform directionIndicator;
+    Vector3 m_WalkDir;
+    Quaternion m_WalkDirLookRot;
+    
+    public float targetSpawnRadius;
-    public float targetSpawnRadius;
-    [Header("Walk Direction Worldspace")] 
-//    public Vector3 walkDirWorldspace = Vector3.right;
-
-    //ORIENTATION
-    Vector3 m_WalkDir;
-    Quaternion m_WalkDirLookRot;
-    Matrix4x4 m_worldPosMatrix;
-    
+
+    [Header("Body Parts")]
+    [Space(10)]
    public Transform hips;
    public Transform chest;
    public Transform spine;
    Rigidbody m_SpineRb;

    EnvironmentParameters m_ResetParams;
+    
-    private GameObject m_OrientationCube;
-    public Quaternion cubeRotation;
-    public Quaternion hipsRotation;
+
+        //Spawn an orientation cube
-//        m_OrientationCube.transform.SetParent(transform.parent);
+        
        UpdateOrientationCube();

        m_JdController = GetComponent<JointDriveController>();
        //GROUND CHECK
        sensor.AddObservation(bp.groundContact.touchingGround ? 1 : 0); // Is this bp touching the ground
        
-//        //RELATIVE RB VELOCITY
-//        var velocityRelativeToLookRotationToTarget = m_worldPosMatrix.inverse.MultiplyVector(bp.rb.velocity);
-//        sensor.AddObservation(velocityRelativeToLookRotationToTarget);
-//
-//        //RELATIVE RB ANGULAR VELOCITY
-//        var angularVelocityRelativeToLookRotationToTarget = m_worldPosMatrix.inverse.MultiplyVector(bp.rb.angularVelocity);
-//        sensor.AddObservation(angularVelocityRelativeToLookRotationToTarget);
-
-        //RELATIVE RB VELOCITIES --WAS
-//        sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.velocity));
-//        sensor.AddObservation(m_OrientationCube.transform.InverseTransformVector(bp.rb.angularVelocity));
-        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity)); //best if cube fixed rot?
-        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity)); //best if cube fixed rot?
-//        sensor.AddObservation(bp.rb.velocity - m_JdController.bodyPartsDict[hips].rb.velocity);
-//        sensor.AddObservation(bp.rb.angularVelocity - m_JdController.bodyPartsDict[hips].rb.angularVelocity);
-        
+        //Get velocities in the context of our orientation cube's space
+        //Note: You can get these velocities in world space as well but it may not train as well.
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
-//        sensor.AddObservation(bp.rb.velocity);
-//        sensor.AddObservation(bp.rb.angularVelocity);
-//        var localPosRelToHips = hips.InverseTransformPoint(rb.position);
-//        sensor.AddObservation(localPosRelToHips);
-//        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPointUnscaled(bp.rb.position));
-//        sensor.AddObservation(hips.InverseTransformPointUnscaled(bp.rb.position));
-        
-//        if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR &&
-//            bp.rb.transform != footL && bp.rb.transform != footR && bp.rb.transform != head)
-//        if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR &&
-//            bp.rb.transform != footL && bp.rb.transform != footR)
-//            sensor.AddObservation(RagdollHelpers.GetJointRotation(bp.joint));
-//            sensor.AddObservation(bp.currentXNormalizedRot);
-//            sensor.AddObservation(bp.currentYNormalizedRot);
-//            sensor.AddObservation(bp.currentZNormalizedRot);
-//    /// <summary>
-//    /// Add relevant information on each body part to observations.
-//    /// </summary>
-//    public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor)
-//    {
-//        var rb = bp.rb;
-//        sensor.AddObservation(bp.groundContact.touchingGround ? 1 : 0); // Is this bp touching the ground
-//        sensor.AddObservation(rb.velocity);
-//        sensor.AddObservation(rb.angularVelocity);
-//        var localPosRelToHips = hips.InverseTransformPoint(rb.position);
-//        sensor.AddObservation(localPosRelToHips);
-//
-//        if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR &&
-//            bp.rb.transform != footL && bp.rb.transform != footR && bp.rb.transform != head)
-//        {
-//            sensor.AddObservation(bp.currentXNormalizedRot);
-//            sensor.AddObservation(bp.currentYNormalizedRot);
-//            sensor.AddObservation(bp.currentZNormalizedRot);
-//            sensor.AddObservation(bp.currentStrength / m_JdController.maxJointForceLimit);
-//        }
-//    }
-
-//        m_JdController.GetCurrentJointForces();
-        
-        // Update pos to target
-//        m_WalkDir = target.position - hips.position;
-//        m_WalkDir = target.position - m_OrientationCube.transform.position;
-        
-//        print($"fromTo: {Quaternion.FromToRotation(m_OrientationCube.transform.forward, hips.forward)} rotDelta {RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, hips.rotation)}");
-
-//        sensor.AddObservation(RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, hips.rotation));
-//        sensor.AddObservation(Quaternion.FromToRotation(m_OrientationCube.transform.forward, hips.forward));
-//        sensor.AddObservation(Quaternion.FromToRotation(m_OrientationCube.transform.forward, head.forward));
-//        sensor.AddObservation(RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, chest.rotation));
-//        sensor.AddObservation(RagdollHelpers.GetRotationDelta(m_WalkDirLookRot, head.rotation));
-//        m_TargetDirMatrix = Matrix4x4.TRS(Vector3.zero, m_LookRotation, Vector3.one);
-
-
-        
-//        //HIP RAYCAST FOR HEIGHT
-//        RaycastHit hit;
-//        if (Physics.Raycast(hips.position, Vector3.down, out hit, 10.0f))
-//        {
-//            sensor.AddObservation(hit.distance);
-//        }
-//        else
-//            sensor.AddObservation(10.0f);
-        
-//        //ORIENTATION MATRIX
-//        Vector3 worldPosMatrixPos = hips.position;
-//        worldPosMatrixPos.y = .5f;
-//        m_worldPosMatrix  = Matrix4x4.TRS(worldPosMatrixPos, Quaternion.identity, Vector3.one);
-        
-//        sensor.AddObservation(m_WalkDir.normalized);
-
-        //HIP POS REL TO MATRIX
-//        sensor.AddObservation(hips.position - worldPosMatrixPos);
-//        sensor.AddObservation(hips.position - m_OrientationCube.transform.position);
-//        sensor.AddObservation(m_JdController.bodyPartsDict[hips].rb.position);
-        
-//        sensor.AddObservation(hips.forward);
-//        sensor.AddObservation(hips.up);
-
-////        print(m_OrientationCube.transform.rotation.eulerAngles);
-////        Debug.DrawRay(m_OrientationCube.transform.position, m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity), Color.green,Time.fixedDeltaTime * 5);
-//  AddReward(
-////            runForwardTowardsTargetReward
-////            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
-////            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-////            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
-//            +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
-//                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
-//            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
-//
-////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
-////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
-////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
-////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
-////            .5f //penalize not looking at
-////            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
-////            .5f //penalize not looking at
-//
-//            + 0.005f * (head.position.y - shinL.position.y)
-//            + 0.005f * (head.position.y - shinR.position.y)
-////            + 0.01f * (head.position.y - shinL.position.y)
-////            + 0.01f * (head.position.y - shinR.position.y)
-////            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
-////                6, 9999)
-////            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
-////                6, 9999)
-////            + 0.02f * (head.position.y - hips.position.y)
-////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-////                m_JdController.bodyPartsDict[hips].rb.velocity)
-//        );
-
    }

    public override void OnActionReceived(float[] vectorAction)
        bpDict[forearmL].SetJointStrength(vectorAction[++i]);
        bpDict[armR].SetJointStrength(vectorAction[++i]);
        bpDict[forearmR].SetJointStrength(vectorAction[++i]);
-//        print(Vector3.Dot(m_OrientationCube.transform.forward,
-//            Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3)));
-//        print((Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f);
-//        print(Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation));
-//        print(Vector3.Dot(m_OrientationCube.transform.forward, hips.transform.forward));
    }

    void UpdateOrientationCube()
-        m_WalkDir.y = 0;
-//        m_WalkDir = walkDirWorldspace;
-        m_WalkDirLookRot = Quaternion.LookRotation(m_WalkDir);
-        
+        m_WalkDir.y = 0; //flatten dir on the y
+        m_WalkDirLookRot = Quaternion.LookRotation(m_WalkDir); //get our look rot to the target
-        Vector3 oCubePos = hips.position;
-//        oCubePos.y = -.45f;
-        m_OrientationCube.transform.position = oCubePos;
+        m_OrientationCube.transform.position = hips.position;
-
-        cubeRotation = m_OrientationCube.transform.rotation;
-        hipsRotation = hips.rotation;
+        
+        directionIndicator.position = new Vector3(hips.position.x, directionIndicator.position.y, hips.position.z);
+        directionIndicator.rotation = m_WalkDirLookRot;
-    
    
    void FixedUpdate()
    {
                }
            }
        }
+        
-        //reward looking at
-//        float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
-//                             + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
-
-//        print($"FacingRewardDot {facingReward}");
-//        float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
-//        print($"VelRewardDot {velReward}");
-//        float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
-
-
-
-
-
-//        //Multiplying these amplifies the reward.
-//        float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
-//                             + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
-//        float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
-//        float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
-//        print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
-
-
-//        print($"Combined {runForwardTowardsTargetReward}");
-//        float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
-        // d. Discourage head movement.
-//            runForwardTowardsTargetReward
-//            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
-//            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-//            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
-                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 999))
-//            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
+                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed))
-
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
-//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
-//            .5f //penalize not looking at
-//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
-//            .5f //penalize not looking at
-
-//            + 0.005f * (head.position.y - shinL.position.y)
-//            + 0.005f * (head.position.y - shinR.position.y)
-//            + 0.01f * (head.position.y - shinL.position.y)
-//            + 0.01f * (head.position.y - shinR.position.y)
-//            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
-//                6, 9999)
-//            - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
-//                6, 9999)
-//            + 0.02f * (head.position.y - hips.position.y)
-//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-//                m_JdController.bodyPartsDict[hips].rb.velocity)
-//        var handLVel = m_JdController.bodyPartsDict[handL].rb.velocity.magnitude;
-//        var handRVel = m_JdController.bodyPartsDict[handR].rb.velocity.magnitude;
-//        if (handLVel > 6)
-//        {
-//            AddReward(-0.005f * handLVel);
-//        }
-//        if (handRVel > 6)
-//        {
-//            AddReward(-0.005f * handRVel);
-//        }
-        
-        
-//        //SUNDAY VERSION
-//        AddReward(
-////            runForwardTowardsTargetReward
-////            facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
-////            +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-////            + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
-//            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
-////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
-////            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
-//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
-//            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
-//
-//            
-//            
-////            + 0.02f * (head.position.y - hips.position.y)
-////            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-////                m_JdController.bodyPartsDict[hips].rb.velocity)
-//        );
-        
-        
-        
-        
-//        // Set reward for this step according to mixture of the following elements.
-//        // a. Velocity alignment with goal direction.
-//        // b. Rotation alignment with goal direction.
-//        // c. Encourage head height.
-//        // d. Discourage head movement.
-//        m_WalkDir = target.position - m_OrientationCube.transform.position;
-//        AddReward(
-//            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
-//            + 0.02f * (head.position.y - hips.position.y)
-//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-//                m_JdController.bodyPartsDict[hips].rb.velocity)
-//        );
-//        m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
-//        AddReward(
-//            +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
-//            + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
-//            + 0.02f * (head.position.y - hips.position.y)
-//            - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
-//                m_JdController.bodyPartsDict[hips].rb.velocity)
-//        );
    }

    /// <summary>
        target.position = newTargetPos + ground.position;
    }

-    
-    
-//        print("OnEpisodeBegin");
-//        if (m_WalkDir != Vector3.zero)
-//        {
-//            transform.rotation = Quaternion.LookRotation(m_WalkDir);
-//        }
+        
+        //Random start rotation
+        
-
-//        transform.Rotate(Vector3.up, Random.Range(0.0f, 360.0f));

        if (detectTargets && !targetIsStatic)
        {
    public void SetResetParameters()
    {
        SetTorsoMass();
+    }
+
+    private void OnDrawGizmosSelected()
+    {
+        if (showOrientationCubeGizmo && Application.isPlaying)
+        {   
+            Gizmos.color = Color.green;
+            Gizmos.matrix = m_OrientationCube.transform.localToWorldMatrix;
+            Gizmos.DrawWireCube(Vector3.zero, m_OrientationCube.transform.localScale);
+            Gizmos.DrawRay(Vector3.zero, Vector3.forward);
+        }
    }
 }
--- a/Project/Assets/ML-Agents/Examples/WallJump/Materials/TransparentWall.mat
+++ b/Project/Assets/ML-Agents/Examples/WallJump/Materials/TransparentWall.mat
 Material:
  serializedVersion: 6
  m_ObjectHideFlags: 0
-  m_PrefabParentObject: {fileID: 0}
-  m_PrefabInternal: {fileID: 0}
+  m_CorrespondingSourceObject: {fileID: 0}
+  m_PrefabInstance: {fileID: 0}
+  m_PrefabAsset: {fileID: 0}
  m_Name: TransparentWall
  m_Shader: {fileID: 46, guid: 0000000000000000f000000000000000, type: 0}
  m_ShaderKeywords: _ALPHABLEND_ON