more cleanup

5 年前 · 40b5cb96
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs

 public class WalkerAgent : Agent
 {
-    [Header("Walk Speed")]
-    [Range(0, 15)]
-    public float walkingSpeed = 15; //The walking speed to try and achieve
+    [Header("Walk Speed")] [Range(0, 15)] public float walkingSpeed = 15; //The walking speed to try and achieve
-    
-    [Header("Target To Walk Towards")]
-    public TargetController target; //Target the agent will walk towards.
+
+    [Header("Target To Walk Towards")] public TargetController target; //Target the agent will walk towards.

    [Header("Body Parts")] public Transform hips;
    public Transform chest;
    JointDriveController m_JdController;

    EnvironmentParameters m_ResetParams;
-//    private WalkGroup walkGroup;
+
-//        walkGroup = FindObjectOfType<WalkGroup>();
        //Setup each body part
        m_JdController = GetComponent<JointDriveController>();
        m_JdController.SetupBodyPart(hips);
        orientationCube.UpdateOrientation(hips, target.transform);

        rewardManager.ResetEpisodeRewards();
-        
-        walkingSpeed = randomizeWalkSpeedEachEpisode? Random.Range(0.0f, m_maxWalkingSpeed): walkingSpeed; //Random Walk Speed
+
+        walkingSpeed =
+            randomizeWalkSpeedEachEpisode ? Random.Range(0.0f, m_maxWalkingSpeed) : walkingSpeed; //Random Walk Speed

        SetResetParameters();
    }
    /// </summary>
    public override void CollectObservations(VectorSensor sensor)
    {
-
-//        avgVelValue = GetVelocity();
-
-//        sensor.AddObservation(VelocityInverseLerp(cubeForward * walkingSpeed, avgVelValue));
-
-//        sensor.AddObservation(VelocityInverseLerp( cubeForward * walkGroup.walkingSpeed)); //
-        sensor.AddObservation(VelocityInverseLerp( cubeForward * walkingSpeed));
-        
-        
-        
+        sensor.AddObservation(GetMatchingVelocityInverseLerp(cubeForward * walkingSpeed, GetAvgVelocity()));
+
-        sensor.AddObservation(walkingSpeed/m_maxWalkingSpeed);
-//        sensor.AddObservation(walkGroup.walkingSpeed/walkGroup.m_maxWalkingSpeed);
+        sensor.AddObservation(walkingSpeed / m_maxWalkingSpeed);
        sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
        sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));

        UpdateRewards();
    }

-    Vector3 GetVelocity()
-    { 
+    //Returns the average velocity of all the rigidbodies
+    Vector3 GetAvgVelocity()
+    {
-        Vector3 avg = Vector3.zero;
-        
-//        velSum += m_JdController.bodyPartsDict[hips].rb.velocity;
-//        velSum += m_JdController.bodyPartsDict[spine].rb.velocity;
-//        velSum += m_JdController.bodyPartsDict[chest].rb.velocity;
-//        velSum += m_JdController.bodyPartsDict[head].rb.velocity;
-//        avg = velSum/4;
-        
+        Vector3 avgVel = Vector3.zero;
+
-        int counter = 0;
+        int numOfRB = 0;
-            counter++;
+            numOfRB++;
-        avg = velSum/counter;
-        return avg;
-        
-//        velInverseLerpVal = VelocityInverseLerp(cubeForward * walkingSpeed, avgVelValue);
+
+        avgVel = velSum / numOfRB;
+        return avgVel;
-    public float velInverseLerpVal;
-    public float hipsVelMag;
-    public float lookAtTargetReward; //reward for looking at the target
-    public float matchSpeedReward; //reward for matching the desired walking speed.
-    public float headHeightOverFeetReward; //reward for standing up straight-ish
-    public float hurryUpReward = -1; //don't waste time
+//    public float headHeightOverFeetReward; //reward for standing up straight-ish
-    public float bpVelPenaltyThisStep = 0;
+
+        
-        //This reward will approach 1 if it matches and approach zero as it deviates
-//        matchSpeedReward =
-//            Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
-//                               m_JdController.bodyPartsDict[hips].rb.velocity).sqrMagnitude);
-
-        hipsVelMag = m_JdController.bodyPartsDict[hips].rb.velocity.magnitude;
-//        velInverseLerpVal =
-//            Mathf.InverseLerp(0, walkingSpeed, m_JdController.bodyPartsDict[hips].rb.velocity.magnitude);
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        var matchSpeedReward = GetMatchingVelocityInverseLerp(cubeForward * walkingSpeed, GetAvgVelocity());
-//        var moveTowardsTargetReward = Vector3.Dot(cubeForward,
-//            Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed));
-        // b. Rotation alignment with goal direction.
-//        lookAtTargetReward = Vector3.Dot(cubeForward, head.forward);
-        lookAtTargetReward = (Vector3.Dot(cubeForward, head.forward) + 1) * .5F;
-//        lookAtTargetReward =
-//            Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
-//                               m_JdController.bodyPartsDict[hips].rb.velocity).sqrMagnitude);
+        // b. Rotation alignment with target direction.
+        //This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
+        var lookAtTargetReward = (Vector3.Dot(cubeForward, head.forward) + 1) * .5F;
-        headHeightOverFeetReward =
-            Mathf.Clamp01(((head.position.y - footL.position.y) + (head.position.y - footR.position.y))/ 10); //Should normalize to ~1
-//        AddReward(
-//            +0.02f * moveTowardsTargetReward
-//            + 0.01f * lookAtTargetReward
-//            + 0.01f * headHeightOverFeetReward
-//        );
-
-//        rewardManager.UpdateReward("matchSpeed", matchSpeedReward);
-//        rewardManager.UpdateReward("lookAtTarget", lookAtTargetReward);
-//        rewardManager.UpdateReward("headHeightOverFeet", headHeightOverFeetReward);
-//        rewardManager.UpdateReward("hurryUp", hurryUpReward/MaxStep);
-
-//        //VELOCITY REWARDS
-//        bpVelPenaltyThisStep = 0;
-//        foreach (var item in m_JdController.bodyPartsList)
-//        {
-//            var velDelta = Mathf.Clamp(item.rb.velocity.magnitude - walkingSpeed, 0, 1);
-//            bpVelPenaltyThisStep += velDelta;
-//        }
-//        rewardManager.UpdateReward("bpVel", bpVelPenaltyThisStep);
+//        headHeightOverFeetReward =
+//            Mathf.Clamp01(((head.position.y - footL.position.y) + (head.position.y - footR.position.y))/ 10); //Should normalize to ~1
-//        Vector3 velSum = Vector3.zero;
-//        avgVelValue = Vector3.zero;
-//        velSum += m_JdController.bodyPartsDict[hips].rb.velocity;
-//        velSum += m_JdController.bodyPartsDict[spine].rb.velocity;
-//        velSum += m_JdController.bodyPartsDict[chest].rb.velocity;
-//        velSum += m_JdController.bodyPartsDict[head].rb.velocity;
-//        avgVelValue = velSum/4;
-//        velInverseLerpVal = VelocityInverseLerp(cubeForward * walkingSpeed, avgVelValue);
-        velInverseLerpVal = VelocityInverseLerp(cubeForward * walkingSpeed);
-        rewardManager.rewardsDict["matchSpeed"].rewardThisStep = velInverseLerpVal;
+        rewardManager.rewardsDict["matchSpeed"].rewardThisStep = matchSpeedReward;
-        rewardManager.rewardsDict["headHeightOverFeet"].rewardThisStep = headHeightOverFeetReward;
-//        velInverseLerpVal = VelocityInverseLerp(cubeForward * walkGroup.walkingSpeed);
-        rewardManager.UpdateReward("productOfAllRewards", velInverseLerpVal * lookAtTargetReward);
-//        rewardManager.UpdateReward("productOfAllRewards", velInverseLerpVal * lookAtTargetReward * headHeightOverFeetReward);
-//            velInverseLerpVal = VelocityInverseLerp(Vector3.zero, cubeForward * walkingSpeed, avgVelValue);
+//        rewardManager.rewardsDict["headHeightOverFeet"].rewardThisStep = headHeightOverFeetReward;
+        rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward);
+    }
-        //This reward will approach 1 if it matches and approach zero as it deviates
-//        velInverseLerpVal =
-//            Mathf.InverseLerp(0, walkingSpeed, avgVelValue.magnitude);
-//        rewardManager.UpdateReward("productOfAllRewards", velInverseLerpVal * lookAtTargetReward * headHeightOverFeetReward);
-//        matchSpeedReward =
-//            Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
-//                               avgVelValue).sqrMagnitude);
-//        matchSpeedReward =
-//            Mathf.Exp(-0.01f * (cubeForward * walkingSpeed -
-//                               avgVelValue).sqrMagnitude);
-//        rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);
-        
-        
-//        Vector3 velSum = Vector3.zero;
-//
-//        int counter = 0;
-//        avgVelValue = Vector3.zero;
-//        foreach (var item in m_JdController.bodyPartsList)
-//        {
-//            counter++;
-//            velSum += item.rb.velocity;
-//        }
-//            avgVelValue = velSum/counter;
-//        //This reward will approach 1 if it matches and approach zero as it deviates
-//        matchSpeedReward =
-//            Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
-//                               avgVelValue).sqrMagnitude);
-//        rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);
-        
-    }
-    public Vector3 bodyVelocity;
-    //value of 0 means we are matching velocity perfectly
-    //value of 1 means we are not matching velocity
-    public float velDeltaDistance; //distance between the goal and actual vel
-    
-    public float VelocityInverseLerp(Vector3 velocityGoal)
+    public float GetMatchingVelocityInverseLerp(Vector3 velocityGoal, Vector3 actualVelocity)
-        bodyVelocity = GetVelocity();
+        //distance between our actual velocity and goal velocity
+        var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, walkingSpeed);
-//        velDeltaDistance = Vector3.Distance(avgVelValue, velocityGoal);
-//        velDeltaDistance = Vector3.Distance(avgVelValue, velocityGoal);
-//        velDeltaDistance = Mathf.Clamp(Vector3.Distance(bodyVelocity, velocityGoal), 0, walkGroup.walkingSpeed);
-        velDeltaDistance = Mathf.Clamp(Vector3.Distance(bodyVelocity, velocityGoal), 0, walkingSpeed);
-//        float percent = Mathf.InverseLerp(walkingSpeed, 0, velDeltaDistance);
-//        float percent = Mathf.InverseLerp(velocityGoal.magnitude, 0, velDeltaDistance);
-//        float percent = Mathf.Pow(1 - Mathf.Pow(velDeltaDistance/walkGroup.walkingSpeed, 2), 2);
-        float percent = Mathf.Pow(1 - Mathf.Pow(velDeltaDistance/walkingSpeed, 2), 2);
-        return percent;
+        //get the value on a declining sigmoid shaped curve that decays from 1 to 0
+        return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / walkingSpeed, 2), 2);
-//    public float VelocityInverseLerp(Vector3 velocityGoal)
-//    {
-//        avgVelValue = GetVelocity();
-//
-//        velDeltaDistance = Vector3.Distance(avgVelValue, velocityGoal);
-////        float percent = Mathf.InverseLerp(walkingSpeed, 0, velDeltaDistance);
-//        float percent = Mathf.InverseLerp(velocityGoal.magnitude, 0, velDeltaDistance);
-//        return percent;
-//    }
-    
-//    public float VelocityInverseLerp(Vector3 velocityGoal, Vector3 currentVel)
-//    {
-//        avgVelValue = GetVelocity();
-//
-//        velDeltaDistance = Vector3.Distance(currentVel, velocityGoal);
-//        float percent = Mathf.InverseLerp(walkingSpeed, 0, velDeltaDistance);
-//        return percent;
-//    }
-    
-//    public float VelocityInverseLerp(Vector3 a, Vector3 b, Vector3 value)
-//    {
-//        Vector3 AB = b - a;
-//        Vector3 AV = value - a;
-//        return Vector3.Dot(AV, AB) / Vector3.Dot(AB, AB);
-//    }
-//    void FixedUpdate()
-//    {
-//        var cubeForward = orientationCube.transform.forward;
-//        orientationCube.UpdateOrientation(hips, target.transform);
-//        // Set reward for this step according to mixture of the following elements.
-//        // a. Velocity alignment with goal direction.
-//        var moveTowardsTargetReward = Vector3.Dot(cubeForward,
-//            Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed));
-//        // b. Rotation alignment with goal direction.
-//        var lookAtTargetReward = Vector3.Dot(cubeForward, head.forward);
-//        // c. Encourage head height. //Should normalize to ~1
-//        var headHeightOverFeetReward = 
-//            ((head.position.y - footL.position.y) + (head.position.y - footR.position.y) / 10);
-//        AddReward(
-//            + 0.02f * moveTowardsTargetReward
-//            + 0.02f * lookAtTargetReward
-//            + 0.005f * headHeightOverFeetReward
-//        );
-//    }

    /// <summary>
    /// Agent touched the target
    {
        SetTorsoMass();
    }
-}
+}