init

5 年前 · 691c06e8
--- a/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/DynamicPlatform.prefab
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Prefabs/DynamicPlatform.prefab
      propertyPath: targetToLookAt
      value: 
      objectReference: {fileID: 7802320107249901494}
+    - target: {fileID: 2864902974773876700, guid: 0456c89e8c9c243d595b039fe7aa0bf9,
+        type: 3}
+      propertyPath: updatedByAgent
+      value: 1
+      objectReference: {fileID: 0}
    - target: {fileID: 4845971000000621469, guid: 0456c89e8c9c243d595b039fe7aa0bf9,
        type: 3}
      propertyPath: m_ConnectedAnchor.x
      propertyPath: m_BehaviorName
      value: CrawlerDynamic
      objectReference: {fileID: 0}
+    - target: {fileID: 4845971001715176648, guid: 0456c89e8c9c243d595b039fe7aa0bf9,
+        type: 3}
+      propertyPath: m_BrainParameters.VectorObservationSize
+      value: 32
+      objectReference: {fileID: 0}
-      objectReference: {fileID: 7738248088303878723}
+      objectReference: {fileID: 7802320107249901494}
    - target: {fileID: 4845971001715176649, guid: 0456c89e8c9c243d595b039fe7aa0bf9,
        type: 3}
      propertyPath: ground
    type: 3}
  m_PrefabInstance: {fileID: 6413179990576818696}
  m_PrefabAsset: {fileID: 0}
--- !u!114 &7738248088303878723 stripped
-MonoBehaviour:
-  m_CorrespondingSourceObject: {fileID: 3631016866778687563, guid: 46734abd0de454192b407379c6a4ab8d,
-    type: 3}
-  m_PrefabInstance: {fileID: 6413179990576818696}
-  m_PrefabAsset: {fileID: 0}
-  m_GameObject: {fileID: 0}
-  m_Enabled: 1
-  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 3c8f113a8b8d94967b1b1782c549be81, type: 3}
-  m_Name: 
-  m_EditorClassIdentifier: 
--- a/Project/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Scenes/CrawlerDynamicTarget.unity
    - target: {fileID: 6813981368972186340, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_IsActive
-      value: 1
+      value: 0
      objectReference: {fileID: 0}
    - target: {fileID: 6815147845698256993, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
    - target: {fileID: 6813981368972186340, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_IsActive
-      value: 1
+      value: 0
      objectReference: {fileID: 0}
    - target: {fileID: 6815147845698256993, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
    - target: {fileID: 6813981368972186340, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_IsActive
-      value: 1
+      value: 0
      objectReference: {fileID: 0}
    - target: {fileID: 6815147845698256993, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
    - target: {fileID: 6813981368972186340, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_IsActive
-      value: 1
+      value: 0
      objectReference: {fileID: 0}
    - target: {fileID: 6815147845698256993, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
    - target: {fileID: 6813981368972186340, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_IsActive
-      value: 1
+      value: 0
      objectReference: {fileID: 0}
    - target: {fileID: 6815147845698256993, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
    - target: {fileID: 6813981368972186340, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_IsActive
-      value: 1
+      value: 0
      objectReference: {fileID: 0}
    - target: {fileID: 6815147845698256993, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
    - target: {fileID: 6813981368972186340, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_IsActive
-      value: 1
+      value: 0
      objectReference: {fileID: 0}
    - target: {fileID: 6815147845698256993, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
    - target: {fileID: 6813981368972186340, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_IsActive
-      value: 1
+      value: 0
      objectReference: {fileID: 0}
    - target: {fileID: 6815147845698256993, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
    - target: {fileID: 6813981368972186340, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_IsActive
-      value: 1
+      value: 0
      objectReference: {fileID: 0}
    - target: {fileID: 6815147845698256993, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 2375859054548711005, guid: 0058b366f9d6d44a3ba35beb06b0174b,
+        type: 3}
+      propertyPath: m_IsActive
+      value: 1
+      objectReference: {fileID: 0}
    - target: {fileID: 6810587057221831324, guid: 0058b366f9d6d44a3ba35beb06b0174b,
        type: 3}
      propertyPath: m_LocalPosition.x
--- a/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
 [RequireComponent(typeof(JointDriveController))] // Required to set joint forces
 public class CrawlerAgent : Agent
 {
-    public float maximumWalkingSpeed = 999; //The max walk velocity magnitude an agent will be rewarded for
-    Vector3 m_WalkDir; //Direction to the target
-    Quaternion m_WalkDirLookRot; //Will hold the rotation to our target
+    [Header("Walk Speed")]
+    [Range(0.1f, 10)]
+    [SerializeField]
+    //The walking speed to try and achieve
+    private float m_TargetWalkingSpeed = 10;
-    [Header("Target To Walk Towards")] [Space(10)]
-    public TargetController target; //Target the agent will walk towards.
+    public float TargetWalkingSpeed // property
+    {
+        get { return m_TargetWalkingSpeed; }
+        set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); }
+    }
+
+    const float m_maxWalkingSpeed = 10; //The max walking speed
+
+    //Should the agent sample a new goal velocity each episode?
+    //If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin() 
+    //If false, the goal velocity will be walkingSpeed
+    public bool randomizeWalkSpeedEachEpisode;
+
+    //The direction an agent will walk during training.
+    private Vector3 m_WorldDirToWalk = Vector3.right;
+    [Header("Target To Walk Towards")] public Transform target; //Target the agent will walk towards during training.

    [Header("Body Parts")] [Space(10)] public Transform body;
    public Transform leg0Upper;
    public Transform leg3Lower;


-    [Header("Orientation")] [Space(10)]
-    public OrientationCubeController orientationCube;
+    public OrientationCubeController m_OrientationCube;
+    //The indicator graphic gameobject that points towards the target
+    public DirectionIndicator m_DirectionIndicator;
-
-    [Header("Reward Functions To Use")] [Space(10)]
-    public bool rewardMovingTowardsTarget; // Agent should move towards target
-
-    public bool rewardFacingTarget; // Agent should face the target
-    public bool rewardUseTimePenalty; // Hurry up

    [Header("Foot Grounded Visualization")] [Space(10)]
    public bool useFootGroundedVisualization;

    public override void Initialize()
    {
-        orientationCube.UpdateOrientation(body, target.transform);
-
+        m_OrientationCube = GetComponentInChildren<OrientationCubeController>();
+        m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();
        m_JdController = GetComponent<JointDriveController>();

        //Setup each body part
        }

        //Random start rotation to help generalize
-        transform.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
+        body.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
-        orientationCube.UpdateOrientation(body, target.transform);
+        UpdateOrientationObjects();
+
+        //Set our goal walking speed
+        TargetWalkingSpeed =
+            randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : TargetWalkingSpeed;
+
    }

    /// <summary>
    /// </summary>
    public override void CollectObservations(VectorSensor sensor)
    {
+        var cubeForward = m_OrientationCube.transform.forward;
+
+        //velocity we want to match
+        var velGoal = cubeForward * TargetWalkingSpeed;
+        //ragdoll's avg vel
+        var avgVel = GetAvgVelocity();
+
+        //current ragdoll velocity. normalized 
+        sensor.AddObservation(Vector3.Distance(velGoal, avgVel));
+        //avg body vel relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel));
+        //vel goal relative to cube
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal));
+        //rotation delta
+        sensor.AddObservation(Quaternion.FromToRotation(body.forward, cubeForward));
+        
-        sensor.AddObservation(orientationCube.transform.InverseTransformPoint(target.transform.position));
+        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));

        RaycastHit hit;
        float maxRaycastDist = 10;
            CollectObservationBodyPart(bodyPart, sensor);
        }
    }
-
-    /// <summary>
-    /// Agent touched the target
-    /// </summary>
-    public void TouchedTarget()
-    {
-        AddReward(1f);
-    }
-
+    
+    
+//    /// <summary>
+//    /// Loop over body parts to add them to observation.
+//    /// </summary>
+//    public override void CollectObservations(VectorSensor sensor)
+//    {
+//        //Add pos of target relative to orientation cube
+//        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));
+//
+//        RaycastHit hit;
+//        float maxRaycastDist = 10;
+//        if (Physics.Raycast(body.position, Vector3.down, out hit, maxRaycastDist))
+//        {
+//            sensor.AddObservation(hit.distance / maxRaycastDist);
+//        }
+//        else
+//            sensor.AddObservation(1);
+//
+//        foreach (var bodyPart in m_JdController.bodyPartsList)
+//        {
+//            CollectObservationBodyPart(bodyPart, sensor);
+//        }
+//    }
+    
    public override void OnActionReceived(float[] vectorAction)
    {
        // The dictionary with all the body parts in it are in the jdController

    void FixedUpdate()
    {
-        orientationCube.UpdateOrientation(body, target.transform);
+        UpdateOrientationObjects();

        // If enabled the feet will light up green when the foot is grounded.
        // This is just a visualization and isn't necessary for function
                ? groundedMaterial
                : unGroundedMaterial;
        }
+        
+        var cubeForward = m_OrientationCube.transform.forward;
-        if (rewardMovingTowardsTarget)
+        // a. Match target speed
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        var matchSpeedReward = GetMatchingVelocityReward(cubeForward * TargetWalkingSpeed, GetAvgVelocity());
+
+        //Check for NaNs
+        if (float.IsNaN(matchSpeedReward))
-            RewardFunctionMovingTowards();
+            throw new ArgumentException(
+                "NaN in moveTowardsTargetReward.\n" +
+                $" cubeForward: {cubeForward}\n" +
+                $" hips.velocity: {m_JdController.bodyPartsDict[body].rb.velocity}\n" +
+                $" maximumWalkingSpeed: {m_maxWalkingSpeed}"
+            );
-        if (rewardFacingTarget)
+        // b. Rotation alignment with target direction.
+        //This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
+        var lookAtTargetReward = (Vector3.Dot(cubeForward, body.forward) + 1) * .5F;
+
+        //Check for NaNs
+        if (float.IsNaN(lookAtTargetReward))
-            RewardFunctionFacingTarget();
+            throw new ArgumentException(
+                "NaN in lookAtTargetReward.\n" +
+                $" cubeForward: {cubeForward}\n" +
+                $" body.forward: {body.forward}"
+            );
-        if (rewardUseTimePenalty)
+        AddReward(matchSpeedReward * lookAtTargetReward);
+
+    }
+
+    //Update OrientationCube and DirectionIndicator
+    void UpdateOrientationObjects()
+    {
+        m_WorldDirToWalk = target.position - body.position;
+        m_OrientationCube.UpdateOrientation(body, target);
+        if (m_DirectionIndicator)
-            RewardFunctionTimePenalty();
+            m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
-    /// <summary>
-    /// Reward moving towards target & Penalize moving away from target.
-    /// </summary>
-    void RewardFunctionMovingTowards()
+    //Returns the average velocity of all of the body parts
+    //Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
+    //...using the average helps prevent this erratic movement
+    Vector3 GetAvgVelocity()
-        var movingTowardsDot = Vector3.Dot(orientationCube.transform.forward,
-            Vector3.ClampMagnitude(m_JdController.bodyPartsDict[body].rb.velocity, maximumWalkingSpeed));
-        if (float.IsNaN(movingTowardsDot))
+        Vector3 velSum = Vector3.zero;
+        Vector3 avgVel = Vector3.zero;
+
+        //ALL RBS
+        int numOfRB = 0;
+        foreach (var item in m_JdController.bodyPartsList)
-            throw new ArgumentException(
-                "NaN in movingTowardsDot.\n" +
-                $" orientationCube.transform.forward: {orientationCube.transform.forward}\n"+
-                $" body.velocity: {m_JdController.bodyPartsDict[body].rb.velocity}\n"+
-                $" maximumWalkingSpeed: {maximumWalkingSpeed}"
-            );
+            numOfRB++;
+            velSum += item.rb.velocity;
-        AddReward(0.03f * movingTowardsDot);
+
+        avgVel = velSum / numOfRB;
+        return avgVel;
-    /// <summary>
-    /// Reward facing target & Penalize facing away from target
-    /// </summary>
-    void RewardFunctionFacingTarget()
+    //normalized value of the difference in avg speed vs goal walking speed.
+    public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity)
-        var facingReward = Vector3.Dot(orientationCube.transform.forward, body.forward);
-        if (float.IsNaN(facingReward))
-        {
-            throw new ArgumentException(
-                "NaN in movingTowardsDot.\n" +
-                $" orientationCube.transform.forward: {orientationCube.transform.forward}\n"+
-                $" body.forward: {body.forward}"
-            );
-        }
-        AddReward(0.01f * facingReward);
+        //distance between our actual velocity and goal velocity
+        var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, TargetWalkingSpeed);
+
+        //return the value on a declining sigmoid shaped curve that decays from 1 to 0
+        //This reward will approach 1 if it matches perfectly and approach zero as it deviates
+        return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / TargetWalkingSpeed, 2), 2);
-    /// Existential penalty for time-contrained tasks.
+    /// Agent touched the target
-    void RewardFunctionTimePenalty()
+    public void TouchedTarget()
-        AddReward(-0.001f);
+        AddReward(1f);
    }
 }
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta
 guid: 9f87b3070a0fd4a1e838131a91399c2f
 ScriptedImporter:
  fileIDToRecycleName:
-    11400000: Assets/Demonstrations/ExpertWalkerDy.demo
+    11400002: Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo
  externalObjects: {}
  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta
 guid: a4b02e2c382c247919eb63ce72e90a3b
 ScriptedImporter:
  fileIDToRecycleName:
-    11400000: Assets/Demonstrations/ExpertWalkerDyVS.demo
+    11400002: Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo
  externalObjects: {}
  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta
 guid: edcbb505552464c5c829886a4a3817dd
 ScriptedImporter:
  fileIDToRecycleName:
-    11400000: Assets/Demonstrations/ExpertWalkerStVS.demo
+    11400002: Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo
  externalObjects: {}
  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
  assetBundleName: 
--- a/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta
+++ b/Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta
 guid: 1f3a5d62e6aea4b5eb053ac33f11b06d
 ScriptedImporter:
  fileIDToRecycleName:
-    11400000: Assets/Demonstrations/ExpertWalkerSta.demo
+    11400002: Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo
  externalObjects: {}
  userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
  assetBundleName: 
--- a/Project/Packages/manifest.json
+++ b/Project/Packages/manifest.json
    "com.unity.ads": "2.0.8",
    "com.unity.analytics": "3.2.3",
    "com.unity.collab-proxy": "1.2.15",
-    "com.unity.ml-agents": "file:../../com.unity.ml-agents",
-    "com.unity.ml-agents.extensions": "file:../../com.unity.ml-agents.extensions",
+    "com.unity.ml-agents": "file:/Users/brandonh/unity_projects/ml-agents-master/ml-agents/com.unity.ml-agents",
    "com.unity.package-manager-ui": "2.0.8",
    "com.unity.purchasing": "2.0.3",
    "com.unity.textmeshpro": "1.4.1",