cleanup and update docs

5 年前 · 82b5af24
--- a/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
    public float maximumWalkingSpeed = 999; //The max walk velocity magnitude an agent will be rewarded for
    Vector3 m_WalkDir; //Direction to the target
    Quaternion m_WalkDirLookRot; //Will hold the rotation to our target
-    
+
+
    public float targetSpawnRadius; //The radius in which a target can be randomly spawned.
    public bool detectTargets; //Should this agent detect targets
    public bool respawnTargetWhenTouched; //Should the target respawn to a different position when touched

    [Header("Reward Functions To Use")] [Space(10)]
    public bool rewardMovingTowardsTarget; // Agent should move towards target
+
    public bool rewardFacingTarget; // Agent should face the target
    public bool rewardUseTimePenalty; // Hurry up

    /// </summary>
    public void CollectObservationBodyPart(BodyPart bp, VectorSensor sensor)
    {
-        var rb = bp.rb;
        sensor.AddObservation(bp.groundContact.touchingGround ? 1 : 0); // Whether the bp touching the ground

        //Get velocities in the context of our orientation cube's space

        //Get position relative to hips in the context of our orientation cube's space
-//        sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(bp.rb.position));
        sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.position - body.position));

        if (bp.rb.transform != body)
    /// </summary>
    void RewardFunctionMovingTowards()
    {
-        var movingTowardsDot =
-            Vector3.Dot(orientationCube.transform.forward, m_JdController.bodyPartsDict[body].rb.velocity);
+        var movingTowardsDot = Vector3.Dot(orientationCube.transform.forward,
+            Vector3.ClampMagnitude(m_JdController.bodyPartsDict[body].rb.velocity, maximumWalkingSpeed));
+        ;
        AddReward(0.03f * movingTowardsDot);
    }

--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md
 - Goal: The agents must move its body toward the goal direction without falling.
  - `CrawlerStaticTarget` - Goal direction is always forward.
  - `CrawlerDynamicTarget`- Goal direction is randomized.
- Agents: The environment contains 3 agent with same Behavior Parameters.
+- Agents: The environment contains 10 agents with same Behavior Parameters.
-  - Vector Observation space: 117 variables corresponding to position, rotation,
+  - Vector Observation space: 138 variables corresponding to position, rotation,
    velocity, and angular velocities of each limb plus the acceleration and
    angular acceleration of the body.
  - Vector Action space: (Continuous) Size of 20, corresponding to target
- Benchmark Mean Reward for `CrawlerStaticTarget`: 2000
- Benchmark Mean Reward for `CrawlerDynamicTarget`: 400
+- Benchmark Mean Reward for `CrawlerStaticTarget`: 1600
+- Benchmark Mean Reward for `CrawlerDynamicTarget`: 800

 ## Worm