hip facing reward

5 年前 · 5bf43487
--- a/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs
+++ b/Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs
            +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,
                Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 999))
 //            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
-            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, head.forward)
+//            + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, head.forward)
-//            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
+            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
 //            + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
 //            + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
 //            .5f //penalize not looking at
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
    time_horizon: 1000
    batch_size: 2048
    buffer_size: 20480
-    max_steps: 3e7
+    max_steps: 2e7
    summary_freq: 30000
    num_layers: 3
    hidden_units: 512