浏览代码

increased joint strength to 40k

/docs-update
Hunter-Unity 5 年前
当前提交
9d46b450
共有 2 个文件被更改,包括 25 次插入13 次删除
  1. 4
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPairDynamic.prefab
  2. 34
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs

4
Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerPairDynamic.prefab


m_Script: {fileID: 11500000, guid: 1b29724baddfa457da6eeab446fa49ca, type: 3}
m_Name:
m_EditorClassIdentifier:
maxJointSpring: 10000
jointDampen: 500
maxJointSpring: 40000
jointDampen: 3000
maxJointForceLimit: 10000
bodyPartsList: []
--- !u!114 &8913670488868299308

34
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs


//reward looking at
// float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
+ 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
//Multiplying these amplifies the reward.
float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
// //Multiplying these amplifies the reward.
// float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
// float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
// float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
// print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
// print($"Combined {runForwardTowardsTargetReward}");
// float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
// Set reward for this step according to mixture of the following elements.

// d. Discourage head movement.
AddReward(
runForwardTowardsTargetReward
// runForwardTowardsTargetReward
// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+ 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
// + 0.01f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
// + 0.01f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
+ 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
+ 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
+ 0.02f * (head.position.y - hips.position.y)
- 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
);

正在加载...
取消
保存