浏览代码

fixed bp heirarchy

/hh-develop-ragdoll-testing
HH 5 年前
当前提交
eca1dac5
共有 2 个文件被更改,包括 141 次插入141 次删除
  1. 10
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerRagdollScale1.prefab
  2. 272
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs

10
Project/Assets/ML-Agents/Examples/Walker/Prefabs/WalkerRagdollScale1.prefab


m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 5756232957616717705}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 2.1669998, z: 0}
m_LocalPosition: {x: 0, y: 1.3619995, z: 0}
m_Father: {fileID: 5756232958866523899}
m_RootOrder: 1
m_Father: {fileID: 5756232958925281413}
m_RootOrder: 3
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!54 &5756232957616717715
Rigidbody:

m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 3284846713557781917}
- {fileID: 5756232957616717704}
- {fileID: 5756232958925281413}
m_Father: {fileID: 3284846714078240390}
m_RootOrder: 3

- {fileID: 3284846713634731969}
- {fileID: 5756232958318828582}
- {fileID: 5756232958734302656}
- {fileID: 5756232957616717704}
m_RootOrder: 2
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!54 &5756232958925281468
Rigidbody:

272
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgentDynamic.cs


CollectObservationBodyPart(bodyPart, sensor);
}
// print(m_OrientationCube.transform.rotation.eulerAngles);
// Debug.DrawRay(m_OrientationCube.transform.position, m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity), Color.green,Time.fixedDeltaTime * 5);
AddReward(
// runForwardTowardsTargetReward
// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
+ 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
// .5f //penalize not looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
// .5f //penalize not looking at
+ 0.005f * (head.position.y - shinL.position.y)
+ 0.005f * (head.position.y - shinR.position.y)
// + 0.01f * (head.position.y - shinL.position.y)
// + 0.01f * (head.position.y - shinR.position.y)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
// 6, 9999)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
// 6, 9999)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
);
//// print(m_OrientationCube.transform.rotation.eulerAngles);
//// Debug.DrawRay(m_OrientationCube.transform.position, m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity), Color.green,Time.fixedDeltaTime * 5);
// AddReward(
//// runForwardTowardsTargetReward
//// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
//// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
//// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
// +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
// Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
//
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
//// .5f //penalize not looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
//// .5f //penalize not looking at
//
// + 0.005f * (head.position.y - shinL.position.y)
// + 0.005f * (head.position.y - shinR.position.y)
//// + 0.01f * (head.position.y - shinL.position.y)
//// + 0.01f * (head.position.y - shinR.position.y)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
//// 6, 9999)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
//// 6, 9999)
//// + 0.02f * (head.position.y - hips.position.y)
//// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
//// m_JdController.bodyPartsDict[hips].rb.velocity)
// );
}

}
// void FixedUpdate()
// {
//// UpdateOrientationCube();
// //reward looking at
//// float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
//
//// print($"FacingRewardDot {facingReward}");
//// float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
//// print($"VelRewardDot {velReward}");
//// float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
//
//
//
//
//
//// //Multiplying these amplifies the reward.
//// float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
//// + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
//// float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
//// float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
//
//// print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
//
//
//// print($"Combined {runForwardTowardsTargetReward}");
//// float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
// // Set reward for this step according to mixture of the following elements.
// // a. Velocity alignment with goal direction.
// // b. Rotation alignment with goal direction.
// // c. Encourage head height.
// // d. Discourage head movement.
void FixedUpdate()
{
// UpdateOrientationCube();
//reward looking at
// float facingReward = + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
// print($"FacingRewardDot {facingReward}");
// float velReward = +0.02f * Vector3.Dot(m_OrientationCube.transform.forward,m_OrientationCube.transform.InverseTransformVector(m_JdController.bodyPartsDict[hips].rb.velocity));
// print($"VelRewardDot {velReward}");
// float velReward = +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity);
// //Multiplying these amplifies the reward.
// float facingReward = + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.1f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation);
// float velReward = +0.2f * Vector3.Dot(m_OrientationCube.transform.forward,m_JdController.bodyPartsDict[hips].rb.velocity); //because we are observing in local space???
// float runForwardTowardsTargetReward = facingReward * Mathf.Clamp(velReward, 0, 15);
// print(Quaternion.Angle(hips.transform.rotation, thighL.transform.rotation));
// print($"Combined {runForwardTowardsTargetReward}");
// float runBackwardsTowardsTargetReward = facingReward * Mathf.Clamp(velReward, -1, 0);
// Set reward for this step according to mixture of the following elements.
// a. Velocity alignment with goal direction.
// b. Rotation alignment with goal direction.
// c. Encourage head height.
// d. Discourage head movement.
AddReward(
// runForwardTowardsTargetReward
// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
+0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
+ 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
// .5f //penalize not looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
// .5f //penalize not looking at
+ 0.005f * (head.position.y - shinL.position.y)
+ 0.005f * (head.position.y - shinR.position.y)
// + 0.01f * (head.position.y - shinL.position.y)
// + 0.01f * (head.position.y - shinR.position.y)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
// 6, 9999)
// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
// 6, 9999)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
);
// var handLVel = m_JdController.bodyPartsDict[handL].rb.velocity.magnitude;
// var handRVel = m_JdController.bodyPartsDict[handR].rb.velocity.magnitude;
// if (handLVel > 6)
// {
// AddReward(-0.005f * handLVel);
// }
// if (handRVel > 6)
// {
// AddReward(-0.005f * handRVel);
// }
// //SUNDAY VERSION
// +0.01f * Vector3.Dot(m_OrientationCube.transform.forward,
// Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, 3))
// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward, hips.forward)
//
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, chest.rotation) //reward looking at
// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) *
//// .5f //penalize not looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) *
//// .5f //penalize not looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
// + 0.005f * (head.position.y - shinL.position.y)
// + 0.005f * (head.position.y - shinR.position.y)
//// + 0.01f * (head.position.y - shinL.position.y)
//// + 0.01f * (head.position.y - shinR.position.y)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handL].rb.velocity.magnitude,
//// 6, 9999)
//// - 0.005f * Mathf.Clamp(m_JdController.bodyPartsDict[handR].rb.velocity.magnitude,
//// 6, 9999)
//
//
//// var handLVel = m_JdController.bodyPartsDict[handL].rb.velocity.magnitude;
//// var handRVel = m_JdController.bodyPartsDict[handR].rb.velocity.magnitude;
//// if (handLVel > 6)
//// {
//// AddReward(-0.005f * handLVel);
//// }
//// if (handRVel > 6)
//// {
//// AddReward(-0.005f * handRVel);
//// }
//
//
//// //SUNDAY VERSION
//// AddReward(
////// runForwardTowardsTargetReward
////// facingReward * velReward //max reward is moving towards while facing otherwise it is a penalty
////// +0.02f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
////// + 0.02f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,5))
//// + 0.01f * Vector3.Dot(m_OrientationCube.transform.forward,Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity,3))
////// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) //reward looking at
////// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) //reward looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation) - 1) * .5f //penalize not looking at
//// + 0.015f * (Quaternion.Dot(m_OrientationCube.transform.rotation, head.rotation) - 1) * .5f //penalize not looking at
////
////
////
////// + 0.02f * (head.position.y - hips.position.y)
////// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
////// m_JdController.bodyPartsDict[hips].rb.velocity)
//// );
//
//
//
//
//// // Set reward for this step according to mixture of the following elements.
//// // a. Velocity alignment with goal direction.
//// // b. Rotation alignment with goal direction.
//// // c. Encourage head height.
//// // d. Discourage head movement.
//// m_WalkDir = target.position - m_OrientationCube.transform.position;
//// AddReward(
//// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
//// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
//// + 0.02f * (head.position.y - hips.position.y)
//// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
//// m_JdController.bodyPartsDict[hips].rb.velocity)
//// );
//// m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
//// AddReward(
//// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
//// + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
//// + 0.02f * (head.position.y - hips.position.y)
//// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
//// m_JdController.bodyPartsDict[hips].rb.velocity)
//// );
// }
// // Set reward for this step according to mixture of the following elements.
// // a. Velocity alignment with goal direction.
// // b. Rotation alignment with goal direction.
// // c. Encourage head height.
// // d. Discourage head movement.
// m_WalkDir = target.position - m_OrientationCube.transform.position;
// AddReward(
// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.01f * Quaternion.Dot(m_OrientationCube.transform.rotation, hips.rotation)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
// );
// m_WalkDir = target.position - m_JdController.bodyPartsDict[hips].rb.position;
// AddReward(
// +0.03f * Vector3.Dot(m_WalkDir.normalized, m_JdController.bodyPartsDict[hips].rb.velocity)
// + 0.01f * Vector3.Dot(m_WalkDir.normalized, hips.forward)
// + 0.02f * (head.position.y - hips.position.y)
// - 0.01f * Vector3.Distance(m_JdController.bodyPartsDict[head].rb.velocity,
// m_JdController.bodyPartsDict[hips].rb.velocity)
// );
}
/// <summary>
/// Loop over body parts and reset them to initial conditions.

正在加载...
取消
保存