浏览代码

try linear inverselerp for vel

/active-variablespeed
HH 5 年前
当前提交
13279c74
共有 2 个文件被更改,包括 116 次插入14 次删除
  1. 56
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
  2. 74
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs

56
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity


propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Name

type: 3}
propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}

propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Name

propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Name

propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Name

propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Name

type: 3}
propertyPath: m_ConnectedAnchor.x
value: 0.69999707
objectReference: {fileID: 0}
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}

propertyPath: m_ConnectedAnchor.z
value: 0.00000023841858
objectReference: {fileID: 0}
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Name

propertyPath: rewardManager
value:
objectReference: {fileID: 79411373}
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Name

m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 4507520074116686519, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: walkingSpeed
value: 15
objectReference: {fileID: 0}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Name

propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 6065910098925129095, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Model
value:
objectReference: {fileID: 11400000, guid: 9cdb96bd3846b477cbf9c5ad7ac2d87e,
type: 3}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: f51e8260728fd4c8fa87bcda9d0e2027, type: 3}
--- !u!4 &6065910099080495282 stripped

74
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


UpdateRewards();
}
public float velInverseLerpVal;
public float hipsVelMag;
public float lookAtTargetReward; //reward for looking at the target
public float matchSpeedReward; //reward for matching the desired walking speed.
public float headHeightOverFeetReward; //reward for standing up straight-ish

// matchSpeedReward =
// Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
// m_JdController.bodyPartsDict[hips].rb.velocity).sqrMagnitude);
hipsVelMag = m_JdController.bodyPartsDict[hips].rb.velocity.magnitude;
// velInverseLerpVal =
// Mathf.InverseLerp(0, walkingSpeed, m_JdController.bodyPartsDict[hips].rb.velocity.magnitude);
// var moveTowardsTargetReward = Vector3.Dot(cubeForward,
// Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed));
// b. Rotation alignment with goal direction.

// bpVelPenaltyThisStep += velDelta;
// }
// rewardManager.UpdateReward("bpVel", bpVelPenaltyThisStep);
avgVelValue = Vector3.zero;
velSum += m_JdController.bodyPartsDict[hips].rb.velocity;
velSum += m_JdController.bodyPartsDict[spine].rb.velocity;
velSum += m_JdController.bodyPartsDict[chest].rb.velocity;
velSum += m_JdController.bodyPartsDict[head].rb.velocity;
avgVelValue = velSum/4;
velInverseLerpVal = VelocityInverseLerp(cubeForward * walkingSpeed, avgVelValue);
rewardManager.UpdateReward("productOfAllRewards", velInverseLerpVal * lookAtTargetReward * headHeightOverFeetReward);
// velInverseLerpVal = VelocityInverseLerp(Vector3.zero, cubeForward * walkingSpeed, avgVelValue);
int counter = 0;
avgVelValue = Vector3.zero;
foreach (var item in m_JdController.bodyPartsList)
{
counter++;
// velSum += item.rb.velocity;
// velSum += Mathf.Clamp(item.rb.velocity.magnitude, 0, m_maxWalkingSpeed);
velSum += Vector3.ClampMagnitude(item.rb.velocity, m_maxWalkingSpeed);
}
avgVelValue = velSum/counter;
matchSpeedReward =
Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
avgVelValue).sqrMagnitude);
rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);
// velInverseLerpVal =
// Mathf.InverseLerp(0, walkingSpeed, avgVelValue.magnitude);
// rewardManager.UpdateReward("productOfAllRewards", velInverseLerpVal * lookAtTargetReward * headHeightOverFeetReward);
// matchSpeedReward =
// Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
// avgVelValue).sqrMagnitude);
// matchSpeedReward =
// Mathf.Exp(-0.01f * (cubeForward * walkingSpeed -
// avgVelValue).sqrMagnitude);
// rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);
// Vector3 velSum = Vector3.zero;
//
// int counter = 0;
// avgVelValue = Vector3.zero;
// foreach (var item in m_JdController.bodyPartsList)
// {
// counter++;
// velSum += item.rb.velocity;
//// velSum += Mathf.Clamp(item.rb.velocity.magnitude, 0, m_maxWalkingSpeed);
//// velSum += Vector3.ClampMagnitude(item.rb.velocity, m_maxWalkingSpeed);
// }
// avgVelValue = velSum/counter;
// //This reward will approach 1 if it matches and approach zero as it deviates
// matchSpeedReward =
// Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
// avgVelValue).sqrMagnitude);
// rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);
public float VelocityInverseLerp(Vector3 velocityGoal, Vector3 currentVel)
{
float distance = Vector3.Distance(currentVel, velocityGoal);
float percent = Mathf.InverseLerp(m_maxWalkingSpeed, 0, distance);
return percent;
}
// public float VelocityInverseLerp(Vector3 a, Vector3 b, Vector3 value)
// {
// Vector3 AB = b - a;
// Vector3 AV = value - a;
// return Vector3.Dot(AV, AB) / Vector3.Dot(AB, AB);
// }
// void FixedUpdate()
// {
// var cubeForward = orientationCube.transform.forward;

正在加载...
取消
保存