浏览代码

try avg vel of all bp for reward

/active-variablespeed
HH 4 年前
当前提交
58102fdc
共有 4 个文件被更改,包括 1034 次插入5 次删除
  1. 2
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab
  2. 25
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  3. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic ProductOfAllRew.nn
  4. 11
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic ProductOfAllRew.nn.meta

2
Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab


type: 3}
propertyPath: m_Model
value:
objectReference: {fileID: 11400000, guid: 9cdb96bd3846b477cbf9c5ad7ac2d87e,
objectReference: {fileID: 11400000, guid: e785133c5b0ac461588106642550d1b3,
type: 3}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}

25
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


// Set reward for this step according to mixture of the following elements.
// a. Match target speed
//This reward will approach 1 if it matches and approach zero as it deviates
matchSpeedReward =
Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
m_JdController.bodyPartsDict[hips].rb.velocity).sqrMagnitude);
// matchSpeedReward =
// Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
// m_JdController.bodyPartsDict[hips].rb.velocity).sqrMagnitude);
// var moveTowardsTargetReward = Vector3.Dot(cubeForward,
// Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed));
// b. Rotation alignment with goal direction.

// rewardManager.UpdateReward("lookAtTarget", lookAtTargetReward);
// rewardManager.UpdateReward("headHeightOverFeet", headHeightOverFeetReward);
// rewardManager.UpdateReward("hurryUp", hurryUpReward/MaxStep);
rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);
// //VELOCITY REWARDS
// bpVelPenaltyThisStep = 0;

// bpVelPenaltyThisStep += velDelta;
// }
// rewardManager.UpdateReward("bpVel", bpVelPenaltyThisStep);
Vector3 velSum = Vector3.zero;
int counter = 0;
avgVelValue = Vector3.zero;
foreach (var item in m_JdController.bodyPartsList)
{
counter++;
// velSum += item.rb.velocity;
// velSum += Mathf.Clamp(item.rb.velocity.magnitude, 0, m_maxWalkingSpeed);
velSum += Vector3.ClampMagnitude(item.rb.velocity, m_maxWalkingSpeed);
avgVelValue = velSum/counter;
}
//This reward will approach 1 if it matches and approach zero as it deviates
matchSpeedReward =
Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
avgVelValue).sqrMagnitude);
rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);
public Vector3 avgVelValue;
// void FixedUpdate()
// {

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic ProductOfAllRew.nn
文件差异内容过多而无法显示
查看文件

11
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic ProductOfAllRew.nn.meta


fileFormatVersion: 2
guid: e785133c5b0ac461588106642550d1b3
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj
11400002: model data
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}
正在加载...
取消
保存