浏览代码

try rewad product

/active-variablespeed
HH 5 年前
当前提交
3c2ff715
共有 2 个文件被更改,包括 20 次插入11 次删除
  1. 7
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab
  2. 24
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs

7
Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab


cumulativeThisEpisode: 0
cumulativeThisSession: 0
lastNaNStep: 0
- rewardKey: productOfAllRewards
rawVal: 0
rewardScalar: 0.01
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0
lastNaNStep: 0
maxSteps: 0
--- !u!1001 &6359877978260855390
PrefabInstance:

24
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


// + 0.01f * headHeightOverFeetReward
// );
rewardManager.UpdateReward("matchSpeed", matchSpeedReward);
rewardManager.UpdateReward("lookAtTarget", lookAtTargetReward);
rewardManager.UpdateReward("headHeightOverFeet", headHeightOverFeetReward);
rewardManager.UpdateReward("hurryUp", hurryUpReward/MaxStep);
// rewardManager.UpdateReward("matchSpeed", matchSpeedReward);
// rewardManager.UpdateReward("lookAtTarget", lookAtTargetReward);
// rewardManager.UpdateReward("headHeightOverFeet", headHeightOverFeetReward);
// rewardManager.UpdateReward("hurryUp", hurryUpReward/MaxStep);
rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);
bpVelPenaltyThisStep = 0;
foreach (var item in m_JdController.bodyPartsList)
{
var velDelta = Mathf.Clamp(item.rb.velocity.magnitude - walkingSpeed, 0, 1);
bpVelPenaltyThisStep += velDelta;
}
rewardManager.UpdateReward("bpVel", bpVelPenaltyThisStep);
// //VELOCITY REWARDS
// bpVelPenaltyThisStep = 0;
// foreach (var item in m_JdController.bodyPartsList)
// {
// var velDelta = Mathf.Clamp(item.rb.velocity.magnitude - walkingSpeed, 0, 1);
// bpVelPenaltyThisStep += velDelta;
// }
// rewardManager.UpdateReward("bpVel", bpVelPenaltyThisStep);
}

正在加载...
取消
保存