浏览代码

add NaN check for reward manager. start vel penalty

/active-variablespeed
HH 4 年前
当前提交
35235345
共有 3 个文件被更改,包括 44 次插入10 次删除
  1. 33
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/JointDriveController.cs
  2. 12
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs
  3. 9
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs

33
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/JointDriveController.cs


public void SetJointStrength(float strength)
{
var rawVal = (strength + 1f) * 0.5f * thisJdController.maxJointForceLimit;
var jd = new JointDrive
{
positionSpring = thisJdController.maxJointSpring,
positionDamper = thisJdController.jointDampen,
maximumForce = rawVal
};
// var remappedVal = Mathf.InverseLerp(1, 1, strength);
JointDrive jd = joint.slerpDrive;
jd.positionSpring = thisJdController.maxJointSpring;
jd.positionDamper = thisJdController.jointDampen;
jd.maximumForce = rawVal;
// var jd = new JointDrive
// {
// positionSpring = thisJdController.maxJointSpring,
// positionDamper = thisJdController.jointDampen,
// maximumForce = rawVal
// };
// public void SetJointStrength(float strength)
// {
// var rawVal = (strength + 1f) * 0.5f * thisJdController.maxJointForceLimit;
// var jd = new JointDrive
// {
// positionSpring = thisJdController.maxJointSpring,
// positionDamper = thisJdController.jointDampen,
// maximumForce = rawVal
// };
// joint.slerpDrive = jd;
// currentStrength = jd.maximumForce;
// }
}
public class JointDriveController : MonoBehaviour

public float jointDampen;
[Min(1)]
[HideInInspector] public List<BodyPart> bodyPartsList = new List<BodyPart>();
public List<BodyPart> bodyPartsList = new List<BodyPart>();
// [HideInInspector] public List<BodyPart> bodyPartsList = new List<BodyPart>();
const float k_MaxAngularVelocity = 50.0f;
/// <summary>

12
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs


public float cumulativeThisEpisode;
public float cumulativeThisSession;
public float maxRewardThisSession;
// public float maxRewardThisSession;
public int lastNaNStep;
// public Reward(string k)
// public Reward()
// {

{
rewardsDict[key].rawVal = rawVal;
float scaledVal = rawVal * rewardsDict[key].rewardScalar;
rewardsDict[key].maxRewardThisSession = scaledVal * maxSteps;
//if we get a NaN, set the step
if (float.IsNaN(scaledVal))
rewardsDict[key].lastNaNStep = m_thisAgent.StepCount;
// rewardsDict[key].maxRewardThisSession = scaledVal * maxSteps;
m_thisAgent.AddReward(scaledVal);
}

9
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


public float headHeightOverFeetReward; //reward for standing up straight-ish
public float hurryUpReward = -1; //don't waste time
public RewardManager rewardManager;
public float bpVelPenaltyThisStep = 0;
void UpdateRewards()
{
var cubeForward = orientationCube.transform.forward;

rewardManager.UpdateReward("matchSpeed", matchSpeedReward);
rewardManager.UpdateReward("lookAtTarget", lookAtTargetReward);
rewardManager.UpdateReward("headHeightOverFeet", headHeightOverFeetReward);
rewardManager.UpdateReward("hurryUp", hurryUpReward/MaxStep);
rewardManager.UpdateReward("hurryUp", hurryUpReward/MaxStep);
bpVelPenaltyThisStep = 0;
foreach (var item in m_JdController.bodyPartsList)
{
bpVelPenaltyThisStep += item.rb.velocity.magnitude;
}
}
// void FixedUpdate()

正在加载...
取消
保存