浏览代码

Turn off head height and hurry rew

/active-variablespeed
HH 4 年前
当前提交
1ea8ad6f
共有 8 个文件被更改,包括 1033 次插入18 次删除
  1. 14
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs
  2. 12
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab
  3. 6
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
  4. 7
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  5. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic Hurry Pen.nn
  6. 11
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic Hurry Pen.nn.meta
  7. 0
      /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic Hurry Pen way too high.nn
  8. 0
      /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic Hurry Pen way too high.nn.meta

14
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs


{
public string rewardKey;
// [Range(.01f, .05f)]
public float rawVal;
public float rewardScalar = .01f;
// public float rewardScalar;
public float rewardThisStep;

//Add new rewards
public void UpdateReward(string key, float rawVal)
{
float val = rawVal * rewardsDict[key].rewardScalar;
rewardsDict[key].maxRewardThisSession = val * maxSteps;
rewardsDict[key].rewardThisStep = val;
rewardsDict[key].cumulativeThisEpisode += val;
rewardsDict[key].cumulativeThisSession += val;
m_thisAgent.AddReward(val);
rewardsDict[key].rawVal = rawVal;
float scaledVal = rawVal * rewardsDict[key].rewardScalar;
rewardsDict[key].maxRewardThisSession = scaledVal * maxSteps;
rewardsDict[key].rewardThisStep = scaledVal;
rewardsDict[key].cumulativeThisEpisode += scaledVal;
rewardsDict[key].cumulativeThisSession += scaledVal;
m_thisAgent.AddReward(scaledVal);
}
// //Add new rewards

12
Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab


m_EditorClassIdentifier:
rewardsList:
- rewardKey: matchSpeed
rewardScalar: 0.01
rawVal: 0
rewardScalar: 0.03
rawVal: 0
rewardScalar: 0.01
rewardThisStep: 0
cumulativeThisEpisode: 0

rewardScalar: 0.005
rawVal: 0
rewardScalar: 0
rewardScalar: 1
rawVal: 0
rewardScalar: 0
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0

type: 3}
propertyPath: m_Model
value:
objectReference: {fileID: 11400000, guid: e785133c5b0ac461588106642550d1b3,
objectReference: {fileID: 11400000, guid: f2958ea7e93ca450096779f1293af0bf,
type: 3}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}

6
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity


propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 6065910098925129095, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Model
value:
objectReference: {fileID: 11400000, guid: c85becd32a14a4c048895b4eafc718ce,
type: 3}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: f51e8260728fd4c8fa87bcda9d0e2027, type: 3}
--- !u!4 &6065910099080495282 stripped

7
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


// a. Match target speed
//This reward will approach 1 if it matches and approach zero as it deviates
matchSpeedReward =
Mathf.Exp(-0.1f * (orientationCube.transform.forward * walkingSpeed -
Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
// lookAtTargetReward =
// Mathf.Exp(-0.1f * (cubeForward * walkingSpeed -
// m_JdController.bodyPartsDict[hips].rb.velocity).sqrMagnitude);
((head.position.y - footL.position.y) + (head.position.y - footR.position.y) / 10); //Should normalize to ~1
(((head.position.y - footL.position.y) + (head.position.y - footR.position.y))/ 10); //Should normalize to ~1
// AddReward(
// +0.02f * moveTowardsTargetReward
// + 0.01f * lookAtTargetReward

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic Hurry Pen.nn
文件差异内容过多而无法显示
查看文件

11
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic Hurry Pen.nn.meta


fileFormatVersion: 2
guid: f2958ea7e93ca450096779f1293af0bf
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj
11400002: model data
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic 1.nn → /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic Hurry Pen way too high.nn

/Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic 1.nn.meta → /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic Hurry Pen way too high.nn.meta

正在加载...
取消
保存