浏览代码

cleanup

/active-variablespeed
HH 4 年前
当前提交
6c67bf4e
共有 31 个文件被更改,包括 465 次插入2947 次删除
  1. 46
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
  2. 161
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  3. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamicVariableSpeed.nn
  4. 2
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamicVariableSpeed.nn.meta
  5. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
  6. 2
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta
  7. 2
      Project/ProjectSettings/ProjectVersion.txt
  8. 11
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs.meta
  9. 110
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs
  10. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStaticApprovedWalkInWorldDir.nn
  11. 11
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStaticApprovedWalkInWorldDir.nn.meta
  12. 8
      Project/Assets/ML-Agents/Examples/Walker/TFModels/New Folder.meta
  13. 8
      Project/Assets/ML-Agents/Examples/Walker/TFModels/with hh.meta
  14. 8
      Project/Assets/ML-Agents/Examples/Walker/TFModels/10k no hh.meta
  15. 8
      Project/Assets/ML-Agents/Examples/Walker/TFModels/15kstrength with hh.meta
  16. 8
      Project/Assets/ML-Agents/Examples/Walker/TFModels/20k no hh no rolling targ.meta
  17. 8
      Project/Assets/ML-Agents/Examples/Walker/TFModels/20kDistToTargCloud.meta
  18. 8
      Project/Assets/ML-Agents/Examples/Walker/TFModels/20kAvgVelAllBPMaxDist50.meta
  19. 8
      Project/Assets/ML-Agents/Examples/Walker/TFModels/20kAllBPVelRelPosClampedTo100.meta
  20. 0
      /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
  21. 0
      /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta
  22. 0
      /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStaticVariableSpeed.nn
  23. 0
      /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStaticVariableSpeed.nn.meta

46
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab


- component: {fileID: 895268871377934303}
- component: {fileID: 895268871377934302}
- component: {fileID: 895268871377934301}
- component: {fileID: 5891315090006581283}
m_Layer: 0
m_Name: WalkerRagdollBase
m_TagString: Untagged

randomizeWalkSpeedEachEpisode: 1
walkDirectionMethod: 0
worldDirToWalk: {x: 1, y: 0, z: 0}
posToWalkTo: {x: 0, y: 0, z: 0}
worldPosToWalkTo: {x: 0, y: 0, z: 0}
target: {fileID: 0}
hips: {fileID: 895268871264836332}
chest: {fileID: 7933235354845945071}

armR: {fileID: 7933235355057813930}
forearmR: {fileID: 7933235353195701980}
handR: {fileID: 7933235354616748502}
rewardManager: {fileID: 5891315090006581283}
--- !u!114 &895268871377934303
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!114 &5891315090006581283
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 895268871377934275}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 2d2b3caecf069467ebf3a650d8ee401e, type: 3}
m_Name:
m_EditorClassIdentifier:
rewardsList:
- rewardKey: matchSpeed
rawVal: 0
rewardScalar: 0.03
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0
lastNaNStep: 0
- rewardKey: lookAtTarget
rawVal: 0
rewardScalar: 0.01
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0
lastNaNStep: 0
- rewardKey: headHeightOverFeet
rawVal: 0
rewardScalar: 0.001
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0
lastNaNStep: 0
- rewardKey: productOfAllRewards
rawVal: 0
rewardScalar: 1
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0
lastNaNStep: 0
maxSteps: 0
--- !u!1 &895268871382313704
GameObject:
m_ObjectHideFlags: 0

161
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


//If false, the goal velocity will be walkingSpeed
public bool randomizeWalkSpeedEachEpisode;
public enum WalkDirectionMethod
{
UseWorldDirection,

OrientationCubeController m_OrientationCube;
DirectionIndicator m_DirectionIndicator;
JointDriveController m_JdController;
EnvironmentParameters m_ResetParams;
public override void Initialize()

/// </summary>
public override void OnEpisodeBegin()
{
// if (walkTowardsType == WalkTowardsType.UseTarget && !target)
// {
// Debug.LogError("Missing a reference toTarget");
// Instantiate(targetPrefab)
// }
//Random start rotation to help generalize
hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);

{
worldPosToWalkTo = hips.position + (worldDirToWalk * 1000);
}
rewardManager.ResetEpisodeRewards();
//Set our goal walking speed
walkingSpeed =

//current speed goal. normalized.
sensor.AddObservation(walkingSpeed / m_maxWalkingSpeed);
// sensor.AddObservation((int)walkDirectionMethod);
// //Dist To Target. Max 50 meters. Normalized;
// //If we're walking in world dir, always return 1;
// float distToTarget = walkDirectionMethod == WalkDirectionMethod.UseTarget
// ? Mathf.Clamp((target.position - hips.position).magnitude, 0, 50)/50
// : 1;
// sensor.AddObservation(distToTarget);
// worldPosToWalkTo = GetUpdatedTargetPosition();
//
// Vector3 relPos = Vector3.zero;
// if (walkDirectionMethod == WalkDirectionMethod.UseTarget)
// {
// relPos = Vector3.ClampMagnitude(m_OrientationCube.transform.InverseTransformPoint(target.transform.position), 100);
// }
// sensor.AddObservation(relPos);
// worldPosToWalkTo = walkDirectionMethod == WalkDirectionMethod.UseTarget
// ? target.transform.position
//// : hips.position + (dirToLook * 100);
//// : m_OrientationCube.transform.TransformDirection(dirToLook * 100);
// : m_OrientationCube.transform.position + (cubeForward * 100);
// targetPos.y = 0;
Vector3 relPos = Vector3.ClampMagnitude(m_OrientationCube.transform.InverseTransformPoint(worldPosToWalkTo), 100);
//Position of target position relative to cube
Vector3 relPos =
Vector3.ClampMagnitude(m_OrientationCube.transform.InverseTransformPoint(worldPosToWalkTo), 100);
// Debug.DrawRay(worldPosToWalkTo, Vector3.up, Color.green,1);
// Debug.DrawRay(relPos, Vector3.up, Color.green,1);
// Debug.DrawRay(m_OrientationCube.transform.InverseTransformPoint(worldPosToWalkTo), Vector3.up * 2, Color.red,5);
// worldPosToWalkTo = GetUpdatedWorldTargetPosition();
// Vector3 relPos = Vector3.ClampMagnitude(m_OrientationCube.transform.InverseTransformPoint(worldPosToWalkTo), 100);
// sensor.AddObservation(relPos);
// Debug.DrawRay(worldPosToWalkTo, Vector3.up, Color.green,1);
// Debug.DrawRay(m_OrientationCube.transform.InverseTransformPoint(worldPosToWalkTo), Vector3.up * 2, Color.red,5);
// Vector3 targetPos = walkDirectionMethod == WalkDirectionMethod.UseTarget
// ? target.transform.position
//// : hips.position + (dirToLook * 100);
//// : m_OrientationCube.transform.TransformDirection(dirToLook * 100);
// : m_OrientationCube.transform.position + (cubeForward * 100);
// targetPos.y = 0;
// Vector3 relPos = Vector3.ClampMagnitude(m_OrientationCube.transform.InverseTransformPoint(targetPos), 100);
// sensor.AddObservation(relPos);
// Debug.DrawRay(targetPos, Vector3.up, Color.green,1);
// Debug.DrawRay(m_OrientationCube.transform.InverseTransformPoint(targetPos), Vector3.up * 2, Color.red,5);
// Vector3 targetPos = walkDirectionMethod == WalkDirectionMethod.UseTarget
// ? target.transform.position
//// : hips.position + (dirToLook * 100);
//// : m_OrientationCube.transform.TransformDirection(dirToLook * 100);
// : m_OrientationCube.transform.position + (cubeForward * 100);
// targetPos.y = 0;
// Vector3 relPos = Vector3.ClampMagnitude(m_OrientationCube.transform.InverseTransformPoint(targetPos), 100);
// sensor.AddObservation(relPos);
// Debug.DrawRay(targetPos, Vector3.up, Color.green,1);
// Debug.DrawRay(m_OrientationCube.transform.InverseTransformPoint(targetPos), Vector3.up * 2, Color.red,5);
foreach (var bodyPart in m_JdController.bodyPartsList)
{

m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
}
Vector3 GetUpdatedWorldTargetPosition()
{
if (walkDirectionMethod == WalkDirectionMethod.UseWorldDirection)
{
//Wait until we are within 10 units and then update the position
//This helps prevent direction drift
if (Vector3.Distance(worldPosToWalkTo, hips.position) < 10)
{
return hips.position + (worldDirToWalk * 100);
}
else
{
return worldPosToWalkTo;
}
}
else //use target
{
return target.position;
}
}
// if (walkDirectionMethod == WalkDirectionMethod.UseWorldDirection)
// {
// Vector3 targetPos
// if(targetPos hips.position)
// }
// if(m_currentWorldDirToWalk != worldDirToWalk)
// {
// worldPosToWalkTo = hips.position + (worldDirToWalk * 100);
// }
var cubeForward = m_OrientationCube.transform.forward;
// Set reward for this step according to mixture of the following elements.

//Check for NaNs
if (float.IsNaN(matchSpeedReward))
{
throw new ArgumentException(
"NaN in moveTowardsTargetReward.\n" +
$" cubeForward: {cubeForward}\n"+
$" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n"+
$" maximumWalkingSpeed: {m_maxWalkingSpeed}"
);
}
// c. Encourage head height.
// var headHeightOverFeetReward =
// Mathf.Clamp01(((head.position.y - footL.position.y) + (head.position.y - footR.position.y))/ 10); //Should normalize to ~1
//Check for NaNs
if (float.IsNaN(lookAtTargetReward))
{
throw new ArgumentException(
"NaN in lookAtTargetReward.\n" +
$" cubeForward: {cubeForward}\n"+
$" head.forward: {head.forward}"
);
}
rewardManager.rewardsDict["matchSpeed"].rewardThisStep = matchSpeedReward;
rewardManager.rewardsDict["lookAtTarget"].rewardThisStep = lookAtTargetReward;
// rewardManager.rewardsDict["headHeightOverFeet"].rewardThisStep = headHeightOverFeetReward;
// rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward * headHeightOverFeetReward);
rewardManager.UpdateReward("productOfAllRewards", matchSpeedReward * lookAtTargetReward);
AddReward(matchSpeedReward * lookAtTargetReward);
// //Returns the average velocity of all the rigidbodies
// Vector3 GetAvgVelocity()
// {
// Vector3 velSum = Vector3.zero;
// Vector3 avgVel = Vector3.zero;
// velSum += m_JdController.bodyPartsDict[head].rb.velocity;
// velSum += m_JdController.bodyPartsDict[chest].rb.velocity;
// velSum += m_JdController.bodyPartsDict[spine].rb.velocity;
// velSum += m_JdController.bodyPartsDict[hips].rb.velocity;
// avgVel = velSum / 4;
// return avgVel;
// }
//Returns the average velocity of all of the body parts
//Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
//...using the average helps prevent this erratic movment
Vector3 GetAvgVelocity()
{
Vector3 velSum = Vector3.zero;

return avgVel;
}
// public float headHeightOverFeetReward; //reward for standing up straight-ish
public RewardManager rewardManager;
//normalized value of the difference in avg speed vs goal walking speed.
public float GetMatchingVelocityInverseLerp(Vector3 velocityGoal, Vector3 actualVelocity)
{

//get the value on a declining sigmoid shaped curve that decays from 1 to 0
//return the value on a declining sigmoid shaped curve that decays from 1 to 0
//This reward will approach 1 if it matches perfectly and approach zero as it deviates
return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / walkingSpeed, 2), 2);
}

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamicVariableSpeed.nn
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamicVariableSpeed.nn.meta


fileFormatVersion: 2
guid: f598eaeeef9f94691989a2cfaaafb565
guid: 1a6e4a4e15a5d49a7acac1f78bc1f514
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta


fileFormatVersion: 2
guid: 8dfd4337ed40e4d48872a4f86919c9da
guid: d8bebea7ecfd0470f87cbab469bd1411
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj

2
Project/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2018.4.17f1
m_EditorVersion: 2018.4.18f1

11
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs.meta


fileFormatVersion: 2
guid: 2d2b3caecf069467ebf3a650d8ee401e
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

110
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs


using System;
using System.Collections;
using System.Collections.Generic;
using Unity.MLAgents;
using UnityEngine;
public class RewardManager : MonoBehaviour
{
[Serializable]
public class Reward
{
public string rewardKey;
// [Range(.01f, .05f)]
public float rawVal;
public float rewardScalar = .01f;
// public float rewardScalar;
public float rewardThisStep;
public float cumulativeThisEpisode;
public float cumulativeThisSession;
// public float maxRewardThisSession;
public int lastNaNStep;
// public Reward(string k)
// public Reward()
// {
//// rewardKey = k;
// rewardScalar = .01f;
// }
}
private Agent m_thisAgent;
public List<Reward> rewardsList = new List<Reward>();
public Dictionary<string, Reward> rewardsDict = new Dictionary<string, Reward>();
public float maxSteps;
private void OnEnable()
// private void Awake()
{
m_thisAgent = GetComponent<Agent>();
maxSteps = m_thisAgent.MaxStep;
foreach (var item in rewardsList)
{
if (rewardsDict.ContainsKey(item.rewardKey)) return; //don't need to add
rewardsDict.Add(item.rewardKey, item);
}
}
// public void AddReward(Reward r)
// {
// if (rewardsDict.ContainsKey(r.rewardKey)) return; //don't need to add
// rewardsDict.Add(r.rewardKey, r);
// }
// public void AddReward(string rewardKey)
// {
// if (rewardsDict.ContainsKey(rewardKey)) return; //don't need to add
// Reward newReward = new Reward(rewardKey);
// rewardsDict.Add(rewardKey, newReward);
// rewardsList.Add(newReward);
// }
//Add new rewards
public void UpdateReward(string key, float rawVal)
{
rewardsDict[key].rawVal = rawVal;
float scaledVal = rawVal * rewardsDict[key].rewardScalar;
//if we get a NaN, set the step
if (float.IsNaN(scaledVal))
rewardsDict[key].lastNaNStep = m_thisAgent.StepCount;
// rewardsDict[key].maxRewardThisSession = scaledVal * maxSteps;
rewardsDict[key].rewardThisStep = scaledVal;
rewardsDict[key].cumulativeThisEpisode += scaledVal;
rewardsDict[key].cumulativeThisSession += scaledVal;
m_thisAgent.AddReward(scaledVal);
}
// //Add new rewards
// public void UpdateReward(string key, float val)
// {
// rewardsDict[key].rewardThisStep = val;
// rewardsDict[key].cumulativeThisEpisode += val;
// rewardsDict[key].cumulativeThisSession += val;
// m_thisAgent.AddReward(val);
// }
//Resets cumulative episode reward
public void ResetEpisodeRewards()
{
foreach (var item in rewardsDict)
{
item.Value.rewardThisStep = 0;
item.Value.cumulativeThisEpisode = 0;
}
}
// Start is called before the first frame update
void Start()
{
}
// Update is called once per frame
void Update()
{
}
}

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStaticApprovedWalkInWorldDir.nn
文件差异内容过多而无法显示
查看文件

11
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStaticApprovedWalkInWorldDir.nn.meta


fileFormatVersion: 2
guid: b857c29923dc040d5bc2194435a73ebd
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj
11400002: model data
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

8
Project/Assets/ML-Agents/Examples/Walker/TFModels/New Folder.meta


fileFormatVersion: 2
guid: 4536394c7630c45d29363e1475a9e5cf
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/Walker/TFModels/with hh.meta


fileFormatVersion: 2
guid: ee9fbb0156ff64d169b17780a398702a
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/Walker/TFModels/10k no hh.meta


fileFormatVersion: 2
guid: 6ecd0cba4f4484921949a0db709fef3d
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/Walker/TFModels/15kstrength with hh.meta


fileFormatVersion: 2
guid: 9bf8e7dfffdb34e61a70f66fc9a337a5
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/Walker/TFModels/20k no hh no rolling targ.meta


fileFormatVersion: 2
guid: b3414503c573d4815b63252e966c2673
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/Walker/TFModels/20kDistToTargCloud.meta


fileFormatVersion: 2
guid: 8d773733493b846e9ac5f499b49ee72c
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/Walker/TFModels/20kAvgVelAllBPMaxDist50.meta


fileFormatVersion: 2
guid: edfd5cf7fe9674978b62b3df669c4e0e
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/Walker/TFModels/20kAllBPVelRelPosClampedTo100.meta


fileFormatVersion: 2
guid: 2d241685fc042499b9de5f4cc6f7deb0
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

/Project/Assets/ML-Agents/Examples/Walker/TFModels/20kAllBPVelRelPosClampedTo100/WalkerDynamic.nn → /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn

/Project/Assets/ML-Agents/Examples/Walker/TFModels/20kAllBPVelRelPosClampedTo100/WalkerDynamic.nn.meta → /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta

/Project/Assets/ML-Agents/Examples/Walker/TFModels/20kAllBPVelRelPosClampedTo100/WalkerStaticVariableSpeed.nn → /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStaticVariableSpeed.nn

/Project/Assets/ML-Agents/Examples/Walker/TFModels/20kAllBPVelRelPosClampedTo100/WalkerStaticVariableSpeed.nn.meta → /Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStaticVariableSpeed.nn.meta

正在加载...
取消
保存