浏览代码

Add reward manager and hurryUpReward

/active-variablespeed
HH 4 年前
当前提交
decf9a0a
共有 5 个文件被更改,包括 206 次插入19 次删除
  1. 56
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab
  2. 14
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
  3. 44
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  4. 100
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs
  5. 11
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs.meta

56
Project/Assets/ML-Agents/Examples/Walker/Prefabs/DynamicPlatformWalker.prefab


m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!114 &758428434940870733
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6065910098925129117}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 2d2b3caecf069467ebf3a650d8ee401e, type: 3}
m_Name:
m_EditorClassIdentifier:
rewardsList:
- rewardKey: matchSpeed
rewardScalar: 0.01
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0
maxRewardThisSession: 0
- rewardKey: lookAtTarget
rewardScalar: 0.01
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0
maxRewardThisSession: 0
- rewardKey: headHeightOverFeet
rewardScalar: 0.005
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0
maxRewardThisSession: 0
- rewardKey: hurryUp
rewardScalar: 1
rewardThisStep: 0
cumulativeThisEpisode: 0
cumulativeThisSession: 0
maxRewardThisSession: 0
maxSteps: 0
--- !u!1001 &6359877978260855390
PrefabInstance:
m_ObjectHideFlags: 0

value:
objectReference: {fileID: 11400000, guid: e785133c5b0ac461588106642550d1b3,
type: 3}
- target: {fileID: 895268871377934297, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_BrainParameters.VectorObservationSize
value: 237
objectReference: {fileID: 0}
- target: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: m_LocalPosition.x

propertyPath: target
value:
objectReference: {fileID: 5064725739247198300}
- target: {fileID: 7408209125961349353, guid: 765582efd9dda46ed98564603316353f,
type: 3}
propertyPath: rewardManager
value:
objectReference: {fileID: 758428434940870733}
--- !u!1 &6065910098925129117 stripped
GameObject:
m_CorrespondingSourceObject: {fileID: 895268871377934275, guid: 765582efd9dda46ed98564603316353f,
type: 3}
m_PrefabInstance: {fileID: 6359877978260855390}
m_PrefabAsset: {fileID: 0}
--- !u!4 &6065910098925129092 stripped
Transform:
m_CorrespondingSourceObject: {fileID: 895268871377934298, guid: 765582efd9dda46ed98564603316353f,

type: 3}
m_PrefabInstance: {fileID: 6359877978260855390}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 0}
m_GameObject: {fileID: 6065910098925129117}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: ccb0f85f0009540d7ad997952e2aed7b, type: 3}

14
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity


debug:
m_Flags: 0
m_NavMeshData: {fileID: 0}
--- !u!114 &79411373 stripped
MonoBehaviour:
m_CorrespondingSourceObject: {fileID: 758428434940870733, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
m_PrefabInstance: {fileID: 1615064471}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 2d2b3caecf069467ebf3a650d8ee401e, type: 3}
m_Name:
m_EditorClassIdentifier:
--- !u!1001 &193531851
PrefabInstance:
m_ObjectHideFlags: 0

type: 3}
propertyPath: rewardManager
value:
objectReference: {fileID: 0}
objectReference: {fileID: 79411373}
- target: {fileID: 4712600297668500197, guid: f51e8260728fd4c8fa87bcda9d0e2027,
type: 3}
propertyPath: m_Name

44
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


public class WalkerAgent : Agent
{
public float maximumWalkingSpeed = 999; //The max walk velocity magnitude an agent will be rewarded for
[Range(0, 10)]
public float walkingSpeed = 10; //The max walk velocity magnitude an agent will be rewarded for
// Quaternion m_WalkDirLookRot; //Will hold the rotation to our target
[Header("Target To Walk Towards")] [Space(10)]
public TargetController target; //Target the agent will walk towards.

orientationCube.UpdateOrientation(hips, target.transform);
rewardManager.ResetEpisodeRewards();
walkingSpeed = Random.Range(0.0f, 10.0f); //Random Walk Speed
SetResetParameters();
}

/// </summary>
public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(walkingSpeed);
sensor.AddObservation(Quaternion.FromToRotation(hips.forward, orientationCube.transform.forward));
sensor.AddObservation(Quaternion.FromToRotation(head.forward, orientationCube.transform.forward));

void FixedUpdate()
{
UpdateRewards();
UpdateRewards();
public float headFacingDot;
public float hipsFacingDot;
public float headHeightOverFeetReward;
public float lookAtTargetReward; //reward for looking at the target
public float matchSpeedReward; //reward for matching the desired walking speed.
public float headHeightOverFeetReward; //reward for standing up straight-ish
public float hurryUpReward = -1; //don't waste time
headFacingDot = Vector3.Dot(cubeForward, head.forward);
hipsFacingDot = Vector3.Dot(cubeForward, hips.forward);
// a. Velocity alignment with goal direction.
var moveTowardsTargetReward = Mathf.Exp(-0.1f * (orientationCube.transform.forward * maximumWalkingSpeed - m_JdController.bodyPartsDict[hips].rb.velocity).sqrMagnitude);
// a. Match target speed
//This reward will approach 1 if it matches and approach zero as it deviates
matchSpeedReward =
Mathf.Exp(-0.1f * (orientationCube.transform.forward * walkingSpeed -
m_JdController.bodyPartsDict[hips].rb.velocity).sqrMagnitude);
var lookAtTargetReward = Vector3.Dot(cubeForward, head.forward);
lookAtTargetReward = Vector3.Dot(cubeForward, head.forward);
headHeightOverFeetReward = ((head.position.y - footL.position.y) + (head.position.y - footR.position.y)/10); //Should normalize to ~1
headHeightOverFeetReward =
((head.position.y - footL.position.y) + (head.position.y - footR.position.y) / 10); //Should normalize to ~1
rewardManager.UpdateReward("moveTowardsTarget", moveTowardsTargetReward);
rewardManager.UpdateReward("matchSpeed", matchSpeedReward);
// rewardManager.UpdateReward("moveTowardsTargetReward", +0.02f * moveTowardsTargetReward);
// rewardManager.UpdateReward("lookAtTargetReward", +0.01f * lookAtTargetReward);
// rewardManager.UpdateReward("headHeightOverFeetReward", +0.01f * headHeightOverFeetReward);
rewardManager.UpdateReward("hurryUp", hurryUpReward);
// void FixedUpdate()
// {
// var cubeForward = orientationCube.transform.forward;

{
SetTorsoMass();
}
}
}

100
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs


using System;
using System.Collections;
using System.Collections.Generic;
using Unity.MLAgents;
using UnityEngine;
public class RewardManager : MonoBehaviour
{
[Serializable]
public class Reward
{
public string rewardKey;
// [Range(.01f, .05f)]
public float rewardScalar = .01f;
// public float rewardScalar;
public float rewardThisStep;
public float cumulativeThisEpisode;
public float cumulativeThisSession;
public float maxRewardThisSession;
// public Reward(string k)
// public Reward()
// {
//// rewardKey = k;
// rewardScalar = .01f;
// }
}
private Agent m_thisAgent;
public List<Reward> rewardsList = new List<Reward>();
public Dictionary<string, Reward> rewardsDict = new Dictionary<string, Reward>();
public float maxSteps;
private void OnEnable()
// private void Awake()
{
m_thisAgent = GetComponent<Agent>();
maxSteps = m_thisAgent.MaxStep;
foreach (var item in rewardsList)
{
if (rewardsDict.ContainsKey(item.rewardKey)) return; //don't need to add
rewardsDict.Add(item.rewardKey, item);
}
}
// public void AddReward(Reward r)
// {
// if (rewardsDict.ContainsKey(r.rewardKey)) return; //don't need to add
// rewardsDict.Add(r.rewardKey, r);
// }
// public void AddReward(string rewardKey)
// {
// if (rewardsDict.ContainsKey(rewardKey)) return; //don't need to add
// Reward newReward = new Reward(rewardKey);
// rewardsDict.Add(rewardKey, newReward);
// rewardsList.Add(newReward);
// }
//Add new rewards
public void UpdateReward(string key, float rawVal)
{
float val = rawVal * rewardsDict[key].rewardScalar;
rewardsDict[key].maxRewardThisSession =1/maxSteps;
rewardsDict[key].rewardThisStep = val;
rewardsDict[key].cumulativeThisEpisode += val;
rewardsDict[key].cumulativeThisSession += val;
m_thisAgent.AddReward(val);
}
// //Add new rewards
// public void UpdateReward(string key, float val)
// {
// rewardsDict[key].rewardThisStep = val;
// rewardsDict[key].cumulativeThisEpisode += val;
// rewardsDict[key].cumulativeThisSession += val;
// m_thisAgent.AddReward(val);
// }
//Resets cumulative episode reward
public void ResetEpisodeRewards()
{
foreach (var item in rewardsDict)
{
item.Value.rewardThisStep = 0;
item.Value.cumulativeThisEpisode = 0;
}
}
// Start is called before the first frame update
void Start()
{
}
// Update is called once per frame
void Update()
{
}
}

11
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/RewardManager.cs.meta


fileFormatVersion: 2
guid: 2d2b3caecf069467ebf3a650d8ee401e
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存