浏览代码

block larger feature size; reacher fix and new reward

/develop/bisim-sac-transfer
yanchaosun 4 年前
当前提交
e39986ed
共有 10 个文件被更改,包括 70 次插入17 次删除
  1. 15
      Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab
  2. 6
      Project/Assets/ML-Agents/Examples/Reacher/Prefabs/NewAgent.prefab
  3. 4
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherAgent.cs
  4. 6
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  5. 2
      config/sac_transfer/PushBlock.yaml
  6. 4
      config/sac_transfer/PushBlockTransfer.yaml
  7. 2
      config/sac_transfer/Reacher.yaml
  8. 4
      config/sac_transfer/ReacherTransfer.yaml
  9. 32
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherGoal.cs
  10. 12
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherGoal.cs.meta

15
Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 33
numStackedVectorObservations: 1
vectorActionSize: 04000000
vectorActionDescriptions: []
vectorActionSpaceType: 1
VectorObservationSize: 33
NumStackedVectorObservations: 1
VectorActionSize: 04000000
VectorActionDescriptions: []
VectorActionSpaceType: 1
m_Model: {fileID: 11400000, guid: d7bdb6a78154f4cf99437d67e4a569a8, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114955921823023820
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 4000
MaxStep: 4000
pendulumA: {fileID: 1644872085946016}
pendulumB: {fileID: 1053261483945176}
hand: {fileID: 1654288206095398}

m_EditorClassIdentifier:
DecisionPeriod: 4
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!114 &7840105453417110232
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1644872085946016
GameObject:
m_ObjectHideFlags: 0

6
Project/Assets/ML-Agents/Examples/Reacher/Prefabs/NewAgent.prefab


- component: {fileID: 33192810276213476}
- component: {fileID: 135746602902751552}
- component: {fileID: 23595512991530936}
- component: {fileID: 114928491800121992}
- component: {fileID: 2476992091532970082}
m_Layer: 0
m_Name: Goal
m_TagString: Untagged

m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!114 &114928491800121992
--- !u!114 &2476992091532970082
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: a8c5005c8e6b84f1089c132cb87b44c4, type: 3}
m_Script: {fileID: 11500000, guid: becbcd118702d41089479cafb60baa32, type: 3}
m_Name:
m_EditorClassIdentifier:
agent: {fileID: 1395682910799436}

4
Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherAgent.cs


var torqueZ = Mathf.Clamp(vectorAction[1], -1f, 1f) * 150f;
m_RbA.AddTorque(new Vector3(torqueX, 0f, torqueZ));
AddReward( - (0.05f * torqueX * torqueX + 0.05f * torqueZ * torqueZ));
AddReward( - (0.05f * torqueX * torqueX + 0.05f * torqueZ * torqueZ));
}
/// <summary>

6
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


sensor.AddObservation(goal.transform.localPosition);
sensor.AddObservation(hand.transform.localPosition);
sensor.AddObservation(m_GoalSpeed);
}

var torqueZ = Mathf.Clamp(vectorAction[1], -1f, 1f) * 150f;
m_RbA.AddTorque(new Vector3(torqueX, 0f, torqueZ));
AddReward( - (0.05f * torqueX * torqueX + 0.05f * torqueZ * torqueZ));
AddReward( - (0.05f * torqueX * torqueX + 0.05f * torqueZ * torqueZ));
}
/// <summary>

2
config/sac_transfer/PushBlock.yaml


forward_layers: 2
value_layers: 2
action_layers: 2
feature_size: 128
feature_size: 256
action_feature_size: 64
separate_policy_train: true
separate_policy_net: true

4
config/sac_transfer/PushBlockTransfer.yaml


forward_layers: 2
value_layers: 2
action_layers: 2
feature_size: 128
feature_size: 256
action_feature_size: 64
separate_policy_train: true
separate_policy_net: true

train_model: false
load_action: true
train_action: false
transfer_path: "results/block/PushBlock"
transfer_path: "results/block-f256/PushBlock"
network_settings:
normalize: false
hidden_units: 256

2
config/sac_transfer/Reacher.yaml


learning_rate: 0.0003
learning_rate_schedule: linear
model_schedule: constant
batch_size: 128
batch_size: 256
buffer_size: 6000000
buffer_init_steps: 0
tau: 0.005

4
config/sac_transfer/ReacherTransfer.yaml


learning_rate: 0.0003
learning_rate_schedule: constant
model_schedule: constant
batch_size: 128
batch_size: 256
buffer_size: 6000000
buffer_init_steps: 0
tau: 0.005

train_model: false
load_action: true
train_action: false
transfer_path: "results/sacmod-reacher-e1/Reacher"
transfer_path: "results/sacmod_reacher-qr/Reacher"
network_settings:
normalize: true
hidden_units: 128

32
Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherGoal.cs


using UnityEngine;
public class NewReacherGoal : MonoBehaviour
{
public GameObject agent;
public GameObject hand;
public GameObject goalOn;
void OnTriggerEnter(Collider other)
{
if (other.gameObject == hand)
{
goalOn.transform.localScale = new Vector3(1f, 1f, 1f);
}
}
void OnTriggerExit(Collider other)
{
if (other.gameObject == hand)
{
goalOn.transform.localScale = new Vector3(0f, 0f, 0f);
}
}
void OnTriggerStay(Collider other)
{
if (other.gameObject == hand)
{
agent.GetComponent<NewReacherAgent>().AddReward(0.01f);
}
}
}

12
Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherGoal.cs.meta


fileFormatVersion: 2
guid: becbcd118702d41089479cafb60baa32
timeCreated: 1508689729
licenseType: Pro
MonoImporter:
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存