浏览代码

new reacher reward

/develop/bisim-sac-transfer
yanchaosun 4 年前
当前提交
27dffa4d
共有 7 个文件被更改,包括 13 次插入13 次删除
  1. 2
      Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab
  2. 2
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherGoal.cs
  3. 12
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  4. 2
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherGoal.cs
  5. 2
      config/sac/Reacher.yaml
  6. 2
      config/sac_transfer/Reacher.yaml
  7. 4
      config/sac_transfer/ReacherTransfer.yaml

2
Project/Assets/ML-Agents/Examples/Reacher/Prefabs/Agent.prefab


VectorActionSize: 04000000
VectorActionDescriptions: []
VectorActionSpaceType: 1
m_Model: {fileID: 11400000, guid: d7bdb6a78154f4cf99437d67e4a569a8, type: 3}
m_Model: {fileID: 11400000, guid: e12acd64209f9468c899b9708b2702c3, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: Reacher

2
Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherGoal.cs


{
if (other.gameObject == hand)
{
agent.GetComponent<NewReacherAgent>().AddReward(0.01f);
agent.GetComponent<NewReacherAgent>().AddReward(0.05f);
}
}
}

12
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


var torqueZ = Mathf.Clamp(vectorAction[1], -1f, 1f) * 150f;
m_RbA.AddTorque(new Vector3(torqueX, 0f, torqueZ));
+ vectorAction[1] * vectorAction[1]
+ vectorAction[2] * vectorAction[2]
+ vectorAction[3] * vectorAction[3]
));
+ vectorAction[1] * vectorAction[1]
+ vectorAction[2] * vectorAction[2]
+ vectorAction[3] * vectorAction[3]
));
}
/// <summary>

{
AddReward( - 0.001f * (goal.transform.position - hand.transform.position).magnitude);
// Debug.Log( - 0.001f * (goal.transform.position - hand.transform.position).magnitude);
var radians = m_GoalDegree * Mathf.PI / 180f;
var goalX = 8f * Mathf.Cos(radians);
var goalY = 8f * Mathf.Sin(radians);

2
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherGoal.cs


{
if (other.gameObject == hand)
{
agent.GetComponent<ReacherAgent>().AddReward(0.01f);
agent.GetComponent<ReacherAgent>().AddReward(0.05f);
}
}
}

2
config/sac/Reacher.yaml


gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 6000000
max_steps: 3000000
time_horizon: 1000
summary_freq: 60000
threaded: true

2
config/sac_transfer/Reacher.yaml


gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 6000000
max_steps: 3000000
time_horizon: 1000
summary_freq: 60000
threaded: true

4
config/sac_transfer/ReacherTransfer.yaml


train_model: false
load_action: true
train_action: false
transfer_path: "results/sacmod_reacher-qr/Reacher"
transfer_path: "results/reacher/Reacher"
network_settings:
normalize: true
hidden_units: 128

gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 6000000
max_steps: 3000000
time_horizon: 1000
summary_freq: 60000
threaded: true
正在加载...
取消
保存