浏览代码

change reward function: stress less on action

/develop/bisim-sac-transfer
yanchaosun 4 年前
当前提交
7bc457f8
共有 2 个文件被更改,包括 5 次插入4 次删除
  1. 5
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherAgent.cs
  2. 4
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs

5
Project/Assets/ML-Agents/Examples/Reacher/Scripts/NewReacherAgent.cs


torqueZ = Mathf.Clamp(vectorAction[3], -1f, 1f) * 150f;
m_RbB.AddTorque(new Vector3(torqueX, 0f, torqueZ));
AddReward( - 0.005f * (vectorAction[0] * vectorAction[0]
AddReward( - 0.001f * (vectorAction[0] * vectorAction[0]
+ vectorAction[1] * vectorAction[1]
+ vectorAction[2] * vectorAction[2]
+ vectorAction[3] * vectorAction[3]

{
if ((goal.transform.position - hand.transform.position).magnitude > 3.5f)
{
AddReward(-0.001f);
AddReward(-0.002f);
// Debug.Log((goal.transform.position - hand.transform.position).magnitude);
var radians = m_GoalDegree * Mathf.PI / 180f;
var goalX = 8f * Mathf.Cos(radians);
var goalY = 8f * Mathf.Sin(radians);

4
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


torqueZ = Mathf.Clamp(vectorAction[3], -1f, 1f) * 150f;
m_RbB.AddTorque(new Vector3(torqueX, 0f, torqueZ));
AddReward( - 0.005f * (vectorAction[0] * vectorAction[0]
AddReward( - 0.001f * (vectorAction[0] * vectorAction[0]
+ vectorAction[1] * vectorAction[1]
+ vectorAction[2] * vectorAction[2]
+ vectorAction[3] * vectorAction[3]

{
if ((goal.transform.position - hand.transform.position).magnitude > 3.5f)
{
AddReward(-0.001f);
AddReward(-0.002f);
}
// AddReward( - 0.001f * (goal.transform.position - hand.transform.position).magnitude);
// Debug.Log((goal.transform.position - hand.transform.position).magnitude);

正在加载...
取消
保存