浏览代码

tennis reward fix

/asymm-envs
Andrew Cohen 4 年前
当前提交
8431ecb5
共有 4 个文件被更改,包括 11 次插入4 次删除
  1. 2
      Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  2. 4
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
  3. 7
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  4. 2
      config/trainer_config.yaml

2
Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab


myArea: {fileID: 1541947554534326}
angle: 0
scale: 0
timePenalty: 0
--- !u!114 &2449890524009497851
MonoBehaviour:
m_ObjectHideFlags: 0

myArea: {fileID: 1541947554534326}
angle: 0
scale: 0
timePenalty: 0
--- !u!114 &6598495797138489682
MonoBehaviour:
m_ObjectHideFlags: 0

4
Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs


void AgentAWins()
{
m_AgentA.SetReward(1);
m_AgentA.SetReward(1 + m_AgentA.timePenalty);
m_AgentB.SetReward(-1);
m_AgentA.score += 1;
Reset();

void AgentBWins()
{
m_AgentA.SetReward(-1);
m_AgentB.SetReward(1);
m_AgentB.SetReward(1 + m_AgentB.timePenalty);
m_AgentB.score += 1;
Reset();

7
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


public float angle;
public float scale;
[HideInInspector]
public float timePenalty = 0;
Text m_TextComponent;
Rigidbody m_AgentRb;
Rigidbody m_BallRb;

transform.position.y,
transform.position.z);
}
AddReward(-1f / 3000f);
// AddReward(-1f / 3000f);
timePenalty += -1f / 3000f;
m_TextComponent.text = score.ToString();
}

public override void OnEpisodeBegin()
{
timePenalty = 0;
m_InvertMult = invertX ? -1f : 1f;
transform.position = new Vector3(-m_InvertMult * Random.Range(6f, 8f), -1.5f, -1.8f) + transform.parent.transform.position;

2
config/trainer_config.yaml


time_horizon: 1000
self_play:
window: 10
play_against_latest_model_ratio: 0.2
play_against_latest_model_ratio: 0.5
save_steps: 50000
swap_steps: 50000
team_change: 100000

正在加载...
取消
保存