浏览代码

energy usage penalty to prevent superstition on serve

/asymm-envs
Andrew Cohen 5 年前
当前提交
d77f2566
共有 2 个文件被更改,包括 10 次插入7 次删除
  1. 8
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
  2. 9
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs

8
Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs


void AgentAWins()
{
m_AgentA.SetReward(1f);
m_AgentB.SetReward(-1f);// - m_AgentB.timePenalty);
m_AgentA.SetReward(1f + m_AgentA.energyPenalty);
m_AgentB.SetReward(-1f);
m_AgentA.score += 1;
Reset();

{
m_AgentA.SetReward(-1f);// - m_AgentA.timePenalty);
m_AgentB.SetReward(1f);
m_AgentA.SetReward(-1f);
m_AgentB.SetReward(1f + m_AgentB.energyPenalty);
m_AgentB.score += 1;
Reset();

9
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


public float scale;
[HideInInspector]
public float timePenalty = 0;
// accumulator of energy penalty
public float energyPenalty = 0;
Text m_TextComponent;
Rigidbody m_AgentRb;

}
var rgV = m_AgentRb.velocity;
m_AgentRb.velocity = new Vector3(Mathf.Clamp(rgV.x, -35f, 35f), Mathf.Min(rgV.y, 15f), rgV.z);
//timePenalty += -1f / 3000f;
// energy usage penalty cumulant
energyPenalty += -0.0001f * (Mathf.Abs(moveX) + upward);
m_TextComponent.text = score.ToString();
}

public override void OnEpisodeBegin()
{
timePenalty = 0;
energyPenalty = 0;
m_BallTouch = SideChannelUtils.GetSideChannel<FloatPropertiesChannel>().GetPropertyWithDefault("ball_touch", 0);
m_InvertMult = invertX ? -1f : 1f;
if (m_InvertMult == 1f)

正在加载...
取消
保存