浏览代码

normalize by hand

/asymm-envs
Andrew Cohen 5 年前
当前提交
fd7ee405
共有 3 个文件被更改,包括 17 次插入17 次删除
  1. 4
      Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  2. 27
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  3. 3
      config/trainer_config.yaml

4
Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 12
vectorObservationSize: 14
numStackedVectorObservations: 3
vectorActionSize: 03000000
vectorActionDescriptions: []

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 12
vectorObservationSize: 14
numStackedVectorObservations: 3
vectorActionSize: 03000000
vectorActionDescriptions: []

27
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(m_InvertMult * (transform.position.x - myArea.transform.position.x));
sensor.AddObservation(transform.position.y - myArea.transform.position.y);
sensor.AddObservation(m_InvertMult * m_AgentRb.velocity.x);
sensor.AddObservation(m_AgentRb.velocity.y);
sensor.AddObservation(m_InvertMult * (transform.position.x - myArea.transform.position.x) / -25f);
sensor.AddObservation((transform.position.y - myArea.transform.position.y) / -7f);
sensor.AddObservation(m_InvertMult * m_AgentRb.velocity.x / 20f);
sensor.AddObservation(m_AgentRb.velocity.y / 20f);
sensor.AddObservation(m_InvertMult * (ball.transform.position.x - myArea.transform.position.x));
sensor.AddObservation(ball.transform.position.y - myArea.transform.position.y);
sensor.AddObservation(m_InvertMult * m_BallRb.velocity.x);
sensor.AddObservation(m_BallRb.velocity.y);
sensor.AddObservation(m_InvertMult * (ball.transform.position.x - myArea.transform.position.x) / 25f);
sensor.AddObservation((ball.transform.position.y - myArea.transform.position.y) / 20f);
sensor.AddObservation(m_InvertMult * m_BallRb.velocity.x / 40f);
sensor.AddObservation(m_BallRb.velocity.y / 60f);
sensor.AddObservation(m_InvertMult * (opponent.transform.position.x - myArea.transform.position.x));
sensor.AddObservation(opponent.transform.position.y - myArea.transform.position.y);
//sensor.AddObservation(m_InvertMult * m_OpponentRb.velocity.x);
//sensor.AddObservation(m_OpponentRb.velocity.y);
sensor.AddObservation(m_InvertMult * (opponent.transform.position.x - myArea.transform.position.x) / -25f);
sensor.AddObservation((opponent.transform.position.y - myArea.transform.position.y) / -7f);
sensor.AddObservation(m_InvertMult * m_OpponentRb.velocity.x / 20f);
sensor.AddObservation(m_OpponentRb.velocity.y / 20f);
sensor.AddObservation(m_InvertMult * gameObject.transform.rotation.z);
//sensor.AddObservation(m_InvertMult * gameObject.transform.rotation.z);
sensor.AddObservation((m_InvertMult * (gameObject.transform.rotation.eulerAngles.z - (1f - m_InvertMult) * 180f) - 35f) / 125f);
sensor.AddObservation(System.Convert.ToInt32(m_BallScript.lastFloorHit == HitWall.FloorHit.FloorHitUnset));
}

3
config/trainer_config.yaml


gamma: 0.995
Tennis:
normalize: true
hidden_units: 256
hidden_units: 512
beta: 1.0e-2
threaded: false
time_horizon: 1000

正在加载...
取消
保存