浏览代码

opponent observations

/asymm-envs
Andrew Cohen 4 年前
当前提交
8ef0b3a8
共有 3 个文件被更改,包括 17 次插入9 次删除
  1. 14
      Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab
  2. 9
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  3. 3
      config/trainer_config.yaml

14
Project/Assets/ML-Agents/Examples/Tennis/Prefabs/TennisArea.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 10
vectorObservationSize: 14
m_Model: {fileID: 11400000, guid: d6c5e749e4ceb4cf79640a5955706d3d, type: 3}
m_Model: {fileID: 11400000, guid: 13ca629afc16c42afb2d681a3d81ea6c, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: Tennis

hasUpgradedFromAgentParameters: 1
maxStep: 3000
ball: {fileID: 1273406647218856}
opponent: {fileID: 1882383181950958}
timePenalty: 0
energyPenalty: 0
--- !u!114 &2449890524009497851
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 10
vectorObservationSize: 14
m_Model: {fileID: 11400000, guid: d6c5e749e4ceb4cf79640a5955706d3d, type: 3}
m_Model: {fileID: 11400000, guid: 13ca629afc16c42afb2d681a3d81ea6c, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: Tennis

hasUpgradedFromAgentParameters: 1
maxStep: 3000
ball: {fileID: 1273406647218856}
opponent: {fileID: 1170495812642400}
timePenalty: 0
energyPenalty: 0
--- !u!114 &6598495797138489682
MonoBehaviour:
m_ObjectHideFlags: 0

9
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


{
[Header("Specific to Tennis")]
public GameObject ball;
public GameObject opponent;
public bool invertX;
public int score;
public GameObject myArea;

Text m_TextComponent;
Rigidbody m_AgentRb;
Rigidbody m_BallRb;
Rigidbody m_OpponentRb;
HitWall m_BallScript;
TennisArea m_Area;
float m_InvertMult;

{
m_AgentRb = GetComponent<Rigidbody>();
m_BallRb = ball.GetComponent<Rigidbody>();
m_OpponentRb = opponent.GetComponent<Rigidbody>();
m_BallScript = ball.GetComponent<HitWall>();
m_Area = myArea.GetComponent<TennisArea>();
var canvas = GameObject.Find(k_CanvasName);

sensor.AddObservation(m_InvertMult * m_BallRb.velocity.x);
sensor.AddObservation(m_BallRb.velocity.y);
sensor.AddObservation(m_InvertMult * (opponent.transform.position.x - myArea.transform.position.x));
sensor.AddObservation(opponent.transform.position.y - myArea.transform.position.y);
sensor.AddObservation(m_InvertMult * m_OpponentRb.velocity.x);
sensor.AddObservation(m_OpponentRb.velocity.y);
AddReward(m_BallTouch * (1f / Vector3.Distance(ball.transform.position, transform.position)));
}
public override void OnActionReceived(float[] vectorAction)

3
config/trainer_config.yaml


Tennis:
normalize: true
max_steps: 5.0e7
learning_rate: 3.0e-4
hidden_units: 256
hidden_units: 512
beta: 1.0e-2
time_horizon: 1000
self_play:

正在加载...
取消
保存