浏览代码

Soccer refactor (#3331)

/asymm-envs
GitHub 5 年前
当前提交
ae97ab3a
共有 14 个文件被更改,包括 1317 次插入1960 次删除
  1. 884
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
  2. 171
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
  3. 6
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerBallController.cs
  4. 79
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
  5. 6
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerSettings.cs
  6. 10
      Project/ProjectSettings/TagManager.asset
  7. 2
      com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs
  8. 46
      config/trainer_config.yaml
  9. 54
      docs/Learning-Environment-Examples.md
  10. 999
      docs/images/tennis.png
  11. 8
      Project/Assets/ML-Agents/Examples/Soccer/TFModels.meta
  12. 1001
      Project/Assets/ML-Agents/Examples/Soccer/TFModels/Soccer.nn
  13. 11
      Project/Assets/ML-Agents/Examples/Soccer/TFModels/Soccer.nn.meta

884
Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
文件差异内容过多而无法显示
查看文件

171
Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs


using System;
using UnityEngine;
using MLAgents;

// * opposing player
public enum Team
{
Purple,
Blue
}
public enum AgentRole
{
Striker,
Goalie
Blue = 0,
Purple = 1
[HideInInspector]
public AgentRole agentRole;
float m_KickPower;
int m_PlayerIndex;
public SoccerFieldArea area;

SoccerSettings m_SoccerSettings;
Renderer m_AgentRenderer;
BehaviorParameters m_BP;
Vector3 m_Transform;
public void ChooseRandomTeam()
public override void InitializeAgent()
team = (Team)Random.Range(0, 2);
if (team == Team.Purple)
base.InitializeAgent();
m_BP = gameObject.GetComponent<BehaviorParameters>();
if (m_BP.m_TeamID == (int)Team.Blue)
JoinPurpleTeam(agentRole);
team = Team.Blue;
m_Transform = new Vector3(transform.position.x - 4f, .5f, transform.position.z);
JoinBlueTeam(agentRole);
team = Team.Purple;
m_Transform = new Vector3(transform.position.x + 4f, .5f, transform.position.z);
}
public void JoinPurpleTeam(AgentRole role)
{
agentRole = role;
team = Team.Purple;
m_AgentRenderer.material = m_SoccerSettings.purpleMaterial;
tag = "purpleAgent";
}
public void JoinBlueTeam(AgentRole role)
{
agentRole = role;
team = Team.Blue;
m_AgentRenderer.material = m_SoccerSettings.blueMaterial;
tag = "blueAgent";
}
public override void InitializeAgent()
{
base.InitializeAgent();
m_AgentRenderer = GetComponentInChildren<Renderer>();
m_SoccerSettings = FindObjectOfType<SoccerSettings>();
agentRb = GetComponent<Rigidbody>();

var action = Mathf.FloorToInt(act[0]);
// Goalies and Strikers have slightly different action spaces.
if (agentRole == AgentRole.Goalie)
m_KickPower = 0f;
var forwardAxis = (int)act[0];
var rightAxis = (int)act[1];
var rotateAxis = (int)act[2];
switch (forwardAxis)
{
case 1:
dirToGo = transform.forward * 1f;
m_KickPower = 1f;
break;
case 2:
dirToGo = transform.forward * -1f;
break;
}
switch (rightAxis)
m_KickPower = 0f;
switch (action)
{
case 1:
dirToGo = transform.forward * 1f;
m_KickPower = 1f;
break;
case 2:
dirToGo = transform.forward * -1f;
break;
case 4:
dirToGo = transform.right * -1f;
break;
case 3:
dirToGo = transform.right * 1f;
break;
}
case 1:
dirToGo = transform.right * 0.3f;
break;
case 2:
dirToGo = transform.right * -0.3f;
break;
else
switch (rotateAxis)
m_KickPower = 0f;
switch (action)
{
case 1:
dirToGo = transform.forward * 1f;
m_KickPower = 1f;
break;
case 2:
dirToGo = transform.forward * -1f;
break;
case 3:
rotateDir = transform.up * 1f;
break;
case 4:
rotateDir = transform.up * -1f;
break;
case 5:
dirToGo = transform.right * -0.75f;
break;
case 6:
dirToGo = transform.right * 0.75f;
break;
}
case 1:
rotateDir = transform.up * -1f;
break;
case 2:
rotateDir = transform.up * 1f;
break;
transform.Rotate(rotateDir, Time.deltaTime * 100f);
agentRb.AddForce(dirToGo * m_SoccerSettings.agentRunSpeed,
ForceMode.VelocityChange);

{
// Existential penalty for strikers.
if (agentRole == AgentRole.Striker)
AddReward(-1f / 3000f);
MoveAgent(vectorAction);
}
public override float[] Heuristic()
{
var action = new float[3];
//forward
if (Input.GetKey(KeyCode.W))
{
action[0] = 1f;
}
if (Input.GetKey(KeyCode.S))
AddReward(-1f / 3000f);
action[0] = 2f;
// Existential bonus for goalies.
if (agentRole == AgentRole.Goalie)
//rotate
if (Input.GetKey(KeyCode.A))
AddReward(1f / 3000f);
action[2] = 1f;
MoveAgent(vectorAction);
if (Input.GetKey(KeyCode.D))
{
action[2] = 2f;
}
//right
if (Input.GetKey(KeyCode.E))
{
action[1] = 1f;
}
if (Input.GetKey(KeyCode.Q))
{
action[1] = 2f;
}
return action;
/// <summary>
/// Used to provide a "kick" to the ball.
/// </summary>

public override void AgentReset()
{
if (m_SoccerSettings.randomizePlayersTeamForTraining)
{
ChooseRandomTeam();
}
JoinPurpleTeam(agentRole);
JoinBlueTeam(agentRole);
transform.position = area.GetRandomSpawnPos(agentRole, team);
transform.position = m_Transform;
agentRb.velocity = Vector3.zero;
agentRb.angularVelocity = Vector3.zero;
SetResetParameters();

6
Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerBallController.cs


{
[HideInInspector]
public SoccerFieldArea area;
public AgentSoccer lastTouchedBy; //who was the last to touch the ball
public string agentTag; //will be used to check if collided with a agent
public string purpleGoalTag; //will be used to check if collided with red goal
public string purpleGoalTag; //will be used to check if collided with purple goal
if (col.gameObject.CompareTag(purpleGoalTag)) //ball touched red goal
if (col.gameObject.CompareTag(purpleGoalTag)) //ball touched purple goal
{
area.GoalTouched(AgentSoccer.Team.Blue);
}

79
Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs


SoccerSettings m_SoccerSettings;
public IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
{
m_GroundRenderer.material = mat;
yield return new WaitForSeconds(time);
m_GroundRenderer.material = m_GroundMaterial;
}
void Awake()
{
m_SoccerSettings = FindObjectOfType<SoccerSettings>();

if (goalTextUI) goalTextUI.SetActive(false);
}
public void AllPlayersDone(float reward)
{
foreach (var ps in playerStates)
{
if (ps.agentScript.gameObject.activeInHierarchy)
{
if (reward != 0)
{
ps.agentScript.AddReward(reward);
}
ps.agentScript.Done();
}
}
}
public void GoalTouched(AgentSoccer.Team scoredTeam)
{
foreach (var ps in playerStates)

RewardOrPunishPlayer(ps, m_SoccerSettings.strikerReward, m_SoccerSettings.goalieReward);
ps.agentScript.AddReward(1);
RewardOrPunishPlayer(ps, m_SoccerSettings.strikerPunish, m_SoccerSettings.goaliePunish);
}
if (m_SoccerSettings.randomizePlayersTeamForTraining)
{
ps.agentScript.ChooseRandomTeam();
ps.agentScript.AddReward(-1);
ps.agentScript.Done(); //all agents need to be reset
if (scoredTeam == AgentSoccer.Team.Purple)
{
StartCoroutine(GoalScoredSwapGroundMaterial(m_SoccerSettings.purpleMaterial, 1));
}
else
{
StartCoroutine(GoalScoredSwapGroundMaterial(m_SoccerSettings.blueMaterial, 1));
}
if (goalTextUI)
{
StartCoroutine(ShowGoalUI());

public void RewardOrPunishPlayer(PlayerState ps, float striker, float goalie)
{
if (ps.agentScript.agentRole == AgentSoccer.AgentRole.Striker)
{
ps.agentScript.AddReward(striker);
}
if (ps.agentScript.agentRole == AgentSoccer.AgentRole.Goalie)
{
ps.agentScript.AddReward(goalie);
}
ps.agentScript.Done(); //all agents need to be reset
}
public Vector3 GetRandomSpawnPos(AgentSoccer.AgentRole role, AgentSoccer.Team team)
{
var xOffset = 0f;
if (role == AgentSoccer.AgentRole.Goalie)
{
xOffset = 13f;
}
if (role == AgentSoccer.AgentRole.Striker)
{
xOffset = 7f;
}
if (team == AgentSoccer.Team.Blue)
{
xOffset = xOffset * -1f;
}
var randomSpawnPos = ground.transform.position +
new Vector3(xOffset, 0f, 0f)
+ (Random.insideUnitSphere * 2);
randomSpawnPos.y = ground.transform.position.y + 2;
return randomSpawnPos;
}
new Vector3(0f, 0f, 0f)
+ (Random.insideUnitSphere * 2);
randomSpawnPos.y = ground.transform.position.y + 2;
new Vector3(0f, 0f, 0f);
randomSpawnPos.y = ground.transform.position.y + .5f;
return randomSpawnPos;
}

6
Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerSettings.cs


public Material purpleMaterial;
public Material blueMaterial;
public bool randomizePlayersTeamForTraining = true;
public float strikerPunish; //if opponents scores, the striker gets this neg reward (-1)
public float strikerReward; //if team scores a goal they get a reward (+1)
public float goaliePunish; //if opponents score, goalie gets this neg reward (-1)
public float goalieReward; //if team scores, goalie gets this reward (currently 0...no reward. can play with this later)
}

10
Project/ProjectSettings/TagManager.asset


-
-
- invisible
- ball
- goalieWall
- goalie
- strikerWall
- striker
-
-
-
-
-
-
-
-

2
com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs


string m_BehaviorName = "My Behavior";
[HideInInspector]
[SerializeField]
int m_TeamID = 0;
public int m_TeamID = 0;
[HideInInspector]
[SerializeField]
[Tooltip("Use all Sensor components attached to child GameObjects of this Agent.")]

46
config/trainer_config.yaml


num_layers: 2
normalize: false
Striker:
max_steps: 5.0e6
learning_rate: 1e-3
batch_size: 128
num_epoch: 3
buffer_size: 2000
beta: 1.0e-2
hidden_units: 256
summary_freq: 20000
time_horizon: 128
num_layers: 2
normalize: false
Goalie:
max_steps: 5.0e6
learning_rate: 1e-3
batch_size: 320
num_epoch: 3
buffer_size: 2000
beta: 1.0e-2
hidden_units: 256
summary_freq: 20000
time_horizon: 128
num_layers: 2
normalize: false
Pyramids:
summary_freq: 30000
time_horizon: 128

Tennis:
normalize: true
max_steps: 2e7
max_steps: 5.0e7
batch_size: 1024
buffer_size: 10240
time_horizon: 1000
self_play:
window: 10
play_against_current_self_ratio: 0.5
save_steps: 50000
swap_steps: 50000
Soccer:
normalize: false
max_steps: 5.0e7
learning_rate_schedule: constant
batch_size: 2048
buffer_size: 20480
hidden_units: 512
time_horizon: 1000
num_layers: 2
self_play:
window: 10
play_against_current_self_ratio: 0.5

54
docs/Learning-Environment-Examples.md


![Tennis](images/tennis.png)
* Set-up: Two-player game where agents control rackets to bounce ball over a
* Set-up: Two-player game where agents control rackets to hit a ball over the
* Goal: The agents must bounce ball between one another while not dropping or
sending ball out of bounds.
* Goal: The agents must hit the ball so that the opponent cannot hit a valid
return.
* +0.1 To agent when hitting ball over net.
* -0.1 To agent who let ball hit their ground, or hit ball out of bounds.
* +1.0 To the agent that wins the point. An agent wins a point by preventing
the opponent from hitting a valid return.
* -1.0 To the agent who loses the point.
* Vector Observation space: 8 variables corresponding to position and velocity
of ball and racket.
* Vector Action space: (Continuous) Size of 2, corresponding to movement
toward net or away from net, and jumping.
* Vector Observation space: 9 variables corresponding to position, velocity
and orientation of ball and racket.
* Vector Action space: (Continuous) Size of 3, corresponding to movement
toward net or away from net, jumping and rotation.
* angle: Angle of the racket from the vertical (Y) axis.
* Default: 55
* Recommended Minimum: 35
* Recommended Maximum: 65
* Default: 1
* Default: .5
* Benchmark Mean Reward: 2.5
## [Push Block](https://youtu.be/jKdw216ZgoE)

* Set-up: Environment where four agents compete in a 2 vs 2 toy soccer game.
* Goal:
* Striker: Get the ball into the opponent's goal.
* Goalie: Prevent the ball from entering its own goal.
* Agents: The environment contains four agents, with two different sets of
Behavior Parameters : Striker and Goalie.
* Get the ball into the opponent's goal while preventing
the ball from entering own goal.
* Goalie:
* Agents: The environment contains four agents, with the same
Behavior Parameters : Soccer.
* Striker:
* -0.1 When ball enters own team's goal.
* -0.001 Existential penalty.
* Goalie:
* +0.1 When ball enters opponents goal.
* +0.001 Existential bonus.
* -0.001 Existential penalty.
* Vector Observation space: 112 corresponding to local 14 ray casts, each
detecting 7 possible object types, along with the object's distance.
Perception is in 180 degree view from front of agent.
* Vector Action space: (Discrete) One Branch
* Striker: 6 actions corresponding to forward, backward, sideways movement,
* Vector Observation space: 336 corresponding to 11 ray-casts forward distributed over 120 degrees (264)
and 3 ray-casts backward distributed over 90 degrees each detecting 6 possible object types, along with the object's distance.
The forward ray-casts contribute 264 state dimensions and backward 72 state dimensions.
* Vector Action space: (Discrete) Three branched actions corresponding to forward, backward, sideways movement,
* Goalie: 4 actions corresponding to forward, backward, sideways movement.
* Visual Observations: None
* Float Properties: Two
* ball_scale: Specifies the scale of the ball in the 3 dimensions (equal across the three dimensions)

* Default: 9.81
* Recommended minimum: 6
* Recommended maximum: 20
* Benchmark Mean Reward (Striker & Goalie): 0 (the means will be inverse
of each other and criss crosses during training) __Note that our trainer is currently unable to consistently train this environment__
## Walker

999
docs/images/tennis.png
文件差异内容过多而无法显示
查看文件

8
Project/Assets/ML-Agents/Examples/Soccer/TFModels.meta


fileFormatVersion: 2
guid: f1322978b914041748ae9def364eeef1
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

1001
Project/Assets/ML-Agents/Examples/Soccer/TFModels/Soccer.nn
文件差异内容过多而无法显示
查看文件

11
Project/Assets/ML-Agents/Examples/Soccer/TFModels/Soccer.nn.meta


fileFormatVersion: 2
guid: 9d26b71f04a2d4680a68d8de4f6b62e9
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj
11400002: model data
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}
正在加载...
取消
保存