Soccer refactor (#3331)

5 年前 · ae97ab3a
--- a/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
+using System;
 using UnityEngine;
 using MLAgents;

    // * opposing player
    public enum Team
    {
-        Purple,
-        Blue
-    }
-    public enum AgentRole
-    {
-        Striker,
-        Goalie
+        Blue = 0,
+        Purple = 1
+    [HideInInspector]
-    public AgentRole agentRole;
    float m_KickPower;
    int m_PlayerIndex;
    public SoccerFieldArea area;
    SoccerSettings m_SoccerSettings;
    Renderer m_AgentRenderer;
+    BehaviorParameters m_BP;
+    Vector3 m_Transform;
-    public void ChooseRandomTeam()
+    public override void InitializeAgent()
-        team = (Team)Random.Range(0, 2);
-        if (team == Team.Purple)
+        base.InitializeAgent();
+        m_BP = gameObject.GetComponent<BehaviorParameters>();
+        if (m_BP.m_TeamID == (int)Team.Blue)
-            JoinPurpleTeam(agentRole);
+            team = Team.Blue;
+            m_Transform = new Vector3(transform.position.x - 4f, .5f, transform.position.z);
-            JoinBlueTeam(agentRole);
+            team = Team.Purple;
+            m_Transform = new Vector3(transform.position.x + 4f, .5f, transform.position.z);
-    }
-
-    public void JoinPurpleTeam(AgentRole role)
-    {
-        agentRole = role;
-        team = Team.Purple;
-        m_AgentRenderer.material = m_SoccerSettings.purpleMaterial;
-        tag = "purpleAgent";
-    }
-
-    public void JoinBlueTeam(AgentRole role)
-    {
-        agentRole = role;
-        team = Team.Blue;
-        m_AgentRenderer.material = m_SoccerSettings.blueMaterial;
-        tag = "blueAgent";
-    }
-
-    public override void InitializeAgent()
-    {
-        base.InitializeAgent();
        m_AgentRenderer = GetComponentInChildren<Renderer>();
        m_SoccerSettings = FindObjectOfType<SoccerSettings>();
        agentRb = GetComponent<Rigidbody>();

        var action = Mathf.FloorToInt(act[0]);

-        // Goalies and Strikers have slightly different action spaces.
-        if (agentRole == AgentRole.Goalie)
+        m_KickPower = 0f;
+
+        var forwardAxis = (int)act[0];
+        var rightAxis = (int)act[1];
+        var rotateAxis = (int)act[2];
+
+        switch (forwardAxis)
+        {
+            case 1:
+                dirToGo = transform.forward * 1f;
+                m_KickPower = 1f;
+                break;
+            case 2:
+                dirToGo = transform.forward * -1f;
+                break;
+        }
+
+        switch (rightAxis)
-            m_KickPower = 0f;
-            switch (action)
-            {
-                case 1:
-                    dirToGo = transform.forward * 1f;
-                    m_KickPower = 1f;
-                    break;
-                case 2:
-                    dirToGo = transform.forward * -1f;
-                    break;
-                case 4:
-                    dirToGo = transform.right * -1f;
-                    break;
-                case 3:
-                    dirToGo = transform.right * 1f;
-                    break;
-            }
+            case 1:
+                dirToGo = transform.right * 0.3f;
+                break;
+            case 2:
+                dirToGo = transform.right * -0.3f;
+                break;
-        else
+
+        switch (rotateAxis)
-            m_KickPower = 0f;
-            switch (action)
-            {
-                case 1:
-                    dirToGo = transform.forward * 1f;
-                    m_KickPower = 1f;
-                    break;
-                case 2:
-                    dirToGo = transform.forward * -1f;
-                    break;
-                case 3:
-                    rotateDir = transform.up * 1f;
-                    break;
-                case 4:
-                    rotateDir = transform.up * -1f;
-                    break;
-                case 5:
-                    dirToGo = transform.right * -0.75f;
-                    break;
-                case 6:
-                    dirToGo = transform.right * 0.75f;
-                    break;
-            }
+            case 1:
+                rotateDir = transform.up * -1f;
+                break;
+            case 2:
+                rotateDir = transform.up * 1f;
+                break;
+
        transform.Rotate(rotateDir, Time.deltaTime * 100f);
        agentRb.AddForce(dirToGo * m_SoccerSettings.agentRunSpeed,
            ForceMode.VelocityChange);
    {
        // Existential penalty for strikers.
-        if (agentRole == AgentRole.Striker)
+        AddReward(-1f / 3000f);
+        MoveAgent(vectorAction);
+    }
+
+    public override float[] Heuristic()
+    {
+        var action = new float[3];
+        //forward
+        if (Input.GetKey(KeyCode.W))
+        {
+            action[0] = 1f;
+        }
+        if (Input.GetKey(KeyCode.S))
-            AddReward(-1f / 3000f);
+            action[0] = 2f;
-        // Existential bonus for goalies.
-        if (agentRole == AgentRole.Goalie)
+        //rotate
+        if (Input.GetKey(KeyCode.A))
-            AddReward(1f / 3000f);
+            action[2] = 1f;
-        MoveAgent(vectorAction);
+        if (Input.GetKey(KeyCode.D))
+        {
+            action[2] = 2f;
+        }
+        //right
+        if (Input.GetKey(KeyCode.E))
+        {
+            action[1] = 1f;
+        }
+        if (Input.GetKey(KeyCode.Q))
+        {
+            action[1] = 2f;
+        }
+        return action;
-
    /// <summary>
    /// Used to provide a "kick" to the ball.
    /// </summary>

    public override void AgentReset()
    {
-        if (m_SoccerSettings.randomizePlayersTeamForTraining)
-        {
-            ChooseRandomTeam();
-        }
-
-            JoinPurpleTeam(agentRole);
-            JoinBlueTeam(agentRole);
-        transform.position = area.GetRandomSpawnPos(agentRole, team);
+        transform.position = m_Transform;
        agentRb.velocity = Vector3.zero;
        agentRb.angularVelocity = Vector3.zero;
        SetResetParameters();
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerBallController.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerBallController.cs
 {
    [HideInInspector]
    public SoccerFieldArea area;
-    public AgentSoccer lastTouchedBy; //who was the last to touch the ball
-    public string agentTag; //will be used to check if collided with a agent
-    public string purpleGoalTag; //will be used to check if collided with red goal
+    public string purpleGoalTag; //will be used to check if collided with purple goal
-        if (col.gameObject.CompareTag(purpleGoalTag)) //ball touched red goal
+        if (col.gameObject.CompareTag(purpleGoalTag)) //ball touched purple goal
        {
            area.GoalTouched(AgentSoccer.Team.Blue);
        }
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs

    SoccerSettings m_SoccerSettings;

-    public IEnumerator GoalScoredSwapGroundMaterial(Material mat, float time)
-    {
-        m_GroundRenderer.material = mat;
-        yield return new WaitForSeconds(time);
-        m_GroundRenderer.material = m_GroundMaterial;
-    }
-
    void Awake()
    {
        m_SoccerSettings = FindObjectOfType<SoccerSettings>();
        if (goalTextUI) goalTextUI.SetActive(false);
    }

-    public void AllPlayersDone(float reward)
-    {
-        foreach (var ps in playerStates)
-        {
-            if (ps.agentScript.gameObject.activeInHierarchy)
-            {
-                if (reward != 0)
-                {
-                    ps.agentScript.AddReward(reward);
-                }
-                ps.agentScript.Done();
-            }
-        }
-    }
-
    public void GoalTouched(AgentSoccer.Team scoredTeam)
    {
        foreach (var ps in playerStates)
-                RewardOrPunishPlayer(ps, m_SoccerSettings.strikerReward, m_SoccerSettings.goalieReward);
+                ps.agentScript.AddReward(1);
-                RewardOrPunishPlayer(ps, m_SoccerSettings.strikerPunish, m_SoccerSettings.goaliePunish);
-            }
-            if (m_SoccerSettings.randomizePlayersTeamForTraining)
-            {
-                ps.agentScript.ChooseRandomTeam();
+                ps.agentScript.AddReward(-1);
+            ps.agentScript.Done();  //all agents need to be reset
-            if (scoredTeam == AgentSoccer.Team.Purple)
-            {
-                StartCoroutine(GoalScoredSwapGroundMaterial(m_SoccerSettings.purpleMaterial, 1));
-            }
-            else
-            {
-                StartCoroutine(GoalScoredSwapGroundMaterial(m_SoccerSettings.blueMaterial, 1));
-            }
            if (goalTextUI)
            {
                StartCoroutine(ShowGoalUI());

-    public void RewardOrPunishPlayer(PlayerState ps, float striker, float goalie)
-    {
-        if (ps.agentScript.agentRole == AgentSoccer.AgentRole.Striker)
-        {
-            ps.agentScript.AddReward(striker);
-        }
-        if (ps.agentScript.agentRole == AgentSoccer.AgentRole.Goalie)
-        {
-            ps.agentScript.AddReward(goalie);
-        }
-        ps.agentScript.Done();  //all agents need to be reset
-    }
-
-    public Vector3 GetRandomSpawnPos(AgentSoccer.AgentRole role, AgentSoccer.Team team)
-    {
-        var xOffset = 0f;
-        if (role == AgentSoccer.AgentRole.Goalie)
-        {
-            xOffset = 13f;
-        }
-        if (role == AgentSoccer.AgentRole.Striker)
-        {
-            xOffset = 7f;
-        }
-        if (team == AgentSoccer.Team.Blue)
-        {
-            xOffset = xOffset * -1f;
-        }
-        var randomSpawnPos = ground.transform.position +
-            new Vector3(xOffset, 0f, 0f)
-            + (Random.insideUnitSphere * 2);
-        randomSpawnPos.y = ground.transform.position.y + 2;
-        return randomSpawnPos;
-    }
-
-            new Vector3(0f, 0f, 0f)
-            + (Random.insideUnitSphere * 2);
-        randomSpawnPos.y = ground.transform.position.y + 2;
+            new Vector3(0f, 0f, 0f);
+        randomSpawnPos.y = ground.transform.position.y + .5f;
        return randomSpawnPos;
    }

--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerSettings.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerSettings.cs
    public Material purpleMaterial;
    public Material blueMaterial;
    public bool randomizePlayersTeamForTraining = true;
-
-
-    public float strikerPunish; //if opponents scores, the striker gets this neg reward (-1)
-    public float strikerReward; //if team scores a goal they get a reward (+1)
-    public float goaliePunish; //if opponents score, goalie gets this neg reward (-1)
-    public float goalieReward; //if team scores, goalie gets this reward (currently 0...no reward. can play with this later)
 }
--- a/Project/ProjectSettings/TagManager.asset
+++ b/Project/ProjectSettings/TagManager.asset
  - 
  - 
  - invisible
-  - ball
-  - goalieWall
-  - goalie
-  - strikerWall
-  - striker
+  - 
+  - 
+  - 
+  - 
+  - 
  - 
  - 
  - 
--- a/com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs
        string m_BehaviorName = "My Behavior";
        [HideInInspector]
        [SerializeField]
-        int m_TeamID = 0;
+        public int m_TeamID = 0;
        [HideInInspector]
        [SerializeField]
        [Tooltip("Use all Sensor components attached to child GameObjects of this Agent.")]
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
    num_layers: 2
    normalize: false

-Striker:
-    max_steps: 5.0e6
-    learning_rate: 1e-3
-    batch_size: 128
-    num_epoch: 3
-    buffer_size: 2000
-    beta: 1.0e-2
-    hidden_units: 256
-    summary_freq: 20000
-    time_horizon: 128
-    num_layers: 2
-    normalize: false
-
-Goalie:
-    max_steps: 5.0e6
-    learning_rate: 1e-3
-    batch_size: 320
-    num_epoch: 3
-    buffer_size: 2000
-    beta: 1.0e-2
-    hidden_units: 256
-    summary_freq: 20000
-    time_horizon: 128
-    num_layers: 2
-    normalize: false
-
 Pyramids:
    summary_freq: 30000
    time_horizon: 128

 Tennis:
    normalize: true
-    max_steps: 2e7
+    max_steps: 5.0e7
+    batch_size: 1024
+    buffer_size: 10240
+    time_horizon: 1000
+    self_play:
+        window: 10
+        play_against_current_self_ratio: 0.5
+        save_steps: 50000
+        swap_steps: 50000
+
+Soccer:
+    normalize: false
+    max_steps: 5.0e7
+    learning_rate_schedule: constant
+    batch_size: 2048
+    buffer_size: 20480
+    hidden_units: 512
+    time_horizon: 1000
+    num_layers: 2
    self_play:
        window: 10
        play_against_current_self_ratio: 0.5
--- a/docs/Learning-Environment-Examples.md
+++ b/docs/Learning-Environment-Examples.md

 ![Tennis](images/tennis.png)

-* Set-up: Two-player game where agents control rackets to bounce ball over a
+* Set-up: Two-player game where agents control rackets to hit a ball over the
-* Goal: The agents must bounce ball between one another while not dropping or
-  sending ball out of bounds.
+* Goal: The agents must hit the ball so that the opponent cannot hit a valid
+return.
-  * +0.1 To agent when hitting ball over net.
-  * -0.1 To agent who let ball hit their ground, or hit ball out of bounds.
+  * +1.0 To the agent that wins the point. An agent wins a point by preventing
+   the opponent from hitting a valid return.
+  * -1.0 To the agent who loses the point.
-  * Vector Observation space: 8 variables corresponding to position and velocity
-    of ball and racket.
-  * Vector Action space: (Continuous) Size of 2, corresponding to movement
-    toward net or away from net, and jumping.
+  * Vector Observation space: 9 variables corresponding to position, velocity
+    and orientation of ball and racket.
+  * Vector Action space: (Continuous) Size of 3, corresponding to movement
+    toward net or away from net, jumping and rotation.
-    * angle: Angle of the racket from the vertical (Y) axis.
-      * Default: 55
-      * Recommended Minimum: 35
-      * Recommended Maximum: 65
-      * Default: 1
+      * Default: .5
-* Benchmark Mean Reward: 2.5

 ## [Push Block](https://youtu.be/jKdw216ZgoE)


 * Set-up: Environment where four agents compete in a 2 vs 2 toy soccer game.
 * Goal:
-  * Striker: Get the ball into the opponent's goal.
-  * Goalie: Prevent the ball from entering its own goal.
-* Agents: The environment contains four agents, with two different sets of
-  Behavior Parameters : Striker and Goalie.
+  * Get the ball into the opponent's goal while preventing
+  the ball from entering own goal.
+  * Goalie:
+* Agents: The environment contains four agents, with the same
+  Behavior Parameters : Soccer.
-  * Striker:
-    * -0.1 When ball enters own team's goal.
-    * -0.001 Existential penalty.
-  * Goalie:
-    * +0.1 When ball enters opponents goal.
-    * +0.001 Existential bonus.
+    * -0.001 Existential penalty.
-  * Vector Observation space: 112 corresponding to local 14 ray casts, each
-    detecting 7 possible object types, along with the object's distance.
-    Perception is in 180 degree view from front of agent.
-  * Vector Action space: (Discrete) One Branch
-    * Striker: 6 actions corresponding to forward, backward, sideways movement,
+  * Vector Observation space: 336 corresponding to 11 ray-casts forward distributed over 120 degrees (264)
+    and 3 ray-casts backward distributed over 90 degrees each detecting 6 possible object types, along with the object's distance.
+    The forward ray-casts contribute 264 state dimensions and backward 72 state dimensions.
+  * Vector Action space: (Discrete) Three branched actions corresponding to forward, backward, sideways movement,
-    * Goalie: 4 actions corresponding to forward, backward, sideways movement.
  * Visual Observations: None
 * Float Properties: Two
  * ball_scale: Specifies the scale of the ball in the 3 dimensions (equal across the three dimensions)
    * Default: 9.81
    * Recommended minimum: 6
    * Recommended maximum: 20
-* Benchmark Mean Reward (Striker & Goalie): 0 (the means will be inverse
-  of each other and criss crosses during training) __Note that our trainer is currently unable to consistently train this environment__

 ## Walker

--- a/docs/images/tennis.png
+++ b/docs/images/tennis.png
--- a/Project/Assets/ML-Agents/Examples/Soccer/TFModels.meta
+++ b/Project/Assets/ML-Agents/Examples/Soccer/TFModels.meta
+fileFormatVersion: 2
+guid: f1322978b914041748ae9def364eeef1
+folderAsset: yes
+DefaultImporter:
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/Project/Assets/ML-Agents/Examples/Soccer/TFModels/Soccer.nn
+++ b/Project/Assets/ML-Agents/Examples/Soccer/TFModels/Soccer.nn
--- a/Project/Assets/ML-Agents/Examples/Soccer/TFModels/Soccer.nn.meta
+++ b/Project/Assets/ML-Agents/Examples/Soccer/TFModels/Soccer.nn.meta
+fileFormatVersion: 2
+guid: 9d26b71f04a2d4680a68d8de4f6b62e9
+ScriptedImporter:
+  fileIDToRecycleName:
+    11400000: main obj
+    11400002: model data
+  externalObjects: {}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
+  script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}