Agent.Heuristic takes an float[] (#3765)

5 年前 · dd6aa7e2
--- a/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
        SetResetParameters();
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
-        var action = new float[2];
-
-        action[0] = -Input.GetAxis("Horizontal");
-        action[1] = Input.GetAxis("Vertical");
-        return action;
+        actionsOut[0] = -Input.GetAxis("Horizontal");
+        actionsOut[1] = Input.GetAxis("Vertical");
    }

    public void SetBall()
--- a/Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
        }
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
-        var action = new float[3];
-
-        action[0] = Input.GetAxis("Horizontal");
-        action[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
-        action[2] = Input.GetAxis("Vertical");
-        return action;
+        actionsOut[0] = Input.GetAxis("Horizontal");
+        actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
+        actionsOut[2] = Input.GetAxis("Vertical");
    }

    void Update()
--- a/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
        MoveAgent(vectorAction);
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
-        var action = new float[4];
-            action[2] = 2f;
+            actionsOut[2] = 2f;
-            action[0] = 1f;
+            actionsOut[0] = 1f;
-            action[2] = 1f;
+            actionsOut[2] = 1f;
-            action[0] = 2f;
+            actionsOut[0] = 2f;
-        action[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
-        return action;
+        actionsOut[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
    }

    public override void OnEpisodeBegin()
--- a/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
        }
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
+        actionsOut[0] = k_NoAction;
-            return new float[] { k_Right };
+            actionsOut[0] = k_Right;
-            return new float[] { k_Up };
+            actionsOut[0] = k_Up;
-            return new float[] { k_Left };
+            actionsOut[0] = k_Left;
-            return new float[] { k_Down };
+            actionsOut[0] = k_Down;
-        return new float[] { k_NoAction };
    }

    // to be implemented by the developer
--- a/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
        }
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
+        actionsOut[0] = 0;
-            return new float[] { 3 };
+            actionsOut[0] = 3;
-        if (Input.GetKey(KeyCode.W))
+        else if (Input.GetKey(KeyCode.W))
-            return new float[] { 1 };
+            actionsOut[0] = 1;
-        if (Input.GetKey(KeyCode.A))
+        else if (Input.GetKey(KeyCode.A))
-            return new float[] { 4 };
+            actionsOut[0] = 4;
-        if (Input.GetKey(KeyCode.S))
+        else if (Input.GetKey(KeyCode.S))
-            return new float[] { 2 };
+            actionsOut[0] = 2;
-        return new float[] { 0 };
    }

    public override void OnEpisodeBegin()
--- a/Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
+++ b/Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
        AddReward(-1f / maxStep);
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
+        actionsOut[0] = 0;
-            return new float[] { 3 };
+            actionsOut[0] = 3;
-        if (Input.GetKey(KeyCode.W))
+        else if (Input.GetKey(KeyCode.W))
-            return new float[] { 1 };
+            actionsOut[0] = 1;
-        if (Input.GetKey(KeyCode.A))
+        else if (Input.GetKey(KeyCode.A))
-            return new float[] { 4 };
+            actionsOut[0] = 4;
-        if (Input.GetKey(KeyCode.S))
+        else if (Input.GetKey(KeyCode.S))
-            return new float[] { 2 };
+            actionsOut[0] = 2;
-        return new float[] { 0 };
    }

    /// <summary>
--- a/Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
        MoveAgent(vectorAction);
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
+        actionsOut[0] = 0;
-            return new float[] { 3 };
+            actionsOut[0] = 3;
-        if (Input.GetKey(KeyCode.W))
+        else if (Input.GetKey(KeyCode.W))
-            return new float[] { 1 };
+            actionsOut[0] = 1;
-        if (Input.GetKey(KeyCode.A))
+        else if (Input.GetKey(KeyCode.A))
-            return new float[] { 4 };
+            actionsOut[0] = 4;
-        if (Input.GetKey(KeyCode.S))
+        else if (Input.GetKey(KeyCode.S))
-            return new float[] { 2 };
+            actionsOut[0] = 2;
-        return new float[] { 0 };
    }

    public override void OnEpisodeBegin()
--- a/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
+++ b/Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
        MoveAgent(vectorAction);
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
-        var action = new float[3];
-            action[0] = 1f;
+            actionsOut[0] = 1f;
-            action[0] = 2f;
+            actionsOut[0] = 2f;
-            action[2] = 1f;
+            actionsOut[2] = 1f;
-            action[2] = 2f;
+            actionsOut[2] = 2f;
-            action[1] = 1f;
+            actionsOut[1] = 1f;
-            action[1] = 2f;
+            actionsOut[1] = 2f;
-        return action;
    }
    /// <summary>
    /// Used to provide a "kick" to the ball.
--- a/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
        m_TextComponent.text = score.ToString();
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
-        var action = new float[3];
-
-        action[0] = Input.GetAxis("Horizontal");    // Racket Movement
-        action[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f;   // Racket Jumping
-        action[2] = Input.GetAxis("Vertical");   // Racket Rotation  
-        return action;
+        actionsOut[0] = Input.GetAxis("Horizontal");    // Racket Movement
+        actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f;   // Racket Jumping
+        actionsOut[2] = Input.GetAxis("Vertical");   // Racket Rotation
    }

    public override void OnEpisodeBegin()
--- a/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
+++ b/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
        }
    }

-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
-        var action = new float[4];
-            action[1] = 2f;
+            actionsOut[1] = 2f;
-            action[0] = 1f;
+            actionsOut[0] = 1f;
-            action[1] = 1f;
+            actionsOut[1] = 1f;
-            action[0] = 2f;
+            actionsOut[0] = 2f;
-        action[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
-        return action;
+        actionsOut[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
    }

    // Detect when the agent hits the goal
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 - Added ability to start training (initialize model weights) from a previous run ID. (#3710)
 - The internal event `Academy.AgentSetStatus` was renamed to `Academy.AgentPreStep` and made public.
 - The offset logic was removed from DecisionRequester.
+ - The signature of `Agent.Heuristic()` was changed to take a `float[]` as a parameter, instead of returning the array. This was done to prevent a common source of error where users would return arrays of the wrong size.
 - The communication API version has been bumped up to 1.0.0 and will use [Semantic Versioning](https://semver.org/) to do compatibility checks for communication between Unity and the Python process.

 ### Minor Changes
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
        /// </summary>
        /// <returns> A float array corresponding to the next action of the Agent
        /// </returns>
-        public virtual float[] Heuristic()
+        public virtual void Heuristic(float[] actionsOut)
-            var param = m_PolicyFactory.brainParameters;
-
-            return new float[param.numActions];
+            Array.Clear(actionsOut, 0, actionsOut.Length);
        }

        /// <summary>
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
            get { return m_BehaviorName + "?team=" + TeamId; }
        }

-        internal IPolicy GeneratePolicy(Func<float[]> heuristic)
+        internal IPolicy GeneratePolicy(HeuristicPolicy.ActionGenerator heuristic)
-                    return new HeuristicPolicy(heuristic);
+                    return new HeuristicPolicy(heuristic, m_BrainParameters.numActions);
                case BehaviorType.InferenceOnly:
                {
                    if (m_Model == null)
                    }
                    else
                    {
-                        return new HeuristicPolicy(heuristic);
+                        return new HeuristicPolicy(heuristic, m_BrainParameters.numActions);
-                    return new HeuristicPolicy(heuristic);
+                    return new HeuristicPolicy(heuristic, m_BrainParameters.numActions);
            }
        }

--- a/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
    /// </summary>
    internal class HeuristicPolicy : IPolicy
    {
-        Func<float[]> m_Heuristic;
+        public delegate void ActionGenerator(float[] actionsOut);
+        ActionGenerator m_Heuristic;
+        int m_numActions;

        WriteAdapter m_WriteAdapter = new WriteAdapter();
        NullList m_NullList = new NullList();
-        public HeuristicPolicy(Func<float[]> heuristic)
+        public HeuristicPolicy(ActionGenerator heuristic, int numActions)
+            m_numActions = numActions;
        }

        /// <inheritdoc />
            if (!info.done)
            {
-                m_LastDecision = m_Heuristic.Invoke();
+                // Reset m_LastDecision each time.
+                 m_LastDecision = new float[m_numActions];
+                 m_Heuristic.Invoke(m_LastDecision);
            }
        }

--- a/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
    [TestFixture]
    public class BehaviorParameterTests
    {
-        static float[] DummyHeuristic()
+        static void DummyHeuristic(float[] actionsOut)
-            return null;
+            // No-op
        }

        [Test]
--- a/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
            agentActionCallsForEpisode = 0;
        }

-        public override float[] Heuristic()
+        public override void Heuristic(float[] actionsOut)
-            return new float[0];
        }
    }

--- a/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
+++ b/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
    {
        public int numHeuristicCalls;

-        public override float[] Heuristic()
+        public override void Heuristic(float[] actionsOut)
-            return base.Heuristic();
+            base.Heuristic(actionsOut);
-    }// Simple SensorComponent that sets up a StackingSensor
+    }
+
+    // Simple SensorComponent that sets up a StackingSensor
    public class StackingComponent : SensorComponent
    {
        public SensorComponent wrappedComponent;
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md
  step.
 * `Agent.Heuristic()` - When the `Behavior Type` is set to `Heuristic Only` in the Behavior
  Parameters of the Agent, the Agent will use the `Heuristic()` method to generate
-  the actions of the Agent. As such, the `Heuristic()` method returns an array of
+  the actions of the Agent. As such, the `Heuristic()` method takes an array of
  floats. In the case of the Ball 3D Agent, the `Heuristic()` method converts the
  keyboard inputs into actions.

--- a/docs/Learning-Environment-Create-New.md
+++ b/docs/Learning-Environment-Create-New.md
 The `Heuristic()` method will look like this :

 ```csharp
-    public override float[] Heuristic()
+    public override void Heuristic(float[] actionsOut)
-        var action = new float[2];
-        action[0] = Input.GetAxis("Horizontal");
-        action[1] = Input.GetAxis("Vertical");
-        return action;
+        actionsOut[0] = Input.GetAxis("Horizontal");
+        actionsOut[1] = Input.GetAxis("Vertical");
    }
 ```

--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md

 Note that when you are programming actions for an agent, it is often helpful to
 test your action logic using the `Heuristic()` method of the Agent,
-which lets you map keyboard
-commands to actions.
+which lets you map keyboard commands to actions.

 The [3DBall](Learning-Environment-Examples.md#3dball-3d-balance-ball) and
 [Area](Learning-Environment-Examples.md#push-block) example environments are set
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 * The `play_against_current_self_ratio` self-play trainer hyperparameter has been renamed to `play_against_latest_model_ratio`
 * Removed the multi-agent gym option from the gym wrapper. For multi-agent scenarios, use the [Low Level Python API](Python-API.md).
 * The low level Python API has changed. You can look at the document [Low Level Python API documentation](Python-API.md) for more information. If you use `mlagents-learn` for training, this should be a transparent change.
+* The signature of `Agent.Heuristic()` was changed to take a `float[]` as a parameter, instead of returning the array. This was done to prevent a common source of error where users would return arrays of the wrong size.

 ### Steps to Migrate
 * Replace the `--load` flag with `--resume` when calling `mlagents-learn`, and don't use the `--train` flag as training
 * `Academy.FloatProperties` was removed.
 * `Academy.RegisterSideChannel` and `Academy.UnregisterSideChannel` were removed.
-
-### Steps to Migrate
+* If your Agent class overrides `Heuristic()`, change the signature to `public override void Heuristic(float[] actionsOut)` and assign values to `actionsOut` instead of returning an array.


 ## Migrating from 0.14 to 0.15