Initial implementation using IHeuristicProvider. (#4849)

- Actuators can now optionally implement IHeuristicProvider to generate heuristic actions for agents. Co-authored-by: Chris Elion <chris.elion@unity3d.com>
3 年前 · 399f99e7
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
    - |
      sudo apt-get update && sudo apt-get install -y python3-venv
      python3 -m venv venv && source venv/bin/activate
+      python -m pip install wheel --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
      python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
      python -u -m ml-agents.tests.yamato.setup_venv
      python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
--- a/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
+++ b/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
 using System;
 using Unity.MLAgents.Actuators;
+using UnityEngine;

 namespace Unity.MLAgentsExamples
 {
    /// <summary>
    /// Simple actuator that converts the action into a {-1, 0, 1} direction
    /// </summary>
-    public class BasicActuator : IActuator
+    public class BasicActuator : IActuator, IHeuristicProvider
    {
        public BasicController basicController;
        ActionSpec m_ActionSpec;
            }

            basicController.MoveDirection(direction);
+        }
+
+        public void Heuristic(in ActionBuffers actionBuffersOut)
+        {
+            var direction = Input.GetAxis("Horizontal");
+            var discreteActions = actionBuffersOut.DiscreteActions;
+            if (Mathf.Approximately(direction, 0.0f))
+            {
+                discreteActions[0] = 0;
+                return;
+            }
+            var sign = Math.Sign(direction);
+            discreteActions[0] = sign < 0 ? 1 : 2;
        }

        public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab
  - component: {fileID: 3508723250470608012}
  - component: {fileID: 3508723250470608011}
  - component: {fileID: 3508723250470608009}
-  - component: {fileID: 3508723250470608013}
+  - component: {fileID: 2112317463290853299}
  m_Layer: 0
  m_Name: Match3 Agent
  m_TagString: Untagged
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: c34da50737a3c4a50918002b20b2b927, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  Board: {fileID: 0}
  MoveTime: 0.25
  MaxMoves: 500
-  HeuristicQuality: 0
 --- !u!114 &3508723250470608011
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_EditorClassIdentifier: 
  DebugMoveIndex: -1
  CubeSpacing: 1.25
-  Board: {fileID: 0}
  TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
    type: 3}
 --- !u!114 &3508723250470608009
  BasicCellPoints: 1
  SpecialCell1Points: 2
  SpecialCell2Points: 3
--- !u!114 &3508723250470608013
+--- !u!114 &3508723250470608014
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
+  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
-  ActuatorName: Match3 Actuator
-  ForceHeuristic: 1
--- !u!114 &3508723250470608014
+  SensorName: Match3 Sensor
+  ObservationType: 0
+--- !u!114 &2112317463290853299
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
+  m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
-  SensorName: Match3 Sensor
-  ObservationType: 0
+  ActuatorName: Match3 Actuator
+  ForceHeuristic: 1
+  HeuristicQuality: 0
 --- !u!1 &3508723250774301855
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab
  - component: {fileID: 2118285884327540682}
  - component: {fileID: 2118285884327540685}
  - component: {fileID: 2118285884327540687}
-  - component: {fileID: 2118285884327540683}
+  - component: {fileID: 3357012711826686276}
  m_Layer: 0
  m_Name: Match3 Agent
  m_TagString: Untagged
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 9e89b8e81974148d3b7213530d00589d, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  Board: {fileID: 0}
  MoveTime: 0.25
  MaxMoves: 500
-  HeuristicQuality: 0
 --- !u!114 &2118285884327540685
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_EditorClassIdentifier: 
  DebugMoveIndex: -1
  CubeSpacing: 1.25
-  Board: {fileID: 0}
  TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
    type: 3}
 --- !u!114 &2118285884327540687
  BasicCellPoints: 1
  SpecialCell1Points: 2
  SpecialCell2Points: 3
--- !u!114 &2118285884327540683
+--- !u!114 &2118285884327540680
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
+  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
-  ActuatorName: Match3 Actuator
-  ForceHeuristic: 0
--- !u!114 &2118285884327540680
+  SensorName: Match3 Sensor
+  ObservationType: 0
+--- !u!114 &3357012711826686276
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
+  m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
-  SensorName: Match3 Sensor
-  ObservationType: 0
+  ActuatorName: Match3 Actuator
+  ForceHeuristic: 0
+  HeuristicQuality: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
  - component: {fileID: 3019509692332007781}
  - component: {fileID: 3019509692332007778}
  - component: {fileID: 3019509692332007776}
-  - component: {fileID: 3019509692332007780}
+  - component: {fileID: 8270768986451624427}
  m_Layer: 0
  m_Name: Match3 Agent
  m_TagString: Untagged
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 48d14da88fea74d0693c691c6e3f2e34, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  Board: {fileID: 0}
  MoveTime: 0.25
  MaxMoves: 500
-  HeuristicQuality: 0
 --- !u!114 &3019509692332007778
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_EditorClassIdentifier: 
  DebugMoveIndex: -1
  CubeSpacing: 1.25
-  Board: {fileID: 0}
  TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
    type: 3}
 --- !u!114 &3019509692332007776
  BasicCellPoints: 1
  SpecialCell1Points: 2
  SpecialCell2Points: 3
--- !u!114 &3019509692332007780
+--- !u!114 &3019509692332007783
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
+  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
-  ActuatorName: Match3 Actuator
-  ForceHeuristic: 0
--- !u!114 &3019509692332007783
+  SensorName: Match3 Sensor
+  ObservationType: 2
+--- !u!114 &8270768986451624427
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
+  m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
-  SensorName: Match3 Sensor
-  ObservationType: 2
+  ActuatorName: Match3 Actuator
+  ForceHeuristic: 0
+  HeuristicQuality: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 2112317463290853299, guid: 2fafdcd0587684641b03b11f04454f1b,
+        type: 3}
+      propertyPath: HeuristicQuality
+      value: 1
+      objectReference: {fileID: 0}
    - target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
        type: 3}
      propertyPath: cubeSpacing
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 2112317463290853299, guid: 2fafdcd0587684641b03b11f04454f1b,
+        type: 3}
+      propertyPath: HeuristicQuality
+      value: 1
+      objectReference: {fileID: 0}
    - target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
        type: 3}
      propertyPath: cubeSpacing
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Agent.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Agent.cs
        WaitForMove = 4,
    }

-    public enum HeuristicQuality
-    {
-        /// <summary>
-        /// The heuristic will pick any valid move at random.
-        /// </summary>
-        RandomValidMove,
-
-        /// <summary>
-        /// The heuristic will pick the move that scores the most points.
-        /// This only looks at the immediate move, and doesn't consider where cells will fall.
-        /// </summary>
-        Greedy
-    }
-
    public class Match3Agent : Agent
    {
        [HideInInspector]
        public int MaxMoves = 500;


-        public HeuristicQuality HeuristicQuality = HeuristicQuality.RandomValidMove;
-
-        private System.Random m_Random;
-
-            var seed = Board.RandomSeed == -1 ? gameObject.GetInstanceID() : Board.RandomSeed + 1;
-            m_Random = new System.Random(seed);
        }

        public override void OnEpisodeBegin()
            return false;
        }

-        public override void Heuristic(in ActionBuffers actionsOut)
-        {
-            var discreteActions = actionsOut.DiscreteActions;
-            discreteActions[0] = GreedyMove();
-        }
-
-        int GreedyMove()
-        {
-            var pointsByType = new[] { Board.BasicCellPoints, Board.SpecialCell1Points, Board.SpecialCell2Points };
-
-            var bestMoveIndex = 0;
-            var bestMovePoints = -1;
-            var numMovesAtCurrentScore = 0;
-
-            foreach (var move in Board.ValidMoves())
-            {
-                var movePoints = HeuristicQuality == HeuristicQuality.Greedy ? EvalMovePoints(move, pointsByType) : 1;
-                if (movePoints < bestMovePoints)
-                {
-                    // Worse, skip
-                    continue;
-                }
-
-                if (movePoints > bestMovePoints)
-                {
-                    // Better, keep
-                    bestMovePoints = movePoints;
-                    bestMoveIndex = move.MoveIndex;
-                    numMovesAtCurrentScore = 1;
-                }
-                else
-                {
-                    // Tied for best - use reservoir sampling to make sure we select from equal moves uniformly.
-                    // See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
-                    numMovesAtCurrentScore++;
-                    var randVal = m_Random.Next(0, numMovesAtCurrentScore);
-                    if (randVal == 0)
-                    {
-                        // Keep the new one
-                        bestMoveIndex = move.MoveIndex;
-                    }
-                }
-            }
-
-            return bestMoveIndex;
-        }
-
-        int EvalMovePoints(Move move, int[] pointsByType)
-        {
-            // Counts the expected points for making the move.
-            var moveVal = Board.GetCellType(move.Row, move.Column);
-            var moveSpecial = Board.GetSpecialType(move.Row, move.Column);
-            var (otherRow, otherCol) = move.OtherCell();
-            var oppositeVal = Board.GetCellType(otherRow, otherCol);
-            var oppositeSpecial = Board.GetSpecialType(otherRow, otherCol);
-
-
-            int movePoints = EvalHalfMove(
-                otherRow, otherCol, moveVal, moveSpecial, move.Direction, pointsByType
-            );
-            int otherPoints = EvalHalfMove(
-                move.Row, move.Column, oppositeVal, oppositeSpecial, move.OtherDirection(), pointsByType
-            );
-            return movePoints + otherPoints;
-        }
-
-        int EvalHalfMove(int newRow, int newCol, int newValue, int newSpecial, Direction incomingDirection, int[] pointsByType)
-        {
-            // This is a essentially a duplicate of AbstractBoard.CheckHalfMove but also counts the points for the move.
-            int matchedLeft = 0, matchedRight = 0, matchedUp = 0, matchedDown = 0;
-            int scoreLeft = 0, scoreRight = 0, scoreUp = 0, scoreDown = 0;
-
-            if (incomingDirection != Direction.Right)
-            {
-                for (var c = newCol - 1; c >= 0; c--)
-                {
-                    if (Board.GetCellType(newRow, c) == newValue)
-                    {
-                        matchedLeft++;
-                        scoreLeft += pointsByType[Board.GetSpecialType(newRow, c)];
-                    }
-                    else
-                        break;
-                }
-            }
-
-            if (incomingDirection != Direction.Left)
-            {
-                for (var c = newCol + 1; c < Board.Columns; c++)
-                {
-                    if (Board.GetCellType(newRow, c) == newValue)
-                    {
-                        matchedRight++;
-                        scoreRight += pointsByType[Board.GetSpecialType(newRow, c)];
-                    }
-                    else
-                        break;
-                }
-            }
-
-            if (incomingDirection != Direction.Down)
-            {
-                for (var r = newRow + 1; r < Board.Rows; r++)
-                {
-                    if (Board.GetCellType(r, newCol) == newValue)
-                    {
-                        matchedUp++;
-                        scoreUp += pointsByType[Board.GetSpecialType(r, newCol)];
-                    }
-                    else
-                        break;
-                }
-            }
-
-            if (incomingDirection != Direction.Up)
-            {
-                for (var r = newRow - 1; r >= 0; r--)
-                {
-                    if (Board.GetCellType(r, newCol) == newValue)
-                    {
-                        matchedDown++;
-                        scoreDown += pointsByType[Board.GetSpecialType(r, newCol)];
-                    }
-                    else
-                        break;
-                }
-            }
-
-            if ((matchedUp + matchedDown >= 2) || (matchedLeft + matchedRight >= 2))
-            {
-                // It's a match. Start from counting the piece being moved
-                var totalScore = pointsByType[newSpecial];
-                if (matchedUp + matchedDown >= 2)
-                {
-                    totalScore += scoreUp + scoreDown;
-                }
-
-                if (matchedLeft + matchedRight >= 2)
-                {
-                    totalScore += scoreLeft + scoreRight;
-                }
-                return totalScore;
-            }
-
-            return 0;
-        }
    }

 }
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Board.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Board.cs
+using System;
 using Unity.MLAgents.Extensions.Match3;
 using UnityEngine;


    public class Match3Board : AbstractBoard
    {
-        public int RandomSeed = -1;
-
        public const int k_EmptyCell = -1;
        [Tooltip("Points earned for clearing a basic cell (cube)")]
        public int BasicCellPoints = 1;
        [Tooltip("Points earned for clearing an extra special cell (plus)")]
        public int SpecialCell2Points = 3;

+        /// <summary>
+        /// Seed to initialize the <see cref="System.Random"/> object.
+        /// </summary>
+        public int RandomSeed;
+
        (int, int)[,] m_Cells;
        bool[,] m_Matched;

            m_Cells = new (int, int)[Columns, Rows];
            m_Matched = new bool[Columns, Rows];

+        }
+
+        void Start()
+        {
-
            InitRandom();
        }

--- a/com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs
    /// Actuator for a Match3 game. It translates valid moves (defined by AbstractBoard.IsMoveValid())
    /// in action masks, and applies the action to the board via AbstractBoard.MakeMove().
    /// </summary>
-    public class Match3Actuator : IActuator
+    public class Match3Actuator : IActuator, IHeuristicProvider
-        private AbstractBoard m_Board;
+        protected AbstractBoard m_Board;
+        protected System.Random m_Random;
-        private System.Random m_Random;
        private Agent m_Agent;

        private int m_Rows;
        /// <param name="board"></param>
        /// <param name="forceHeuristic">Whether the inference action should be ignored and the Agent's Heuristic
        /// should be called. This should only be used for generating comparison stats of the Heuristic.</param>
+        /// <param name="seed">The seed used to initialize <see cref="System.Random"/>.</param>
-        public Match3Actuator(AbstractBoard board, bool forceHeuristic, Agent agent, string name)
+        public Match3Actuator(AbstractBoard board,
+            bool forceHeuristic,
+            int seed,
+            Agent agent,
+            string name)
        {
            m_Board = board;
            m_Rows = board.Rows;

            var numMoves = Move.NumPotentialMoves(m_Board.Rows, m_Board.Columns);
            m_ActionSpec = ActionSpec.MakeDiscrete(numMoves);
+            m_Random = new System.Random(seed);
        }

        /// <inheritdoc/>
        {
            if (m_ForceHeuristic)
            {
-                m_Agent.Heuristic(actions);
+                Heuristic(actions);
            }
            var moveIndex = actions.DiscreteActions[0];

                yield return move.MoveIndex;
            }
        }
+
+        public void Heuristic(in ActionBuffers actionsOut)
+        {
+            var discreteActions = actionsOut.DiscreteActions;
+            discreteActions[0] = GreedyMove();
+        }
+
+
+        protected int GreedyMove()
+        {
+
+            var bestMoveIndex = 0;
+            var bestMovePoints = -1;
+            var numMovesAtCurrentScore = 0;
+
+            foreach (var move in m_Board.ValidMoves())
+            {
+                var movePoints = EvalMovePoints(move);
+                if (movePoints < bestMovePoints)
+                {
+                    // Worse, skip
+                    continue;
+                }
+
+                if (movePoints > bestMovePoints)
+                {
+                    // Better, keep
+                    bestMovePoints = movePoints;
+                    bestMoveIndex = move.MoveIndex;
+                    numMovesAtCurrentScore = 1;
+                }
+                else
+                {
+                    // Tied for best - use reservoir sampling to make sure we select from equal moves uniformly.
+                    // See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
+                    numMovesAtCurrentScore++;
+                    var randVal = m_Random.Next(0, numMovesAtCurrentScore);
+                    if (randVal == 0)
+                    {
+                        // Keep the new one
+                        bestMoveIndex = move.MoveIndex;
+                    }
+                }
+            }
+
+            return bestMoveIndex;
+        }
+
+        /// <summary>
+        /// Method to be overridden when evaluating how many points a specific move will generate.
+        /// </summary>
+        /// <param name="move">The move to evaluate.</param>
+        /// <returns>The number of points the move generates.</returns>
+        protected virtual int EvalMovePoints(Move move)
+        {
+            return 1;
+        }
+
    }
 }
--- a/com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
 namespace Unity.MLAgents.Extensions.Match3
 {
    /// <summary>
-    /// Actuator component for a Match 3 game. Generates a Match3Actuator at runtime.
+    /// Actuator component for a Match3 game. Generates a Match3Actuator at runtime.
    /// </summary>
    public class Match3ActuatorComponent : ActuatorComponent
    {
        public string ActuatorName = "Match3 Actuator";

        /// <summary>
+        /// A random seed used to generate a board, if needed.
+        /// </summary>
+        public int RandomSeed = -1;
+
+        /// <summary>
        /// Force using the Agent's Heuristic() method to decide the action. This should only be used in testing.
        /// </summary>
        [FormerlySerializedAs("ForceRandom")]
        {
            var board = GetComponent<AbstractBoard>();
            var agent = GetComponentInParent<Agent>();
-            return new Match3Actuator(board, ForceHeuristic, agent, ActuatorName);
+            var seed = RandomSeed == -1 ? gameObject.GetInstanceID() : RandomSeed + 1;
+            return new Match3Actuator(board, ForceHeuristic, seed, agent, ActuatorName);
        }

        /// <inheritdoc/>
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 TensorBoard. Thanks to @brccabral for the contribution! (#4816)
 - The upper limit for the time scale (by setting the `--time-scale` paramater in mlagents-learn) was
 removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
+- Added the IHeuristicProvider interface to allow IActuators as well as Agent implement the Heuristic function to generate actions.
+  Updated the Basic example and the Match3 Example to use Actuators.
+  Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)
+

 #### ml-agents / ml-agents-envs / gym-unity (Python)

--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs

        /// <summary>
        /// Iterates through all of the IActuators in this list and calls their
+        /// <see cref="IHeuristicProvider.Heuristic"/> method on them, if implemented, with the appropriate
+        /// <see cref="ActionSegment{T}"/>s depending on their <see cref="ActionSpec"/>.
+        /// </summary>
+        public void ApplyHeuristic(in ActionBuffers actionBuffersOut)
+        {
+            var continuousStart = 0;
+            var discreteStart = 0;
+            for (var i = 0; i < m_Actuators.Count; i++)
+            {
+                var actuator = m_Actuators[i];
+                var numContinuousActions = actuator.ActionSpec.NumContinuousActions;
+                var numDiscreteActions = actuator.ActionSpec.NumDiscreteActions;
+
+                if (numContinuousActions == 0 && numDiscreteActions == 0)
+                {
+                    continue;
+                }
+
+                var continuousActions = ActionSegment<float>.Empty;
+                if (numContinuousActions > 0)
+                {
+                    continuousActions = new ActionSegment<float>(actionBuffersOut.ContinuousActions.Array,
+                        continuousStart,
+                        numContinuousActions);
+                }
+
+                var discreteActions = ActionSegment<int>.Empty;
+                if (numDiscreteActions > 0)
+                {
+                    discreteActions = new ActionSegment<int>(actionBuffersOut.DiscreteActions.Array,
+                        discreteStart,
+                        numDiscreteActions);
+                }
+
+                var heuristic = actuator as IHeuristicProvider;
+                heuristic?.Heuristic(new ActionBuffers(continuousActions, discreteActions));
+                continuousStart += numContinuousActions;
+                discreteStart += numDiscreteActions;
+            }
+        }
+
+        /// <summary>
+        /// Iterates through all of the IActuators in this list and calls their
        /// <see cref="IActionReceiver.OnActionReceived"/> method on them with the appropriate
        /// <see cref="ActionSegment{T}"/>s depending on their <see cref="ActionSpec"/>.
        /// </summary>
--- a/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
 namespace Unity.MLAgents.Actuators
 {
    /// <summary>
-    /// IActuator implementation that forwards to an <see cref="IActionReceiver"/>.
+    /// IActuator implementation that forwards calls to an <see cref="IActionReceiver"/> and an <see cref="IHeuristicProvider"/>.
-    internal class VectorActuator : IActuator
+    internal class VectorActuator : IActuator, IHeuristicProvider
+        IHeuristicProvider m_HeuristicProvider;

        ActionBuffers m_ActionBuffers;
        internal ActionBuffers ActionBuffers
        /// <summary>
        /// Create a VectorActuator that forwards to the provided IActionReceiver.
        /// </summary>
+        /// <param name="actionReceiver">The <see cref="IActionReceiver"/> used for OnActionReceived and WriteDiscreteActionMask.
+        /// If this parameter also implements <see cref="IHeuristicProvider"/> it will be cast and used to forward calls to
+        /// <see cref="IHeuristicProvider.Heuristic"/>.</param>
+        /// <param name="actionSpec"></param>
+        /// <param name="name"></param>
+        public VectorActuator(IActionReceiver actionReceiver,
+                              ActionSpec actionSpec,
+                              string name = "VectorActuator")
+            : this(actionReceiver, actionReceiver as IHeuristicProvider, actionSpec, name) { }
+
+        /// <summary>
+        /// Create a VectorActuator that forwards to the provided IActionReceiver.
+        /// </summary>
+        /// <param name="heuristicProvider">The <see cref="IHeuristicProvider"/> used to fill the <see cref="ActionBuffers"/>
+        /// for Heuristic Policies.</param>
+                              IHeuristicProvider heuristicProvider,
+            m_HeuristicProvider = heuristicProvider;
            ActionSpec = actionSpec;
            string suffix;
            if (actionSpec.NumContinuousActions == 0)
        {
            ActionBuffers = actionBuffers;
            m_ActionReceiver.OnActionReceived(ActionBuffers);
+        }
+
+        public void Heuristic(in ActionBuffers actionBuffersOut)
+        {
+            m_HeuristicProvider?.Heuristic(actionBuffersOut);
        }

        /// <inheritdoc />
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
        "docs/Learning-Environment-Design-Agents.md")]
    [Serializable]
    [RequireComponent(typeof(BehaviorParameters))]
-    public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver
+    public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver, IHeuristicProvider
    {
        IPolicy m_Brain;
        BehaviorParameters m_PolicyFactory;
        float[] m_LegacyActionCache;

        /// <summary>
+        /// This is used to avoid allocation of a float array during legacy calls to Heuristic.
+        /// </summary>
+        float[] m_LegacyHeuristicCache;
+
+        /// <summary>
        /// Called when the attached [GameObject] becomes enabled and active.
        /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
        /// </summary>
                InitializeActuators();
            }

-            m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
+            m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager);
            ResetData();
            Initialize();

                return;
            }
            m_Brain?.Dispose();
-            m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
+            m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager);
        }

        /// <summary>
        public virtual void Initialize() { }

        /// <summary>
-        /// Implement `Heuristic()` to choose an action for this agent using a custom heuristic.
+        /// Implement <see cref="Heuristic"/> to choose an action for this agent using a custom heuristic.
-        /// control of an agent using keyboard, mouse, or game controller input.
+        /// control of an agent using keyboard, mouse, game controller input, or a script.
        ///
        /// Your heuristic implementation can use any decision making logic you specify. Assign decision
        /// values to the <see cref="ActionBuffers.ContinuousActions"/>  and <see cref="ActionBuffers.DiscreteActions"/>
            switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
            {
                case SpaceType.Continuous:
-                    Heuristic(actionsOut.ContinuousActions.Array);
+                    Heuristic(m_LegacyHeuristicCache);
+                    Array.Copy(m_LegacyHeuristicCache, actionsOut.ContinuousActions.Array, m_LegacyActionCache.Length);
-                    var convertedOut = Array.ConvertAll(actionsOut.DiscreteActions.Array, x => (float)x);
-                    Heuristic(convertedOut);
+                    Heuristic(m_LegacyHeuristicCache);
-                        discreteActionSegment[i] = (int)convertedOut[i];
+                        discreteActionSegment[i] = (int)m_LegacyHeuristicCache[i];
-
        }

        /// <summary>
            // Support legacy OnActionReceived
            // TODO don't set this up if the sizes are 0?
            var param = m_PolicyFactory.BrainParameters;
-            m_VectorActuator = new VectorActuator(this, param.ActionSpec);
+            m_VectorActuator = new VectorActuator(this, this, param.ActionSpec);
+            m_LegacyHeuristicCache = new float[m_VectorActuator.TotalNumberOfActions()];

            m_ActuatorManager.Add(m_VectorActuator);

        /// three values in ActionBuffers.ContinuousActions array to use as the force components.
        /// During training, the agent's  policy learns to set those particular elements of
        /// the array to maximize the training rewards the agent receives. (Of course,
-        /// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
+        /// if you implement a <seealso cref="Agent.Heuristic(in ActionBuffers)"/> function, it must use the same
        /// elements of the action array for the same purpose since there is no learning
        /// involved.)
        ///

            if (!actions.ContinuousActions.IsEmpty())
            {
-                m_LegacyActionCache = actions.ContinuousActions.Array;
+                Array.Copy(actions.ContinuousActions.Array,
+                    m_LegacyActionCache,
+                    actionSpec.NumContinuousActions);
-                m_LegacyActionCache = Array.ConvertAll(actions.DiscreteActions.Array, x => (float)x);
+                for (var i = 0; i < m_LegacyActionCache.Length; i++)
+                {
+                    m_LegacyActionCache[i] = (float)actions.DiscreteActions[i];
+                }
            }
            // Disable deprecation warnings so we can call the legacy overload.
 #pragma warning disable CS0618
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
            get { return m_BehaviorName + "?team=" + TeamId; }
        }

-        internal IPolicy GeneratePolicy(ActionSpec actionSpec, HeuristicPolicy.ActionGenerator heuristic)
+        internal IPolicy GeneratePolicy(ActionSpec actionSpec, ActuatorManager actuatorManager)
-                    return new HeuristicPolicy(heuristic, actionSpec);
+                    return new HeuristicPolicy(actuatorManager, actionSpec);
                case BehaviorType.InferenceOnly:
                    {
                        if (m_Model == null)
                    }
                    else
                    {
-                        return new HeuristicPolicy(heuristic, actionSpec);
+                        return new HeuristicPolicy(actuatorManager, actionSpec);
-                    return new HeuristicPolicy(heuristic, actionSpec);
+                    return new HeuristicPolicy(actuatorManager, actionSpec);
            }
        }

            }
            agent.ReloadPolicy();
        }
-
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
 namespace Unity.MLAgents.Policies
 {
    /// <summary>
-    /// The Heuristic Policy uses a hards coded Heuristic method
+    /// The Heuristic Policy uses a hard-coded Heuristic method
-        public delegate void ActionGenerator(in ActionBuffers actionBuffers);
-        ActionGenerator m_Heuristic;
+        ActuatorManager m_ActuatorManager;
        ActionBuffers m_ActionBuffers;
        bool m_Done;
        bool m_DecisionRequested;


        /// <inheritdoc />
-        public HeuristicPolicy(ActionGenerator heuristic, ActionSpec actionSpec)
+        public HeuristicPolicy(ActuatorManager actuatorManager, ActionSpec actionSpec)
-            m_Heuristic = heuristic;
+            m_ActuatorManager = actuatorManager;
            var numContinuousActions = actionSpec.NumContinuousActions;
            var numDiscreteActions = actionSpec.NumDiscreteActions;
            var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);
        {
            if (!m_Done && m_DecisionRequested)
            {
-                m_Heuristic.Invoke(m_ActionBuffers);
+                m_ActuatorManager.ApplyHeuristic(m_ActionBuffers);
            }
            m_DecisionRequested = false;
            return ref m_ActionBuffers;
--- a/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
            manager.WriteActionMask();
            Assert.IsTrue(groundTruthMask.SequenceEqual(manager.DiscreteActionMask.GetMask()));
        }
+
+        [Test]
+        public void TestHeuristic()
+        {
+            var manager = new ActuatorManager(2);
+            var va1 = new TestActuator(ActionSpec.MakeDiscrete(1, 2, 3), "name");
+            var va2 = new TestActuator(ActionSpec.MakeDiscrete(3, 2, 1, 8), "name1");
+            manager.Add(va1);
+            manager.Add(va2);
+
+            var actionBuf = new ActionBuffers(Array.Empty<float>(), new[] { 0, 0, 0, 0, 0, 0, 0 });
+            manager.ApplyHeuristic(actionBuf);
+
+            Assert.IsTrue(va1.m_HeuristicCalled);
+            Assert.AreEqual(va1.m_DiscreteBufferSize, 3);
+            Assert.IsTrue(va2.m_HeuristicCalled);
+            Assert.AreEqual(va2.m_DiscreteBufferSize, 4);
+        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Actuators/TestActuator.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/TestActuator.cs
 using Unity.MLAgents.Actuators;
 namespace Unity.MLAgents.Tests.Actuators
 {
-    internal class TestActuator : IActuator
+    internal class TestActuator : IActuator, IHeuristicProvider
+        public bool m_HeuristicCalled;
+        public int m_DiscreteBufferSize;
+
        public TestActuator(ActionSpec actuatorSpace, string name)
        {
            ActionSpec = actuatorSpace;

        public void ResetData()
        {
+        }
+
+        public void Heuristic(in ActionBuffers actionBuffersOut)
+        {
+            m_HeuristicCalled = true;
+            m_DiscreteBufferSize = actionBuffersOut.DiscreteActions.Length;
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
+using System;
 using System.Collections.Generic;
 using System.Linq;
 using NUnit.Framework;
    [TestFixture]
    public class VectorActuatorTests
    {
-        class TestActionReceiver : IActionReceiver
+        class TestActionReceiver : IActionReceiver, IHeuristicProvider
+            public bool HeuristicCalled;

            public void OnActionReceived(ActionBuffers actionBuffers)
            {
            public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
            {
                actionMask.WriteMask(Branch, Mask);
+            }
+
+            public void Heuristic(in ActionBuffers actionBuffersOut)
+            {
+                HeuristicCalled = true;
            }
        }

            va.WriteDiscreteActionMask(bdam);

            Assert.IsTrue(groundTruthMask.SequenceEqual(bdam.GetMask()));
+        }
+
+        [Test]
+        public void TestHeuristic()
+        {
+            var ar = new TestActionReceiver();
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
+
+            va.Heuristic(new ActionBuffers(Array.Empty<float>(), va.ActionSpec.BranchSizes));
+            Assert.IsTrue(ar.HeuristicCalled);
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
 namespace Unity.MLAgents.Tests
 {
    [TestFixture]
-    public class BehaviorParameterTests
+    public class BehaviorParameterTests : IHeuristicProvider
-        static void DummyHeuristic(in ActionBuffers actionsOut)
+        public void Heuristic(in ActionBuffers actionsOut)
        {
            // No-op
        }

            Assert.Throws<UnityAgentsException>(() =>
            {
-                bp.GeneratePolicy(actionSpec, DummyHeuristic);
+                bp.GeneratePolicy(actionSpec, new ActuatorManager());
            });
        }
    }
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 - `UnityEnvironment.API_VERSION` in environment.py
  ([example](https://github.com/Unity-Technologies/ml-agents/blob/b255661084cb8f701c716b040693069a3fb9a257/ml-agents-envs/mlagents/envs/environment.py#L45))

+
+# Migrating
+## Migrating to Release 13
+### Implementing IHeuristic in your IActuator implementations
+ - If you have any custom actuators, you can now implement the `IHeuristicProvider` interface to have your actuator
+handle the generation of actions when an Agent is running in heuristic mode.
+
+
 # Migrating
 ## Migrating to Release 11
 ### Agent virtual method deprecation
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs
+using Unity.MLAgents;
+using Unity.MLAgents.Extensions.Match3;
+
+namespace Unity.MLAgentsExamples
+{
+    public class Match3ExampleActuator : Match3Actuator
+    {
+        Match3Board Board => (Match3Board)m_Board;
+
+        public Match3ExampleActuator(Match3Board board,
+            bool forceHeuristic,
+            Agent agent,
+            string name,
+            int seed
+            )
+            : base(board, forceHeuristic, seed, agent, name) { }
+
+
+        protected override int EvalMovePoints(Move move)
+        {
+            var pointsByType = new[] { Board.BasicCellPoints, Board.SpecialCell1Points, Board.SpecialCell2Points };
+            // Counts the expected points for making the move.
+            var moveVal = m_Board.GetCellType(move.Row, move.Column);
+            var moveSpecial = m_Board.GetSpecialType(move.Row, move.Column);
+            var (otherRow, otherCol) = move.OtherCell();
+            var oppositeVal = m_Board.GetCellType(otherRow, otherCol);
+            var oppositeSpecial = m_Board.GetSpecialType(otherRow, otherCol);
+
+
+            int movePoints = EvalHalfMove(
+                otherRow, otherCol, moveVal, moveSpecial, move.Direction, pointsByType
+            );
+            int otherPoints = EvalHalfMove(
+                move.Row, move.Column, oppositeVal, oppositeSpecial, move.OtherDirection(), pointsByType
+            );
+            return movePoints + otherPoints;
+        }
+
+        int EvalHalfMove(int newRow, int newCol, int newValue, int newSpecial, Direction incomingDirection, int[] pointsByType)
+        {
+            // This is a essentially a duplicate of AbstractBoard.CheckHalfMove but also counts the points for the move.
+            int matchedLeft = 0, matchedRight = 0, matchedUp = 0, matchedDown = 0;
+            int scoreLeft = 0, scoreRight = 0, scoreUp = 0, scoreDown = 0;
+
+            if (incomingDirection != Direction.Right)
+            {
+                for (var c = newCol - 1; c >= 0; c--)
+                {
+                    if (m_Board.GetCellType(newRow, c) == newValue)
+                    {
+                        matchedLeft++;
+                        scoreLeft += pointsByType[m_Board.GetSpecialType(newRow, c)];
+                    }
+                    else
+                        break;
+                }
+            }
+
+            if (incomingDirection != Direction.Left)
+            {
+                for (var c = newCol + 1; c < m_Board.Columns; c++)
+                {
+                    if (m_Board.GetCellType(newRow, c) == newValue)
+                    {
+                        matchedRight++;
+                        scoreRight += pointsByType[m_Board.GetSpecialType(newRow, c)];
+                    }
+                    else
+                        break;
+                }
+            }
+
+            if (incomingDirection != Direction.Down)
+            {
+                for (var r = newRow + 1; r < m_Board.Rows; r++)
+                {
+                    if (m_Board.GetCellType(r, newCol) == newValue)
+                    {
+                        matchedUp++;
+                        scoreUp += pointsByType[m_Board.GetSpecialType(r, newCol)];
+                    }
+                    else
+                        break;
+                }
+            }
+
+            if (incomingDirection != Direction.Up)
+            {
+                for (var r = newRow - 1; r >= 0; r--)
+                {
+                    if (m_Board.GetCellType(r, newCol) == newValue)
+                    {
+                        matchedDown++;
+                        scoreDown += pointsByType[m_Board.GetSpecialType(r, newCol)];
+                    }
+                    else
+                        break;
+                }
+            }
+
+            if ((matchedUp + matchedDown >= 2) || (matchedLeft + matchedRight >= 2))
+            {
+                // It's a match. Start from counting the piece being moved
+                var totalScore = pointsByType[newSpecial];
+                if (matchedUp + matchedDown >= 2)
+                {
+                    totalScore += scoreUp + scoreDown;
+                }
+
+                if (matchedLeft + matchedRight >= 2)
+                {
+                    totalScore += scoreLeft + scoreRight;
+                }
+                return totalScore;
+            }
+
+            return 0;
+        }
+    }
+
+}
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs.meta
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs.meta
+fileFormatVersion: 2
+guid: 9e6fe1a020a04421ab828be4543a655c
+timeCreated: 1610665874
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
+using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
+using Unity.MLAgents.Extensions.Match3;
+
+namespace Unity.MLAgentsExamples
+{
+    public class Match3ExampleActuatorComponent : Match3ActuatorComponent
+    {
+        /// <inheritdoc/>
+        public override IActuator CreateActuator()
+        {
+            var board = GetComponent<Match3Board>();
+            var agent = GetComponentInParent<Agent>();
+            var seed = RandomSeed == -1 ? gameObject.GetInstanceID() : RandomSeed + 1;
+            return new Match3ExampleActuator(board, ForceHeuristic, agent, ActuatorName, seed);
+        }
+    }
+}
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs.meta
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs.meta
+fileFormatVersion: 2
+guid: b17adcc6c9b241da903aa134f2dac930
+timeCreated: 1610665885
--- a/com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs
+namespace Unity.MLAgents.Actuators
+{
+    /// <summary>
+    /// Interface that allows objects to fill out an <see cref="ActionBuffers"/> data structure for controlling
+    /// behavior of Agents or Actuators.
+    /// </summary>
+    public interface IHeuristicProvider
+    {
+        /// <summary>
+        /// Method called on objects which are expected to fill out the <see cref="ActionBuffers"/> data structure.
+        /// Object that implement this interface should be careful to be consistent in the placement of their actions
+        /// in the <see cref="ActionBuffers"/> data structure.
+        /// </summary>
+        /// <param name="actionBuffersOut">The <see cref="ActionBuffers"/> data structure to be filled by the
+        /// object implementing this interface.</param>
+        void Heuristic(in ActionBuffers actionBuffersOut);
+    }
+}
--- a/com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs.meta
+++ b/com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs.meta
+fileFormatVersion: 2
+guid: be90ffb28f39444a8fb02dfd4a82870c
+timeCreated: 1610057456