Merging master

3 年前 · 396bc43c
--- a/.yamato/com.unity.ml-agents-performance.yml
+++ b/.yamato/com.unity.ml-agents-performance.yml
  commands:
    - python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
    - unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
-    - curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
+    - curl -s https://artifactory.prd.it.unity3d.com/artifactory/unity-tools-local/utr-standalone/utr --output utr
    - chmod +x ./utr
    - ./utr --suite=editor --platform=StandaloneOSX --editor-location=.Editor --testproject=DevProject --artifacts_path=build/test-results --report-performance-data --performance-project-id=com.unity.ml-agents --zero-tests-are-ok=1
  triggers:
--- a/.yamato/gym-interface-test.yml
+++ b/.yamato/gym-interface-test.yml
    - |
      sudo apt-get update && sudo apt-get install -y python3-venv
      python3 -m venv venv && source venv/bin/activate
+      python -m pip install wheel --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
      python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
      python -u -m ml-agents.tests.yamato.setup_venv
      python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic
--- a/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
+++ b/Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
 using System;
 using Unity.MLAgents.Actuators;
+using UnityEngine;

 namespace Unity.MLAgentsExamples
 {
    /// <summary>
    /// Simple actuator that converts the action into a {-1, 0, 1} direction
    /// </summary>
-    public class BasicActuator : IActuator
+    public class BasicActuator : IActuator, IHeuristicProvider
    {
        public BasicController basicController;
        ActionSpec m_ActionSpec;
            }

            basicController.MoveDirection(direction);
+        }
+
+        public void Heuristic(in ActionBuffers actionBuffersOut)
+        {
+            var direction = Input.GetAxis("Horizontal");
+            var discreteActions = actionBuffersOut.DiscreteActions;
+            if (Mathf.Approximately(direction, 0.0f))
+            {
+                discreteActions[0] = 0;
+                return;
+            }
+            var sign = Math.Sign(direction);
+            discreteActions[0] = sign < 0 ? 1 : 2;
        }

        public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab
  - component: {fileID: 3508723250470608012}
  - component: {fileID: 3508723250470608011}
  - component: {fileID: 3508723250470608009}
-  - component: {fileID: 3508723250470608013}
+  - component: {fileID: 2112317463290853299}
  m_Layer: 0
  m_Name: Match3 Agent
  m_TagString: Untagged
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: c34da50737a3c4a50918002b20b2b927, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  Board: {fileID: 0}
  MoveTime: 0.25
  MaxMoves: 500
-  HeuristicQuality: 0
 --- !u!114 &3508723250470608011
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_EditorClassIdentifier: 
  DebugMoveIndex: -1
  CubeSpacing: 1.25
-  Board: {fileID: 0}
  TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
    type: 3}
 --- !u!114 &3508723250470608009
  BasicCellPoints: 1
  SpecialCell1Points: 2
  SpecialCell2Points: 3
--- !u!114 &3508723250470608013
+--- !u!114 &3508723250470608014
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
+  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
-  ActuatorName: Match3 Actuator
-  ForceHeuristic: 1
--- !u!114 &3508723250470608014
+  SensorName: Match3 Sensor
+  ObservationType: 0
+--- !u!114 &2112317463290853299
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
+  m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
-  SensorName: Match3 Sensor
-  ObservationType: 0
+  ActuatorName: Match3 Actuator
+  ForceHeuristic: 1
+  HeuristicQuality: 0
 --- !u!1 &3508723250774301855
 GameObject:
  m_ObjectHideFlags: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab
  - component: {fileID: 2118285884327540682}
  - component: {fileID: 2118285884327540685}
  - component: {fileID: 2118285884327540687}
-  - component: {fileID: 2118285884327540683}
+  - component: {fileID: 3357012711826686276}
  m_Layer: 0
  m_Name: Match3 Agent
  m_TagString: Untagged
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 9e89b8e81974148d3b7213530d00589d, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  Board: {fileID: 0}
  MoveTime: 0.25
  MaxMoves: 500
-  HeuristicQuality: 0
 --- !u!114 &2118285884327540685
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_EditorClassIdentifier: 
  DebugMoveIndex: -1
  CubeSpacing: 1.25
-  Board: {fileID: 0}
  TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
    type: 3}
 --- !u!114 &2118285884327540687
  BasicCellPoints: 1
  SpecialCell1Points: 2
  SpecialCell2Points: 3
--- !u!114 &2118285884327540683
+--- !u!114 &2118285884327540680
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
+  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
-  ActuatorName: Match3 Actuator
-  ForceHeuristic: 0
--- !u!114 &2118285884327540680
+  SensorName: Match3 Sensor
+  ObservationType: 0
+--- !u!114 &3357012711826686276
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
+  m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
-  SensorName: Match3 Sensor
-  ObservationType: 0
+  ActuatorName: Match3 Actuator
+  ForceHeuristic: 0
+  HeuristicQuality: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
+++ b/Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
  - component: {fileID: 3019509692332007781}
  - component: {fileID: 3019509692332007778}
  - component: {fileID: 3019509692332007776}
-  - component: {fileID: 3019509692332007780}
+  - component: {fileID: 8270768986451624427}
  m_Layer: 0
  m_Name: Match3 Agent
  m_TagString: Untagged
  m_BrainParameters:
    VectorObservationSize: 0
    NumStackedVectorObservations: 1
+    m_ActionSpec:
+      m_NumContinuousActions: 0
+      BranchSizes: 
+    hasUpgradedBrainParametersWithActionSpec: 1
  m_Model: {fileID: 11400000, guid: 48d14da88fea74d0693c691c6e3f2e34, type: 3}
  m_InferenceDevice: 0
  m_BehaviorType: 0
  Board: {fileID: 0}
  MoveTime: 0.25
  MaxMoves: 500
-  HeuristicQuality: 0
 --- !u!114 &3019509692332007778
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_EditorClassIdentifier: 
  DebugMoveIndex: -1
  CubeSpacing: 1.25
-  Board: {fileID: 0}
  TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
    type: 3}
 --- !u!114 &3019509692332007776
  BasicCellPoints: 1
  SpecialCell1Points: 2
  SpecialCell2Points: 3
--- !u!114 &3019509692332007780
+--- !u!114 &3019509692332007783
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
+  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
-  ActuatorName: Match3 Actuator
-  ForceHeuristic: 0
--- !u!114 &3019509692332007783
+  SensorName: Match3 Sensor
+  ObservationType: 2
+--- !u!114 &8270768986451624427
 MonoBehaviour:
  m_ObjectHideFlags: 0
  m_CorrespondingSourceObject: {fileID: 0}
  m_Enabled: 1
  m_EditorHideFlags: 0
-  m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
+  m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
-  SensorName: Match3 Sensor
-  ObservationType: 2
+  ActuatorName: Match3 Actuator
+  ForceHeuristic: 0
+  HeuristicQuality: 0
--- a/Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 2112317463290853299, guid: 2fafdcd0587684641b03b11f04454f1b,
+        type: 3}
+      propertyPath: HeuristicQuality
+      value: 1
+      objectReference: {fileID: 0}
    - target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
        type: 3}
      propertyPath: cubeSpacing
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 2112317463290853299, guid: 2fafdcd0587684641b03b11f04454f1b,
+        type: 3}
+      propertyPath: HeuristicQuality
+      value: 1
+      objectReference: {fileID: 0}
    - target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
        type: 3}
      propertyPath: cubeSpacing
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Agent.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Agent.cs
        WaitForMove = 4,
    }

-    public enum HeuristicQuality
-    {
-        /// <summary>
-        /// The heuristic will pick any valid move at random.
-        /// </summary>
-        RandomValidMove,
-
-        /// <summary>
-        /// The heuristic will pick the move that scores the most points.
-        /// This only looks at the immediate move, and doesn't consider where cells will fall.
-        /// </summary>
-        Greedy
-    }
-
    public class Match3Agent : Agent
    {
        [HideInInspector]
        public int MaxMoves = 500;


-        public HeuristicQuality HeuristicQuality = HeuristicQuality.RandomValidMove;
-
-        private System.Random m_Random;
-
-            var seed = Board.RandomSeed == -1 ? gameObject.GetInstanceID() : Board.RandomSeed + 1;
-            m_Random = new System.Random(seed);
        }

        public override void OnEpisodeBegin()
            return false;
        }

-        public override void Heuristic(in ActionBuffers actionsOut)
-        {
-            var discreteActions = actionsOut.DiscreteActions;
-            discreteActions[0] = GreedyMove();
-        }
-
-        int GreedyMove()
-        {
-            var pointsByType = new[] { Board.BasicCellPoints, Board.SpecialCell1Points, Board.SpecialCell2Points };
-
-            var bestMoveIndex = 0;
-            var bestMovePoints = -1;
-            var numMovesAtCurrentScore = 0;
-
-            foreach (var move in Board.ValidMoves())
-            {
-                var movePoints = HeuristicQuality == HeuristicQuality.Greedy ? EvalMovePoints(move, pointsByType) : 1;
-                if (movePoints < bestMovePoints)
-                {
-                    // Worse, skip
-                    continue;
-                }
-
-                if (movePoints > bestMovePoints)
-                {
-                    // Better, keep
-                    bestMovePoints = movePoints;
-                    bestMoveIndex = move.MoveIndex;
-                    numMovesAtCurrentScore = 1;
-                }
-                else
-                {
-                    // Tied for best - use reservoir sampling to make sure we select from equal moves uniformly.
-                    // See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
-                    numMovesAtCurrentScore++;
-                    var randVal = m_Random.Next(0, numMovesAtCurrentScore);
-                    if (randVal == 0)
-                    {
-                        // Keep the new one
-                        bestMoveIndex = move.MoveIndex;
-                    }
-                }
-            }
-
-            return bestMoveIndex;
-        }
-
-        int EvalMovePoints(Move move, int[] pointsByType)
-        {
-            // Counts the expected points for making the move.
-            var moveVal = Board.GetCellType(move.Row, move.Column);
-            var moveSpecial = Board.GetSpecialType(move.Row, move.Column);
-            var (otherRow, otherCol) = move.OtherCell();
-            var oppositeVal = Board.GetCellType(otherRow, otherCol);
-            var oppositeSpecial = Board.GetSpecialType(otherRow, otherCol);
-
-
-            int movePoints = EvalHalfMove(
-                otherRow, otherCol, moveVal, moveSpecial, move.Direction, pointsByType
-            );
-            int otherPoints = EvalHalfMove(
-                move.Row, move.Column, oppositeVal, oppositeSpecial, move.OtherDirection(), pointsByType
-            );
-            return movePoints + otherPoints;
-        }
-
-        int EvalHalfMove(int newRow, int newCol, int newValue, int newSpecial, Direction incomingDirection, int[] pointsByType)
-        {
-            // This is a essentially a duplicate of AbstractBoard.CheckHalfMove but also counts the points for the move.
-            int matchedLeft = 0, matchedRight = 0, matchedUp = 0, matchedDown = 0;
-            int scoreLeft = 0, scoreRight = 0, scoreUp = 0, scoreDown = 0;
-
-            if (incomingDirection != Direction.Right)
-            {
-                for (var c = newCol - 1; c >= 0; c--)
-                {
-                    if (Board.GetCellType(newRow, c) == newValue)
-                    {
-                        matchedLeft++;
-                        scoreLeft += pointsByType[Board.GetSpecialType(newRow, c)];
-                    }
-                    else
-                        break;
-                }
-            }
-
-            if (incomingDirection != Direction.Left)
-            {
-                for (var c = newCol + 1; c < Board.Columns; c++)
-                {
-                    if (Board.GetCellType(newRow, c) == newValue)
-                    {
-                        matchedRight++;
-                        scoreRight += pointsByType[Board.GetSpecialType(newRow, c)];
-                    }
-                    else
-                        break;
-                }
-            }
-
-            if (incomingDirection != Direction.Down)
-            {
-                for (var r = newRow + 1; r < Board.Rows; r++)
-                {
-                    if (Board.GetCellType(r, newCol) == newValue)
-                    {
-                        matchedUp++;
-                        scoreUp += pointsByType[Board.GetSpecialType(r, newCol)];
-                    }
-                    else
-                        break;
-                }
-            }
-
-            if (incomingDirection != Direction.Up)
-            {
-                for (var r = newRow - 1; r >= 0; r--)
-                {
-                    if (Board.GetCellType(r, newCol) == newValue)
-                    {
-                        matchedDown++;
-                        scoreDown += pointsByType[Board.GetSpecialType(r, newCol)];
-                    }
-                    else
-                        break;
-                }
-            }
-
-            if ((matchedUp + matchedDown >= 2) || (matchedLeft + matchedRight >= 2))
-            {
-                // It's a match. Start from counting the piece being moved
-                var totalScore = pointsByType[newSpecial];
-                if (matchedUp + matchedDown >= 2)
-                {
-                    totalScore += scoreUp + scoreDown;
-                }
-
-                if (matchedLeft + matchedRight >= 2)
-                {
-                    totalScore += scoreLeft + scoreRight;
-                }
-                return totalScore;
-            }
-
-            return 0;
-        }
    }

 }
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Board.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Board.cs
+using System;
 using Unity.MLAgents.Extensions.Match3;
 using UnityEngine;


    public class Match3Board : AbstractBoard
    {
-        public int RandomSeed = -1;
-
        public const int k_EmptyCell = -1;
        [Tooltip("Points earned for clearing a basic cell (cube)")]
        public int BasicCellPoints = 1;
        [Tooltip("Points earned for clearing an extra special cell (plus)")]
        public int SpecialCell2Points = 3;

+        /// <summary>
+        /// Seed to initialize the <see cref="System.Random"/> object.
+        /// </summary>
+        public int RandomSeed;
+
        (int, int)[,] m_Cells;
        bool[,] m_Matched;

            m_Cells = new (int, int)[Columns, Rows];
            m_Matched = new bool[Columns, Rows];

+        }
+
+        void Start()
+        {
-
            InitRandom();
        }

--- a/README.md
+++ b/README.md

 ## Additional Resources

-We have published a series of blog posts that are relevant for ML-Agents:
+We have a Unity Learn course,
+[ML-Agents: Hummingsbird](https://learn.unity.com/course/ml-agents-hummingbirds),
+that provides a gentle introduction to Unity and the ML-Agents Toolkit.
+
+We've also partnered with
+[CodeMonkeyUnity](https://www.youtube.com/c/CodeMonkeyUnity) to create a
+[series of tutorial videos](https://www.youtube.com/playlist?list=PLzDRvYVwl53vehwiN_odYJkPBzcqFw110)
+on how to implement and use the ML-Agents Toolkit.
+
+We have also published a series of blog posts that are relevant for ML-Agents:
+- (December 28, 2020)
+  [Happy holidays from the Unity ML-Agents team!](https://blogs.unity3d.com/2020/12/28/happy-holidays-from-the-unity-ml-agents-team/)
+- (November 20, 2020)
+  [How Eidos-Montréal created Grid Sensors to improve observations for training agents](https://blogs.unity3d.com/2020/11/20/how-eidos-montreal-created-grid-sensors-to-improve-observations-for-training-agents/)
+- (November 11, 2020)
+  [2020 AI@Unity interns shoutout](https://blogs.unity3d.com/2020/11/11/2020-aiunity-interns-shoutout/)
 - (May 12, 2020)
  [Announcing ML-Agents Unity Package v1.0!](https://blogs.unity3d.com/2020/05/12/announcing-ml-agents-unity-package-v1-0/)
 - (February 28, 2020)
  ([multi-armed bandit](https://blogs.unity3d.com/2017/06/26/unity-ai-themed-blog-entries/)
  and
  [Q-learning](https://blogs.unity3d.com/2017/08/22/unity-ai-reinforcement-learning-with-q-learning/))
-
-In addition to our own documentation, here are some additional, relevant
-articles:
-
- [A Game Developer Learns Machine Learning](https://mikecann.co.uk/posts/a-game-developer-learns-machine-learning-intent)
- [Explore Unity Technologies ML-Agents Exclusively on Intel Architecture](https://software.intel.com/en-us/articles/explore-unity-technologies-ml-agents-exclusively-on-intel-architecture)
- [ML-Agents Penguins tutorial](https://learn.unity.com/project/ml-agents-penguins)

 ## Community and Feedback

--- a/com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs
    /// Actuator for a Match3 game. It translates valid moves (defined by AbstractBoard.IsMoveValid())
    /// in action masks, and applies the action to the board via AbstractBoard.MakeMove().
    /// </summary>
-    public class Match3Actuator : IActuator
+    public class Match3Actuator : IActuator, IHeuristicProvider
-        private AbstractBoard m_Board;
+        protected AbstractBoard m_Board;
+        protected System.Random m_Random;
-        private System.Random m_Random;
        private Agent m_Agent;

        private int m_Rows;
        /// <param name="board"></param>
        /// <param name="forceHeuristic">Whether the inference action should be ignored and the Agent's Heuristic
        /// should be called. This should only be used for generating comparison stats of the Heuristic.</param>
+        /// <param name="seed">The seed used to initialize <see cref="System.Random"/>.</param>
-        public Match3Actuator(AbstractBoard board, bool forceHeuristic, Agent agent, string name)
+        public Match3Actuator(AbstractBoard board,
+            bool forceHeuristic,
+            int seed,
+            Agent agent,
+            string name)
        {
            m_Board = board;
            m_Rows = board.Rows;

            var numMoves = Move.NumPotentialMoves(m_Board.Rows, m_Board.Columns);
            m_ActionSpec = ActionSpec.MakeDiscrete(numMoves);
+            m_Random = new System.Random(seed);
        }

        /// <inheritdoc/>
        {
            if (m_ForceHeuristic)
            {
-                m_Agent.Heuristic(actions);
+                Heuristic(actions);
            }
            var moveIndex = actions.DiscreteActions[0];

                yield return move.MoveIndex;
            }
        }
+
+        public void Heuristic(in ActionBuffers actionsOut)
+        {
+            var discreteActions = actionsOut.DiscreteActions;
+            discreteActions[0] = GreedyMove();
+        }
+
+
+        protected int GreedyMove()
+        {
+
+            var bestMoveIndex = 0;
+            var bestMovePoints = -1;
+            var numMovesAtCurrentScore = 0;
+
+            foreach (var move in m_Board.ValidMoves())
+            {
+                var movePoints = EvalMovePoints(move);
+                if (movePoints < bestMovePoints)
+                {
+                    // Worse, skip
+                    continue;
+                }
+
+                if (movePoints > bestMovePoints)
+                {
+                    // Better, keep
+                    bestMovePoints = movePoints;
+                    bestMoveIndex = move.MoveIndex;
+                    numMovesAtCurrentScore = 1;
+                }
+                else
+                {
+                    // Tied for best - use reservoir sampling to make sure we select from equal moves uniformly.
+                    // See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
+                    numMovesAtCurrentScore++;
+                    var randVal = m_Random.Next(0, numMovesAtCurrentScore);
+                    if (randVal == 0)
+                    {
+                        // Keep the new one
+                        bestMoveIndex = move.MoveIndex;
+                    }
+                }
+            }
+
+            return bestMoveIndex;
+        }
+
+        /// <summary>
+        /// Method to be overridden when evaluating how many points a specific move will generate.
+        /// </summary>
+        /// <param name="move">The move to evaluate.</param>
+        /// <returns>The number of points the move generates.</returns>
+        protected virtual int EvalMovePoints(Move move)
+        {
+            return 1;
+        }
+
    }
 }
--- a/com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
+++ b/com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
 namespace Unity.MLAgents.Extensions.Match3
 {
    /// <summary>
-    /// Actuator component for a Match 3 game. Generates a Match3Actuator at runtime.
+    /// Actuator component for a Match3 game. Generates a Match3Actuator at runtime.
    /// </summary>
    public class Match3ActuatorComponent : ActuatorComponent
    {
        public string ActuatorName = "Match3 Actuator";

        /// <summary>
+        /// A random seed used to generate a board, if needed.
+        /// </summary>
+        public int RandomSeed = -1;
+
+        /// <summary>
        /// Force using the Agent's Heuristic() method to decide the action. This should only be used in testing.
        /// </summary>
        [FormerlySerializedAs("ForceRandom")]
        {
            var board = GetComponent<AbstractBoard>();
            var agent = GetComponentInParent<Agent>();
-            return new Match3Actuator(board, ForceHeuristic, agent, ActuatorName);
+            var seed = RandomSeed == -1 ? gameObject.GetInstanceID() : RandomSeed + 1;
+            return new Match3Actuator(board, ForceHeuristic, seed, agent, ActuatorName);
        }

        /// <inheritdoc/>
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
 - `StatAggregationMethod.Sum` can now be passed to `StatsRecorder.Add()`. This
 will result in the values being summed (instead of averaged) when written to
 TensorBoard. Thanks to @brccabral for the contribution! (#4816)
+- The upper limit for the time scale (by setting the `--time-scale` paramater in mlagents-learn) was
+removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
+- Added the IHeuristicProvider interface to allow IActuators as well as Agent implement the Heuristic function to generate actions.
+  Updated the Basic example and the Match3 Example to use Actuators.
+  Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)
+

 #### ml-agents / ml-agents-envs / gym-unity (Python)

--- a/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs

        /// <summary>
        /// Iterates through all of the IActuators in this list and calls their
+        /// <see cref="IHeuristicProvider.Heuristic"/> method on them, if implemented, with the appropriate
+        /// <see cref="ActionSegment{T}"/>s depending on their <see cref="ActionSpec"/>.
+        /// </summary>
+        public void ApplyHeuristic(in ActionBuffers actionBuffersOut)
+        {
+            var continuousStart = 0;
+            var discreteStart = 0;
+            for (var i = 0; i < m_Actuators.Count; i++)
+            {
+                var actuator = m_Actuators[i];
+                var numContinuousActions = actuator.ActionSpec.NumContinuousActions;
+                var numDiscreteActions = actuator.ActionSpec.NumDiscreteActions;
+
+                if (numContinuousActions == 0 && numDiscreteActions == 0)
+                {
+                    continue;
+                }
+
+                var continuousActions = ActionSegment<float>.Empty;
+                if (numContinuousActions > 0)
+                {
+                    continuousActions = new ActionSegment<float>(actionBuffersOut.ContinuousActions.Array,
+                        continuousStart,
+                        numContinuousActions);
+                }
+
+                var discreteActions = ActionSegment<int>.Empty;
+                if (numDiscreteActions > 0)
+                {
+                    discreteActions = new ActionSegment<int>(actionBuffersOut.DiscreteActions.Array,
+                        discreteStart,
+                        numDiscreteActions);
+                }
+
+                var heuristic = actuator as IHeuristicProvider;
+                heuristic?.Heuristic(new ActionBuffers(continuousActions, discreteActions));
+                continuousStart += numContinuousActions;
+                discreteStart += numDiscreteActions;
+            }
+        }
+
+        /// <summary>
+        /// Iterates through all of the IActuators in this list and calls their
        /// <see cref="IActionReceiver.OnActionReceived"/> method on them with the appropriate
        /// <see cref="ActionSegment{T}"/>s depending on their <see cref="ActionSpec"/>.
        /// </summary>
--- a/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
 namespace Unity.MLAgents.Actuators
 {
    /// <summary>
-    /// IActuator implementation that forwards to an <see cref="IActionReceiver"/>.
+    /// IActuator implementation that forwards calls to an <see cref="IActionReceiver"/> and an <see cref="IHeuristicProvider"/>.
-    internal class VectorActuator : IActuator
+    internal class VectorActuator : IActuator, IHeuristicProvider
+        IHeuristicProvider m_HeuristicProvider;

        ActionBuffers m_ActionBuffers;
        internal ActionBuffers ActionBuffers
        /// <summary>
        /// Create a VectorActuator that forwards to the provided IActionReceiver.
        /// </summary>
+        /// <param name="actionReceiver">The <see cref="IActionReceiver"/> used for OnActionReceived and WriteDiscreteActionMask.
+        /// If this parameter also implements <see cref="IHeuristicProvider"/> it will be cast and used to forward calls to
+        /// <see cref="IHeuristicProvider.Heuristic"/>.</param>
+        /// <param name="actionSpec"></param>
+        /// <param name="name"></param>
+        public VectorActuator(IActionReceiver actionReceiver,
+                              ActionSpec actionSpec,
+                              string name = "VectorActuator")
+            : this(actionReceiver, actionReceiver as IHeuristicProvider, actionSpec, name) { }
+
+        /// <summary>
+        /// Create a VectorActuator that forwards to the provided IActionReceiver.
+        /// </summary>
+        /// <param name="heuristicProvider">The <see cref="IHeuristicProvider"/> used to fill the <see cref="ActionBuffers"/>
+        /// for Heuristic Policies.</param>
+                              IHeuristicProvider heuristicProvider,
+            m_HeuristicProvider = heuristicProvider;
            ActionSpec = actionSpec;
            string suffix;
            if (actionSpec.NumContinuousActions == 0)
        {
            ActionBuffers = actionBuffers;
            m_ActionReceiver.OnActionReceived(ActionBuffers);
+        }
+
+        public void Heuristic(in ActionBuffers actionBuffersOut)
+        {
+            m_HeuristicProvider?.Heuristic(actionBuffersOut);
        }

        /// <inheritdoc />
--- a/com.unity.ml-agents/Runtime/Agent.cs
+++ b/com.unity.ml-agents/Runtime/Agent.cs
        "docs/Learning-Environment-Design-Agents.md")]
    [Serializable]
    [RequireComponent(typeof(BehaviorParameters))]
-    public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver
+    public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver, IHeuristicProvider
    {
        IPolicy m_Brain;
        BehaviorParameters m_PolicyFactory;
        float[] m_LegacyActionCache;

        /// <summary>
+        /// This is used to avoid allocation of a float array during legacy calls to Heuristic.
+        /// </summary>
+        float[] m_LegacyHeuristicCache;
+
+        /// <summary>
        /// Called when the attached [GameObject] becomes enabled and active.
        /// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
        /// </summary>
                InitializeActuators();
            }

-            m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
+            m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager);
            ResetData();
            Initialize();

                return;
            }
            m_Brain?.Dispose();
-            m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
+            m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager);
        }

        /// <summary>
        public virtual void Initialize() { }

        /// <summary>
-        /// Implement `Heuristic()` to choose an action for this agent using a custom heuristic.
+        /// Implement <see cref="Heuristic"/> to choose an action for this agent using a custom heuristic.
-        /// control of an agent using keyboard, mouse, or game controller input.
+        /// control of an agent using keyboard, mouse, game controller input, or a script.
        ///
        /// Your heuristic implementation can use any decision making logic you specify. Assign decision
        /// values to the <see cref="ActionBuffers.ContinuousActions"/>  and <see cref="ActionBuffers.DiscreteActions"/>
            switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
            {
                case SpaceType.Continuous:
-                    Heuristic(actionsOut.ContinuousActions.Array);
+                    Heuristic(m_LegacyHeuristicCache);
+                    Array.Copy(m_LegacyHeuristicCache, actionsOut.ContinuousActions.Array, m_LegacyActionCache.Length);
-                    var convertedOut = Array.ConvertAll(actionsOut.DiscreteActions.Array, x => (float)x);
-                    Heuristic(convertedOut);
+                    Heuristic(m_LegacyHeuristicCache);
-                        discreteActionSegment[i] = (int)convertedOut[i];
+                        discreteActionSegment[i] = (int)m_LegacyHeuristicCache[i];
-
        }

        /// <summary>
            // Support legacy OnActionReceived
            // TODO don't set this up if the sizes are 0?
            var param = m_PolicyFactory.BrainParameters;
-            m_VectorActuator = new VectorActuator(this, param.ActionSpec);
+            m_VectorActuator = new VectorActuator(this, this, param.ActionSpec);
+            m_LegacyHeuristicCache = new float[m_VectorActuator.TotalNumberOfActions()];

            m_ActuatorManager.Add(m_VectorActuator);

        /// three values in ActionBuffers.ContinuousActions array to use as the force components.
        /// During training, the agent's  policy learns to set those particular elements of
        /// the array to maximize the training rewards the agent receives. (Of course,
-        /// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
+        /// if you implement a <seealso cref="Agent.Heuristic(in ActionBuffers)"/> function, it must use the same
        /// elements of the action array for the same purpose since there is no learning
        /// involved.)
        ///

            if (!actions.ContinuousActions.IsEmpty())
            {
-                m_LegacyActionCache = actions.ContinuousActions.Array;
+                Array.Copy(actions.ContinuousActions.Array,
+                    m_LegacyActionCache,
+                    actionSpec.NumContinuousActions);
-                m_LegacyActionCache = Array.ConvertAll(actions.DiscreteActions.Array, x => (float)x);
+                for (var i = 0; i < m_LegacyActionCache.Length; i++)
+                {
+                    m_LegacyActionCache[i] = (float)actions.DiscreteActions[i];
+                }
            }
            // Disable deprecation warnings so we can call the legacy overload.
 #pragma warning disable CS0618
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
            for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
            {
                var sensor = sensorComponents[sensorIndex];
-                if (!sensor.IsVisual())
+                if (sensor.GetObservationShape().Length == 3)
-                    continue;
+                    if (!tensorsNames.Contains(
+                        TensorNames.VisualObservationPlaceholderPrefix + visObsIndex))
+                    {
+                        failedModelChecks.Add(
+                            "The model does not contain a Visual Observation Placeholder Input " +
+                            $"for sensor component {visObsIndex} ({sensor.GetType().Name}).");
+                    }
+                    visObsIndex++;
-                if (!tensorsNames.Contains(
-                    TensorNames.VisualObservationPlaceholderPrefix + visObsIndex))
+                if (sensor.GetObservationShape().Length == 2)
-                    failedModelChecks.Add(
-                        "The model does not contain a Visual Observation Placeholder Input " +
-                        $"for sensor component {visObsIndex} ({sensor.GetType().Name}).");
+                    if (!tensorsNames.Contains(
+                        TensorNames.ObservationPlaceholderPrefix + sensorIndex))
+                    {
+                        failedModelChecks.Add(
+                            "The model does not contain an Observation Placeholder Input " +
+                            $"for sensor component {sensorIndex} ({sensor.GetType().Name}).");
+                    }
-                visObsIndex++;
            }

            var expectedVisualObs = model.GetNumVisualInputs();
        }

        /// <summary>
+        /// Checks that the shape of the rank 2 observation input placeholder is the same as the corresponding sensor.
+        /// </summary>
+        /// <param name="tensorProxy">The tensor that is expected by the model</param>
+        /// <param name="sensorComponent">The sensor that produces the visual observation.</param>
+        /// <returns>
+        /// If the Check failed, returns a string containing information about why the
+        /// check failed. If the check passed, returns null.
+        /// </returns>
+        static string CheckRankTwoObsShape(
+            TensorProxy tensorProxy, SensorComponent sensorComponent)
+        {
+            var shape = sensorComponent.GetObservationShape();
+            var dim1Bp = shape[0];
+            var dim2Bp = shape[1];
+            var dim1T = tensorProxy.Channels;
+            var dim2T = tensorProxy.Width;
+            if ((dim1Bp != dim1T) || (dim2Bp != dim2T))
+            {
+                return $"An Observation of the model does not match. " +
+                    $"Received TensorProxy of shape [?x{dim1Bp}x{dim2Bp}] but " +
+                    $"was expecting [?x{dim1T}x{dim2T}].";
+            }
+            return null;
+        }
+
+        /// <summary>
        /// Generates failed checks that correspond to inputs shapes incompatibilities between
        /// the model and the BrainParameters.
        /// </summary>
            for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
            {
                var sensorComponent = sensorComponents[sensorIndex];
-                if (!sensorComponent.IsVisual())
+                if (sensorComponent.GetObservationShape().Length == 3)
+                {
+
+                    tensorTester[TensorNames.VisualObservationPlaceholderPrefix + visObsIndex] =
+                        (bp, tensor, scs, i) => CheckVisualObsShape(tensor, sensorComponent);
+                    visObsIndex++;
+                }
+                if (sensorComponent.GetObservationShape().Length == 2)
-                    continue;
+                    tensorTester[TensorNames.ObservationPlaceholderPrefix + sensorIndex] =
+                        (bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sensorComponent);
-                tensorTester[TensorNames.VisualObservationPlaceholderPrefix + visObsIndex] =
-                    (bp, tensor, scs, i) => CheckVisualObsShape(tensor, sensorComponent);
-                visObsIndex++;
            }

            // If the model expects an input but it is not in this list
            var totalVectorSensorSize = 0;
            foreach (var sensorComp in sensorComponents)
            {
-                if (sensorComp.IsVector())
+                if (sensorComp.GetObservationShape().Length == 1)
                {
                    totalVectorSensorSize += sensorComp.GetObservationShape()[0];
                }
                var sensorSizes = "";
                foreach (var sensorComp in sensorComponents)
                {
-                    if (sensorComp.IsVector())
+                    if (sensorComp.GetObservationShape().Length == 1)
                    {
                        var vecSize = sensorComp.GetObservationShape()[0];
                        if (sensorSizes.Length == 0)
--- a/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
+++ b/com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
    }

    /// <summary>
-    /// Generates the Tensor corresponding to the VectorObservation input : Will be a two
-    /// dimensional float array of dimension [batchSize x vectorObservationSize].
-    /// It will use the Vector Observation data contained in the agentInfo to fill the data
-    /// of the tensor.
-    /// </summary>
-    internal class VectorObservationGenerator : TensorGenerator.IGenerator
-    {
-        readonly ITensorAllocator m_Allocator;
-        List<int> m_SensorIndices = new List<int>();
-        ObservationWriter m_ObservationWriter = new ObservationWriter();
-
-        public VectorObservationGenerator(ITensorAllocator allocator)
-        {
-            m_Allocator = allocator;
-        }
-
-        public void AddSensorIndex(int sensorIndex)
-        {
-            m_SensorIndices.Add(sensorIndex);
-        }
-
-        public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
-        {
-            TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
-            var vecObsSizeT = tensorProxy.shape[tensorProxy.shape.Length - 1];
-            var agentIndex = 0;
-            foreach (var info in infos)
-            {
-                if (info.agentInfo.done)
-                {
-                    // If the agent is done, we might have a stale reference to the sensors
-                    // e.g. a dependent object might have been disposed.
-                    // To avoid this, just fill observation with zeroes instead of calling sensor.Write.
-                    TensorUtils.FillTensorBatch(tensorProxy, agentIndex, 0.0f);
-                }
-                else
-                {
-                    var tensorOffset = 0;
-                    // Write each sensor consecutively to the tensor
-                    foreach (var sensorIndex in m_SensorIndices)
-                    {
-                        var sensor = info.sensors[sensorIndex];
-                        m_ObservationWriter.SetTarget(tensorProxy, agentIndex, tensorOffset);
-                        var numWritten = sensor.Write(m_ObservationWriter);
-                        tensorOffset += numWritten;
-                    }
-                    Debug.AssertFormat(
-                        tensorOffset == vecObsSizeT,
-                        "mismatch between vector observation size ({0}) and number of observations written ({1})",
-                        vecObsSizeT, tensorOffset
-                    );
-                }
-
-                agentIndex++;
-            }
-        }
-    }
-
-    /// <summary>
    /// Generates the Tensor corresponding to the Recurrent input : Will be a two
    /// dimensional float array of dimension [batchSize x memorySize].
    /// It will use the Memory data contained in the agentInfo to fill the data
    }

    /// <summary>
-    /// Generates the Tensor corresponding to the Visual Observation input : Will be a 4
-    /// dimensional float array of dimension [batchSize x width x height x numChannels].
-    /// It will use the Texture input data contained in the agentInfo to fill the data
+    /// Generates the Tensor corresponding to the Observation input : Will be a multi
+    /// dimensional float array.
+    /// It will use the Observation data contained in the sensors to fill the data
-    internal class VisualObservationInputGenerator : TensorGenerator.IGenerator
+    internal class ObservationGenerator : TensorGenerator.IGenerator
-        readonly int m_SensorIndex;
+        List<int> m_SensorIndices = new List<int>();
-        public VisualObservationInputGenerator(
-            int sensorIndex, ITensorAllocator allocator)
+        public ObservationGenerator(ITensorAllocator allocator)
-            m_SensorIndex = sensorIndex;
+        public void AddSensorIndex(int sensorIndex)
+        {
+            m_SensorIndices.Add(sensorIndex);
+        }
+
-            foreach (var infoSensorPair in infos)
+            foreach (var info in infos)
-                var sensor = infoSensorPair.sensors[m_SensorIndex];
-                if (infoSensorPair.agentInfo.done)
+                if (info.agentInfo.done)
                {
                    // If the agent is done, we might have a stale reference to the sensors
                    // e.g. a dependent object might have been disposed.
                else
                {
-                    m_ObservationWriter.SetTarget(tensorProxy, agentIndex, 0);
-                    sensor.Write(m_ObservationWriter);
+                    var tensorOffset = 0;
+                    // Write each sensor consecutively to the tensor
+                    foreach (var sensorIndex in m_SensorIndices)
+                    {
+                        var sensor = info.sensors[sensorIndex];
+                        m_ObservationWriter.SetTarget(tensorProxy, agentIndex, tensorOffset);
+                        var numWritten = sensor.Write(m_ObservationWriter);
+                        tensorOffset += numWritten;
+                    }
                }
                agentIndex++;
            }
--- a/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
        public void InitializeObservations(List<ISensor> sensors, ITensorAllocator allocator)
        {
            // Loop through the sensors on a representative agent.
-            // For vector observations, add the index to the (single) VectorObservationGenerator
-            // For visual observations, make a VisualObservationInputGenerator
+            // All vector observations use a shared ObservationGenerator since they are concatenated.
+            // All other observations use a unique ObservationInputGenerator
-            VectorObservationGenerator vecObsGen = null;
+            ObservationGenerator vecObsGen = null;
-                // TODO generalize - we currently only have vector or visual, but can't handle "2D" observations
-                var isVectorSensor = (shape.Length == 1);
-                if (isVectorSensor)
-                {
-                    if (vecObsGen == null)
-                    {
-                        vecObsGen = new VectorObservationGenerator(allocator);
-                    }
-
-                    vecObsGen.AddSensorIndex(sensorIndex);
-                }
-                else
+                var rank = shape.Length;
+                ObservationGenerator obsGen = null;
+                string obsGenName = null;
+                switch (rank)
-                    m_Dict[TensorNames.VisualObservationPlaceholderPrefix + visIndex] =
-                        new VisualObservationInputGenerator(sensorIndex, allocator);
-                    visIndex++;
+                    case 1:
+                        if (vecObsGen == null)
+                        {
+                            vecObsGen = new ObservationGenerator(allocator);
+                        }
+                        obsGen = vecObsGen;
+                        obsGenName = TensorNames.VectorObservationPlaceholder;
+                        break;
+                    case 2:
+                        // If the tensor is of rank 2, we use the index of the sensor
+                        // to create the name
+                        obsGen = new ObservationGenerator(allocator);
+                        obsGenName = TensorNames.ObservationPlaceholderPrefix + sensorIndex;
+                        break;
+                    case 3:
+                        // If the tensor is of rank 3, we use the "visual observation
+                        // index", which only counts the rank 3 sensors
+                        obsGen = new ObservationGenerator(allocator);
+                        obsGenName = TensorNames.VisualObservationPlaceholderPrefix + visIndex;
+                        visIndex++;
+                        break;
+                    default:
+                        throw new UnityAgentsException(
+                            $"Sensor {sensor.GetName()} have an invalid rank {rank}");
-            }
-
-            if (vecObsGen != null)
-            {
-                m_Dict[TensorNames.VectorObservationPlaceholder] = vecObsGen;
+                obsGen.AddSensorIndex(sensorIndex);
+                m_Dict[obsGenName] = obsGen;
            }
        }

--- a/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
        public const string recurrentInPlaceholderH = "recurrent_in_h";
        public const string recurrentInPlaceholderC = "recurrent_in_c";
        public const string VisualObservationPlaceholderPrefix = "visual_observation_";
+        public const string ObservationPlaceholderPrefix = "obs_";
        public const string PreviousActionPlaceholder = "prev_action";
        public const string ActionMaskPlaceholder = "action_masks";
        public const string RandomNormalEpsilonPlaceholder = "epsilon";
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
            get { return m_BehaviorName + "?team=" + TeamId; }
        }

-        internal IPolicy GeneratePolicy(ActionSpec actionSpec, HeuristicPolicy.ActionGenerator heuristic)
+        internal IPolicy GeneratePolicy(ActionSpec actionSpec, ActuatorManager actuatorManager)
-                    return new HeuristicPolicy(heuristic, actionSpec);
+                    return new HeuristicPolicy(actuatorManager, actionSpec);
                case BehaviorType.InferenceOnly:
                    {
                        if (m_Model == null)
                    }
                    else
                    {
-                        return new HeuristicPolicy(heuristic, actionSpec);
+                        return new HeuristicPolicy(actuatorManager, actionSpec);
-                    return new HeuristicPolicy(heuristic, actionSpec);
+                    return new HeuristicPolicy(actuatorManager, actionSpec);
            }
        }

            }
            agent.ReloadPolicy();
        }
-
    }
 }
--- a/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
 namespace Unity.MLAgents.Policies
 {
    /// <summary>
-    /// The Heuristic Policy uses a hards coded Heuristic method
+    /// The Heuristic Policy uses a hard-coded Heuristic method
-        public delegate void ActionGenerator(in ActionBuffers actionBuffers);
-        ActionGenerator m_Heuristic;
+        ActuatorManager m_ActuatorManager;
        ActionBuffers m_ActionBuffers;
        bool m_Done;
        bool m_DecisionRequested;


        /// <inheritdoc />
-        public HeuristicPolicy(ActionGenerator heuristic, ActionSpec actionSpec)
+        public HeuristicPolicy(ActuatorManager actuatorManager, ActionSpec actionSpec)
-            m_Heuristic = heuristic;
+            m_ActuatorManager = actuatorManager;
            var numContinuousActions = actionSpec.NumContinuousActions;
            var numDiscreteActions = actionSpec.NumDiscreteActions;
            var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);
        {
            if (!m_Done && m_DecisionRequested)
            {
-                m_Heuristic.Invoke(m_ActionBuffers);
+                m_ActuatorManager.ApplyHeuristic(m_ActionBuffers);
            }
            m_DecisionRequested = false;
            return ref m_ActionBuffers;
--- a/com.unity.ml-agents/Runtime/Sensors/SensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/SensorComponent.cs
 using UnityEngine;
+using System;

 namespace Unity.MLAgents.Sensors
 {
        /// Whether the observation is visual or not.
        /// </summary>
        /// <returns>True if the observation is visual, false otherwise.</returns>
+        [Obsolete("IsVisual is deprecated, please use GetObservationShape() instead.")]
        public virtual bool IsVisual()
        {
            var shape = GetObservationShape();
        /// Whether the observation is vector or not.
        /// </summary>
        /// <returns>True if the observation is vector, false otherwise.</returns>
+        [Obsolete("IsVisual is deprecated, please use GetObservationShape() instead.")]
        public virtual bool IsVector()
        {
            var shape = GetObservationShape();
--- a/com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
+++ b/com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
    /// </summary>
    internal class EngineConfigurationChannel : SideChannel
    {
-        enum ConfigurationType : int
+        internal enum ConfigurationType : int
        {
            ScreenResolution = 0,
            QualityLevel = 1,
                    break;
                case ConfigurationType.TimeScale:
                    var timeScale = msg.ReadFloat32();
-                    timeScale = Mathf.Clamp(timeScale, 1, 100);
+
+                    // There's an upper limit for the timeScale in the editor (but not in the player)
+                    // Always ensure that timeScale >= 1 also,
+#if UNITY_EDITOR
+                    const float maxTimeScale = 100f;
+#else
+                    const float maxTimeScale = float.PositiveInfinity;
+#endif
+                    timeScale = Mathf.Clamp(timeScale, 1, maxTimeScale);
                    Time.timeScale = timeScale;
                    break;
                case ConfigurationType.TargetFrameRate:
--- a/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
            manager.WriteActionMask();
            Assert.IsTrue(groundTruthMask.SequenceEqual(manager.DiscreteActionMask.GetMask()));
        }
+
+        [Test]
+        public void TestHeuristic()
+        {
+            var manager = new ActuatorManager(2);
+            var va1 = new TestActuator(ActionSpec.MakeDiscrete(1, 2, 3), "name");
+            var va2 = new TestActuator(ActionSpec.MakeDiscrete(3, 2, 1, 8), "name1");
+            manager.Add(va1);
+            manager.Add(va2);
+
+            var actionBuf = new ActionBuffers(Array.Empty<float>(), new[] { 0, 0, 0, 0, 0, 0, 0 });
+            manager.ApplyHeuristic(actionBuf);
+
+            Assert.IsTrue(va1.m_HeuristicCalled);
+            Assert.AreEqual(va1.m_DiscreteBufferSize, 3);
+            Assert.IsTrue(va2.m_HeuristicCalled);
+            Assert.AreEqual(va2.m_DiscreteBufferSize, 4);
+        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Actuators/TestActuator.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/TestActuator.cs
 using Unity.MLAgents.Actuators;
 namespace Unity.MLAgents.Tests.Actuators
 {
-    internal class TestActuator : IActuator
+    internal class TestActuator : IActuator, IHeuristicProvider
+        public bool m_HeuristicCalled;
+        public int m_DiscreteBufferSize;
+
        public TestActuator(ActionSpec actuatorSpace, string name)
        {
            ActionSpec = actuatorSpace;

        public void ResetData()
        {
+        }
+
+        public void Heuristic(in ActionBuffers actionBuffersOut)
+        {
+            m_HeuristicCalled = true;
+            m_DiscreteBufferSize = actionBuffersOut.DiscreteActions.Length;
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
+using System;
 using System.Collections.Generic;
 using System.Linq;
 using NUnit.Framework;
    [TestFixture]
    public class VectorActuatorTests
    {
-        class TestActionReceiver : IActionReceiver
+        class TestActionReceiver : IActionReceiver, IHeuristicProvider
+            public bool HeuristicCalled;

            public void OnActionReceived(ActionBuffers actionBuffers)
            {
            public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
            {
                actionMask.WriteMask(Branch, Mask);
+            }
+
+            public void Heuristic(in ActionBuffers actionBuffersOut)
+            {
+                HeuristicCalled = true;
            }
        }

            va.WriteDiscreteActionMask(bdam);

            Assert.IsTrue(groundTruthMask.SequenceEqual(bdam.GetMask()));
+        }
+
+        [Test]
+        public void TestHeuristic()
+        {
+            var ar = new TestActionReceiver();
+            var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
+
+            va.Heuristic(new ActionBuffers(Array.Empty<float>(), va.ActionSpec.BranchSizes));
+            Assert.IsTrue(ar.HeuristicCalled);
        }
    }
 }
--- a/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
 namespace Unity.MLAgents.Tests
 {
    [TestFixture]
-    public class BehaviorParameterTests
+    public class BehaviorParameterTests : IHeuristicProvider
-        static void DummyHeuristic(in ActionBuffers actionsOut)
+        public void Heuristic(in ActionBuffers actionsOut)
        {
            // No-op
        }

            Assert.Throws<UnityAgentsException>(() =>
            {
-                bp.GeneratePolicy(actionSpec, DummyHeuristic);
+                bp.GeneratePolicy(actionSpec, new ActuatorManager());
            });
        }
    }
--- a/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
+++ b/com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
            const int batchSize = 4;
            var agentInfos = GetFakeAgents(ObservableAttributeOptions.ExamineAll);
            var alloc = new TensorCachingAllocator();
-            var generator = new VectorObservationGenerator(alloc);
+            var generator = new ObservationGenerator(alloc);
            generator.AddSensorIndex(0); // ObservableAttribute (size 1)
            generator.AddSensorIndex(1); // TestSensor (size 0)
            generator.AddSensorIndex(2); // TestSensor (size 0)
--- a/com.unity.ml-agents/Tests/Editor/Sensor/CameraSensorComponentTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/CameraSensorComponentTest.cs

                    var expectedShape = new[] { height, width, grayscale ? 1 : 3 };
                    Assert.AreEqual(expectedShape, cameraComponent.GetObservationShape());
-                    Assert.IsTrue(cameraComponent.IsVisual());
-                    Assert.IsFalse(cameraComponent.IsVector());

                    var sensor = cameraComponent.CreateSensor();
                    Assert.AreEqual(expectedShape, sensor.GetObservationShape());
--- a/com.unity.ml-agents/Tests/Editor/Sensor/RenderTextureSensorComponentTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/RenderTextureSensorComponentTests.cs

                    var expectedShape = new[] { height, width, grayscale ? 1 : 3 };
                    Assert.AreEqual(expectedShape, renderTexComponent.GetObservationShape());
-                    Assert.IsTrue(renderTexComponent.IsVisual());
-                    Assert.IsFalse(renderTexComponent.IsVector());

                    var sensor = renderTexComponent.CreateSensor();
                    Assert.AreEqual(expectedShape, sensor.GetObservationShape());
--- a/docs/Migrating.md
+++ b/docs/Migrating.md
 - `UnityEnvironment.API_VERSION` in environment.py
  ([example](https://github.com/Unity-Technologies/ml-agents/blob/b255661084cb8f701c716b040693069a3fb9a257/ml-agents-envs/mlagents/envs/environment.py#L45))

+
+# Migrating
+## Migrating to Release 13
+### Implementing IHeuristic in your IActuator implementations
+ - If you have any custom actuators, you can now implement the `IHeuristicProvider` interface to have your actuator
+handle the generation of actions when an Agent is running in heuristic mode.
+
+
 # Migrating
 ## Migrating to Release 11
 ### Agent virtual method deprecation
--- a/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
        # Sleep momentarily to allow time for the EnvManager to be waiting for the
        # subprocess response.  We won't be able to capture failures from the subprocess
        # that cause it to close the pipe before we can send the first message.
-        time.sleep(0.1)
+        time.sleep(0.5)
        raise UnityEnvironmentException()

    env_manager = SubprocessEnvManager(
--- a/ml-agents/mlagents/trainers/tests/torch/test_attention.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_attention.py
 from mlagents.torch_utils import torch
 import numpy as np

-from mlagents.trainers.torch.layers import linear_layer
+from mlagents.trainers.torch.utils import ModelUtils
+from mlagents.trainers.torch.layers import linear_layer, LinearEncoder
+    get_zero_entities_mask,
 )


    input_1 = generate_input_helper(masking_pattern_1)
    input_2 = generate_input_helper(masking_pattern_2)

-    masks = ResidualSelfAttention.get_masks([input_1, input_2])
+    masks = get_zero_entities_mask([input_1, input_2])
    assert len(masks) == 2
    masks_1 = masks[0]
    masks_2 = masks[1]
        assert masks_2[0, 1] == 0 if i % 2 == 0 else 1


-def test_simple_transformer_training():
+def test_predict_closest_training():
    np.random.seed(1336)
    torch.manual_seed(1336)
    size, n_k, = 3, 5
    l_layer = linear_layer(embedding_size, size)
    optimizer = torch.optim.Adam(
-        list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001
+        list(entity_embeddings.parameters())
+        + list(transformer.parameters())
+        + list(l_layer.parameters()),
+        lr=0.001,
+        weight_decay=1e-6,
    )
    batch_size = 200
    for _ in range(200):
            target = target.detach()

        embeddings = entity_embeddings(center, key)
-        masks = ResidualSelfAttention.get_masks([key])
+        masks = get_zero_entities_mask([key])
        prediction = transformer.forward(embeddings, masks)
        prediction = l_layer(prediction)
        prediction = prediction.reshape((batch_size, size))
        error.backward()
        optimizer.step()
    assert error.item() < 0.02
+
+
+def test_predict_minimum_training():
+    # of 5 numbers, predict index of min
+    np.random.seed(1336)
+    torch.manual_seed(1336)
+    n_k = 5
+    size = n_k + 1
+    embedding_size = 64
+    entity_embedding = EntityEmbedding(
+        size, size, n_k, embedding_size, concat_self=False
+    )
+    transformer = ResidualSelfAttention(embedding_size)
+    l_layer = LinearEncoder(embedding_size, 2, n_k)
+    loss = torch.nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(
+        list(entity_embedding.parameters())
+        + list(transformer.parameters())
+        + list(l_layer.parameters()),
+        lr=0.001,
+        weight_decay=1e-6,
+    )
+
+    batch_size = 200
+    onehots = ModelUtils.actions_to_onehot(torch.range(0, n_k - 1).unsqueeze(1), [n_k])[
+        0
+    ]
+    onehots = onehots.expand((batch_size, -1, -1))
+    losses = []
+    for _ in range(400):
+        num = np.random.randint(0, n_k)
+        inp = torch.rand((batch_size, num + 1, 1))
+        with torch.no_grad():
+            # create the target : The minimum
+            argmin = torch.argmin(inp, dim=1)
+            argmin = argmin.squeeze()
+            argmin = argmin.detach()
+        sliced_oh = onehots[:, : num + 1]
+        inp = torch.cat([inp, sliced_oh], dim=2)
+
+        embeddings = entity_embedding(inp, inp)
+        masks = get_zero_entities_mask([inp])
+        prediction = transformer(embeddings, masks)
+        prediction = l_layer(prediction)
+        ce = loss(prediction, argmin)
+        losses.append(ce.item())
+        print(ce.item())
+        optimizer.zero_grad()
+        ce.backward()
+        optimizer.step()
+    assert np.array(losses[-20:]).mean() < 0.1
--- a/ml-agents/mlagents/trainers/torch/attention.py
+++ b/ml-agents/mlagents/trainers/torch/attention.py
 from mlagents.trainers.exception import UnityTrainerException


+def get_zero_entities_mask(observations: List[torch.Tensor]) -> List[torch.Tensor]:
+    """
+    Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
+    all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
+    layer to mask the padding observations.
+    """
+    with torch.no_grad():
+        # Generate the masking tensors for each entities tensor (mask only if all zeros)
+        key_masks: List[torch.Tensor] = [
+            (torch.sum(ent ** 2, axis=2) < 0.01).float() for ent in observations
+        ]
+    return key_masks
+
+
 class MultiHeadAttention(torch.nn.Module):

    NEG_INF = -1e6
        concat_self: bool = True,
    ):
        """
-        Constructs an EntityEmbeddings module.
+        Constructs an EntityEmbedding module.
        :param x_self_size: Size of "self" entity.
        :param entity_size: Size of other entitiy.
        :param entity_num_max_elements: Maximum elements for a given entity, None for unrestricted.
 class ResidualSelfAttention(torch.nn.Module):
    """
    Residual self attentioninspired from https://arxiv.org/pdf/1909.07528.pdf. Can be used
-    with an EntityEmbeddings module, to apply multi head self attention to encode information
+    with an EntityEmbedding module, to apply multi head self attention to encode information
    about a "Self" and a list of relevant "Entities".
    """

        denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPSILON
        output = numerator / denominator
        return output
-
-    @staticmethod
-    def get_masks(observations: List[torch.Tensor]) -> List[torch.Tensor]:
-        """
-        Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
-        all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
-        layer to mask the padding observations.
-        """
-        with torch.no_grad():
-            # Generate the masking tensors for each entities tensor (mask only if all zeros)
-            key_masks: List[torch.Tensor] = [
-                (torch.sum(ent ** 2, axis=2) < 0.01).type(torch.FloatTensor)
-                for ent in observations
-            ]
-        return key_masks
--- a/ml-agents/mlagents/trainers/torch/encoders.py
+++ b/ml-agents/mlagents/trainers/torch/encoders.py
        if not exporting_to_onnx.is_exporting():
            visual_obs = visual_obs.permute([0, 3, 1, 2])
        hidden = self.conv_layers(visual_obs)
-        hidden = hidden.view([-1, self.final_flat])
+        hidden = hidden.reshape([-1, self.final_flat])
        return self.dense(hidden)


        if not exporting_to_onnx.is_exporting():
            visual_obs = visual_obs.permute([0, 3, 1, 2])
        batch_size = visual_obs.shape[0]
-        hidden = self.sequential(visual_obs).contiguous()
-        before_out = hidden.view(batch_size, -1)
+        hidden = self.sequential(visual_obs)
+        before_out = hidden.reshape(batch_size, -1)
        return torch.relu(self.dense(before_out))
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
 from mlagents.trainers.torch.encoders import VectorInput
 from mlagents.trainers.buffer import AgentBuffer
 from mlagents.trainers.trajectory import ObsUtil
-from mlagents.trainers.torch.attention import ResidualSelfAttention
+from mlagents.trainers.torch.attention import (
+    ResidualSelfAttention,
+    get_zero_entities_mask,
+)


 ActivationFunction = Callable[[torch.Tensor], torch.Tensor]
        encoded_self = torch.cat(encodes, dim=1)
        if len(var_len_inputs) > 0:
            # Some inputs need to be processed with a variable length encoder
-            masks = ResidualSelfAttention.get_masks(var_len_inputs)
+            masks = get_zero_entities_mask(var_len_inputs)
            embeddings: List[torch.Tensor] = []
            for var_len_input, var_len_processor in zip(
                var_len_inputs, self.var_processors
        if self.use_lstm and memories is not None:
            # Use only the back half of memories for critic and actor
            actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
+            actor_mem, critic_mem = actor_mem.contiguous(), critic_mem.contiguous()
        else:
            critic_mem = None
            actor_mem = None
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
    VectorInput,
 )
 from mlagents.trainers.settings import EncoderType, ScheduleType
-from mlagents.trainers.attention import EntityEmbedding
+from mlagents.trainers.torch.attention import EntityEmbedding
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents_envs.base_env import ObservationSpec, DimensionProperty

--- a/.yamato/training-backcompat-tests.yml
+++ b/.yamato/training-backcompat-tests.yml
+
+test_mac_backcompat_2020.1:
+  {% capture editor_version %}2020.1{% endcapture %}
+  {% capture csharp_backcompat_version %}1.0.0{% endcapture %}
+  # This test has to run on mac because it requires the custom build of tensorflow without AVX
+  # Test against 2020.1 because 2020.2 has to run against package version 1.2.0
+  name: Test Mac Backcompat Training {{ editor_version }}
+  agent:
+    type: Unity::VM::osx
+    image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
+    flavor: b1.small
+  variables:
+    UNITY_VERSION: {{ editor_version }}
+  commands:
+    - |
+      python3 -m venv venv && source venv/bin/activate
+      python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
+      unity-downloader-cli -u {{ editor_version }} -c editor --wait --fast
+      # Backwards-compatibility tests.
+      # If we make a breaking change to the communication protocol, these will need
+      # to be disabled until the next release.
+      python -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=mac
+      python -u -m ml-agents.tests.yamato.training_int_tests --csharp {{ csharp_backcompat_version }}
+    - |
+      python3 -m venv venv_old && source venv_old/bin/activate
+      python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
+      python -u -m ml-agents.tests.yamato.training_int_tests --python 0.16.0
+  triggers:
+    cancel_old_ci: true
+    recurring:
+      - branch: master
+        frequency: daily
+  artifacts:
+    logs:
+      paths:
+        - "artifacts/standalone_build.txt"
+        - "artifacts/inference.nn.txt"
+        - "artifacts/inference.onnx.txt"
+        - "artifacts/*.log"
+    standalonebuild:
+      paths:
+        - "artifacts/testPlayer*/**"
+        - "artifacts/models/**"
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs
+using Unity.MLAgents;
+using Unity.MLAgents.Extensions.Match3;
+
+namespace Unity.MLAgentsExamples
+{
+    public class Match3ExampleActuator : Match3Actuator
+    {
+        Match3Board Board => (Match3Board)m_Board;
+
+        public Match3ExampleActuator(Match3Board board,
+            bool forceHeuristic,
+            Agent agent,
+            string name,
+            int seed
+            )
+            : base(board, forceHeuristic, seed, agent, name) { }
+
+
+        protected override int EvalMovePoints(Move move)
+        {
+            var pointsByType = new[] { Board.BasicCellPoints, Board.SpecialCell1Points, Board.SpecialCell2Points };
+            // Counts the expected points for making the move.
+            var moveVal = m_Board.GetCellType(move.Row, move.Column);
+            var moveSpecial = m_Board.GetSpecialType(move.Row, move.Column);
+            var (otherRow, otherCol) = move.OtherCell();
+            var oppositeVal = m_Board.GetCellType(otherRow, otherCol);
+            var oppositeSpecial = m_Board.GetSpecialType(otherRow, otherCol);
+
+
+            int movePoints = EvalHalfMove(
+                otherRow, otherCol, moveVal, moveSpecial, move.Direction, pointsByType
+            );
+            int otherPoints = EvalHalfMove(
+                move.Row, move.Column, oppositeVal, oppositeSpecial, move.OtherDirection(), pointsByType
+            );
+            return movePoints + otherPoints;
+        }
+
+        int EvalHalfMove(int newRow, int newCol, int newValue, int newSpecial, Direction incomingDirection, int[] pointsByType)
+        {
+            // This is a essentially a duplicate of AbstractBoard.CheckHalfMove but also counts the points for the move.
+            int matchedLeft = 0, matchedRight = 0, matchedUp = 0, matchedDown = 0;
+            int scoreLeft = 0, scoreRight = 0, scoreUp = 0, scoreDown = 0;
+
+            if (incomingDirection != Direction.Right)
+            {
+                for (var c = newCol - 1; c >= 0; c--)
+                {
+                    if (m_Board.GetCellType(newRow, c) == newValue)
+                    {
+                        matchedLeft++;
+                        scoreLeft += pointsByType[m_Board.GetSpecialType(newRow, c)];
+                    }
+                    else
+                        break;
+                }
+            }
+
+            if (incomingDirection != Direction.Left)
+            {
+                for (var c = newCol + 1; c < m_Board.Columns; c++)
+                {
+                    if (m_Board.GetCellType(newRow, c) == newValue)
+                    {
+                        matchedRight++;
+                        scoreRight += pointsByType[m_Board.GetSpecialType(newRow, c)];
+                    }
+                    else
+                        break;
+                }
+            }
+
+            if (incomingDirection != Direction.Down)
+            {
+                for (var r = newRow + 1; r < m_Board.Rows; r++)
+                {
+                    if (m_Board.GetCellType(r, newCol) == newValue)
+                    {
+                        matchedUp++;
+                        scoreUp += pointsByType[m_Board.GetSpecialType(r, newCol)];
+                    }
+                    else
+                        break;
+                }
+            }
+
+            if (incomingDirection != Direction.Up)
+            {
+                for (var r = newRow - 1; r >= 0; r--)
+                {
+                    if (m_Board.GetCellType(r, newCol) == newValue)
+                    {
+                        matchedDown++;
+                        scoreDown += pointsByType[m_Board.GetSpecialType(r, newCol)];
+                    }
+                    else
+                        break;
+                }
+            }
+
+            if ((matchedUp + matchedDown >= 2) || (matchedLeft + matchedRight >= 2))
+            {
+                // It's a match. Start from counting the piece being moved
+                var totalScore = pointsByType[newSpecial];
+                if (matchedUp + matchedDown >= 2)
+                {
+                    totalScore += scoreUp + scoreDown;
+                }
+
+                if (matchedLeft + matchedRight >= 2)
+                {
+                    totalScore += scoreLeft + scoreRight;
+                }
+                return totalScore;
+            }
+
+            return 0;
+        }
+    }
+
+}
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs.meta
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs.meta
+fileFormatVersion: 2
+guid: 9e6fe1a020a04421ab828be4543a655c
+timeCreated: 1610665874
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
+using Unity.MLAgents;
+using Unity.MLAgents.Actuators;
+using Unity.MLAgents.Extensions.Match3;
+
+namespace Unity.MLAgentsExamples
+{
+    public class Match3ExampleActuatorComponent : Match3ActuatorComponent
+    {
+        /// <inheritdoc/>
+        public override IActuator CreateActuator()
+        {
+            var board = GetComponent<Match3Board>();
+            var agent = GetComponentInParent<Agent>();
+            var seed = RandomSeed == -1 ? gameObject.GetInstanceID() : RandomSeed + 1;
+            return new Match3ExampleActuator(board, ForceHeuristic, agent, ActuatorName, seed);
+        }
+    }
+}
--- a/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs.meta
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs.meta
+fileFormatVersion: 2
+guid: b17adcc6c9b241da903aa134f2dac930
+timeCreated: 1610665885
--- a/com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs
+++ b/com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs
+namespace Unity.MLAgents.Actuators
+{
+    /// <summary>
+    /// Interface that allows objects to fill out an <see cref="ActionBuffers"/> data structure for controlling
+    /// behavior of Agents or Actuators.
+    /// </summary>
+    public interface IHeuristicProvider
+    {
+        /// <summary>
+        /// Method called on objects which are expected to fill out the <see cref="ActionBuffers"/> data structure.
+        /// Object that implement this interface should be careful to be consistent in the placement of their actions
+        /// in the <see cref="ActionBuffers"/> data structure.
+        /// </summary>
+        /// <param name="actionBuffersOut">The <see cref="ActionBuffers"/> data structure to be filled by the
+        /// object implementing this interface.</param>
+        void Heuristic(in ActionBuffers actionBuffersOut);
+    }
+}
--- a/com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs.meta
+++ b/com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs.meta
+fileFormatVersion: 2
+guid: be90ffb28f39444a8fb02dfd4a82870c
+timeCreated: 1610057456
--- a/com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs
+using System;
+using NUnit.Framework;
+using UnityEngine;
+using Unity.MLAgents.Sensors;
+
+namespace Unity.MLAgents.Tests
+{
+
+    [TestFixture]
+    public class BufferSensorTest
+    {
+        [Test]
+        public void TestBufferSensor()
+        {
+
+            var bufferSensor = new BufferSensor(20, 4);
+            var shape = bufferSensor.GetObservationShape();
+            var dimProp = bufferSensor.GetDimensionProperties();
+            Assert.AreEqual(shape[0], 20);
+            Assert.AreEqual(shape[1], 4);
+            Assert.AreEqual(shape.Length, 2);
+            Assert.AreEqual(dimProp[0], DimensionProperty.VariableSize);
+            Assert.AreEqual(dimProp[1], DimensionProperty.None);
+            Assert.AreEqual(dimProp.Length, 2);
+
+            bufferSensor.AppendObservation(new float[] { 1, 2, 3, 4 });
+            bufferSensor.AppendObservation(new float[] { 5, 6, 7, 8 });
+
+            var obsWriter = new ObservationWriter();
+            var obs = bufferSensor.GetObservationProto(obsWriter);
+
+            Assert.AreEqual(shape, obs.Shape);
+            Assert.AreEqual(obs.DimensionProperties.Count, 2);
+            Assert.AreEqual((int)dimProp[0], obs.DimensionProperties[0]);
+            Assert.AreEqual((int)dimProp[1], obs.DimensionProperties[1]);
+
+            for (int i = 0; i < 8; i++)
+            {
+                Assert.AreEqual(obs.FloatData.Data[i], i + 1);
+
+            }
+            for (int i = 8; i < 80; i++)
+            {
+                Assert.AreEqual(obs.FloatData.Data[i], 0);
+
+            }
+        }
+
+        [Test]
+        public void TestBufferSensorComponent()
+        {
+            var agentGameObj = new GameObject("agent");
+            var bufferComponent = agentGameObj.AddComponent<BufferSensorComponent>();
+            bufferComponent.MaxNumObservables = 20;
+            bufferComponent.ObservableSize = 4;
+
+            var sensor = bufferComponent.CreateSensor();
+            var shape = bufferComponent.GetObservationShape();
+
+            Assert.AreEqual(shape[0], 20);
+            Assert.AreEqual(shape[1], 4);
+            Assert.AreEqual(shape.Length, 2);
+
+            bufferComponent.AppendObservation(new float[] { 1, 2, 3, 4 });
+            bufferComponent.AppendObservation(new float[] { 5, 6, 7, 8 });
+
+            var obsWriter = new ObservationWriter();
+            var obs = sensor.GetObservationProto(obsWriter);
+
+            Assert.AreEqual(shape, obs.Shape);
+            Assert.AreEqual(obs.DimensionProperties.Count, 2);
+
+            for (int i = 0; i < 8; i++)
+            {
+                Assert.AreEqual(obs.FloatData.Data[i], i + 1);
+
+            }
+            for (int i = 8; i < 80; i++)
+            {
+                Assert.AreEqual(obs.FloatData.Data[i], 0);
+
+            }
+        }
+
+    }
+}
--- a/com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs.meta
+++ b/com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs.meta
+fileFormatVersion: 2
+guid: 5267572aa66d34b49bbc65940674b2a6
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Tests/Editor/SideChannels.meta
+++ b/com.unity.ml-agents/Tests/Editor/SideChannels.meta
+fileFormatVersion: 2
+guid: 1228f198ceee45a38c7d9ff50425b65d
+timeCreated: 1610760867
--- a/com.unity.ml-agents/Tests/Editor/SideChannels/EngineConfigurationChannelTests.cs
+++ b/com.unity.ml-agents/Tests/Editor/SideChannels/EngineConfigurationChannelTests.cs
+using NUnit.Framework;
+using Unity.MLAgents.SideChannels;
+using UnityEngine;
+
+namespace Unity.MLAgents.Tests
+{
+    public class EngineConfigurationChannelTests
+    {
+        float m_OldTimeScale = 1.0f;
+
+        [SetUp]
+        public void Setup()
+        {
+            m_OldTimeScale = Time.timeScale;
+        }
+
+        [TearDown]
+        public void TearDown()
+        {
+            Time.timeScale = m_OldTimeScale;
+        }
+
+        [Test]
+        public void TestTimeScaleClamping()
+        {
+            OutgoingMessage pythonMsg = new OutgoingMessage();
+            pythonMsg.WriteInt32((int)EngineConfigurationChannel.ConfigurationType.TimeScale);
+            pythonMsg.WriteFloat32(1000f);
+
+            var sideChannel = new EngineConfigurationChannel();
+            sideChannel.ProcessMessage(pythonMsg.ToByteArray());
+
+#if UNITY_EDITOR
+            // Should be clamped
+            Assert.AreEqual(100.0f, Time.timeScale);
+#else
+            // Not sure we can run this test from a player, but just in case, shouldn't clamp.
+            Assert.AreEqual(1000.0f, Time.timeScale);
+#endif
+        }
+
+
+    }
+}
--- a/com.unity.ml-agents/Tests/Editor/SideChannels/EngineConfigurationChannelTests.cs.meta
+++ b/com.unity.ml-agents/Tests/Editor/SideChannels/EngineConfigurationChannelTests.cs.meta
+fileFormatVersion: 2
+guid: 71aa620295f74ca5875e8e4782f08768
+timeCreated: 1610760906
--- a//com.unity.ml-agents/Tests/Editor/SideChannels/SideChannelTests.cs.meta
+++ b//com.unity.ml-agents/Tests/Editor/SideChannels/SideChannelTests.cs.meta
--- a//com.unity.ml-agents/Tests/Editor/SideChannels/SideChannelTests.cs
+++ b//com.unity.ml-agents/Tests/Editor/SideChannels/SideChannelTests.cs