浏览代码

Merging master

/develop/singular-embeddings
vincentpierre 4 年前
当前提交
396bc43c
共有 53 个文件被更改,包括 907 次插入397 次删除
  1. 2
      .yamato/com.unity.ml-agents-performance.yml
  2. 1
      .yamato/gym-interface-test.yml
  3. 16
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
  4. 25
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab
  5. 25
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab
  6. 25
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
  7. 10
      Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity
  8. 166
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Agent.cs
  9. 13
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Board.cs
  10. 24
      README.md
  11. 74
      com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs
  12. 10
      com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
  13. 6
      com.unity.ml-agents/CHANGELOG.md
  14. 43
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  15. 27
      com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
  16. 38
      com.unity.ml-agents/Runtime/Agent.cs
  17. 70
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  18. 94
      com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
  19. 56
      com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
  20. 1
      com.unity.ml-agents/Runtime/Inference/TensorNames.cs
  21. 9
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  22. 11
      com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
  23. 3
      com.unity.ml-agents/Runtime/Sensors/SensorComponent.cs
  24. 12
      com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
  25. 18
      com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
  26. 11
      com.unity.ml-agents/Tests/Editor/Actuators/TestActuator.cs
  27. 19
      com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
  28. 6
      com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
  29. 2
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
  30. 2
      com.unity.ml-agents/Tests/Editor/Sensor/CameraSensorComponentTest.cs
  31. 2
      com.unity.ml-agents/Tests/Editor/Sensor/RenderTextureSensorComponentTests.cs
  32. 8
      docs/Migrating.md
  33. 2
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  34. 67
      ml-agents/mlagents/trainers/tests/torch/test_attention.py
  35. 33
      ml-agents/mlagents/trainers/torch/attention.py
  36. 6
      ml-agents/mlagents/trainers/torch/encoders.py
  37. 8
      ml-agents/mlagents/trainers/torch/networks.py
  38. 2
      ml-agents/mlagents/trainers/torch/utils.py
  39. 44
      .yamato/training-backcompat-tests.yml
  40. 121
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs
  41. 3
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs.meta
  42. 18
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs
  43. 3
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs.meta
  44. 18
      com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs
  45. 3
      com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs.meta
  46. 86
      com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs
  47. 11
      com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs.meta
  48. 3
      com.unity.ml-agents/Tests/Editor/SideChannels.meta
  49. 44
      com.unity.ml-agents/Tests/Editor/SideChannels/EngineConfigurationChannelTests.cs
  50. 3
      com.unity.ml-agents/Tests/Editor/SideChannels/EngineConfigurationChannelTests.cs.meta
  51. 0
      /com.unity.ml-agents/Tests/Editor/SideChannels/SideChannelTests.cs.meta
  52. 0
      /com.unity.ml-agents/Tests/Editor/SideChannels/SideChannelTests.cs

2
.yamato/com.unity.ml-agents-performance.yml


commands:
- python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
- curl -s https://artifactory.prd.it.unity3d.com/artifactory/unity-tools-local/utr-standalone/utr --output utr
- chmod +x ./utr
- ./utr --suite=editor --platform=StandaloneOSX --editor-location=.Editor --testproject=DevProject --artifacts_path=build/test-results --report-performance-data --performance-project-id=com.unity.ml-agents --zero-tests-are-ok=1
triggers:

1
.yamato/gym-interface-test.yml


- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install wheel --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.setup_venv
python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic

16
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs


using System;
using Unity.MLAgents.Actuators;
using UnityEngine;
namespace Unity.MLAgentsExamples
{

/// <summary>
/// Simple actuator that converts the action into a {-1, 0, 1} direction
/// </summary>
public class BasicActuator : IActuator
public class BasicActuator : IActuator, IHeuristicProvider
{
public BasicController basicController;
ActionSpec m_ActionSpec;

}
basicController.MoveDirection(direction);
}
public void Heuristic(in ActionBuffers actionBuffersOut)
{
var direction = Input.GetAxis("Horizontal");
var discreteActions = actionBuffersOut.DiscreteActions;
if (Mathf.Approximately(direction, 0.0f))
{
discreteActions[0] = 0;
return;
}
var sign = Math.Sign(direction);
discreteActions[0] = sign < 0 ? 1 : 2;
}
public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)

25
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab


- component: {fileID: 3508723250470608012}
- component: {fileID: 3508723250470608011}
- component: {fileID: 3508723250470608009}
- component: {fileID: 3508723250470608013}
- component: {fileID: 2112317463290853299}
m_Layer: 0
m_Name: Match3 Agent
m_TagString: Untagged

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: c34da50737a3c4a50918002b20b2b927, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

Board: {fileID: 0}
MoveTime: 0.25
MaxMoves: 500
HeuristicQuality: 0
--- !u!114 &3508723250470608011
MonoBehaviour:
m_ObjectHideFlags: 0

m_EditorClassIdentifier:
DebugMoveIndex: -1
CubeSpacing: 1.25
Board: {fileID: 0}
TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
type: 3}
--- !u!114 &3508723250470608009

BasicCellPoints: 1
SpecialCell1Points: 2
SpecialCell2Points: 3
--- !u!114 &3508723250470608013
--- !u!114 &3508723250470608014
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
ActuatorName: Match3 Actuator
ForceHeuristic: 1
--- !u!114 &3508723250470608014
SensorName: Match3 Sensor
ObservationType: 0
--- !u!114 &2112317463290853299
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
SensorName: Match3 Sensor
ObservationType: 0
ActuatorName: Match3 Actuator
ForceHeuristic: 1
HeuristicQuality: 0
--- !u!1 &3508723250774301855
GameObject:
m_ObjectHideFlags: 0

25
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab


- component: {fileID: 2118285884327540682}
- component: {fileID: 2118285884327540685}
- component: {fileID: 2118285884327540687}
- component: {fileID: 2118285884327540683}
- component: {fileID: 3357012711826686276}
m_Layer: 0
m_Name: Match3 Agent
m_TagString: Untagged

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 9e89b8e81974148d3b7213530d00589d, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

Board: {fileID: 0}
MoveTime: 0.25
MaxMoves: 500
HeuristicQuality: 0
--- !u!114 &2118285884327540685
MonoBehaviour:
m_ObjectHideFlags: 0

m_EditorClassIdentifier:
DebugMoveIndex: -1
CubeSpacing: 1.25
Board: {fileID: 0}
TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
type: 3}
--- !u!114 &2118285884327540687

BasicCellPoints: 1
SpecialCell1Points: 2
SpecialCell2Points: 3
--- !u!114 &2118285884327540683
--- !u!114 &2118285884327540680
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
ActuatorName: Match3 Actuator
ForceHeuristic: 0
--- !u!114 &2118285884327540680
SensorName: Match3 Sensor
ObservationType: 0
--- !u!114 &3357012711826686276
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
SensorName: Match3 Sensor
ObservationType: 0
ActuatorName: Match3 Actuator
ForceHeuristic: 0
HeuristicQuality: 0

25
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab


- component: {fileID: 3019509692332007781}
- component: {fileID: 3019509692332007778}
- component: {fileID: 3019509692332007776}
- component: {fileID: 3019509692332007780}
- component: {fileID: 8270768986451624427}
m_Layer: 0
m_Name: Match3 Agent
m_TagString: Untagged

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 48d14da88fea74d0693c691c6e3f2e34, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

Board: {fileID: 0}
MoveTime: 0.25
MaxMoves: 500
HeuristicQuality: 0
--- !u!114 &3019509692332007778
MonoBehaviour:
m_ObjectHideFlags: 0

m_EditorClassIdentifier:
DebugMoveIndex: -1
CubeSpacing: 1.25
Board: {fileID: 0}
TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
type: 3}
--- !u!114 &3019509692332007776

BasicCellPoints: 1
SpecialCell1Points: 2
SpecialCell2Points: 3
--- !u!114 &3019509692332007780
--- !u!114 &3019509692332007783
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
ActuatorName: Match3 Actuator
ForceHeuristic: 0
--- !u!114 &3019509692332007783
SensorName: Match3 Sensor
ObservationType: 2
--- !u!114 &8270768986451624427
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
SensorName: Match3 Sensor
ObservationType: 2
ActuatorName: Match3 Actuator
ForceHeuristic: 0
HeuristicQuality: 0

10
Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity


m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 2112317463290853299, guid: 2fafdcd0587684641b03b11f04454f1b,
type: 3}
propertyPath: HeuristicQuality
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
type: 3}
propertyPath: cubeSpacing

m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 2112317463290853299, guid: 2fafdcd0587684641b03b11f04454f1b,
type: 3}
propertyPath: HeuristicQuality
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
type: 3}
propertyPath: cubeSpacing

166
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Agent.cs


WaitForMove = 4,
}
public enum HeuristicQuality
{
/// <summary>
/// The heuristic will pick any valid move at random.
/// </summary>
RandomValidMove,
/// <summary>
/// The heuristic will pick the move that scores the most points.
/// This only looks at the immediate move, and doesn't consider where cells will fall.
/// </summary>
Greedy
}
public class Match3Agent : Agent
{
[HideInInspector]

public int MaxMoves = 500;
public HeuristicQuality HeuristicQuality = HeuristicQuality.RandomValidMove;
private System.Random m_Random;
var seed = Board.RandomSeed == -1 ? gameObject.GetInstanceID() : Board.RandomSeed + 1;
m_Random = new System.Random(seed);
}
public override void OnEpisodeBegin()

return false;
}
public override void Heuristic(in ActionBuffers actionsOut)
{
var discreteActions = actionsOut.DiscreteActions;
discreteActions[0] = GreedyMove();
}
int GreedyMove()
{
var pointsByType = new[] { Board.BasicCellPoints, Board.SpecialCell1Points, Board.SpecialCell2Points };
var bestMoveIndex = 0;
var bestMovePoints = -1;
var numMovesAtCurrentScore = 0;
foreach (var move in Board.ValidMoves())
{
var movePoints = HeuristicQuality == HeuristicQuality.Greedy ? EvalMovePoints(move, pointsByType) : 1;
if (movePoints < bestMovePoints)
{
// Worse, skip
continue;
}
if (movePoints > bestMovePoints)
{
// Better, keep
bestMovePoints = movePoints;
bestMoveIndex = move.MoveIndex;
numMovesAtCurrentScore = 1;
}
else
{
// Tied for best - use reservoir sampling to make sure we select from equal moves uniformly.
// See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
numMovesAtCurrentScore++;
var randVal = m_Random.Next(0, numMovesAtCurrentScore);
if (randVal == 0)
{
// Keep the new one
bestMoveIndex = move.MoveIndex;
}
}
}
return bestMoveIndex;
}
int EvalMovePoints(Move move, int[] pointsByType)
{
// Counts the expected points for making the move.
var moveVal = Board.GetCellType(move.Row, move.Column);
var moveSpecial = Board.GetSpecialType(move.Row, move.Column);
var (otherRow, otherCol) = move.OtherCell();
var oppositeVal = Board.GetCellType(otherRow, otherCol);
var oppositeSpecial = Board.GetSpecialType(otherRow, otherCol);
int movePoints = EvalHalfMove(
otherRow, otherCol, moveVal, moveSpecial, move.Direction, pointsByType
);
int otherPoints = EvalHalfMove(
move.Row, move.Column, oppositeVal, oppositeSpecial, move.OtherDirection(), pointsByType
);
return movePoints + otherPoints;
}
int EvalHalfMove(int newRow, int newCol, int newValue, int newSpecial, Direction incomingDirection, int[] pointsByType)
{
// This is a essentially a duplicate of AbstractBoard.CheckHalfMove but also counts the points for the move.
int matchedLeft = 0, matchedRight = 0, matchedUp = 0, matchedDown = 0;
int scoreLeft = 0, scoreRight = 0, scoreUp = 0, scoreDown = 0;
if (incomingDirection != Direction.Right)
{
for (var c = newCol - 1; c >= 0; c--)
{
if (Board.GetCellType(newRow, c) == newValue)
{
matchedLeft++;
scoreLeft += pointsByType[Board.GetSpecialType(newRow, c)];
}
else
break;
}
}
if (incomingDirection != Direction.Left)
{
for (var c = newCol + 1; c < Board.Columns; c++)
{
if (Board.GetCellType(newRow, c) == newValue)
{
matchedRight++;
scoreRight += pointsByType[Board.GetSpecialType(newRow, c)];
}
else
break;
}
}
if (incomingDirection != Direction.Down)
{
for (var r = newRow + 1; r < Board.Rows; r++)
{
if (Board.GetCellType(r, newCol) == newValue)
{
matchedUp++;
scoreUp += pointsByType[Board.GetSpecialType(r, newCol)];
}
else
break;
}
}
if (incomingDirection != Direction.Up)
{
for (var r = newRow - 1; r >= 0; r--)
{
if (Board.GetCellType(r, newCol) == newValue)
{
matchedDown++;
scoreDown += pointsByType[Board.GetSpecialType(r, newCol)];
}
else
break;
}
}
if ((matchedUp + matchedDown >= 2) || (matchedLeft + matchedRight >= 2))
{
// It's a match. Start from counting the piece being moved
var totalScore = pointsByType[newSpecial];
if (matchedUp + matchedDown >= 2)
{
totalScore += scoreUp + scoreDown;
}
if (matchedLeft + matchedRight >= 2)
{
totalScore += scoreLeft + scoreRight;
}
return totalScore;
}
return 0;
}
}
}

13
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Board.cs


using System;
using Unity.MLAgents.Extensions.Match3;
using UnityEngine;

public class Match3Board : AbstractBoard
{
public int RandomSeed = -1;
public const int k_EmptyCell = -1;
[Tooltip("Points earned for clearing a basic cell (cube)")]
public int BasicCellPoints = 1;

[Tooltip("Points earned for clearing an extra special cell (plus)")]
public int SpecialCell2Points = 3;
/// <summary>
/// Seed to initialize the <see cref="System.Random"/> object.
/// </summary>
public int RandomSeed;
(int, int)[,] m_Cells;
bool[,] m_Matched;

m_Cells = new (int, int)[Columns, Rows];
m_Matched = new bool[Columns, Rows];
}
void Start()
{
InitRandom();
}

24
README.md


## Additional Resources
We have published a series of blog posts that are relevant for ML-Agents:
We have a Unity Learn course,
[ML-Agents: Hummingsbird](https://learn.unity.com/course/ml-agents-hummingbirds),
that provides a gentle introduction to Unity and the ML-Agents Toolkit.
We've also partnered with
[CodeMonkeyUnity](https://www.youtube.com/c/CodeMonkeyUnity) to create a
[series of tutorial videos](https://www.youtube.com/playlist?list=PLzDRvYVwl53vehwiN_odYJkPBzcqFw110)
on how to implement and use the ML-Agents Toolkit.
We have also published a series of blog posts that are relevant for ML-Agents:
- (December 28, 2020)
[Happy holidays from the Unity ML-Agents team!](https://blogs.unity3d.com/2020/12/28/happy-holidays-from-the-unity-ml-agents-team/)
- (November 20, 2020)
[How Eidos-Montréal created Grid Sensors to improve observations for training agents](https://blogs.unity3d.com/2020/11/20/how-eidos-montreal-created-grid-sensors-to-improve-observations-for-training-agents/)
- (November 11, 2020)
[2020 AI@Unity interns shoutout](https://blogs.unity3d.com/2020/11/11/2020-aiunity-interns-shoutout/)
- (May 12, 2020)
[Announcing ML-Agents Unity Package v1.0!](https://blogs.unity3d.com/2020/05/12/announcing-ml-agents-unity-package-v1-0/)
- (February 28, 2020)

([multi-armed bandit](https://blogs.unity3d.com/2017/06/26/unity-ai-themed-blog-entries/)
and
[Q-learning](https://blogs.unity3d.com/2017/08/22/unity-ai-reinforcement-learning-with-q-learning/))
In addition to our own documentation, here are some additional, relevant
articles:
- [A Game Developer Learns Machine Learning](https://mikecann.co.uk/posts/a-game-developer-learns-machine-learning-intent)
- [Explore Unity Technologies ML-Agents Exclusively on Intel Architecture](https://software.intel.com/en-us/articles/explore-unity-technologies-ml-agents-exclusively-on-intel-architecture)
- [ML-Agents Penguins tutorial](https://learn.unity.com/project/ml-agents-penguins)
## Community and Feedback

74
com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs


/// Actuator for a Match3 game. It translates valid moves (defined by AbstractBoard.IsMoveValid())
/// in action masks, and applies the action to the board via AbstractBoard.MakeMove().
/// </summary>
public class Match3Actuator : IActuator
public class Match3Actuator : IActuator, IHeuristicProvider
private AbstractBoard m_Board;
protected AbstractBoard m_Board;
protected System.Random m_Random;
private System.Random m_Random;
private Agent m_Agent;
private int m_Rows;

/// <param name="board"></param>
/// <param name="forceHeuristic">Whether the inference action should be ignored and the Agent's Heuristic
/// should be called. This should only be used for generating comparison stats of the Heuristic.</param>
/// <param name="seed">The seed used to initialize <see cref="System.Random"/>.</param>
public Match3Actuator(AbstractBoard board, bool forceHeuristic, Agent agent, string name)
public Match3Actuator(AbstractBoard board,
bool forceHeuristic,
int seed,
Agent agent,
string name)
{
m_Board = board;
m_Rows = board.Rows;

var numMoves = Move.NumPotentialMoves(m_Board.Rows, m_Board.Columns);
m_ActionSpec = ActionSpec.MakeDiscrete(numMoves);
m_Random = new System.Random(seed);
}
/// <inheritdoc/>

{
if (m_ForceHeuristic)
{
m_Agent.Heuristic(actions);
Heuristic(actions);
}
var moveIndex = actions.DiscreteActions[0];

yield return move.MoveIndex;
}
}
public void Heuristic(in ActionBuffers actionsOut)
{
var discreteActions = actionsOut.DiscreteActions;
discreteActions[0] = GreedyMove();
}
protected int GreedyMove()
{
var bestMoveIndex = 0;
var bestMovePoints = -1;
var numMovesAtCurrentScore = 0;
foreach (var move in m_Board.ValidMoves())
{
var movePoints = EvalMovePoints(move);
if (movePoints < bestMovePoints)
{
// Worse, skip
continue;
}
if (movePoints > bestMovePoints)
{
// Better, keep
bestMovePoints = movePoints;
bestMoveIndex = move.MoveIndex;
numMovesAtCurrentScore = 1;
}
else
{
// Tied for best - use reservoir sampling to make sure we select from equal moves uniformly.
// See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
numMovesAtCurrentScore++;
var randVal = m_Random.Next(0, numMovesAtCurrentScore);
if (randVal == 0)
{
// Keep the new one
bestMoveIndex = move.MoveIndex;
}
}
}
return bestMoveIndex;
}
/// <summary>
/// Method to be overridden when evaluating how many points a specific move will generate.
/// </summary>
/// <param name="move">The move to evaluate.</param>
/// <returns>The number of points the move generates.</returns>
protected virtual int EvalMovePoints(Move move)
{
return 1;
}
}
}

10
com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs


namespace Unity.MLAgents.Extensions.Match3
{
/// <summary>
/// Actuator component for a Match 3 game. Generates a Match3Actuator at runtime.
/// Actuator component for a Match3 game. Generates a Match3Actuator at runtime.
/// </summary>
public class Match3ActuatorComponent : ActuatorComponent
{

public string ActuatorName = "Match3 Actuator";
/// <summary>
/// A random seed used to generate a board, if needed.
/// </summary>
public int RandomSeed = -1;
/// <summary>
/// Force using the Agent's Heuristic() method to decide the action. This should only be used in testing.
/// </summary>
[FormerlySerializedAs("ForceRandom")]

{
var board = GetComponent<AbstractBoard>();
var agent = GetComponentInParent<Agent>();
return new Match3Actuator(board, ForceHeuristic, agent, ActuatorName);
var seed = RandomSeed == -1 ? gameObject.GetInstanceID() : RandomSeed + 1;
return new Match3Actuator(board, ForceHeuristic, seed, agent, ActuatorName);
}
/// <inheritdoc/>

6
com.unity.ml-agents/CHANGELOG.md


- `StatAggregationMethod.Sum` can now be passed to `StatsRecorder.Add()`. This
will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
- The upper limit for the time scale (by setting the `--time-scale` paramater in mlagents-learn) was
removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
- Added the IHeuristicProvider interface to allow IActuators as well as Agent implement the Heuristic function to generate actions.
Updated the Basic example and the Match3 Example to use Actuators.
Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)
#### ml-agents / ml-agents-envs / gym-unity (Python)

43
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


/// <summary>
/// Iterates through all of the IActuators in this list and calls their
/// <see cref="IHeuristicProvider.Heuristic"/> method on them, if implemented, with the appropriate
/// <see cref="ActionSegment{T}"/>s depending on their <see cref="ActionSpec"/>.
/// </summary>
public void ApplyHeuristic(in ActionBuffers actionBuffersOut)
{
var continuousStart = 0;
var discreteStart = 0;
for (var i = 0; i < m_Actuators.Count; i++)
{
var actuator = m_Actuators[i];
var numContinuousActions = actuator.ActionSpec.NumContinuousActions;
var numDiscreteActions = actuator.ActionSpec.NumDiscreteActions;
if (numContinuousActions == 0 && numDiscreteActions == 0)
{
continue;
}
var continuousActions = ActionSegment<float>.Empty;
if (numContinuousActions > 0)
{
continuousActions = new ActionSegment<float>(actionBuffersOut.ContinuousActions.Array,
continuousStart,
numContinuousActions);
}
var discreteActions = ActionSegment<int>.Empty;
if (numDiscreteActions > 0)
{
discreteActions = new ActionSegment<int>(actionBuffersOut.DiscreteActions.Array,
discreteStart,
numDiscreteActions);
}
var heuristic = actuator as IHeuristicProvider;
heuristic?.Heuristic(new ActionBuffers(continuousActions, discreteActions));
continuousStart += numContinuousActions;
discreteStart += numDiscreteActions;
}
}
/// <summary>
/// Iterates through all of the IActuators in this list and calls their
/// <see cref="IActionReceiver.OnActionReceived"/> method on them with the appropriate
/// <see cref="ActionSegment{T}"/>s depending on their <see cref="ActionSpec"/>.
/// </summary>

27
com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs


namespace Unity.MLAgents.Actuators
{
/// <summary>
/// IActuator implementation that forwards to an <see cref="IActionReceiver"/>.
/// IActuator implementation that forwards calls to an <see cref="IActionReceiver"/> and an <see cref="IHeuristicProvider"/>.
internal class VectorActuator : IActuator
internal class VectorActuator : IActuator, IHeuristicProvider
IHeuristicProvider m_HeuristicProvider;
ActionBuffers m_ActionBuffers;
internal ActionBuffers ActionBuffers

/// <summary>
/// Create a VectorActuator that forwards to the provided IActionReceiver.
/// </summary>
/// <param name="actionReceiver">The <see cref="IActionReceiver"/> used for OnActionReceived and WriteDiscreteActionMask.
/// If this parameter also implements <see cref="IHeuristicProvider"/> it will be cast and used to forward calls to
/// <see cref="IHeuristicProvider.Heuristic"/>.</param>
/// <param name="actionSpec"></param>
/// <param name="name"></param>
public VectorActuator(IActionReceiver actionReceiver,
ActionSpec actionSpec,
string name = "VectorActuator")
: this(actionReceiver, actionReceiver as IHeuristicProvider, actionSpec, name) { }
/// <summary>
/// Create a VectorActuator that forwards to the provided IActionReceiver.
/// </summary>
/// <param name="heuristicProvider">The <see cref="IHeuristicProvider"/> used to fill the <see cref="ActionBuffers"/>
/// for Heuristic Policies.</param>
IHeuristicProvider heuristicProvider,
m_HeuristicProvider = heuristicProvider;
ActionSpec = actionSpec;
string suffix;
if (actionSpec.NumContinuousActions == 0)

{
ActionBuffers = actionBuffers;
m_ActionReceiver.OnActionReceived(ActionBuffers);
}
public void Heuristic(in ActionBuffers actionBuffersOut)
{
m_HeuristicProvider?.Heuristic(actionBuffersOut);
}
/// <inheritdoc />

38
com.unity.ml-agents/Runtime/Agent.cs


"docs/Learning-Environment-Design-Agents.md")]
[Serializable]
[RequireComponent(typeof(BehaviorParameters))]
public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver
public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver, IHeuristicProvider
{
IPolicy m_Brain;
BehaviorParameters m_PolicyFactory;

float[] m_LegacyActionCache;
/// <summary>
/// This is used to avoid allocation of a float array during legacy calls to Heuristic.
/// </summary>
float[] m_LegacyHeuristicCache;
/// <summary>
/// Called when the attached [GameObject] becomes enabled and active.
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </summary>

InitializeActuators();
}
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager);
ResetData();
Initialize();

return;
}
m_Brain?.Dispose();
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager);
}
/// <summary>

public virtual void Initialize() { }
/// <summary>
/// Implement `Heuristic()` to choose an action for this agent using a custom heuristic.
/// Implement <see cref="Heuristic"/> to choose an action for this agent using a custom heuristic.
/// control of an agent using keyboard, mouse, or game controller input.
/// control of an agent using keyboard, mouse, game controller input, or a script.
///
/// Your heuristic implementation can use any decision making logic you specify. Assign decision
/// values to the <see cref="ActionBuffers.ContinuousActions"/> and <see cref="ActionBuffers.DiscreteActions"/>

switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
{
case SpaceType.Continuous:
Heuristic(actionsOut.ContinuousActions.Array);
Heuristic(m_LegacyHeuristicCache);
Array.Copy(m_LegacyHeuristicCache, actionsOut.ContinuousActions.Array, m_LegacyActionCache.Length);
var convertedOut = Array.ConvertAll(actionsOut.DiscreteActions.Array, x => (float)x);
Heuristic(convertedOut);
Heuristic(m_LegacyHeuristicCache);
discreteActionSegment[i] = (int)convertedOut[i];
discreteActionSegment[i] = (int)m_LegacyHeuristicCache[i];
}
/// <summary>

// Support legacy OnActionReceived
// TODO don't set this up if the sizes are 0?
var param = m_PolicyFactory.BrainParameters;
m_VectorActuator = new VectorActuator(this, param.ActionSpec);
m_VectorActuator = new VectorActuator(this, this, param.ActionSpec);
m_LegacyHeuristicCache = new float[m_VectorActuator.TotalNumberOfActions()];
m_ActuatorManager.Add(m_VectorActuator);

/// three values in ActionBuffers.ContinuousActions array to use as the force components.
/// During training, the agent's policy learns to set those particular elements of
/// the array to maximize the training rewards the agent receives. (Of course,
/// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
/// if you implement a <seealso cref="Agent.Heuristic(in ActionBuffers)"/> function, it must use the same
/// elements of the action array for the same purpose since there is no learning
/// involved.)
///

if (!actions.ContinuousActions.IsEmpty())
{
m_LegacyActionCache = actions.ContinuousActions.Array;
Array.Copy(actions.ContinuousActions.Array,
m_LegacyActionCache,
actionSpec.NumContinuousActions);
m_LegacyActionCache = Array.ConvertAll(actions.DiscreteActions.Array, x => (float)x);
for (var i = 0; i < m_LegacyActionCache.Length; i++)
{
m_LegacyActionCache[i] = (float)actions.DiscreteActions[i];
}
}
// Disable deprecation warnings so we can call the legacy overload.
#pragma warning disable CS0618

70
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
{
var sensor = sensorComponents[sensorIndex];
if (!sensor.IsVisual())
if (sensor.GetObservationShape().Length == 3)
continue;
if (!tensorsNames.Contains(
TensorNames.VisualObservationPlaceholderPrefix + visObsIndex))
{
failedModelChecks.Add(
"The model does not contain a Visual Observation Placeholder Input " +
$"for sensor component {visObsIndex} ({sensor.GetType().Name}).");
}
visObsIndex++;
if (!tensorsNames.Contains(
TensorNames.VisualObservationPlaceholderPrefix + visObsIndex))
if (sensor.GetObservationShape().Length == 2)
failedModelChecks.Add(
"The model does not contain a Visual Observation Placeholder Input " +
$"for sensor component {visObsIndex} ({sensor.GetType().Name}).");
if (!tensorsNames.Contains(
TensorNames.ObservationPlaceholderPrefix + sensorIndex))
{
failedModelChecks.Add(
"The model does not contain an Observation Placeholder Input " +
$"for sensor component {sensorIndex} ({sensor.GetType().Name}).");
}
visObsIndex++;
}
var expectedVisualObs = model.GetNumVisualInputs();

}
/// <summary>
/// Checks that the shape of the rank 2 observation input placeholder is the same as the corresponding sensor.
/// </summary>
/// <param name="tensorProxy">The tensor that is expected by the model</param>
/// <param name="sensorComponent">The sensor that produces the visual observation.</param>
/// <returns>
/// If the Check failed, returns a string containing information about why the
/// check failed. If the check passed, returns null.
/// </returns>
static string CheckRankTwoObsShape(
TensorProxy tensorProxy, SensorComponent sensorComponent)
{
var shape = sensorComponent.GetObservationShape();
var dim1Bp = shape[0];
var dim2Bp = shape[1];
var dim1T = tensorProxy.Channels;
var dim2T = tensorProxy.Width;
if ((dim1Bp != dim1T) || (dim2Bp != dim2T))
{
return $"An Observation of the model does not match. " +
$"Received TensorProxy of shape [?x{dim1Bp}x{dim2Bp}] but " +
$"was expecting [?x{dim1T}x{dim2T}].";
}
return null;
}
/// <summary>
/// Generates failed checks that correspond to inputs shapes incompatibilities between
/// the model and the BrainParameters.
/// </summary>

for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
{
var sensorComponent = sensorComponents[sensorIndex];
if (!sensorComponent.IsVisual())
if (sensorComponent.GetObservationShape().Length == 3)
{
tensorTester[TensorNames.VisualObservationPlaceholderPrefix + visObsIndex] =
(bp, tensor, scs, i) => CheckVisualObsShape(tensor, sensorComponent);
visObsIndex++;
}
if (sensorComponent.GetObservationShape().Length == 2)
continue;
tensorTester[TensorNames.ObservationPlaceholderPrefix + sensorIndex] =
(bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sensorComponent);
tensorTester[TensorNames.VisualObservationPlaceholderPrefix + visObsIndex] =
(bp, tensor, scs, i) => CheckVisualObsShape(tensor, sensorComponent);
visObsIndex++;
}
// If the model expects an input but it is not in this list

var totalVectorSensorSize = 0;
foreach (var sensorComp in sensorComponents)
{
if (sensorComp.IsVector())
if (sensorComp.GetObservationShape().Length == 1)
{
totalVectorSensorSize += sensorComp.GetObservationShape()[0];
}

var sensorSizes = "";
foreach (var sensorComp in sensorComponents)
{
if (sensorComp.IsVector())
if (sensorComp.GetObservationShape().Length == 1)
{
var vecSize = sensorComp.GetObservationShape()[0];
if (sensorSizes.Length == 0)

94
com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs


}
/// <summary>
/// Generates the Tensor corresponding to the VectorObservation input : Will be a two
/// dimensional float array of dimension [batchSize x vectorObservationSize].
/// It will use the Vector Observation data contained in the agentInfo to fill the data
/// of the tensor.
/// </summary>
internal class VectorObservationGenerator : TensorGenerator.IGenerator
{
readonly ITensorAllocator m_Allocator;
List<int> m_SensorIndices = new List<int>();
ObservationWriter m_ObservationWriter = new ObservationWriter();
public VectorObservationGenerator(ITensorAllocator allocator)
{
m_Allocator = allocator;
}
public void AddSensorIndex(int sensorIndex)
{
m_SensorIndices.Add(sensorIndex);
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
var vecObsSizeT = tensorProxy.shape[tensorProxy.shape.Length - 1];
var agentIndex = 0;
foreach (var info in infos)
{
if (info.agentInfo.done)
{
// If the agent is done, we might have a stale reference to the sensors
// e.g. a dependent object might have been disposed.
// To avoid this, just fill observation with zeroes instead of calling sensor.Write.
TensorUtils.FillTensorBatch(tensorProxy, agentIndex, 0.0f);
}
else
{
var tensorOffset = 0;
// Write each sensor consecutively to the tensor
foreach (var sensorIndex in m_SensorIndices)
{
var sensor = info.sensors[sensorIndex];
m_ObservationWriter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var numWritten = sensor.Write(m_ObservationWriter);
tensorOffset += numWritten;
}
Debug.AssertFormat(
tensorOffset == vecObsSizeT,
"mismatch between vector observation size ({0}) and number of observations written ({1})",
vecObsSizeT, tensorOffset
);
}
agentIndex++;
}
}
}
/// <summary>
/// Generates the Tensor corresponding to the Recurrent input : Will be a two
/// dimensional float array of dimension [batchSize x memorySize].
/// It will use the Memory data contained in the agentInfo to fill the data

}
/// <summary>
/// Generates the Tensor corresponding to the Visual Observation input : Will be a 4
/// dimensional float array of dimension [batchSize x width x height x numChannels].
/// It will use the Texture input data contained in the agentInfo to fill the data
/// Generates the Tensor corresponding to the Observation input : Will be a multi
/// dimensional float array.
/// It will use the Observation data contained in the sensors to fill the data
internal class VisualObservationInputGenerator : TensorGenerator.IGenerator
internal class ObservationGenerator : TensorGenerator.IGenerator
readonly int m_SensorIndex;
List<int> m_SensorIndices = new List<int>();
public VisualObservationInputGenerator(
int sensorIndex, ITensorAllocator allocator)
public ObservationGenerator(ITensorAllocator allocator)
m_SensorIndex = sensorIndex;
public void AddSensorIndex(int sensorIndex)
{
m_SensorIndices.Add(sensorIndex);
}
foreach (var infoSensorPair in infos)
foreach (var info in infos)
var sensor = infoSensorPair.sensors[m_SensorIndex];
if (infoSensorPair.agentInfo.done)
if (info.agentInfo.done)
{
// If the agent is done, we might have a stale reference to the sensors
// e.g. a dependent object might have been disposed.

else
{
m_ObservationWriter.SetTarget(tensorProxy, agentIndex, 0);
sensor.Write(m_ObservationWriter);
var tensorOffset = 0;
// Write each sensor consecutively to the tensor
foreach (var sensorIndex in m_SensorIndices)
{
var sensor = info.sensors[sensorIndex];
m_ObservationWriter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var numWritten = sensor.Write(m_ObservationWriter);
tensorOffset += numWritten;
}
}
agentIndex++;
}

56
com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs


public void InitializeObservations(List<ISensor> sensors, ITensorAllocator allocator)
{
// Loop through the sensors on a representative agent.
// For vector observations, add the index to the (single) VectorObservationGenerator
// For visual observations, make a VisualObservationInputGenerator
// All vector observations use a shared ObservationGenerator since they are concatenated.
// All other observations use a unique ObservationInputGenerator
VectorObservationGenerator vecObsGen = null;
ObservationGenerator vecObsGen = null;
// TODO generalize - we currently only have vector or visual, but can't handle "2D" observations
var isVectorSensor = (shape.Length == 1);
if (isVectorSensor)
{
if (vecObsGen == null)
{
vecObsGen = new VectorObservationGenerator(allocator);
}
vecObsGen.AddSensorIndex(sensorIndex);
}
else
var rank = shape.Length;
ObservationGenerator obsGen = null;
string obsGenName = null;
switch (rank)
m_Dict[TensorNames.VisualObservationPlaceholderPrefix + visIndex] =
new VisualObservationInputGenerator(sensorIndex, allocator);
visIndex++;
case 1:
if (vecObsGen == null)
{
vecObsGen = new ObservationGenerator(allocator);
}
obsGen = vecObsGen;
obsGenName = TensorNames.VectorObservationPlaceholder;
break;
case 2:
// If the tensor is of rank 2, we use the index of the sensor
// to create the name
obsGen = new ObservationGenerator(allocator);
obsGenName = TensorNames.ObservationPlaceholderPrefix + sensorIndex;
break;
case 3:
// If the tensor is of rank 3, we use the "visual observation
// index", which only counts the rank 3 sensors
obsGen = new ObservationGenerator(allocator);
obsGenName = TensorNames.VisualObservationPlaceholderPrefix + visIndex;
visIndex++;
break;
default:
throw new UnityAgentsException(
$"Sensor {sensor.GetName()} have an invalid rank {rank}");
}
if (vecObsGen != null)
{
m_Dict[TensorNames.VectorObservationPlaceholder] = vecObsGen;
obsGen.AddSensorIndex(sensorIndex);
m_Dict[obsGenName] = obsGen;
}
}

1
com.unity.ml-agents/Runtime/Inference/TensorNames.cs


public const string recurrentInPlaceholderH = "recurrent_in_h";
public const string recurrentInPlaceholderC = "recurrent_in_c";
public const string VisualObservationPlaceholderPrefix = "visual_observation_";
public const string ObservationPlaceholderPrefix = "obs_";
public const string PreviousActionPlaceholder = "prev_action";
public const string ActionMaskPlaceholder = "action_masks";
public const string RandomNormalEpsilonPlaceholder = "epsilon";

9
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


get { return m_BehaviorName + "?team=" + TeamId; }
}
internal IPolicy GeneratePolicy(ActionSpec actionSpec, HeuristicPolicy.ActionGenerator heuristic)
internal IPolicy GeneratePolicy(ActionSpec actionSpec, ActuatorManager actuatorManager)
return new HeuristicPolicy(heuristic, actionSpec);
return new HeuristicPolicy(actuatorManager, actionSpec);
case BehaviorType.InferenceOnly:
{
if (m_Model == null)

}
else
{
return new HeuristicPolicy(heuristic, actionSpec);
return new HeuristicPolicy(actuatorManager, actionSpec);
return new HeuristicPolicy(heuristic, actionSpec);
return new HeuristicPolicy(actuatorManager, actionSpec);
}
}

}
agent.ReloadPolicy();
}
}
}

11
com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs


namespace Unity.MLAgents.Policies
{
/// <summary>
/// The Heuristic Policy uses a hards coded Heuristic method
/// The Heuristic Policy uses a hard-coded Heuristic method
public delegate void ActionGenerator(in ActionBuffers actionBuffers);
ActionGenerator m_Heuristic;
ActuatorManager m_ActuatorManager;
ActionBuffers m_ActionBuffers;
bool m_Done;
bool m_DecisionRequested;

/// <inheritdoc />
public HeuristicPolicy(ActionGenerator heuristic, ActionSpec actionSpec)
public HeuristicPolicy(ActuatorManager actuatorManager, ActionSpec actionSpec)
m_Heuristic = heuristic;
m_ActuatorManager = actuatorManager;
var numContinuousActions = actionSpec.NumContinuousActions;
var numDiscreteActions = actionSpec.NumDiscreteActions;
var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);

{
if (!m_Done && m_DecisionRequested)
{
m_Heuristic.Invoke(m_ActionBuffers);
m_ActuatorManager.ApplyHeuristic(m_ActionBuffers);
}
m_DecisionRequested = false;
return ref m_ActionBuffers;

3
com.unity.ml-agents/Runtime/Sensors/SensorComponent.cs


using UnityEngine;
using System;
namespace Unity.MLAgents.Sensors
{

/// Whether the observation is visual or not.
/// </summary>
/// <returns>True if the observation is visual, false otherwise.</returns>
[Obsolete("IsVisual is deprecated, please use GetObservationShape() instead.")]
public virtual bool IsVisual()
{
var shape = GetObservationShape();

/// Whether the observation is vector or not.
/// </summary>
/// <returns>True if the observation is vector, false otherwise.</returns>
[Obsolete("IsVisual is deprecated, please use GetObservationShape() instead.")]
public virtual bool IsVector()
{
var shape = GetObservationShape();

12
com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs


/// </summary>
internal class EngineConfigurationChannel : SideChannel
{
enum ConfigurationType : int
internal enum ConfigurationType : int
{
ScreenResolution = 0,
QualityLevel = 1,

break;
case ConfigurationType.TimeScale:
var timeScale = msg.ReadFloat32();
timeScale = Mathf.Clamp(timeScale, 1, 100);
// There's an upper limit for the timeScale in the editor (but not in the player)
// Always ensure that timeScale >= 1 also,
#if UNITY_EDITOR
const float maxTimeScale = 100f;
#else
const float maxTimeScale = float.PositiveInfinity;
#endif
timeScale = Mathf.Clamp(timeScale, 1, maxTimeScale);
Time.timeScale = timeScale;
break;
case ConfigurationType.TargetFrameRate:

18
com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs


manager.WriteActionMask();
Assert.IsTrue(groundTruthMask.SequenceEqual(manager.DiscreteActionMask.GetMask()));
}
[Test]
public void TestHeuristic()
{
var manager = new ActuatorManager(2);
var va1 = new TestActuator(ActionSpec.MakeDiscrete(1, 2, 3), "name");
var va2 = new TestActuator(ActionSpec.MakeDiscrete(3, 2, 1, 8), "name1");
manager.Add(va1);
manager.Add(va2);
var actionBuf = new ActionBuffers(Array.Empty<float>(), new[] { 0, 0, 0, 0, 0, 0, 0 });
manager.ApplyHeuristic(actionBuf);
Assert.IsTrue(va1.m_HeuristicCalled);
Assert.AreEqual(va1.m_DiscreteBufferSize, 3);
Assert.IsTrue(va2.m_HeuristicCalled);
Assert.AreEqual(va2.m_DiscreteBufferSize, 4);
}
}
}

11
com.unity.ml-agents/Tests/Editor/Actuators/TestActuator.cs


using Unity.MLAgents.Actuators;
namespace Unity.MLAgents.Tests.Actuators
{
internal class TestActuator : IActuator
internal class TestActuator : IActuator, IHeuristicProvider
public bool m_HeuristicCalled;
public int m_DiscreteBufferSize;
public TestActuator(ActionSpec actuatorSpace, string name)
{
ActionSpec = actuatorSpace;

public void ResetData()
{
}
public void Heuristic(in ActionBuffers actionBuffersOut)
{
m_HeuristicCalled = true;
m_DiscreteBufferSize = actionBuffersOut.DiscreteActions.Length;
}
}
}

19
com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs


using System;
using System.Collections.Generic;
using System.Linq;
using NUnit.Framework;

[TestFixture]
public class VectorActuatorTests
{
class TestActionReceiver : IActionReceiver
class TestActionReceiver : IActionReceiver, IHeuristicProvider
public bool HeuristicCalled;
public void OnActionReceived(ActionBuffers actionBuffers)
{

public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
{
actionMask.WriteMask(Branch, Mask);
}
public void Heuristic(in ActionBuffers actionBuffersOut)
{
HeuristicCalled = true;
}
}

va.WriteDiscreteActionMask(bdam);
Assert.IsTrue(groundTruthMask.SequenceEqual(bdam.GetMask()));
}
[Test]
public void TestHeuristic()
{
var ar = new TestActionReceiver();
var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
va.Heuristic(new ActionBuffers(Array.Empty<float>(), va.ActionSpec.BranchSizes));
Assert.IsTrue(ar.HeuristicCalled);
}
}
}

6
com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs


namespace Unity.MLAgents.Tests
{
[TestFixture]
public class BehaviorParameterTests
public class BehaviorParameterTests : IHeuristicProvider
static void DummyHeuristic(in ActionBuffers actionsOut)
public void Heuristic(in ActionBuffers actionsOut)
{
// No-op
}

Assert.Throws<UnityAgentsException>(() =>
{
bp.GeneratePolicy(actionSpec, DummyHeuristic);
bp.GeneratePolicy(actionSpec, new ActuatorManager());
});
}
}

2
com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs


const int batchSize = 4;
var agentInfos = GetFakeAgents(ObservableAttributeOptions.ExamineAll);
var alloc = new TensorCachingAllocator();
var generator = new VectorObservationGenerator(alloc);
var generator = new ObservationGenerator(alloc);
generator.AddSensorIndex(0); // ObservableAttribute (size 1)
generator.AddSensorIndex(1); // TestSensor (size 0)
generator.AddSensorIndex(2); // TestSensor (size 0)

2
com.unity.ml-agents/Tests/Editor/Sensor/CameraSensorComponentTest.cs


var expectedShape = new[] { height, width, grayscale ? 1 : 3 };
Assert.AreEqual(expectedShape, cameraComponent.GetObservationShape());
Assert.IsTrue(cameraComponent.IsVisual());
Assert.IsFalse(cameraComponent.IsVector());
var sensor = cameraComponent.CreateSensor();
Assert.AreEqual(expectedShape, sensor.GetObservationShape());

2
com.unity.ml-agents/Tests/Editor/Sensor/RenderTextureSensorComponentTests.cs


var expectedShape = new[] { height, width, grayscale ? 1 : 3 };
Assert.AreEqual(expectedShape, renderTexComponent.GetObservationShape());
Assert.IsTrue(renderTexComponent.IsVisual());
Assert.IsFalse(renderTexComponent.IsVector());
var sensor = renderTexComponent.CreateSensor();
Assert.AreEqual(expectedShape, sensor.GetObservationShape());

8
docs/Migrating.md


- `UnityEnvironment.API_VERSION` in environment.py
([example](https://github.com/Unity-Technologies/ml-agents/blob/b255661084cb8f701c716b040693069a3fb9a257/ml-agents-envs/mlagents/envs/environment.py#L45))
# Migrating
## Migrating to Release 13
### Implementing IHeuristic in your IActuator implementations
- If you have any custom actuators, you can now implement the `IHeuristicProvider` interface to have your actuator
handle the generation of actions when an Agent is running in heuristic mode.
# Migrating
## Migrating to Release 11
### Agent virtual method deprecation

2
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


# Sleep momentarily to allow time for the EnvManager to be waiting for the
# subprocess response. We won't be able to capture failures from the subprocess
# that cause it to close the pipe before we can send the first message.
time.sleep(0.1)
time.sleep(0.5)
raise UnityEnvironmentException()
env_manager = SubprocessEnvManager(

67
ml-agents/mlagents/trainers/tests/torch/test_attention.py


from mlagents.torch_utils import torch
import numpy as np
from mlagents.trainers.torch.layers import linear_layer
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.layers import linear_layer, LinearEncoder
get_zero_entities_mask,
)

input_1 = generate_input_helper(masking_pattern_1)
input_2 = generate_input_helper(masking_pattern_2)
masks = ResidualSelfAttention.get_masks([input_1, input_2])
masks = get_zero_entities_mask([input_1, input_2])
assert len(masks) == 2
masks_1 = masks[0]
masks_2 = masks[1]

assert masks_2[0, 1] == 0 if i % 2 == 0 else 1
def test_simple_transformer_training():
def test_predict_closest_training():
np.random.seed(1336)
torch.manual_seed(1336)
size, n_k, = 3, 5

l_layer = linear_layer(embedding_size, size)
optimizer = torch.optim.Adam(
list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001
list(entity_embeddings.parameters())
+ list(transformer.parameters())
+ list(l_layer.parameters()),
lr=0.001,
weight_decay=1e-6,
)
batch_size = 200
for _ in range(200):

target = target.detach()
embeddings = entity_embeddings(center, key)
masks = ResidualSelfAttention.get_masks([key])
masks = get_zero_entities_mask([key])
prediction = transformer.forward(embeddings, masks)
prediction = l_layer(prediction)
prediction = prediction.reshape((batch_size, size))

error.backward()
optimizer.step()
assert error.item() < 0.02
def test_predict_minimum_training():
# of 5 numbers, predict index of min
np.random.seed(1336)
torch.manual_seed(1336)
n_k = 5
size = n_k + 1
embedding_size = 64
entity_embedding = EntityEmbedding(
size, size, n_k, embedding_size, concat_self=False
)
transformer = ResidualSelfAttention(embedding_size)
l_layer = LinearEncoder(embedding_size, 2, n_k)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
list(entity_embedding.parameters())
+ list(transformer.parameters())
+ list(l_layer.parameters()),
lr=0.001,
weight_decay=1e-6,
)
batch_size = 200
onehots = ModelUtils.actions_to_onehot(torch.range(0, n_k - 1).unsqueeze(1), [n_k])[
0
]
onehots = onehots.expand((batch_size, -1, -1))
losses = []
for _ in range(400):
num = np.random.randint(0, n_k)
inp = torch.rand((batch_size, num + 1, 1))
with torch.no_grad():
# create the target : The minimum
argmin = torch.argmin(inp, dim=1)
argmin = argmin.squeeze()
argmin = argmin.detach()
sliced_oh = onehots[:, : num + 1]
inp = torch.cat([inp, sliced_oh], dim=2)
embeddings = entity_embedding(inp, inp)
masks = get_zero_entities_mask([inp])
prediction = transformer(embeddings, masks)
prediction = l_layer(prediction)
ce = loss(prediction, argmin)
losses.append(ce.item())
print(ce.item())
optimizer.zero_grad()
ce.backward()
optimizer.step()
assert np.array(losses[-20:]).mean() < 0.1

33
ml-agents/mlagents/trainers/torch/attention.py


from mlagents.trainers.exception import UnityTrainerException
def get_zero_entities_mask(observations: List[torch.Tensor]) -> List[torch.Tensor]:
"""
Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
layer to mask the padding observations.
"""
with torch.no_grad():
# Generate the masking tensors for each entities tensor (mask only if all zeros)
key_masks: List[torch.Tensor] = [
(torch.sum(ent ** 2, axis=2) < 0.01).float() for ent in observations
]
return key_masks
class MultiHeadAttention(torch.nn.Module):
NEG_INF = -1e6

concat_self: bool = True,
):
"""
Constructs an EntityEmbeddings module.
Constructs an EntityEmbedding module.
:param x_self_size: Size of "self" entity.
:param entity_size: Size of other entitiy.
:param entity_num_max_elements: Maximum elements for a given entity, None for unrestricted.

class ResidualSelfAttention(torch.nn.Module):
"""
Residual self attentioninspired from https://arxiv.org/pdf/1909.07528.pdf. Can be used
with an EntityEmbeddings module, to apply multi head self attention to encode information
with an EntityEmbedding module, to apply multi head self attention to encode information
about a "Self" and a list of relevant "Entities".
"""

denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPSILON
output = numerator / denominator
return output
@staticmethod
def get_masks(observations: List[torch.Tensor]) -> List[torch.Tensor]:
"""
Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
layer to mask the padding observations.
"""
with torch.no_grad():
# Generate the masking tensors for each entities tensor (mask only if all zeros)
key_masks: List[torch.Tensor] = [
(torch.sum(ent ** 2, axis=2) < 0.01).type(torch.FloatTensor)
for ent in observations
]
return key_masks

6
ml-agents/mlagents/trainers/torch/encoders.py


if not exporting_to_onnx.is_exporting():
visual_obs = visual_obs.permute([0, 3, 1, 2])
hidden = self.conv_layers(visual_obs)
hidden = hidden.view([-1, self.final_flat])
hidden = hidden.reshape([-1, self.final_flat])
return self.dense(hidden)

if not exporting_to_onnx.is_exporting():
visual_obs = visual_obs.permute([0, 3, 1, 2])
batch_size = visual_obs.shape[0]
hidden = self.sequential(visual_obs).contiguous()
before_out = hidden.view(batch_size, -1)
hidden = self.sequential(visual_obs)
before_out = hidden.reshape(batch_size, -1)
return torch.relu(self.dense(before_out))

8
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trajectory import ObsUtil
from mlagents.trainers.torch.attention import ResidualSelfAttention
from mlagents.trainers.torch.attention import (
ResidualSelfAttention,
get_zero_entities_mask,
)
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]

encoded_self = torch.cat(encodes, dim=1)
if len(var_len_inputs) > 0:
# Some inputs need to be processed with a variable length encoder
masks = ResidualSelfAttention.get_masks(var_len_inputs)
masks = get_zero_entities_mask(var_len_inputs)
embeddings: List[torch.Tensor] = []
for var_len_input, var_len_processor in zip(
var_len_inputs, self.var_processors

if self.use_lstm and memories is not None:
# Use only the back half of memories for critic and actor
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
actor_mem, critic_mem = actor_mem.contiguous(), critic_mem.contiguous()
else:
critic_mem = None
actor_mem = None

2
ml-agents/mlagents/trainers/torch/utils.py


VectorInput,
)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.attention import EntityEmbedding
from mlagents.trainers.torch.attention import EntityEmbedding
from mlagents.trainers.exception import UnityTrainerException
from mlagents_envs.base_env import ObservationSpec, DimensionProperty

44
.yamato/training-backcompat-tests.yml


test_mac_backcompat_2020.1:
{% capture editor_version %}2020.1{% endcapture %}
{% capture csharp_backcompat_version %}1.0.0{% endcapture %}
# This test has to run on mac because it requires the custom build of tensorflow without AVX
# Test against 2020.1 because 2020.2 has to run against package version 1.2.0
name: Test Mac Backcompat Training {{ editor_version }}
agent:
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:0.1.4-492264
flavor: b1.small
variables:
UNITY_VERSION: {{ editor_version }}
commands:
- |
python3 -m venv venv && source venv/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
unity-downloader-cli -u {{ editor_version }} -c editor --wait --fast
# Backwards-compatibility tests.
# If we make a breaking change to the communication protocol, these will need
# to be disabled until the next release.
python -u -m ml-agents.tests.yamato.standalone_build_tests --build-target=mac
python -u -m ml-agents.tests.yamato.training_int_tests --csharp {{ csharp_backcompat_version }}
- |
python3 -m venv venv_old && source venv_old/bin/activate
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.training_int_tests --python 0.16.0
triggers:
cancel_old_ci: true
recurring:
- branch: master
frequency: daily
artifacts:
logs:
paths:
- "artifacts/standalone_build.txt"
- "artifacts/inference.nn.txt"
- "artifacts/inference.onnx.txt"
- "artifacts/*.log"
standalonebuild:
paths:
- "artifacts/testPlayer*/**"
- "artifacts/models/**"

121
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs


using Unity.MLAgents;
using Unity.MLAgents.Extensions.Match3;
namespace Unity.MLAgentsExamples
{
public class Match3ExampleActuator : Match3Actuator
{
Match3Board Board => (Match3Board)m_Board;
public Match3ExampleActuator(Match3Board board,
bool forceHeuristic,
Agent agent,
string name,
int seed
)
: base(board, forceHeuristic, seed, agent, name) { }
protected override int EvalMovePoints(Move move)
{
var pointsByType = new[] { Board.BasicCellPoints, Board.SpecialCell1Points, Board.SpecialCell2Points };
// Counts the expected points for making the move.
var moveVal = m_Board.GetCellType(move.Row, move.Column);
var moveSpecial = m_Board.GetSpecialType(move.Row, move.Column);
var (otherRow, otherCol) = move.OtherCell();
var oppositeVal = m_Board.GetCellType(otherRow, otherCol);
var oppositeSpecial = m_Board.GetSpecialType(otherRow, otherCol);
int movePoints = EvalHalfMove(
otherRow, otherCol, moveVal, moveSpecial, move.Direction, pointsByType
);
int otherPoints = EvalHalfMove(
move.Row, move.Column, oppositeVal, oppositeSpecial, move.OtherDirection(), pointsByType
);
return movePoints + otherPoints;
}
int EvalHalfMove(int newRow, int newCol, int newValue, int newSpecial, Direction incomingDirection, int[] pointsByType)
{
// This is a essentially a duplicate of AbstractBoard.CheckHalfMove but also counts the points for the move.
int matchedLeft = 0, matchedRight = 0, matchedUp = 0, matchedDown = 0;
int scoreLeft = 0, scoreRight = 0, scoreUp = 0, scoreDown = 0;
if (incomingDirection != Direction.Right)
{
for (var c = newCol - 1; c >= 0; c--)
{
if (m_Board.GetCellType(newRow, c) == newValue)
{
matchedLeft++;
scoreLeft += pointsByType[m_Board.GetSpecialType(newRow, c)];
}
else
break;
}
}
if (incomingDirection != Direction.Left)
{
for (var c = newCol + 1; c < m_Board.Columns; c++)
{
if (m_Board.GetCellType(newRow, c) == newValue)
{
matchedRight++;
scoreRight += pointsByType[m_Board.GetSpecialType(newRow, c)];
}
else
break;
}
}
if (incomingDirection != Direction.Down)
{
for (var r = newRow + 1; r < m_Board.Rows; r++)
{
if (m_Board.GetCellType(r, newCol) == newValue)
{
matchedUp++;
scoreUp += pointsByType[m_Board.GetSpecialType(r, newCol)];
}
else
break;
}
}
if (incomingDirection != Direction.Up)
{
for (var r = newRow - 1; r >= 0; r--)
{
if (m_Board.GetCellType(r, newCol) == newValue)
{
matchedDown++;
scoreDown += pointsByType[m_Board.GetSpecialType(r, newCol)];
}
else
break;
}
}
if ((matchedUp + matchedDown >= 2) || (matchedLeft + matchedRight >= 2))
{
// It's a match. Start from counting the piece being moved
var totalScore = pointsByType[newSpecial];
if (matchedUp + matchedDown >= 2)
{
totalScore += scoreUp + scoreDown;
}
if (matchedLeft + matchedRight >= 2)
{
totalScore += scoreLeft + scoreRight;
}
return totalScore;
}
return 0;
}
}
}

3
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuator.cs.meta


fileFormatVersion: 2
guid: 9e6fe1a020a04421ab828be4543a655c
timeCreated: 1610665874

18
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs


using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Extensions.Match3;
namespace Unity.MLAgentsExamples
{
public class Match3ExampleActuatorComponent : Match3ActuatorComponent
{
/// <inheritdoc/>
public override IActuator CreateActuator()
{
var board = GetComponent<Match3Board>();
var agent = GetComponentInParent<Agent>();
var seed = RandomSeed == -1 ? gameObject.GetInstanceID() : RandomSeed + 1;
return new Match3ExampleActuator(board, ForceHeuristic, agent, ActuatorName, seed);
}
}
}

3
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3ExampleActuatorComponent.cs.meta


fileFormatVersion: 2
guid: b17adcc6c9b241da903aa134f2dac930
timeCreated: 1610665885

18
com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs


namespace Unity.MLAgents.Actuators
{
/// <summary>
/// Interface that allows objects to fill out an <see cref="ActionBuffers"/> data structure for controlling
/// behavior of Agents or Actuators.
/// </summary>
public interface IHeuristicProvider
{
/// <summary>
/// Method called on objects which are expected to fill out the <see cref="ActionBuffers"/> data structure.
/// Object that implement this interface should be careful to be consistent in the placement of their actions
/// in the <see cref="ActionBuffers"/> data structure.
/// </summary>
/// <param name="actionBuffersOut">The <see cref="ActionBuffers"/> data structure to be filled by the
/// object implementing this interface.</param>
void Heuristic(in ActionBuffers actionBuffersOut);
}
}

3
com.unity.ml-agents/Runtime/Actuators/IHeuristicProvider.cs.meta


fileFormatVersion: 2
guid: be90ffb28f39444a8fb02dfd4a82870c
timeCreated: 1610057456

86
com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs


using System;
using NUnit.Framework;
using UnityEngine;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Tests
{
[TestFixture]
public class BufferSensorTest
{
[Test]
public void TestBufferSensor()
{
var bufferSensor = new BufferSensor(20, 4);
var shape = bufferSensor.GetObservationShape();
var dimProp = bufferSensor.GetDimensionProperties();
Assert.AreEqual(shape[0], 20);
Assert.AreEqual(shape[1], 4);
Assert.AreEqual(shape.Length, 2);
Assert.AreEqual(dimProp[0], DimensionProperty.VariableSize);
Assert.AreEqual(dimProp[1], DimensionProperty.None);
Assert.AreEqual(dimProp.Length, 2);
bufferSensor.AppendObservation(new float[] { 1, 2, 3, 4 });
bufferSensor.AppendObservation(new float[] { 5, 6, 7, 8 });
var obsWriter = new ObservationWriter();
var obs = bufferSensor.GetObservationProto(obsWriter);
Assert.AreEqual(shape, obs.Shape);
Assert.AreEqual(obs.DimensionProperties.Count, 2);
Assert.AreEqual((int)dimProp[0], obs.DimensionProperties[0]);
Assert.AreEqual((int)dimProp[1], obs.DimensionProperties[1]);
for (int i = 0; i < 8; i++)
{
Assert.AreEqual(obs.FloatData.Data[i], i + 1);
}
for (int i = 8; i < 80; i++)
{
Assert.AreEqual(obs.FloatData.Data[i], 0);
}
}
[Test]
public void TestBufferSensorComponent()
{
var agentGameObj = new GameObject("agent");
var bufferComponent = agentGameObj.AddComponent<BufferSensorComponent>();
bufferComponent.MaxNumObservables = 20;
bufferComponent.ObservableSize = 4;
var sensor = bufferComponent.CreateSensor();
var shape = bufferComponent.GetObservationShape();
Assert.AreEqual(shape[0], 20);
Assert.AreEqual(shape[1], 4);
Assert.AreEqual(shape.Length, 2);
bufferComponent.AppendObservation(new float[] { 1, 2, 3, 4 });
bufferComponent.AppendObservation(new float[] { 5, 6, 7, 8 });
var obsWriter = new ObservationWriter();
var obs = sensor.GetObservationProto(obsWriter);
Assert.AreEqual(shape, obs.Shape);
Assert.AreEqual(obs.DimensionProperties.Count, 2);
for (int i = 0; i < 8; i++)
{
Assert.AreEqual(obs.FloatData.Data[i], i + 1);
}
for (int i = 8; i < 80; i++)
{
Assert.AreEqual(obs.FloatData.Data[i], 0);
}
}
}
}

11
com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs.meta


fileFormatVersion: 2
guid: 5267572aa66d34b49bbc65940674b2a6
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

3
com.unity.ml-agents/Tests/Editor/SideChannels.meta


fileFormatVersion: 2
guid: 1228f198ceee45a38c7d9ff50425b65d
timeCreated: 1610760867

44
com.unity.ml-agents/Tests/Editor/SideChannels/EngineConfigurationChannelTests.cs


using NUnit.Framework;
using Unity.MLAgents.SideChannels;
using UnityEngine;
namespace Unity.MLAgents.Tests
{
public class EngineConfigurationChannelTests
{
float m_OldTimeScale = 1.0f;
[SetUp]
public void Setup()
{
m_OldTimeScale = Time.timeScale;
}
[TearDown]
public void TearDown()
{
Time.timeScale = m_OldTimeScale;
}
[Test]
public void TestTimeScaleClamping()
{
OutgoingMessage pythonMsg = new OutgoingMessage();
pythonMsg.WriteInt32((int)EngineConfigurationChannel.ConfigurationType.TimeScale);
pythonMsg.WriteFloat32(1000f);
var sideChannel = new EngineConfigurationChannel();
sideChannel.ProcessMessage(pythonMsg.ToByteArray());
#if UNITY_EDITOR
// Should be clamped
Assert.AreEqual(100.0f, Time.timeScale);
#else
// Not sure we can run this test from a player, but just in case, shouldn't clamp.
Assert.AreEqual(1000.0f, Time.timeScale);
#endif
}
}
}

3
com.unity.ml-agents/Tests/Editor/SideChannels/EngineConfigurationChannelTests.cs.meta


fileFormatVersion: 2
guid: 71aa620295f74ca5875e8e4782f08768
timeCreated: 1610760906

/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs.meta → /com.unity.ml-agents/Tests/Editor/SideChannels/SideChannelTests.cs.meta

/com.unity.ml-agents/Tests/Editor/SideChannelTests.cs → /com.unity.ml-agents/Tests/Editor/SideChannels/SideChannelTests.cs

正在加载...
取消
保存