浏览代码

Merge branch 'develop-base-teammanager' into develop-agentprocessor-teammanager

/develop/coma2/samenet
Ervin Teng 4 年前
当前提交
b6f88d6d
共有 191 个文件被更改,包括 1509 次插入687 次删除
  1. 2
      .yamato/com.unity.ml-agents-performance.yml
  2. 1
      .yamato/gym-interface-test.yml
  3. 16
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs
  4. 3
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  5. 25
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab
  6. 25
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab
  7. 25
      Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab
  8. 10
      Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity
  9. 166
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Agent.cs
  10. 13
      Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Board.cs
  11. 2
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs
  12. 2
      Project/ProjectSettings/UnityConnectSettings.asset
  13. 51
      README.md
  14. 74
      com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs
  15. 10
      com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs
  16. 8
      com.unity.ml-agents.extensions/Runtime/Match3/Match3Sensor.cs
  17. 9
      com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
  18. 9
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
  19. 25
      com.unity.ml-agents.extensions/Runtime/Teams/BaseTeamManager.cs
  20. 34
      com.unity.ml-agents/CHANGELOG.md
  21. 7
      com.unity.ml-agents/Runtime/Academy.cs
  22. 13
      com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
  23. 50
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  24. 27
      com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
  25. 50
      com.unity.ml-agents/Runtime/Agent.cs
  26. 68
      com.unity.ml-agents/Runtime/Analytics/Events.cs
  27. 14
      com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs
  28. 64
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  29. 5
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  30. 5
      com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
  31. 26
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentInfo.cs
  32. 39
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
  33. 52
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
  34. 21
      com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
  35. 70
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  36. 126
      com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
  37. 46
      com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
  38. 7
      com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
  39. 63
      com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
  40. 1
      com.unity.ml-agents/Runtime/Inference/TensorNames.cs
  41. 9
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  42. 11
      com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
  43. 14
      com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
  44. 8
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  45. 15
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  46. 24
      com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
  47. 10
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
  48. 9
      com.unity.ml-agents/Runtime/Sensors/Reflection/ReflectionSensorBase.cs
  49. 9
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
  50. 3
      com.unity.ml-agents/Runtime/Sensors/SensorComponent.cs
  51. 7
      com.unity.ml-agents/Runtime/Sensors/SensorShapeValidator.cs
  52. 11
      com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
  53. 24
      com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
  54. 12
      com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
  55. 26
      com.unity.ml-agents/Runtime/SideChannels/SideChannelManager.cs
  56. 18
      com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
  57. 11
      com.unity.ml-agents/Tests/Editor/Actuators/TestActuator.cs
  58. 19
      com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
  59. 19
      com.unity.ml-agents/Tests/Editor/Analytics/InferenceAnalyticsTests.cs
  60. 6
      com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
  61. 32
      com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
  62. 2
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
  63. 9
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  64. 2
      com.unity.ml-agents/Tests/Editor/Sensor/CameraSensorComponentTest.cs
  65. 12
      com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs
  66. 2
      com.unity.ml-agents/Tests/Editor/Sensor/RenderTextureSensorComponentTests.cs
  67. 15
      com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
  68. 4
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  69. 2
      docs/Background-Unity.md
  70. 11
      docs/Migrating.md
  71. 9
      docs/Python-API.md
  72. 10
      docs/Training-ML-Agents.md
  73. 16
      gym-unity/gym_unity/envs/__init__.py
  74. 6
      gym-unity/gym_unity/tests/test_gym.py
  75. 52
      ml-agents-envs/mlagents_envs/base_env.py
  76. 17
      ml-agents-envs/mlagents_envs/communicator.py
  77. 6
      ml-agents-envs/mlagents_envs/communicator_objects/agent_info_pb2.py
  78. 4
      ml-agents-envs/mlagents_envs/communicator_objects/agent_info_pb2.pyi
  79. 11
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  80. 6
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
  81. 56
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  82. 27
      ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
  83. 8
      ml-agents-envs/mlagents_envs/env_utils.py
  84. 55
      ml-agents-envs/mlagents_envs/environment.py
  85. 12
      ml-agents-envs/mlagents_envs/mock_communicator.py
  86. 47
      ml-agents-envs/mlagents_envs/rpc_communicator.py
  87. 44
      ml-agents-envs/mlagents_envs/rpc_utils.py
  88. 2
      ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py
  89. 2
      ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py
  90. 14
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  91. 54
      ml-agents-envs/mlagents_envs/tests/test_rpc_communicator.py
  92. 19
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  93. 6
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  94. 1
      ml-agents/mlagents/torch_utils/__init__.py
  95. 37
      ml-agents/mlagents/torch_utils/torch.py
  96. 15
      ml-agents/mlagents/trainers/cli_utils.py
  97. 9
      ml-agents/mlagents/trainers/demo_loader.py
  98. 12
      ml-agents/mlagents/trainers/env_manager.py
  99. 15
      ml-agents/mlagents/trainers/learn.py
  100. 4
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py

2
.yamato/com.unity.ml-agents-performance.yml


commands:
- python3 -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
- curl -s https://artifactory.prd.it.unity3d.com/artifactory/unity-tools-local/utr-standalone/utr --output utr
- chmod +x ./utr
- ./utr --suite=editor --platform=StandaloneOSX --editor-location=.Editor --testproject=DevProject --artifacts_path=build/test-results --report-performance-data --performance-project-id=com.unity.ml-agents --zero-tests-are-ok=1
triggers:

1
.yamato/gym-interface-test.yml


- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python -m pip install wheel --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python -u -m ml-agents.tests.yamato.setup_venv
python ml-agents/tests/yamato/scripts/run_gym.py --env=artifacts/testPlayer-Basic

16
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs


using System;
using Unity.MLAgents.Actuators;
using UnityEngine;
namespace Unity.MLAgentsExamples
{

/// <summary>
/// Simple actuator that converts the action into a {-1, 0, 1} direction
/// </summary>
public class BasicActuator : IActuator
public class BasicActuator : IActuator, IHeuristicProvider
{
public BasicController basicController;
ActionSpec m_ActionSpec;

}
basicController.MoveDirection(direction);
}
public void Heuristic(in ActionBuffers actionBuffersOut)
{
var direction = Input.GetAxis("Horizontal");
var discreteActions = actionBuffersOut.DiscreteActions;
if (Mathf.Approximately(direction, 0.0f))
{
discreteActions[0] = 0;
return;
}
var sign = Math.Sign(direction);
discreteActions[0] = sign < 0 ? 1 : 2;
}
public void WriteDiscreteActionMask(IDiscreteActionMask actionMask)

3
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


using System.Linq;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using UnityEngine.Rendering;
using UnityEngine.Serialization;
public class GridAgent : Agent

void WaitTimeInference()
{
if (renderCamera != null)
if (renderCamera != null && SystemInfo.graphicsDeviceType != GraphicsDeviceType.Null)
{
renderCamera.Render();
}

25
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3Heuristic.prefab


- component: {fileID: 3508723250470608012}
- component: {fileID: 3508723250470608011}
- component: {fileID: 3508723250470608009}
- component: {fileID: 3508723250470608013}
- component: {fileID: 2112317463290853299}
m_Layer: 0
m_Name: Match3 Agent
m_TagString: Untagged

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: c34da50737a3c4a50918002b20b2b927, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

Board: {fileID: 0}
MoveTime: 0.25
MaxMoves: 500
HeuristicQuality: 0
--- !u!114 &3508723250470608011
MonoBehaviour:
m_ObjectHideFlags: 0

m_EditorClassIdentifier:
DebugMoveIndex: -1
CubeSpacing: 1.25
Board: {fileID: 0}
TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
type: 3}
--- !u!114 &3508723250470608009

BasicCellPoints: 1
SpecialCell1Points: 2
SpecialCell2Points: 3
--- !u!114 &3508723250470608013
--- !u!114 &3508723250470608014
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
ActuatorName: Match3 Actuator
ForceHeuristic: 1
--- !u!114 &3508723250470608014
SensorName: Match3 Sensor
ObservationType: 0
--- !u!114 &2112317463290853299
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
SensorName: Match3 Sensor
ObservationType: 0
ActuatorName: Match3 Actuator
ForceHeuristic: 1
HeuristicQuality: 0
--- !u!1 &3508723250774301855
GameObject:
m_ObjectHideFlags: 0

25
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VectorObs.prefab


- component: {fileID: 2118285884327540682}
- component: {fileID: 2118285884327540685}
- component: {fileID: 2118285884327540687}
- component: {fileID: 2118285884327540683}
- component: {fileID: 3357012711826686276}
m_Layer: 0
m_Name: Match3 Agent
m_TagString: Untagged

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 9e89b8e81974148d3b7213530d00589d, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

Board: {fileID: 0}
MoveTime: 0.25
MaxMoves: 500
HeuristicQuality: 0
--- !u!114 &2118285884327540685
MonoBehaviour:
m_ObjectHideFlags: 0

m_EditorClassIdentifier:
DebugMoveIndex: -1
CubeSpacing: 1.25
Board: {fileID: 0}
TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
type: 3}
--- !u!114 &2118285884327540687

BasicCellPoints: 1
SpecialCell1Points: 2
SpecialCell2Points: 3
--- !u!114 &2118285884327540683
--- !u!114 &2118285884327540680
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
ActuatorName: Match3 Actuator
ForceHeuristic: 0
--- !u!114 &2118285884327540680
SensorName: Match3 Sensor
ObservationType: 0
--- !u!114 &3357012711826686276
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
SensorName: Match3 Sensor
ObservationType: 0
ActuatorName: Match3 Actuator
ForceHeuristic: 0
HeuristicQuality: 0

25
Project/Assets/ML-Agents/Examples/Match3/Prefabs/Match3VisualObs.prefab


- component: {fileID: 3019509692332007781}
- component: {fileID: 3019509692332007778}
- component: {fileID: 3019509692332007776}
- component: {fileID: 3019509692332007780}
- component: {fileID: 8270768986451624427}
m_Layer: 0
m_Name: Match3 Agent
m_TagString: Untagged

m_BrainParameters:
VectorObservationSize: 0
NumStackedVectorObservations: 1
m_ActionSpec:
m_NumContinuousActions: 0
BranchSizes:
hasUpgradedBrainParametersWithActionSpec: 1
m_Model: {fileID: 11400000, guid: 48d14da88fea74d0693c691c6e3f2e34, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

Board: {fileID: 0}
MoveTime: 0.25
MaxMoves: 500
HeuristicQuality: 0
--- !u!114 &3019509692332007778
MonoBehaviour:
m_ObjectHideFlags: 0

m_EditorClassIdentifier:
DebugMoveIndex: -1
CubeSpacing: 1.25
Board: {fileID: 0}
TilePrefab: {fileID: 4007900521885639951, guid: faee4e805953b49e688bd00b45c55f2e,
type: 3}
--- !u!114 &3019509692332007776

BasicCellPoints: 1
SpecialCell1Points: 2
SpecialCell2Points: 3
--- !u!114 &3019509692332007780
--- !u!114 &3019509692332007783
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 08e4b0da54cb4d56bfcbae22dd49ab8d, type: 3}
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
ActuatorName: Match3 Actuator
ForceHeuristic: 0
--- !u!114 &3019509692332007783
SensorName: Match3 Sensor
ObservationType: 2
--- !u!114 &8270768986451624427
MonoBehaviour:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}

m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 530d2f105aa145bd8a00e021bdd925fd, type: 3}
m_Script: {fileID: 11500000, guid: b17adcc6c9b241da903aa134f2dac930, type: 3}
SensorName: Match3 Sensor
ObservationType: 2
ActuatorName: Match3 Actuator
ForceHeuristic: 0
HeuristicQuality: 0

10
Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity


m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 2112317463290853299, guid: 2fafdcd0587684641b03b11f04454f1b,
type: 3}
propertyPath: HeuristicQuality
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
type: 3}
propertyPath: cubeSpacing

m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 2112317463290853299, guid: 2fafdcd0587684641b03b11f04454f1b,
type: 3}
propertyPath: HeuristicQuality
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
type: 3}
propertyPath: cubeSpacing

166
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Agent.cs


WaitForMove = 4,
}
public enum HeuristicQuality
{
/// <summary>
/// The heuristic will pick any valid move at random.
/// </summary>
RandomValidMove,
/// <summary>
/// The heuristic will pick the move that scores the most points.
/// This only looks at the immediate move, and doesn't consider where cells will fall.
/// </summary>
Greedy
}
public class Match3Agent : Agent
{
[HideInInspector]

public int MaxMoves = 500;
public HeuristicQuality HeuristicQuality = HeuristicQuality.RandomValidMove;
private System.Random m_Random;
var seed = Board.RandomSeed == -1 ? gameObject.GetInstanceID() : Board.RandomSeed + 1;
m_Random = new System.Random(seed);
}
public override void OnEpisodeBegin()

return false;
}
public override void Heuristic(in ActionBuffers actionsOut)
{
var discreteActions = actionsOut.DiscreteActions;
discreteActions[0] = GreedyMove();
}
int GreedyMove()
{
var pointsByType = new[] { Board.BasicCellPoints, Board.SpecialCell1Points, Board.SpecialCell2Points };
var bestMoveIndex = 0;
var bestMovePoints = -1;
var numMovesAtCurrentScore = 0;
foreach (var move in Board.ValidMoves())
{
var movePoints = HeuristicQuality == HeuristicQuality.Greedy ? EvalMovePoints(move, pointsByType) : 1;
if (movePoints < bestMovePoints)
{
// Worse, skip
continue;
}
if (movePoints > bestMovePoints)
{
// Better, keep
bestMovePoints = movePoints;
bestMoveIndex = move.MoveIndex;
numMovesAtCurrentScore = 1;
}
else
{
// Tied for best - use reservoir sampling to make sure we select from equal moves uniformly.
// See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
numMovesAtCurrentScore++;
var randVal = m_Random.Next(0, numMovesAtCurrentScore);
if (randVal == 0)
{
// Keep the new one
bestMoveIndex = move.MoveIndex;
}
}
}
return bestMoveIndex;
}
int EvalMovePoints(Move move, int[] pointsByType)
{
// Counts the expected points for making the move.
var moveVal = Board.GetCellType(move.Row, move.Column);
var moveSpecial = Board.GetSpecialType(move.Row, move.Column);
var (otherRow, otherCol) = move.OtherCell();
var oppositeVal = Board.GetCellType(otherRow, otherCol);
var oppositeSpecial = Board.GetSpecialType(otherRow, otherCol);
int movePoints = EvalHalfMove(
otherRow, otherCol, moveVal, moveSpecial, move.Direction, pointsByType
);
int otherPoints = EvalHalfMove(
move.Row, move.Column, oppositeVal, oppositeSpecial, move.OtherDirection(), pointsByType
);
return movePoints + otherPoints;
}
int EvalHalfMove(int newRow, int newCol, int newValue, int newSpecial, Direction incomingDirection, int[] pointsByType)
{
// This is a essentially a duplicate of AbstractBoard.CheckHalfMove but also counts the points for the move.
int matchedLeft = 0, matchedRight = 0, matchedUp = 0, matchedDown = 0;
int scoreLeft = 0, scoreRight = 0, scoreUp = 0, scoreDown = 0;
if (incomingDirection != Direction.Right)
{
for (var c = newCol - 1; c >= 0; c--)
{
if (Board.GetCellType(newRow, c) == newValue)
{
matchedLeft++;
scoreLeft += pointsByType[Board.GetSpecialType(newRow, c)];
}
else
break;
}
}
if (incomingDirection != Direction.Left)
{
for (var c = newCol + 1; c < Board.Columns; c++)
{
if (Board.GetCellType(newRow, c) == newValue)
{
matchedRight++;
scoreRight += pointsByType[Board.GetSpecialType(newRow, c)];
}
else
break;
}
}
if (incomingDirection != Direction.Down)
{
for (var r = newRow + 1; r < Board.Rows; r++)
{
if (Board.GetCellType(r, newCol) == newValue)
{
matchedUp++;
scoreUp += pointsByType[Board.GetSpecialType(r, newCol)];
}
else
break;
}
}
if (incomingDirection != Direction.Up)
{
for (var r = newRow - 1; r >= 0; r--)
{
if (Board.GetCellType(r, newCol) == newValue)
{
matchedDown++;
scoreDown += pointsByType[Board.GetSpecialType(r, newCol)];
}
else
break;
}
}
if ((matchedUp + matchedDown >= 2) || (matchedLeft + matchedRight >= 2))
{
// It's a match. Start from counting the piece being moved
var totalScore = pointsByType[newSpecial];
if (matchedUp + matchedDown >= 2)
{
totalScore += scoreUp + scoreDown;
}
if (matchedLeft + matchedRight >= 2)
{
totalScore += scoreLeft + scoreRight;
}
return totalScore;
}
return 0;
}
}
}

13
Project/Assets/ML-Agents/Examples/Match3/Scripts/Match3Board.cs


using System;
using Unity.MLAgents.Extensions.Match3;
using UnityEngine;

public class Match3Board : AbstractBoard
{
public int RandomSeed = -1;
public const int k_EmptyCell = -1;
[Tooltip("Points earned for clearing a basic cell (cube)")]
public int BasicCellPoints = 1;

[Tooltip("Points earned for clearing an extra special cell (plus)")]
public int SpecialCell2Points = 3;
/// <summary>
/// Seed to initialize the <see cref="System.Random"/> object.
/// </summary>
public int RandomSeed;
(int, int)[,] m_Cells;
bool[,] m_Matched;

m_Cells = new (int, int)[Columns, Rows];
m_Matched = new bool[Columns, Rows];
}
void Start()
{
InitRandom();
}

2
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs


float[] buffer = new float[numFloats];
WriteObservation(buffer);
writer.AddRange(buffer);
writer.AddList(buffer);
return numFloats;
}

2
Project/ProjectSettings/UnityConnectSettings.asset


UnityConnectSettings:
m_ObjectHideFlags: 0
serializedVersion: 1
m_Enabled: 1
m_Enabled: 0
m_TestMode: 0
m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events
m_EventUrl: https://cdp.cloud.unity3d.com/v1/events

51
README.md


- The **Documentation** links in the table below include installation and usage
instructions specific to each release. Remember to always use the
documentation that corresponds to the release version you're using.
| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **Release 12** | **December 22, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_12)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_12.zip)** |
| **Release 11** | December 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_11) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_11_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_11.zip) |
| **Release 10** | November 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_10) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_10_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_10.zip) |
| **Release 9** | November 4, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_9) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_9_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_9.zip) |
| **Release 8** | October 14, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_8) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_8.zip) |
| **Release 7** | September 16, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_7) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip) |
| **Release 6** | August 12, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_6) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_6_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_6.zip) |
| **Release 5** | July 31, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_5) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_5.zip) |
- The `com.unity.ml-agents` package is [verified](https://docs.unity3d.com/2020.1/Documentation/Manual/pack-safe.html)
for Unity 2020.1 and later. Verified packages releases are numbered 1.0.x.
## Citation
| **Version** | **Release Date** | **Source** | **Documentation** | **Download** | **Python Package** | **Unity Package** |
|:-------:|:------:|:-------------:|:-------:|:------------:|:------------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) | -- | -- |
| **Release 12** | **December 22, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_12)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_12.zip)** | **[0.23.0](https://pypi.org/project/mlagents/0.23.0/)** | **[1.7.2](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.7/manual/index.html)** |
| **Release 11** | December 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_11) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_11_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_11.zip) | [0.23.0](https://pypi.org/project/mlagents/0.23.0/) | [1.7.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.7/manual/index.html) |
| **Release 10** | November 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_10) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_10_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_10.zip) | [0.22.0](https://pypi.org/project/mlagents/0.22.0/) | [1.6.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.6/manual/index.html) |
| **Verified Package 1.0.6** | **November 16, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/com.unity.ml-agents_1.0.6)** | **[docs](https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/com.unity.ml-agents_1.0.6.zip)** | **[0.16.1](https://pypi.org/project/mlagents/0.16.1/)** | **[1.0.6](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.0/manual/index.html)** |
| **Release 9** | November 4, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_9) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_9_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_9.zip) | [0.21.1](https://pypi.org/project/mlagents/0.21.1/) | [1.5.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.5/manual/index.html) |
| **Release 8** | October 14, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_8) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_8.zip) | [0.21.0](https://pypi.org/project/mlagents/0.21.0/) | [1.5.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.5/manual/index.html) |
| **Verified Package 1.0.5** | September 23, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/com.unity.ml-agents_1.0.5) | [docs](https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/com.unity.ml-agents_1.0.5.zip) | [0.16.1](https://pypi.org/project/mlagents/0.16.1/) | [1.0.5](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.0/manual/index.html) |
| **Release 7** | September 16, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_7) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip) | [0.20.0](https://pypi.org/project/mlagents/0.20.0/) | [1.4.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.4/manual/index.html) |
If you are a researcher interested in a discussion of Unity as an AI platform,
see a pre-print of our
[reference paper on Unity and the ML-Agents Toolkit](https://arxiv.org/abs/1809.02627).

## Additional Resources
We have published a series of blog posts that are relevant for ML-Agents:
We have a Unity Learn course,
[ML-Agents: Hummingsbird](https://learn.unity.com/course/ml-agents-hummingbirds),
that provides a gentle introduction to Unity and the ML-Agents Toolkit.
We've also partnered with
[CodeMonkeyUnity](https://www.youtube.com/c/CodeMonkeyUnity) to create a
[series of tutorial videos](https://www.youtube.com/playlist?list=PLzDRvYVwl53vehwiN_odYJkPBzcqFw110)
on how to implement and use the ML-Agents Toolkit.
We have also published a series of blog posts that are relevant for ML-Agents:
- (December 28, 2020)
[Happy holidays from the Unity ML-Agents team!](https://blogs.unity3d.com/2020/12/28/happy-holidays-from-the-unity-ml-agents-team/)
- (November 20, 2020)
[How Eidos-Montréal created Grid Sensors to improve observations for training agents](https://blogs.unity3d.com/2020/11/20/how-eidos-montreal-created-grid-sensors-to-improve-observations-for-training-agents/)
- (November 11, 2020)
[2020 AI@Unity interns shoutout](https://blogs.unity3d.com/2020/11/11/2020-aiunity-interns-shoutout/)
- (May 12, 2020)
[Announcing ML-Agents Unity Package v1.0!](https://blogs.unity3d.com/2020/05/12/announcing-ml-agents-unity-package-v1-0/)
- (February 28, 2020)

([multi-armed bandit](https://blogs.unity3d.com/2017/06/26/unity-ai-themed-blog-entries/)
and
[Q-learning](https://blogs.unity3d.com/2017/08/22/unity-ai-reinforcement-learning-with-q-learning/))
In addition to our own documentation, here are some additional, relevant
articles:
- [A Game Developer Learns Machine Learning](https://mikecann.co.uk/posts/a-game-developer-learns-machine-learning-intent)
- [Explore Unity Technologies ML-Agents Exclusively on Intel Architecture](https://software.intel.com/en-us/articles/explore-unity-technologies-ml-agents-exclusively-on-intel-architecture)
- [ML-Agents Penguins tutorial](https://learn.unity.com/project/ml-agents-penguins)
## Community and Feedback

74
com.unity.ml-agents.extensions/Runtime/Match3/Match3Actuator.cs


/// Actuator for a Match3 game. It translates valid moves (defined by AbstractBoard.IsMoveValid())
/// in action masks, and applies the action to the board via AbstractBoard.MakeMove().
/// </summary>
public class Match3Actuator : IActuator
public class Match3Actuator : IActuator, IHeuristicProvider
private AbstractBoard m_Board;
protected AbstractBoard m_Board;
protected System.Random m_Random;
private System.Random m_Random;
private Agent m_Agent;
private int m_Rows;

/// <param name="board"></param>
/// <param name="forceHeuristic">Whether the inference action should be ignored and the Agent's Heuristic
/// should be called. This should only be used for generating comparison stats of the Heuristic.</param>
/// <param name="seed">The seed used to initialize <see cref="System.Random"/>.</param>
public Match3Actuator(AbstractBoard board, bool forceHeuristic, Agent agent, string name)
public Match3Actuator(AbstractBoard board,
bool forceHeuristic,
int seed,
Agent agent,
string name)
{
m_Board = board;
m_Rows = board.Rows;

var numMoves = Move.NumPotentialMoves(m_Board.Rows, m_Board.Columns);
m_ActionSpec = ActionSpec.MakeDiscrete(numMoves);
m_Random = new System.Random(seed);
}
/// <inheritdoc/>

{
if (m_ForceHeuristic)
{
m_Agent.Heuristic(actions);
Heuristic(actions);
}
var moveIndex = actions.DiscreteActions[0];

yield return move.MoveIndex;
}
}
public void Heuristic(in ActionBuffers actionsOut)
{
var discreteActions = actionsOut.DiscreteActions;
discreteActions[0] = GreedyMove();
}
protected int GreedyMove()
{
var bestMoveIndex = 0;
var bestMovePoints = -1;
var numMovesAtCurrentScore = 0;
foreach (var move in m_Board.ValidMoves())
{
var movePoints = EvalMovePoints(move);
if (movePoints < bestMovePoints)
{
// Worse, skip
continue;
}
if (movePoints > bestMovePoints)
{
// Better, keep
bestMovePoints = movePoints;
bestMoveIndex = move.MoveIndex;
numMovesAtCurrentScore = 1;
}
else
{
// Tied for best - use reservoir sampling to make sure we select from equal moves uniformly.
// See https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
numMovesAtCurrentScore++;
var randVal = m_Random.Next(0, numMovesAtCurrentScore);
if (randVal == 0)
{
// Keep the new one
bestMoveIndex = move.MoveIndex;
}
}
}
return bestMoveIndex;
}
/// <summary>
/// Method to be overridden when evaluating how many points a specific move will generate.
/// </summary>
/// <param name="move">The move to evaluate.</param>
/// <returns>The number of points the move generates.</returns>
protected virtual int EvalMovePoints(Move move)
{
return 1;
}
}
}

10
com.unity.ml-agents.extensions/Runtime/Match3/Match3ActuatorComponent.cs


namespace Unity.MLAgents.Extensions.Match3
{
/// <summary>
/// Actuator component for a Match 3 game. Generates a Match3Actuator at runtime.
/// Actuator component for a Match3 game. Generates a Match3Actuator at runtime.
/// </summary>
public class Match3ActuatorComponent : ActuatorComponent
{

public string ActuatorName = "Match3 Actuator";
/// <summary>
/// A random seed used to generate a board, if needed.
/// </summary>
public int RandomSeed = -1;
/// <summary>
/// Force using the Agent's Heuristic() method to decide the action. This should only be used in testing.
/// </summary>
[FormerlySerializedAs("ForceRandom")]

{
var board = GetComponent<AbstractBoard>();
var agent = GetComponentInParent<Agent>();
return new Match3Actuator(board, ForceHeuristic, agent, ActuatorName);
var seed = RandomSeed == -1 ? gameObject.GetInstanceID() : RandomSeed + 1;
return new Match3Actuator(board, ForceHeuristic, seed, agent, ActuatorName);
}
/// <inheritdoc/>

8
com.unity.ml-agents.extensions/Runtime/Match3/Match3Sensor.cs


/// or uncompressed visual observations. Uses AbstractBoard.GetCellType()
/// and AbstractBoard.GetSpecialType() to determine the observation values.
/// </summary>
public class Match3Sensor : ISparseChannelSensor
public class Match3Sensor : ISparseChannelSensor, IBuiltInSensor
{
private Match3ObservationType m_ObservationType;
private AbstractBoard m_Board;

public int[] GetCompressedChannelMapping()
{
return m_SparseChannelMapping;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.Match3Sensor;
}
static void DestroyTexture(Texture2D texture)

9
com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs


/// <summary>
/// Grid-based sensor.
/// </summary>
public class GridSensor : SensorComponent, ISensor
public class GridSensor : SensorComponent, ISensor, IBuiltInSensor
{
/// <summary>
/// Name of this grid sensor.

{
return CompressionType;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.GridSensor;
}
/// <summary>
/// GetCompressedObservation - Calls Perceive then puts the data stored on the perception buffer

9
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs


/// <summary>
/// ISensor implementation that generates observations for a group of Rigidbodies or ArticulationBodies.
/// </summary>
public class PhysicsBodySensor : ISensor
public class PhysicsBodySensor : ISensor, IBuiltInSensor
{
int[] m_Shape;
string m_SensorName;

{
return m_SensorName;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.PhysicsBodySensor;
}
}
}

25
com.unity.ml-agents.extensions/Runtime/Teams/BaseTeamManager.cs


using System.Collections.Generic;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
private readonly string m_Id = System.Guid.NewGuid().ToString();
readonly int m_Id = TeamManagerIdCounter.GetTeamManagerId();
public virtual void RegisterAgent(Agent agent)
{
}
public virtual void RegisterAgent(Agent agent) { }
public virtual void OnAgentDone(Agent agent, Agent.DoneReason doneReason, List<ISensor> sensors)
{
// Possible implementation - save reference to Agent's IPolicy so that we can repeatedly
// call IPolicy.RequestDecision on behalf of the Agent after it's dead
// If so, we'll need dummy sensor impls with the same shape as the originals.
agent.SendDoneToTrainer();
}
public virtual void AddTeamReward(float reward)
{
}
public string GetId()
public int GetId()
{
return m_Id;
}

34
com.unity.ml-agents/CHANGELOG.md


### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
- The `ActionSpec` constructor is now public. Previously, it was not possible to create an
ActionSpec with both continuous and discrete actions from code. (#4896)
will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
- The upper limit for the time scale (by setting the `--time-scale` paramater in mlagents-learn) was
removed when training with a player. The Editor still requires it to be clamped to 100. (#4867)
- Added the IHeuristicProvider interface to allow IActuators as well as Agent implement the Heuristic function to generate actions.
Updated the Basic example and the Match3 Example to use Actuators.
Changed the namespace and file names of classes in com.unity.ml-agents.extensions. (#4849)
- Added `VectorSensor.AddObservation(IList<float>)`. `VectorSensor.AddObservation(IEnumerable<float>)`
is deprecated. The `IList` version is recommended, as it does not generate any
additional memory allocations. (#4887)
- Added `ObservationWriter.AddList()` and deprecated `ObservationWriter.AddRange()`.
`AddList()` is recommended, as it does not generate any additional memory allocations. (#4887)
- Added a `--torch-device` commandline option to `mlagents-learn`, which sets the default
[`torch.device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device) used for training. (#4888)
- The `--cpu` commandline option had no effect and was removed. Use `--torch-device=cpu` to force CPU training. (#4888)
- CameraSensor now logs an error if the GraphicsDevice is null. (#4880)
- Removed unnecessary memory allocations in `ActuatorManager.UpdateActionArray()` (#4877)
- Removed unnecessary memory allocations in `SensorShapeValidator.ValidateSensors()` (#4879)
- Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886)
- Removed several memory allocations that happened during inference. On a test scene, this
reduced the amount of memory allocated by approximately 25%. (#4887)
- Fixed a bug that can cause a crash if a behavior can appear during training in multi-environment training. (#4872)
- Fixed the computation of entropy for continuous actions. (#4869)
- Fixed a bug that would cause `UnityEnvironment` to wait the full timeout
period and report a misleading error message if the executable crashed
without closing the connection. It now periodically checks the process status
while waiting for a connection, and raises a better error message if it crashes. (#4880)
- Passing a `-logfile` option in the `--env-args` option to `mlagents-learn` is
no longer overwritten. (#4880)
## [1.7.2-preview] - 2020-12-22

7
com.unity.ml-agents/Runtime/Academy.cs


/// <term>1.3.0</term>
/// <description>Support both continuous and discrete actions.</description>
/// </item>
/// <item>
/// <term>1.4.0</term>
/// <description>Support training analytics sent from python trainer to the editor.</description>
/// </item>
const string k_ApiVersion = "1.3.0";
const string k_ApiVersion = "1.4.0";
/// <summary>
/// Unity package version of com.unity.ml-agents.

EnableAutomaticStepping();
SideChannelManager.RegisterSideChannel(new EngineConfigurationChannel());
SideChannelManager.RegisterSideChannel(new TrainingAnalyticsSideChannel());
m_EnvironmentParameters = new EnvironmentParameters();
m_StatsRecorder = new StatsRecorder();

13
com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs


/// <summary>
/// Creates a Continuous <see cref="ActionSpec"/> with the number of actions available.
/// </summary>
/// <param name="numActions">The number of actions available.</param>
/// <param name="numActions">The number of continuous actions available.</param>
/// <returns>An Continuous ActionSpec initialized with the number of actions available.</returns>
public static ActionSpec MakeContinuous(int numActions)
{

return actuatorSpace;
}
internal ActionSpec(int numContinuousActions, int[] branchSizes = null)
/// <summary>
/// Create an ActionSpec initialized with the specified action sizes.
/// </summary>
/// <param name="numContinuousActions">The number of continuous actions available.</param>
/// <param name="discreteBranchSizes">The array of branch sizes for the discrete actions. Each index
/// contains the number of actions available for that branch.</param>
/// <returns>An ActionSpec initialized with the specified action sizes.</returns>
public ActionSpec(int numContinuousActions = 0, int[] discreteBranchSizes = null)
BranchSizes = branchSizes;
BranchSizes = discreteBranchSizes;
}
/// <summary>

50
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


}
else
{
Debug.Assert(sourceActionBuffer.Length == destination.Length,
$"sourceActionBuffer:{sourceActionBuffer.Length} is a different" +
$" size than destination: {destination.Length}.");
Debug.AssertFormat(sourceActionBuffer.Length == destination.Length,
"sourceActionBuffer: {0} is a different size than destination: {1}.",
sourceActionBuffer.Length,
destination.Length);
Array.Copy(sourceActionBuffer.Array,
sourceActionBuffer.Offset,

actuator.WriteDiscreteActionMask(m_DiscreteActionMask);
offset += actuator.ActionSpec.NumDiscreteActions;
}
}
}
/// <summary>
/// Iterates through all of the IActuators in this list and calls their
/// <see cref="IHeuristicProvider.Heuristic"/> method on them, if implemented, with the appropriate
/// <see cref="ActionSegment{T}"/>s depending on their <see cref="ActionSpec"/>.
/// </summary>
public void ApplyHeuristic(in ActionBuffers actionBuffersOut)
{
var continuousStart = 0;
var discreteStart = 0;
for (var i = 0; i < m_Actuators.Count; i++)
{
var actuator = m_Actuators[i];
var numContinuousActions = actuator.ActionSpec.NumContinuousActions;
var numDiscreteActions = actuator.ActionSpec.NumDiscreteActions;
if (numContinuousActions == 0 && numDiscreteActions == 0)
{
continue;
}
var continuousActions = ActionSegment<float>.Empty;
if (numContinuousActions > 0)
{
continuousActions = new ActionSegment<float>(actionBuffersOut.ContinuousActions.Array,
continuousStart,
numContinuousActions);
}
var discreteActions = ActionSegment<int>.Empty;
if (numDiscreteActions > 0)
{
discreteActions = new ActionSegment<int>(actionBuffersOut.DiscreteActions.Array,
discreteStart,
numDiscreteActions);
}
var heuristic = actuator as IHeuristicProvider;
heuristic?.Heuristic(new ActionBuffers(continuousActions, discreteActions));
continuousStart += numContinuousActions;
discreteStart += numDiscreteActions;
}
}

27
com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs


namespace Unity.MLAgents.Actuators
{
/// <summary>
/// IActuator implementation that forwards to an <see cref="IActionReceiver"/>.
/// IActuator implementation that forwards calls to an <see cref="IActionReceiver"/> and an <see cref="IHeuristicProvider"/>.
internal class VectorActuator : IActuator
internal class VectorActuator : IActuator, IHeuristicProvider
IHeuristicProvider m_HeuristicProvider;
ActionBuffers m_ActionBuffers;
internal ActionBuffers ActionBuffers

/// <summary>
/// Create a VectorActuator that forwards to the provided IActionReceiver.
/// </summary>
/// <param name="actionReceiver">The <see cref="IActionReceiver"/> used for OnActionReceived and WriteDiscreteActionMask.
/// If this parameter also implements <see cref="IHeuristicProvider"/> it will be cast and used to forward calls to
/// <see cref="IHeuristicProvider.Heuristic"/>.</param>
/// <param name="actionSpec"></param>
/// <param name="name"></param>
public VectorActuator(IActionReceiver actionReceiver,
ActionSpec actionSpec,
string name = "VectorActuator")
: this(actionReceiver, actionReceiver as IHeuristicProvider, actionSpec, name) { }
/// <summary>
/// Create a VectorActuator that forwards to the provided IActionReceiver.
/// </summary>
/// <param name="heuristicProvider">The <see cref="IHeuristicProvider"/> used to fill the <see cref="ActionBuffers"/>
/// for Heuristic Policies.</param>
IHeuristicProvider heuristicProvider,
m_HeuristicProvider = heuristicProvider;
ActionSpec = actionSpec;
string suffix;
if (actionSpec.NumContinuousActions == 0)

{
ActionBuffers = actionBuffers;
m_ActionReceiver.OnActionReceived(ActionBuffers);
}
public void Heuristic(in ActionBuffers actionBuffersOut)
{
m_HeuristicProvider?.Heuristic(actionBuffersOut);
}
/// <inheritdoc />

50
com.unity.ml-agents/Runtime/Agent.cs


/// <summary>
/// Team Manager identifier.
/// </summary>
public string teamManagerId;
public int teamManagerId;
public void ClearActions()
{

"docs/Learning-Environment-Design-Agents.md")]
[Serializable]
[RequireComponent(typeof(BehaviorParameters))]
public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver
public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver, IHeuristicProvider
{
IPolicy m_Brain;
BehaviorParameters m_PolicyFactory;

private ITeamManager m_TeamManager;
/// <summary>
/// This is used to avoid allocation of a float array during legacy calls to Heuristic.
/// </summary>
float[] m_LegacyHeuristicCache;
ITeamManager m_TeamManager;
/// <summary>
/// Called when the attached [GameObject] becomes enabled and active.
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </summary>

InitializeActuators();
}
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager);
ResetData();
Initialize();

new int[m_ActuatorManager.NumDiscreteActions]
);
if (m_TeamManager != null)
{
m_Info.teamManagerId = m_TeamManager.GetId();
}
m_Info.teamManagerId = m_TeamManager == null ? -1 : m_TeamManager.GetId();
// The first time the Academy resets, all Agents in the scene will be
// forced to reset through the <see cref="AgentForceReset"/> event.

m_Info.reward = m_Reward;
m_Info.done = true;
m_Info.maxStepReached = doneReason == DoneReason.MaxStepReached;
m_Info.teamManagerId = m_TeamManager == null ? -1 : m_TeamManager.GetId();
if (collectObservationsSensor != null)
{
// Make sure the latest observations are being passed to training.

return;
}
m_Brain?.Dispose();
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), Heuristic);
m_Brain = m_PolicyFactory.GeneratePolicy(m_ActuatorManager.GetCombinedActionSpec(), m_ActuatorManager);
}
/// <summary>

public virtual void Initialize() { }
/// <summary>
/// Implement `Heuristic()` to choose an action for this agent using a custom heuristic.
/// Implement <see cref="Heuristic"/> to choose an action for this agent using a custom heuristic.
/// control of an agent using keyboard, mouse, or game controller input.
/// control of an agent using keyboard, mouse, game controller input, or a script.
///
/// Your heuristic implementation can use any decision making logic you specify. Assign decision
/// values to the <see cref="ActionBuffers.ContinuousActions"/> and <see cref="ActionBuffers.DiscreteActions"/>

switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
{
case SpaceType.Continuous:
Heuristic(actionsOut.ContinuousActions.Array);
Heuristic(m_LegacyHeuristicCache);
Array.Copy(m_LegacyHeuristicCache, actionsOut.ContinuousActions.Array, m_LegacyActionCache.Length);
var convertedOut = Array.ConvertAll(actionsOut.DiscreteActions.Array, x => (float)x);
Heuristic(convertedOut);
Heuristic(m_LegacyHeuristicCache);
discreteActionSegment[i] = (int)convertedOut[i];
discreteActionSegment[i] = (int)m_LegacyHeuristicCache[i];
}
/// <summary>

// Support legacy OnActionReceived
// TODO don't set this up if the sizes are 0?
var param = m_PolicyFactory.BrainParameters;
m_VectorActuator = new VectorActuator(this, param.ActionSpec);
m_VectorActuator = new VectorActuator(this, this, param.ActionSpec);
m_LegacyHeuristicCache = new float[m_VectorActuator.TotalNumberOfActions()];
m_ActuatorManager.Add(m_VectorActuator);

m_Info.done = false;
m_Info.maxStepReached = false;
m_Info.episodeId = m_EpisodeId;
m_Info.teamManagerId = m_TeamManager == null ? -1 : m_TeamManager.GetId();
using (TimerStack.Instance.Scoped("RequestDecision"))
{

/// three values in ActionBuffers.ContinuousActions array to use as the force components.
/// During training, the agent's policy learns to set those particular elements of
/// the array to maximize the training rewards the agent receives. (Of course,
/// if you implement a <seealso cref="Heuristic(in ActionBuffers)"/> function, it must use the same
/// if you implement a <seealso cref="Agent.Heuristic(in ActionBuffers)"/> function, it must use the same
/// elements of the action array for the same purpose since there is no learning
/// involved.)
///

if (!actions.ContinuousActions.IsEmpty())
{
m_LegacyActionCache = actions.ContinuousActions.Array;
Array.Copy(actions.ContinuousActions.Array,
m_LegacyActionCache,
actionSpec.NumContinuousActions);
m_LegacyActionCache = Array.ConvertAll(actions.DiscreteActions.Array, x => (float)x);
for (var i = 0; i < m_LegacyActionCache.Length; i++)
{
m_LegacyActionCache[i] = (float)actions.DiscreteActions[i];
}
}
// Disable deprecation warnings so we can call the legacy overload.
#pragma warning disable CS0618

public void SetTeamManager(ITeamManager teamManager)
{
m_TeamManager = teamManager;
m_Info.teamManagerId = teamManager?.GetId();
teamManager?.RegisterAgent(this);
}
}

68
com.unity.ml-agents/Runtime/Analytics/Events.cs


{
public string SensorName;
public string CompressionType;
public int BuiltInSensorType;
public EventObservationDimensionInfo[] DimensionInfos;
public static EventObservationSpec FromSensor(ISensor sensor)

// TODO copy flags when we have them
}
var builtInSensorType =
(sensor as IBuiltInSensor)?.GetBuiltInSensorType() ?? Sensors.BuiltInSensorType.Unknown;
BuiltInSensorType = (int)builtInSensorType,
}
internal struct RemotePolicyInitializedEvent
{
public string TrainingSessionGuid;
/// <summary>
/// Hash of the BehaviorName.
/// </summary>
public string BehaviorName;
public List<EventObservationSpec> ObservationSpecs;
public EventActionSpec ActionSpec;
/// <summary>
/// This will be the same as TrainingEnvironmentInitializedEvent if available, but
/// TrainingEnvironmentInitializedEvent maybe not always be available with older trainers.
/// </summary>
public string MLAgentsEnvsVersion;
public string TrainerCommunicationVersion;
}
internal struct TrainingEnvironmentInitializedEvent
{
public string TrainingSessionGuid;
public string TrainerPythonVersion;
public string MLAgentsVersion;
public string MLAgentsEnvsVersion;
public string TorchVersion;
public string TorchDeviceType;
public int NumEnvironments;
public int NumEnvironmentParameters;
}
[Flags]
internal enum RewardSignals
{
Extrinsic = 1 << 0,
Gail = 1 << 1,
Curiosity = 1 << 2,
Rnd = 1 << 3,
}
[Flags]
internal enum TrainingFeatures
{
BehavioralCloning = 1 << 0,
Recurrent = 1 << 1,
Threaded = 1 << 2,
SelfPlay = 1 << 3,
Curriculum = 1 << 4,
}
internal struct TrainingBehaviorInitializedEvent
{
public string TrainingSessionGuid;
public string BehaviorName;
public string TrainerType;
public RewardSignals RewardSignalFlags;
public TrainingFeatures TrainingFeatureFlags;
public string VisualEncoder;
public int NumNetworkLayers;
public int NumNetworkHiddenUnits;
}
}

14
com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs


{
const string k_VendorKey = "unity.ml-agents";
const string k_EventName = "ml_agents_inferencemodelset";
const int k_EventVersion = 1;
/// <summary>
/// Whether or not we've registered this particular event yet

/// </summary>
const int k_MaxNumberOfElements = 1000;
/// <summary>
/// Models that we've already sent events for.
/// </summary>

}
#if UNITY_EDITOR
AnalyticsResult result = EditorAnalytics.RegisterEventWithLimit(k_EventName, k_MaxEventsPerHour, k_MaxNumberOfElements, k_VendorKey);
AnalyticsResult result = EditorAnalytics.RegisterEventWithLimit(k_EventName, k_MaxEventsPerHour, k_MaxNumberOfElements, k_VendorKey, k_EventVersion);
#else
AnalyticsResult result = AnalyticsResult.UnsupportedPlatform;
#endif

var data = GetEventForModel(nnModel, behaviorName, inferenceDevice, sensors, actionSpec);
// Note - to debug, use JsonUtility.ToJson on the event.
// Debug.Log(JsonUtility.ToJson(data, true));
//Debug.Log(JsonUtility.ToJson(data, true));
EditorAnalytics.SendEventWithLimit(k_EventName, data);
if (AnalyticsUtils.s_SendEditorAnalytics)
{
EditorAnalytics.SendEventWithLimit(k_EventName, data, k_EventVersion);
}
#else
return;