浏览代码

Merge branch 'develop' into release-0.11.0

/develop-gpu-test
Jonathan Harper 5 年前
当前提交
8550679d
共有 123 个文件被更改,包括 763 次插入1008 次删除
  1. 19
      .circleci/config.yml
  2. 23
      .pre-commit-config.yaml
  3. 2
      .yamato/csharp-tests.yml
  4. 4
      .yamato/standalone-build-test.yml
  5. 1
      UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs
  6. 24
      UnitySDK/Assets/ML-Agents/Editor/BehaviorParametersEditor.cs
  7. 26
      UnitySDK/Assets/ML-Agents/Editor/BrainParametersDrawer.cs
  8. 2
      UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs
  9. 16
      UnitySDK/Assets/ML-Agents/Editor/ResetParameterDrawer.cs
  10. 17
      UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs
  11. 55
      UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs
  12. 73
      UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs
  13. 21
      UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs
  14. 6
      UnitySDK/Assets/ML-Agents/Editor/Tests/RandomNormalTest.cs
  15. 4
      UnitySDK/Assets/ML-Agents/Editor/Tests/RayPerceptionTests.cs
  16. 6
      UnitySDK/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs
  17. 4
      UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  18. 4
      UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
  19. 6
      UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs
  20. 4
      UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
  21. 2
      UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerTarget.cs
  22. 8
      UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  23. 18
      UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  24. 4
      UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs
  25. 2
      UnitySDK/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  26. 4
      UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
  27. 12
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
  28. 2
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidArea.cs
  29. 12
      UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidSwitch.cs
  30. 14
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  31. 6
      UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherGoal.cs
  32. 6
      UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Scripts/FlyCamera.cs
  33. 2
      UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs
  34. 21
      UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Scripts/RayPerception3D.cs
  35. 2
      UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Scripts/TargetContact.cs
  36. 4
      UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
  37. 10
      UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
  38. 16
      UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  39. 2
      UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisArea.cs
  40. 8
      UnitySDK/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  41. 6
      UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  42. 6
      UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Plugins/Editor/BarracudaEditor/NNModelImporter.cs
  43. 20
      UnitySDK/Assets/ML-Agents/Scripts/Academy.cs
  44. 10
      UnitySDK/Assets/ML-Agents/Scripts/ActionMasker.cs
  45. 183
      UnitySDK/Assets/ML-Agents/Scripts/Agent.cs
  46. 14
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs
  47. 24
      UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs
  48. 33
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentAction.cs
  49. 41
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentInfo.cs
  50. 9
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs
  51. 41
      UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs
  52. 22
      UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs
  53. 54
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs
  54. 25
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/BarracudaModelParamLoader.cs
  55. 141
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs
  56. 45
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs
  57. 14
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs
  58. 58
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs
  59. 2
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorProxy.cs
  60. 2
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/Utils/Multinomial.cs
  61. 10
      UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/Utils/RandomNormal.cs
  62. 27
      UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs
  63. 16
      UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs
  64. 6
      UnitySDK/Assets/ML-Agents/Scripts/Policy/HeuristicPolicy.cs
  65. 1
      UnitySDK/Assets/ML-Agents/Scripts/Policy/IPolicy.cs
  66. 29
      UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs
  67. 5
      UnitySDK/Assets/ML-Agents/Scripts/ResetParameters.cs
  68. 18
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/CameraSensor.cs
  69. 2
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/CameraSensorComponent.cs
  70. 12
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/ISensor.cs
  71. 21
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/RenderTextureSensor.cs
  72. 2
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/RenderTextureSensorComponent.cs
  73. 17
      UnitySDK/Assets/ML-Agents/Scripts/Sensor/SensorBase.cs
  74. 6
      UnitySDK/Assets/ML-Agents/Scripts/Startup.cs
  75. 15
      UnitySDK/Assets/ML-Agents/Scripts/Timer.cs
  76. 61
      UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs
  77. 1
      UnitySDK/UnitySDK.sln.DotSettings
  78. 9
      docs/Migrating.md
  79. 2
      gym-unity/gym_unity/envs/__init__.py
  80. 1
      ml-agents-envs/mlagents/envs/action_info.py
  81. 43
      ml-agents-envs/mlagents/envs/brain.py
  82. 15
      ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.py
  83. 6
      ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.pyi
  84. 25
      ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.py
  85. 6
      ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.pyi
  86. 73
      ml-agents-envs/mlagents/envs/environment.py
  87. 2
      ml-agents-envs/mlagents/envs/mock_communicator.py
  88. 12
      ml-agents-envs/mlagents/envs/rpc_communicator.py
  89. 4
      ml-agents-envs/mlagents/envs/simple_env_manager.py
  90. 8
      ml-agents-envs/mlagents/envs/subprocess_env_manager.py
  91. 13
      ml-agents-envs/mlagents/envs/timers.py
  92. 1
      ml-agents/mlagents/trainers/barracuda.py
  93. 4
      ml-agents/mlagents/trainers/bc/policy.py
  94. 18
      ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
  95. 12
      ml-agents/mlagents/trainers/learn.py
  96. 11
      ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py
  97. 8
      ml-agents/mlagents/trainers/ppo/policy.py
  98. 17
      ml-agents/mlagents/trainers/rl_trainer.py
  99. 6
      ml-agents/mlagents/trainers/sac/models.py
  100. 4
      ml-agents/mlagents/trainers/sac/policy.py

19
.circleci/config.yml


markdown_link_check:
parameters:
precommit_command:
type: string
description: precommit hook to run
default: markdown-link-check
docker:
- image: circleci/node:12.6.0
working_directory: ~/repo

name: Run markdown-link-check via precommit
command: |
. venv/bin/activate
pre-commit run --hook-stage manual markdown-link-check --all-files
pre-commit run --hook-stage manual << parameters.precommit_command >> --all-files
protobuf_generation_check:
docker:

only: /[0-9]+(\.[0-9]+)*(\.dev[0-9]+)*/
branches:
ignore: /.*/
nightly:
triggers:
- schedule:
cron: "0 0 * * *"
filters:
branches:
only:
- develop
jobs:
- markdown_link_check:
name: markdown-link-check full
precommit_command: markdown-link-check-full

23
.pre-commit-config.yaml


hooks:
- id: python-check-mock-methods
- repo: https://github.com/pre-commit/mirrors-pylint
rev: v2.4.3
hooks:
- id: pylint
exclude: >
(?x)^(
.*_pb2.py|
.*_pb2_grpc.py|
.*/tests/.*
)$
require_serial: true
# "Local" hooks, see https://pre-commit.com/#repository-local-hooks
- repo: local
hooks:

# Note that you must install the package separately via npm. For example:
# brew install npm; npm install -g markdown-link-check
entry: bash -xc 'echo "$@" | xargs -n1 -t markdown-link-check -c markdown-link-check.config.json' --
entry: bash -xc 'echo "$@" | xargs -n1 -t markdown-link-check -c markdown-link-check.fast.json' --
stages: [manual]
- id: markdown-link-check-full
name: markdown-link-check-full
entry: bash -xc 'echo "$@" | xargs -n1 -t markdown-link-check -c markdown-link-check.full.json' --
language: system
types: [markdown]
exclude: ".*localized.*"
stages: [manual]
- id: validate-versions
name: validate library versions

2
.yamato/csharp-tests.yml


name: Test Mac EditMode {{ editor.version }}
agent:
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:release
image: ml-agents/ml-agents-bokken-mac:v0.1.2-440635
flavor: i1.small
variables:
UNITY_VERSION: {{ editor.version }}

4
.yamato/standalone-build-test.yml


name: Test Mac Standalone {{ editor.version }}
agent:
type: Unity::VM::osx
image: ml-agents/ml-agents-bokken-mac:release
image: ml-agents/ml-agents-bokken-mac:v0.1.2-440635
flavor: i1.small
variables:
UNITY_VERSION: {{ editor.version }}

- "master"
- "/release-.*/"
- "/hotfix-.*/"
{% endfor %}
{% endfor %}

1
UnitySDK/Assets/ML-Agents/Editor/AgentEditor.cs


using UnityEngine;
using UnityEditor;
using Barracuda;
namespace MLAgents
{

24
UnitySDK/Assets/ML-Agents/Editor/BehaviorParametersEditor.cs


[CanEditMultipleObjects]
public class BehaviorParametersEditor : Editor
{
private const float k_TimeBetweenModelReloads = 2f;
const float k_TimeBetweenModelReloads = 2f;
private float m_TimeSinceModelReload;
float m_TimeSinceModelReload;
private bool m_RequireReload;
bool m_RequireReload;
var serializedObject = base.serializedObject;
serializedObject.Update();
var so = serializedObject;
so.Update();
EditorGUILayout.PropertyField(serializedObject.FindProperty("m_BehaviorName"));
EditorGUILayout.PropertyField(serializedObject.FindProperty("m_BrainParameters"), true);
EditorGUILayout.PropertyField(serializedObject.FindProperty("m_Model"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
EditorGUILayout.PropertyField(serializedObject.FindProperty("m_InferenceDevice"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
EditorGUILayout.PropertyField(serializedObject.FindProperty("m_UseHeuristic"));
EditorGUILayout.PropertyField(so.FindProperty("m_UseHeuristic"));
// EditorGUILayout.PropertyField(serializedObject.FindProperty("m_Heuristic"), true);
EditorGUI.indentLevel--;
if (EditorGUI.EndChangeCheck())

DisplayFailedModelChecks();
serializedObject.ApplyModifiedProperties();
so.ApplyModifiedProperties();
private void DisplayFailedModelChecks()
void DisplayFailedModelChecks()
{
if (m_RequireReload && m_TimeSinceModelReload > k_TimeBetweenModelReloads)
{

26
UnitySDK/Assets/ML-Agents/Editor/BrainParametersDrawer.cs


public class BrainParametersDrawer : PropertyDrawer
{
// The height of a line in the Unity Inspectors
private const float k_LineHeight = 17f;
private const int k_VecObsNumLine = 3;
private const string k_ActionSizePropName = "vectorActionSize";
private const string k_ActionTypePropName = "vectorActionSpaceType";
private const string k_ActionDescriptionPropName = "vectorActionDescriptions";
private const string k_VecObsPropName = "vectorObservationSize";
private const string k_NumVecObsPropName = "numStackedVectorObservations";
const float k_LineHeight = 17f;
const int k_VecObsNumLine = 3;
const string k_ActionSizePropName = "vectorActionSize";
const string k_ActionTypePropName = "vectorActionSpaceType";
const string k_ActionDescriptionPropName = "vectorActionDescriptions";
const string k_VecObsPropName = "vectorObservationSize";
const string k_NumVecObsPropName = "numStackedVectorObservations";
/// <inheritdoc />
public override float GetPropertyHeight(SerializedProperty property, GUIContent label)

/// <param name="position">Rectangle on the screen to use for the property GUI.</param>
/// <param name="property">The SerializedProperty of the BrainParameters
/// to make the custom GUI for.</param>
private static void DrawVectorObservation(Rect position, SerializedProperty property)
static void DrawVectorObservation(Rect position, SerializedProperty property)
{
EditorGUI.LabelField(position, "Vector Observation");
position.y += k_LineHeight;

/// The Height required to draw the Vector Observations paramaters
/// </summary>
/// <returns>The height of the drawer of the Vector Observations </returns>
private static float GetHeightDrawVectorObservation()
static float GetHeightDrawVectorObservation()
{
return k_VecObsNumLine * k_LineHeight;
}

/// <param name="position">Rectangle on the screen to use for the property GUI.</param>
/// <param name="property">The SerializedProperty of the BrainParameters
/// to make the custom GUI for.</param>
private static void DrawVectorAction(Rect position, SerializedProperty property)
static void DrawVectorAction(Rect position, SerializedProperty property)
{
EditorGUI.LabelField(position, "Vector Action");
position.y += k_LineHeight;

/// <param name="position">Rectangle on the screen to use for the property GUI.</param>
/// <param name="property">The SerializedProperty of the BrainParameters
/// to make the custom GUI for.</param>
private static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
vecActionSize.arraySize = 1;

/// <param name="position">Rectangle on the screen to use for the property GUI.</param>
/// <param name="property">The SerializedProperty of the BrainParameters
/// to make the custom GUI for.</param>
private static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
vecActionSize.arraySize = EditorGUI.IntField(

/// The Height required to draw the Vector Action parameters
/// </summary>
/// <returns>The height of the drawer of the Vector Action </returns>
private static float GetHeightDrawVectorAction(SerializedProperty property)
static float GetHeightDrawVectorAction(SerializedProperty property)
{
var actionSize = 2 + property.FindPropertyRelative(k_ActionSizePropName).arraySize;
if (property.FindPropertyRelative(k_ActionTypePropName).enumValueIndex == 0)

2
UnitySDK/Assets/ML-Agents/Editor/DemonstrationImporter.cs


[ScriptedImporter(1, new[] {"demo"})]
public class DemonstrationImporter : ScriptedImporter
{
private const string k_IconPath = "Assets/ML-Agents/Resources/DemoIcon.png";
const string k_IconPath = "Assets/ML-Agents/Resources/DemoIcon.png";
public override void OnImportAsset(AssetImportContext ctx)
{

16
UnitySDK/Assets/ML-Agents/Editor/ResetParameterDrawer.cs


[CustomPropertyDrawer(typeof(ResetParameters))]
public class ResetParameterDrawer : PropertyDrawer
{
private ResetParameters m_Parameters;
ResetParameters m_Parameters;
private const float k_LineHeight = 17f;
const float k_LineHeight = 17f;
private const string k_NewKeyPrefix = "Param-";
const string k_NewKeyPrefix = "Param-";
/// <summary>
/// Computes the height of the Drawer depending on the property it is showing

/// </summary>
/// <param name="addRect">The rectangle for the Add New button.</param>
/// <param name="removeRect">The rectangle for the Remove Last button.</param>
private void DrawAddRemoveButtons(Rect addRect, Rect removeRect)
void DrawAddRemoveButtons(Rect addRect, Rect removeRect)
{
// This is the Add button
if (m_Parameters.Count == 0)

/// Signals that the property has been modified and requires the scene to be saved for
/// the changes to persist. Only works when the Editor is not playing.
/// </summary>
private static void MarkSceneAsDirty()
static void MarkSceneAsDirty()
{
if (!EditorApplication.isPlaying)
{

/// </summary>
/// <param name="property">The SerializedProperty of the ResetParameters
/// to make the custom GUI for.</param>
private void LazyInitializeParameters(SerializedProperty property)
void LazyInitializeParameters(SerializedProperty property)
{
if (m_Parameters != null)
{

/// <summary>
/// Removes the last ResetParameter from the ResetParameters
/// </summary>
private void RemoveLastParameter()
void RemoveLastParameter()
{
if (m_Parameters.Count > 0)
{

/// <summary>
/// Adds a new ResetParameter to the ResetParameters with a default name.
/// </summary>
private void AddParameter()
void AddParameter()
{
var key = k_NewKeyPrefix + m_Parameters.Count;
var value = default(float);

17
UnitySDK/Assets/ML-Agents/Editor/Tests/DemonstrationTests.cs


{
public class DemonstrationTests : MonoBehaviour
{
private const string k_DemoDirecory = "Assets/Demonstrations/";
private const string k_ExtensionType = ".demo";
private const string k_DemoName = "Test";
const string k_DemoDirecory = "Assets/Demonstrations/";
const string k_ExtensionType = ".demo";
const string k_DemoName = "Test";
[Test]
public void TestSanitization()

{
vectorObservationSize = 3,
numStackedVectorObservations = 2,
vectorActionDescriptions = new[] {"TestActionA", "TestActionB"},
vectorActionSize = new[] {2, 2},
vectorActionDescriptions = new[] { "TestActionA", "TestActionB" },
vectorActionSize = new[] { 2, 2 },
vectorActionSpaceType = SpaceType.Discrete
};

var agentInfo = new AgentInfo
{
reward = 1f,
actionMasks = new[] {false, true},
actionMasks = new[] { false, true },
memories = new List<float>(),
stackedVectorObservation = new List<float>() {1f, 1f, 1f},
floatObservations = new List<float>() { 1f, 1f, 1f },
storedVectorActions = new[] {0f, 1f},
storedVectorActions = new[] { 0f, 1f },
textObservation = "TestAction",
};

55
UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorApplier.cs


{
public class EditModeTestInternalBrainTensorApplier
{
private class TestAgent : Agent
class TestAgent : Agent
var f = typeof(Agent).GetField(
var f = typeof(Agent).GetField(
private List<Agent> GetFakeAgentInfos()
List<Agent> GetFakeAgentInfos()
{
var goA = new GameObject("goA");
var agentA = goA.AddComponent<TestAgent>();

return new List<Agent> {agentA, agentB};
return new List<Agent> { agentA, agentB };
}
[Test]

var alloc = new TensorCachingAllocator();
var tensorGenerator = new TensorApplier(bp, 0, alloc);
var mem = new Dictionary<int, List<float>>();
var tensorGenerator = new TensorApplier(bp, 0, alloc, mem);
Assert.IsNotNull(tensorGenerator);
alloc.Dispose();
}

{
var inputTensor = new TensorProxy()
{
shape = new long[] {2, 3},
data = new Tensor(2, 3, new float[] {1, 2, 3, 4, 5, 6})
shape = new long[] { 2, 3 },
data = new Tensor(2, 3, new float[] { 1, 2, 3, 4, 5, 6 })
};
var agentInfos = GetFakeAgentInfos();

{
var inputTensor = new TensorProxy()
{
shape = new long[] {2, 5},
shape = new long[] { 2, 5 },
new[] {0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f})
new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
var applier = new DiscreteActionOutputApplier(new[] {2, 3}, 0, alloc);
var applier = new DiscreteActionOutputApplier(new[] { 2, 3 }, 0, alloc);
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos;

}
[Test]
public void ApplyMemoryOutput()
{
var inputTensor = new TensorProxy()
{
shape = new long[] {2, 5},
data = new Tensor(
2,
5,
new[] {0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f})
};
var agentInfos = GetFakeAgentInfos();
var applier = new MemoryOutputApplier();
applier.Apply(inputTensor, agentInfos);
var agents = agentInfos;
var agent = agents[0] as TestAgent;
Assert.NotNull(agent);
var action = agent.GetAction();
Assert.AreEqual(action.memories[0], 0.5f);
Assert.AreEqual(action.memories[1], 22.5f);
agent = agents[1] as TestAgent;
Assert.NotNull(agent);
action = agent.GetAction();
Assert.AreEqual(action.memories[2], 6);
Assert.AreEqual(action.memories[3], 7);
}
[Test]
shape = new long[] {2, 1},
data = new Tensor(2, 1, new[] {0.5f, 8f})
shape = new long[] { 2, 1 },
data = new Tensor(2, 1, new[] { 0.5f, 8f })
};
var agentInfos = GetFakeAgentInfos();

73
UnitySDK/Assets/ML-Agents/Editor/Tests/EditModeTestInternalBrainTensorGenerator.cs


using System.Collections.Generic;
using System.Linq;
using System.Reflection;
private static IEnumerable<Agent> GetFakeAgentInfos()
static IEnumerable<Agent> GetFakeAgents()
var acaGo = new GameObject("TestAcademy");
acaGo.AddComponent<TestAcademy>();
var aca = acaGo.GetComponent<TestAcademy>();
aca.resetParameters = new ResetParameters();
var bpA = goA.AddComponent<BehaviorParameters>();
bpA.brainParameters.vectorObservationSize = 3;
bpA.brainParameters.numStackedVectorObservations = 1;
var goB = new GameObject("goB");
var bpB = goB.AddComponent<BehaviorParameters>();
bpB.brainParameters.vectorObservationSize = 3;
bpB.brainParameters.numStackedVectorObservations = 1;
var agentB = goB.AddComponent<TestAgent>();
var agents = new List<Agent> { agentA, agentB };
foreach (var agent in agents)
{
var agentEnableMethod = typeof(Agent).GetMethod("OnEnableHelper",
BindingFlags.Instance | BindingFlags.NonPublic);
agentEnableMethod?.Invoke(agent, new object[] { aca });
}
agentA.collectObservationsSensor.AddObservation(new Vector3(1, 2, 3));
agentB.collectObservationsSensor.AddObservation(new Vector3(4, 5, 6));
stackedVectorObservation = new[] { 1f, 2f, 3f }.ToList(),
memories = null,
var goB = new GameObject("goB");
var agentB = goB.AddComponent<TestAgent>();
stackedVectorObservation = new[] { 4f, 5f, 6f }.ToList(),
memories = new[] { 1f, 1f, 1f }.ToList(),
return new List<Agent> { agentA, agentB };
return agents;
var bp = new BrainParameters();
var tensorGenerator = new TensorGenerator(bp, 0, alloc);
var mem = new Dictionary<int, List<float>>();
var tensorGenerator = new TensorGenerator(0, alloc, mem);
Assert.IsNotNull(tensorGenerator);
alloc.Dispose();
}

shape = new long[] { 2, 3 }
};
const int batchSize = 4;
var agentInfos = GetFakeAgentInfos();
var agentInfos = GetFakeAgents();
generator.AddSensorIndex(0);
generator.AddSensorIndex(1);
generator.AddSensorIndex(2);
generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.data);
Assert.AreEqual(inputTensor.data[0, 0], 1);

}
[Test]
public void GenerateRecurrentInput()
{
var inputTensor = new TensorProxy
{
shape = new long[] { 2, 5 }
};
const int batchSize = 4;
var agentInfos = GetFakeAgentInfos();
var alloc = new TensorCachingAllocator();
var generator = new RecurrentInputGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);
Assert.IsNotNull(inputTensor.data);
Assert.AreEqual(inputTensor.data[0, 0], 0);
Assert.AreEqual(inputTensor.data[0, 4], 0);
Assert.AreEqual(inputTensor.data[1, 0], 1);
Assert.AreEqual(inputTensor.data[1, 4], 0);
alloc.Dispose();
}
[Test]
public void GeneratePreviousActionInput()
{
var inputTensor = new TensorProxy

};
const int batchSize = 4;
var agentInfos = GetFakeAgentInfos();
var agentInfos = GetFakeAgents();
var alloc = new TensorCachingAllocator();
var generator = new PreviousActionInputGenerator(alloc);

valueType = TensorProxy.TensorType.FloatingPoint
};
const int batchSize = 4;
var agentInfos = GetFakeAgentInfos();
var agentInfos = GetFakeAgents();
var alloc = new TensorCachingAllocator();
var generator = new ActionMaskInputGenerator(alloc);
generator.Generate(inputTensor, batchSize, agentInfos);

21
UnitySDK/Assets/ML-Agents/Editor/Tests/MLAgentsEditModeTest.cs


using NUnit.Framework;
using System.Reflection;
using MLAgents.Sensor;
using MLAgents.InferenceBrain;
namespace MLAgents.Tests
{

{
initializeAgentCalls += 1;
// Add in some custom sensors so we can confirm they get sorted as expected.
// Add in some custom Sensors so we can confirm they get sorted as expected.
m_Sensors.Add(sensor2);
m_Sensors.Add(sensor1);
sensors.Add(sensor2);
sensors.Add(sensor1);
}
public override void CollectObservations()

public int[] GetFloatObservationShape()
{
return new[] { 1 };
return new[] { 0 };
public void WriteToTensor(TensorProxy tensorProxy, int agentIndex) { }
public int Write(WriteAdapter adapter)
{
// No-op
return 0;
}
public byte[] GetCompressedObservation()
{

Assert.AreEqual(0, agent1.agentActionCalls);
Assert.AreEqual(0, agent2.agentActionCalls);
// Make sure the sensors were sorted
Assert.AreEqual(agent1.m_Sensors[0].GetName(), "testsensor1");
Assert.AreEqual(agent1.m_Sensors[1].GetName(), "testsensor2");
// Make sure the Sensors were sorted
Assert.AreEqual(agent1.sensors[0].GetName(), "testsensor1");
Assert.AreEqual(agent1.sensors[1].GetName(), "testsensor2");
}
}

6
UnitySDK/Assets/ML-Agents/Editor/Tests/RandomNormalTest.cs


{
public class RandomNormalTest
{
private const float k_FirstValue = -1.19580f;
private const float k_SecondValue = -0.97345f;
private const double k_Epsilon = 0.0001;
const float k_FirstValue = -1.19580f;
const float k_SecondValue = -0.97345f;
const double k_Epsilon = 0.0001;
[Test]
public void RandomNormalTestTwoDouble()

4
UnitySDK/Assets/ML-Agents/Editor/Tests/RayPerceptionTests.cs


var go = new GameObject("MyGameObject");
var rayPer3D = go.AddComponent<RayPerception3D>();
var result = rayPer3D.Perceive(1f, angles ,
tags, 0f, 0f);
Debug.Log(result.Count);
var result = rayPer3D.Perceive(1f, angles, tags);
Assert.IsTrue(result.Count == angles.Length * (tags.Length + 2));
}

6
UnitySDK/Assets/ML-Agents/Editor/Tests/StandaloneBuildTest.cs


string[] scenes = { "Assets/ML-Agents/Examples/3DBall/Scenes/3DBall.unity" };
var buildResult = BuildPipeline.BuildPlayer(scenes, "testPlayer", BuildTarget.StandaloneOSX, BuildOptions.None);
#if UNITY_2018_1_OR_NEWER
var isOK = buildResult.summary.result == BuildResult.Succeeded;
var isOk = buildResult.summary.result == BuildResult.Succeeded;
var error = "";
foreach (var stepInfo in buildResult.steps)
{

}
#else
var error = buildResult;
var isOK = string.IsNullOrEmpty(error);
var isOk = string.IsNullOrEmpty(error);
if (isOK)
if (isOk)
{
EditorApplication.Exit(0);
}

4
UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


{
[Header("Specific to Ball3D")]
public GameObject ball;
private Rigidbody m_BallRb;
private ResetParameters m_ResetParams;
Rigidbody m_BallRb;
ResetParameters m_ResetParams;
public override void InitializeAgent()
{

4
UnitySDK/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs


{
[Header("Specific to Ball3DHard")]
public GameObject ball;
private Rigidbody m_BallRb;
private ResetParameters m_ResetParams;
Rigidbody m_BallRb;
ResetParameters m_ResetParams;
public override void InitializeAgent()
{

6
UnitySDK/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs


public class BasicAgent : Agent
{
[Header("Specific to Basic")]
private BasicAcademy m_Academy;
BasicAcademy m_Academy;
private float m_TimeSinceDecision;
float m_TimeSinceDecision;
int m_Position;
int m_SmallGoalPosition;
int m_LargeGoalPosition;

WaitTimeInference();
}
private void WaitTimeInference()
void WaitTimeInference()
{
if (!m_Academy.GetIsInference())
{

4
UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs


{
}
private void FixedUpdate()
void FixedUpdate()
{
if (Physics.Raycast(transform.position, new Vector3(0f, -1f, 0f), 0.51f) && m_JumpCooldown <= 0f)
{

return action;
}
private void Update()
void Update()
{
if (m_LookDir.magnitude > float.Epsilon)
{

2
UnitySDK/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerTarget.cs


gameObject.transform.Rotate(new Vector3(1, 0, 0), 0.5f);
}
private void OnTriggerEnter(Collider collision)
void OnTriggerEnter(Collider collision)
{
var agent = collision.gameObject.GetComponent<Agent>();
if (agent != null)

8
UnitySDK/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


public class FoodCollectorAgent : Agent
{
private FoodCollectorAcademy m_MyAcademy;
FoodCollectorAcademy m_MyAcademy;
public GameObject area;
FoodCollectorArea m_MyArea;
bool m_Frozen;

float m_FrozenTime;
float m_EffectTime;
Rigidbody m_AgentRb;
private float m_LaserLength;
float m_LaserLength;
// Speed of agent rotation.
public float turnSpeed = 300;

public Material frozenMaterial;
public GameObject myLaser;
public bool contribute;
private RayPerception3D m_RayPer;
RayPerception3D m_RayPer;
public bool useVectorObs;

const float rayDistance = 50f;
float[] rayAngles = { 20f, 90f, 160f, 45f, 135f, 70f, 110f };
string[] detectableObjects = { "food", "agent", "wall", "badFood", "frozenAgent" };
AddVectorObs(m_RayPer.Perceive(rayDistance, rayAngles, detectableObjects, 0f, 0f));
AddVectorObs(m_RayPer.Perceive(rayDistance, rayAngles, detectableObjects));
var localVelocity = transform.InverseTransformDirection(m_AgentRb.velocity);
AddVectorObs(localVelocity.x);
AddVectorObs(localVelocity.z);

18
UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


public class GridAgent : Agent
{
private Academy m_Academy;
Academy m_Academy;
private float m_TimeSinceDecision;
float m_TimeSinceDecision;
[Tooltip("Because we want an observation right before making a decision, we can force " +
"a camera to render before making a decision. Place the agentCam here if using " +

"masking turned on may not behave optimally when action masking is turned off.")]
public bool maskActions = true;
private const int k_NoAction = 0; // do nothing!
private const int k_Up = 1;
private const int k_Down = 2;
private const int k_Left = 3;
private const int k_Right = 4;
const int k_NoAction = 0; // do nothing!
const int k_Up = 1;
const int k_Down = 2;
const int k_Left = 3;
const int k_Right = 4;
public override void InitializeAgent()
{

/// <summary>
/// Applies the mask for the agents action to disallow unnecessary actions.
/// </summary>
private void SetMask()
void SetMask()
{
// Prevents the agent from picking an action that would make it collide with a wall
var positionX = (int)transform.position.x;

WaitTimeInference();
}
private void WaitTimeInference()
void WaitTimeInference()
{
if (renderCamera != null)
{

4
UnitySDK/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs


public GameObject trueAgent;
private ResetParameters m_ResetParameters;
ResetParameters m_ResetParameters;
Camera m_AgentCam;

GameObject m_Se;
GameObject m_Sw;
private Vector3 m_InitialPosition;
Vector3 m_InitialPosition;
public void Awake()
{

2
UnitySDK/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


float[] rayAngles = { 20f, 60f, 90f, 120f, 160f };
string[] detectableObjects = { "symbol_O_Goal", "symbol_X_Goal", "symbol_O", "symbol_X", "wall" };
AddVectorObs(GetStepCount() / (float)agentParameters.maxStep);
AddVectorObs(m_RayPer.Perceive(rayDistance, rayAngles, detectableObjects, 0f, 0f));
AddVectorObs(m_RayPer.Perceive(rayDistance, rayAngles, detectableObjects));
}
}

4
UnitySDK/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs


{
var rayDistance = 12f;
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, m_DetectableObjects, 0f, 0f));
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, m_DetectableObjects, 1.5f, 0f));
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, m_DetectableObjects));
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, m_DetectableObjects, 1.5f, 1.5f));
}
}

12
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs


public class PyramidAgent : Agent
{
public GameObject area;
private PyramidArea m_MyArea;
private Rigidbody m_AgentRb;
private RayPerception m_RayPer;
private PyramidSwitch m_SwitchLogic;
PyramidArea m_MyArea;
Rigidbody m_AgentRb;
RayPerception m_RayPer;
PyramidSwitch m_SwitchLogic;
public GameObject areaSwitch;
public bool useVectorObs;

float[] rayAngles2 = { 15f, 85f, 155f, 40f, 130f, 65f, 105f };
string[] detectableObjects = { "block", "wall", "goal", "switchOff", "switchOn", "stone" };
AddVectorObs(m_RayPer.Perceive(rayDistance, rayAngles, detectableObjects, 0f, 0f));
AddVectorObs(m_RayPer.Perceive(rayDistance, rayAngles, detectableObjects));
AddVectorObs(m_RayPer.Perceive(rayDistance, rayAngles1, detectableObjects, 0f, 5f));
AddVectorObs(m_RayPer.Perceive(rayDistance, rayAngles2, detectableObjects, 0f, 10f));
AddVectorObs(m_SwitchLogic.GetState());

m_MyArea.CreateStonePyramid(1, items[8]);
}
private void OnCollisionEnter(Collision collision)
void OnCollisionEnter(Collision collision)
{
if (collision.gameObject.CompareTag("goal"))
{

2
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidArea.cs


CreateObject(numObjects, stonePyramid, spawnAreaIndex);
}
private void CreateObject(int numObjects, GameObject desiredObject, int spawnAreaIndex)
void CreateObject(int numObjects, GameObject desiredObject, int spawnAreaIndex)
{
for (var i = 0; i < numObjects; i++)
{

12
UnitySDK/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidSwitch.cs


public Material onMaterial;
public Material offMaterial;
public GameObject myButton;
private bool m_State;
private GameObject m_Area;
private PyramidArea m_AreaComponent;
private int m_PyramidIndex;
bool m_State;
GameObject m_Area;
PyramidArea m_AreaComponent;
int m_PyramidIndex;
public bool GetState()
{

private void Start()
void Start()
{
m_Area = gameObject.transform.parent.gameObject;
m_AreaComponent = m_Area.GetComponent<PyramidArea>();

myButton.GetComponent<Renderer>().material = offMaterial;
}
private void OnCollisionEnter(Collision other)
void OnCollisionEnter(Collision other)
{
if (other.gameObject.CompareTag("agent") && m_State == false)
{

14
UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


public GameObject pendulumB;
public GameObject hand;
public GameObject goal;
private ReacherAcademy m_MyAcademy;
ReacherAcademy m_MyAcademy;
private Rigidbody m_RbA;
private Rigidbody m_RbB;
Rigidbody m_RbA;
Rigidbody m_RbB;
private float m_GoalSpeed;
float m_GoalSpeed;
private float m_GoalSize;
float m_GoalSize;
private float m_Deviation;
float m_Deviation;
private float m_DeviationFreq;
float m_DeviationFreq;
/// <summary>
/// Collect the rigidbodies of the reacher in order to resue them for

6
UnitySDK/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherGoal.cs


public GameObject hand;
public GameObject goalOn;
private void OnTriggerEnter(Collider other)
void OnTriggerEnter(Collider other)
{
if (other.gameObject == hand)
{

private void OnTriggerExit(Collider other)
void OnTriggerExit(Collider other)
{
if (other.gameObject == hand)
{

private void OnTriggerStay(Collider other)
void OnTriggerStay(Collider other)
{
if (other.gameObject == hand)
{

6
UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Scripts/FlyCamera.cs


public bool rotateOnlyIfMousedown = true;
public bool movementStaysFlat = true;
private Vector3
Vector3
private float m_TotalRun = 1.0f;
float m_TotalRun = 1.0f;
void Awake()
{

}
}
private Vector3 GetBaseInput()
Vector3 GetBaseInput()
{
// returns the basic values, if it's 0 than it's not active.
var pVelocity = new Vector3();

2
UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs


public bool penalizeGroundContact; // Whether to penalize on contact.
public float groundContactPenalty; // Penalty amount (ex: -1).
public bool touchingGround;
private const string k_Ground = "ground"; // Tag of ground object.
const string k_Ground = "ground"; // Tag of ground object.
/// <summary>
/// Check for collision with ground, and optionally penalize agent.

21
UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Scripts/RayPerception3D.cs


/// </summary>
public class RayPerception3D : RayPerception
{
Vector3 m_EndPosition;
private float[] m_SubList;
float[] m_SubList;
/// <summary>
/// Creates perception vector to be used as part of an observation of an agent.

// along with object distance.
foreach (var angle in rayAngles)
{
m_EndPosition = transform.TransformDirection(
PolarToCartesian(rayDistance, angle));
m_EndPosition.y = endOffset;
Vector3 startPositionLocal = new Vector3(0, startOffset, 0);
Vector3 endPositionLocal = PolarToCartesian(rayDistance, angle);
endPositionLocal.y += endOffset;
var startPositionWorld = transform.TransformPoint(startPositionLocal);
var endPositionWorld = transform.TransformPoint(endPositionLocal);
var rayDirection = endPositionWorld - startPositionWorld;
Debug.DrawRay(transform.position + new Vector3(0f, startOffset, 0f),
m_EndPosition, Color.black, 0.01f, true);
Debug.DrawRay(startPositionWorld,rayDirection, Color.black, 0.01f, true);
if (Physics.SphereCast(transform.position +
new Vector3(0f, startOffset, 0f), 0.5f,
m_EndPosition, out m_Hit, rayDistance))
if (Physics.SphereCast(startPositionWorld, 0.5f, rayDirection, out m_Hit, rayDistance))
{
for (var i = 0; i < detectableObjects.Length; i++)
{

2
UnitySDK/Assets/ML-Agents/Examples/SharedAssets/Scripts/TargetContact.cs


public class TargetContact : MonoBehaviour
{
[Header("Detect Targets")] public bool touchingTarget;
private const string k_Target = "target"; // Tag on target object.
const string k_Target = "target"; // Tag on target object.
/// <summary>
/// Check for collision with a target.

4
UnitySDK/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs


{
detectableObjects = m_DetectableObjectsBlue;
}
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, detectableObjects, 0f, 0f));
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, detectableObjects, 1f, 0f));
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, detectableObjects));
AddVectorObs(m_RayPer.Perceive(rayDistance, m_RayAngles, detectableObjects, 1f, 1f));
}
public void MoveAgent(float[] act)

10
UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs


public GameObject areaObject;
public int lastAgentHit;
private TennisArea m_Area;
private TennisAgent m_AgentA;
private TennisAgent m_AgentB;
TennisArea m_Area;
TennisAgent m_AgentA;
TennisAgent m_AgentB;
// Use this for initialization
void Start()

m_AgentB = m_Area.agentB.GetComponent<TennisAgent>();
}
private void OnTriggerExit(Collider other)
void OnTriggerExit(Collider other)
{
if (other.name == "over")
{

}
}
private void OnCollisionEnter(Collision collision)
void OnCollisionEnter(Collision collision)
{
if (collision.gameObject.CompareTag("iWall"))
{

16
UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


public float angle;
public float scale;
private Text m_TextComponent;
private Rigidbody m_AgentRb;
private Rigidbody m_BallRb;
private float m_InvertMult;
private ResetParameters m_ResetParams;
Text m_TextComponent;
Rigidbody m_AgentRb;
Rigidbody m_BallRb;
float m_InvertMult;
ResetParameters m_ResetParams;
private const string k_CanvasName = "Canvas";
private const string k_ScoreBoardAName = "ScoreA";
private const string k_ScoreBoardBName = "ScoreB";
const string k_CanvasName = "Canvas";
const string k_ScoreBoardAName = "ScoreA";
const string k_ScoreBoardBName = "ScoreB";
public override void InitializeAgent()
{

2
UnitySDK/Assets/ML-Agents/Examples/Tennis/Scripts/TennisArea.cs


public GameObject ball;
public GameObject agentA;
public GameObject agentB;
private Rigidbody m_BallRb;
Rigidbody m_BallRb;
// Use this for initialization
void Start()

8
UnitySDK/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


bool m_IsNewDecisionStep;
int m_CurrentDecisionStep;
private Rigidbody m_HipsRb;
private Rigidbody m_ChestRb;
private Rigidbody m_SpineRb;
Rigidbody m_HipsRb;
Rigidbody m_ChestRb;
Rigidbody m_SpineRb;
private ResetParameters m_ResetParams;
ResetParameters m_ResetParams;
public override void InitializeAgent()
{

6
UnitySDK/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


var rayDistance = 20f;
float[] rayAngles = { 0f, 45f, 90f, 135f, 180f, 110f, 70f };
AddVectorObs(m_RayPer.Perceive(
rayDistance, rayAngles, m_DetectableObjects, 0f, 0f));
rayDistance, rayAngles, m_DetectableObjects));
rayDistance, rayAngles, m_DetectableObjects, 2.5f, 2.5f));
rayDistance, rayAngles, m_DetectableObjects, 2.5f, 5.0f));
var agentPos = m_AgentRb.position - ground.transform.position;
AddVectorObs(agentPos / 20f);

m_AgentRb.velocity = default(Vector3);
}
private void FixedUpdate()
void FixedUpdate()
{
if (m_Configuration != -1)
{

6
UnitySDK/Assets/ML-Agents/Plugins/Barracuda.Core/Barracuda/Plugins/Editor/BarracudaEditor/NNModelImporter.cs


[ScriptedImporter(1, new[] {"nn"})]
public class NNModelImporter : ScriptedImporter
{
private const string k_IconName = "NNModelIcon";
const string k_IconName = "NNModelIcon";
private Texture2D m_IconTexture;
Texture2D m_IconTexture;
public override void OnImportAsset(AssetImportContext ctx)
{

ctx.SetMainObject(asset);
}
private Texture2D LoadIconTexture()
Texture2D LoadIconTexture()
{
if (m_IconTexture == null)
{

20
UnitySDK/Assets/ML-Agents/Scripts/Academy.cs


"docs/Learning-Environment-Design-Academy.md")]
public abstract class Academy : MonoBehaviour
{
private const string k_ApiVersion = "API-11";
const string k_ApiVersion = "API-11";
private Vector3 m_OriginalGravity;
Vector3 m_OriginalGravity;
private float m_OriginalFixedDeltaTime;
float m_OriginalFixedDeltaTime;
private float m_OriginalMaximumDeltaTime;
float m_OriginalMaximumDeltaTime;
// Fields provided in the Inspector

/// Pointer to the communicator currently in use by the Academy.
public ICommunicator Communicator;
private bool m_Initialized;
private List<ModelRunner> m_ModelRunners = new List<ModelRunner>();
bool m_Initialized;
List<ModelRunner> m_ModelRunners = new List<ModelRunner>();
// Flag used to keep track of the first time the Academy is reset.
bool m_FirstAcademyReset;

}
// Used to read Python-provided environment parameters
private static int ReadArgs()
static int ReadArgs()
{
var args = System.Environment.GetCommandLineArgs();
var inputPort = "";

/// <summary>
/// Initializes the environment, configures it and initialized the Academy.
/// </summary>
private void InitializeEnvironment()
void InitializeEnvironment()
{
m_OriginalGravity = Physics.gravity;
m_OriginalFixedDeltaTime = Time.fixedDeltaTime;

Application.Quit();
}
private void OnResetCommand(EnvironmentResetParameters newResetParameters)
void OnResetCommand(EnvironmentResetParameters newResetParameters)
{
UpdateResetParameters(newResetParameters);
ForcedFullReset();

m_IsInference = !inputParams.isTraining;
}
private void UpdateResetParameters(EnvironmentResetParameters newResetParameters)
void UpdateResetParameters(EnvironmentResetParameters newResetParameters)
{
if (newResetParameters.resetParameters != null)
{

10
UnitySDK/Assets/ML-Agents/Scripts/ActionMasker.cs


{
/// When using discrete control, is the starting indices of the actions
/// when all the branches are concatenated with each other.
private int[] m_StartingActionIndices;
int[] m_StartingActionIndices;
private bool[] m_CurrentMask;
bool[] m_CurrentMask;
private readonly BrainParameters m_BrainParameters;
readonly BrainParameters m_BrainParameters;
public ActionMasker(BrainParameters brainParameters)
{

/// <summary>
/// Makes sure that the current mask is usable.
/// </summary>
private void AssertMask()
void AssertMask()
{
// Action Masks can only be used in Discrete Control.
if (m_BrainParameters.vectorActionSpaceType != SpaceType.Discrete)

/// </summary>
/// <param name="branch"> The index of the branch to check</param>
/// <returns> True if all the actions of the branch are masked</returns>
private bool AreAllActionsMasked(int branch)
bool AreAllActionsMasked(int branch)
{
if (m_CurrentMask == null)
{

183
UnitySDK/Assets/ML-Agents/Scripts/Agent.cs


using UnityEngine;
using Barracuda;
using MLAgents.Sensor;
using UnityEngine.Serialization;
namespace MLAgents
{

public struct AgentInfo
{
/// <summary>
/// Most recent agent vector (i.e. numeric) observation.
/// </summary>
public List<float> vectorObservation;
/// <summary>
/// The previous agent vector observations, stacked. The length of the
/// history (i.e. number of vector observations to stack) is specified
/// in the Brain parameters.
/// </summary>
public List<float> stackedVectorObservation;
/// <summary>
// TODO struct?
public List<float> floatObservations;
/// <summary>
/// Most recent text observation.

public bool[] actionMasks;
/// <summary>
/// Used by the Trainer to store information about the agent. This data
/// structure is not consumed or modified by the agent directly, they are
/// just the owners of their trainier's memory. Currently, however, the
/// size of the memory is in the Brain properties.
/// </summary>
public List<float> memories;
/// <summary>
/// Current agent reward.
/// </summary>
public float reward;

{
public float[] vectorActions;
public string textActions;
public List<float> memories;
public float value;
/// TODO(cgoy): All references to protobuf objects should be removed.
public CommunicatorObjects.CustomActionProto customAction;

[RequireComponent(typeof(BehaviorParameters))]
public abstract class Agent : MonoBehaviour
{
private IPolicy m_Brain;
private BehaviorParameters m_PolicyFactory;
IPolicy m_Brain;
BehaviorParameters m_PolicyFactory;
/// <summary>
/// Agent parameters specified within the Editor via AgentEditor.

int m_Id;
/// Keeps track of the actions that are masked at each step.
private ActionMasker m_ActionMasker;
ActionMasker m_ActionMasker;
private DemonstrationRecorder m_Recorder;
DemonstrationRecorder m_Recorder;
[FormerlySerializedAs("m_Sensors")]
public List<ISensor> sensors;
public VectorSensor collectObservationsSensor;
public List<ISensor> m_Sensors;
WriteAdapter m_WriteAdapter = new WriteAdapter();
/// Monobehavior function that is called when the attached GameObject
/// MonoBehaviour function that is called when the attached GameObject
/// becomes enabled or active.
void OnEnable()
{

{
m_Info = new AgentInfo();
m_Action = new AgentAction();
m_Sensors = new List<ISensor>();
sensors = new List<ISensor>();
if (academy == null)
{

/// will categorize the agent when training.
/// </param>
/// <param name="model"> The model to use for inference.</param>
/// <param name = "inferenceDevide"> Define on what device the model
/// <param name = "inferenceDevice"> Define on what device the model
/// will be run.</param>
public void GiveModel(
string behaviorName,

if (m_Info.textObservation == null)
m_Info.textObservation = "";
m_Action.textActions = "";
m_Info.memories = new List<float>();
m_Action.memories = new List<float>();
m_Info.vectorObservation =
new List<float>(param.vectorObservationSize);
m_Info.stackedVectorObservation =
new List<float>(param.vectorObservationSize
* param.numStackedVectorObservations);
m_Info.stackedVectorObservation.AddRange(
new float[param.vectorObservationSize
* param.numStackedVectorObservations]);
m_Info.floatObservations = new List<float>();
m_Info.floatObservations.AddRange(
new float[param.vectorObservationSize
* param.numStackedVectorObservations]);
m_Info.customObservation = null;
}

/// </summary>
public void InitializeSensors()
{
// Get all attached sensor components
m_Sensors.Capacity += attachedSensorComponents.Length;
sensors.Capacity += attachedSensorComponents.Length;
m_Sensors.Add(component.CreateSensor());
sensors.Add(component.CreateSensor());
// Sort the sensors by name to ensure determinism
m_Sensors.Sort((x, y) => x.GetName().CompareTo(y.GetName()));
// Support legacy CollectObservations
var param = m_PolicyFactory.brainParameters;
if (param.vectorObservationSize > 0)
{
collectObservationsSensor = new VectorSensor(param.vectorObservationSize);
if (param.numStackedVectorObservations > 1)
{
var stackingSensor = new StackingSensor(collectObservationsSensor, param.numStackedVectorObservations);
sensors.Add(stackingSensor);
}
else
{
sensors.Add(collectObservationsSensor);
}
}
// Sort the Sensors by name to ensure determinism
sensors.Sort((x, y) => x.GetName().CompareTo(y.GetName()));
for (var i = 0; i < m_Sensors.Count - 1; i++)
for (var i = 0; i < sensors.Count - 1; i++)
Debug.Assert(!m_Sensors[i].GetName().Equals(m_Sensors[i + 1].GetName()), "Sensor names must be unique.");
Debug.Assert(!sensors[i].GetName().Equals(sensors[i + 1].GetName()), "Sensor names must be unique.");
}
#endif
}

return;
}
m_Info.memories = m_Action.memories;
m_Info.vectorObservation.Clear();
m_Info.compressedObservations.Clear();
m_ActionMasker.ResetMask();
using (TimerStack.Instance.Scoped("CollectObservations"))

m_Info.actionMasks = m_ActionMasker.GetMask();
var param = m_PolicyFactory.brainParameters;
if (m_Info.vectorObservation.Count != param.vectorObservationSize)
{
throw new UnityAgentsException(string.Format(
"Vector Observation size mismatch in continuous " +
"agent {0}. " +
"Was Expecting {1} but received {2}. ",
gameObject.name,
param.vectorObservationSize,
m_Info.vectorObservation.Count));
}
Utilities.ShiftLeft(m_Info.stackedVectorObservation, param.vectorObservationSize);
Utilities.ReplaceRange(m_Info.stackedVectorObservation, m_Info.vectorObservation,
m_Info.stackedVectorObservation.Count - m_Info.vectorObservation.Count);
// var param = m_PolicyFactory.brainParameters; // look, no brain params!
m_Info.reward = m_Reward;
m_Info.done = m_Done;

/// <summary>
/// Generate data for each sensor and store it on the Agent's AgentInfo.
/// NOTE: At the moment, this is only called during training or when using a DemonstrationRecorder;
/// during inference the sensors are used to write directly to the Tensor data. This will likely change in the
/// during inference the Sensors are used to write directly to the Tensor data. This will likely change in the
// Generate data for all sensors
// TODO add bool argument indicating when to compress? For now, we always will compress.
for (var i = 0; i < m_Sensors.Count; i++)
int floatsWritten = 0;
// Generate data for all Sensors
for (var i = 0; i < sensors.Count; i++)
var sensor = m_Sensors[i];
var compressedObs = new CompressedObservation
var sensor = sensors[i];
if (sensor.GetCompressionType() == SensorCompressionType.None)
{
m_WriteAdapter.SetTarget(m_Info.floatObservations, floatsWritten);
floatsWritten += sensor.Write(m_WriteAdapter);
}
else
Data = sensor.GetCompressedObservation(),
Shape = sensor.GetFloatObservationShape(),
CompressionType = sensor.GetCompressionType()
};
m_Info.compressedObservations.Add(compressedObs);
var compressedObs = new CompressedObservation
{
Data = sensor.GetCompressedObservation(),
Shape = sensor.GetFloatObservationShape(),
CompressionType = sensor.GetCompressionType()
};
m_Info.compressedObservations.Add(compressedObs);
}
}
}

/// <param name="observation">Observation.</param>
protected void AddVectorObs(float observation)
{
m_Info.vectorObservation.Add(observation);
collectObservationsSensor.AddObservation(observation);
}
/// <summary>

/// <param name="observation">Observation.</param>
protected void AddVectorObs(int observation)
{
m_Info.vectorObservation.Add(observation);
collectObservationsSensor.AddObservation(observation);
}
/// <summary>

/// <param name="observation">Observation.</param>
protected void AddVectorObs(Vector3 observation)
{
m_Info.vectorObservation.Add(observation.x);
m_Info.vectorObservation.Add(observation.y);
m_Info.vectorObservation.Add(observation.z);
collectObservationsSensor.AddObservation(observation);
}
/// <summary>

/// <param name="observation">Observation.</param>
protected void AddVectorObs(Vector2 observation)
{
m_Info.vectorObservation.Add(observation.x);
m_Info.vectorObservation.Add(observation.y);
collectObservationsSensor.AddObservation(observation);
}
/// <summary>

/// <param name="observation">Observation.</param>
protected void AddVectorObs(IEnumerable<float> observation)
{
m_Info.vectorObservation.AddRange(observation);
collectObservationsSensor.AddObservation(observation);
}
/// <summary>

/// <param name="observation">Observation.</param>
protected void AddVectorObs(Quaternion observation)
{
m_Info.vectorObservation.Add(observation.x);
m_Info.vectorObservation.Add(observation.y);
m_Info.vectorObservation.Add(observation.z);
m_Info.vectorObservation.Add(observation.w);
collectObservationsSensor.AddObservation(observation);
}
/// <summary>

/// <param name="observation"></param>
protected void AddVectorObs(bool observation)
{
m_Info.vectorObservation.Add(observation ? 1f : 0f);
collectObservationsSensor.AddObservation(observation);
var oneHotVector = new float[range];
oneHotVector[observation] = 1;
m_Info.vectorObservation.AddRange(oneHotVector);
collectObservationsSensor.AddOneHotObservation(observation, range);
}
/// <summary>

public void UpdateVectorAction(float[] vectorActions)
{
m_Action.vectorActions = vectorActions;
}
/// <summary>
/// Updates the memories action.
/// </summary>
/// <param name="memories">Memories.</param>
public void UpdateMemoriesAction(List<float> memories)
{
m_Action.memories = memories;
}
public void AppendMemoriesAction(List<float> memories)
{
m_Action.memories.AddRange(memories);
}
public List<float> GetMemoriesAction()
{
return m_Action.memories;
}
/// <summary>

14
UnitySDK/Assets/ML-Agents/Scripts/DemonstrationRecorder.cs


{
public bool record;
public string demonstrationName;
private Agent m_RecordingAgent;
private string m_FilePath;
private DemonstrationStore m_DemoStore;
Agent m_RecordingAgent;
string m_FilePath;
DemonstrationStore m_DemoStore;
private void Start()
void Start()
{
if (Application.isEditor && record)
{

private void Update()
void Update()
{
if (Application.isEditor && record && m_DemoStore == null)
{

/// <summary>
/// Creates demonstration store for use in recording.
/// </summary>
private void InitializeDemoStore()
void InitializeDemoStore()
{
m_RecordingAgent = GetComponent<Agent>();
m_DemoStore = new DemonstrationStore();

/// <summary>
/// Closes Demonstration store.
/// </summary>
private void OnApplicationQuit()
void OnApplicationQuit()
{
if (Application.isEditor && record && m_DemoStore != null)
{

24
UnitySDK/Assets/ML-Agents/Scripts/DemonstrationStore.cs


public class DemonstrationStore
{
public const int MetaDataBytes = 32; // Number of bytes allocated to metadata in demo file.
private readonly IFileSystem m_FileSystem;
private const string k_DemoDirecory = "Assets/Demonstrations/";
private const string k_ExtensionType = ".demo";
readonly IFileSystem m_FileSystem;
const string k_DemoDirecory = "Assets/Demonstrations/";
const string k_ExtensionType = ".demo";
private string m_FilePath;
private DemonstrationMetaData m_MetaData;
private Stream m_Writer;
private float m_CumulativeReward;
string m_FilePath;
DemonstrationMetaData m_MetaData;
Stream m_Writer;
float m_CumulativeReward;
public DemonstrationStore(IFileSystem fileSystem)
{

/// Checks for the existence of the Demonstrations directory
/// and creates it if it does not exist.
/// </summary>
private void CreateDirectory()
void CreateDirectory()
{
if (!m_FileSystem.Directory.Exists(k_DemoDirecory))
{

/// <summary>
/// Creates demonstration file.
/// </summary>
private void CreateDemonstrationFile(string demonstrationName)
void CreateDemonstrationFile(string demonstrationName)
{
// Creates demonstration file.
var literalName = demonstrationName;

/// <summary>
/// Writes brain parameters to file.
/// </summary>
private void WriteBrainParameters(string brainName, BrainParameters brainParameters)
void WriteBrainParameters(string brainName, BrainParameters brainParameters)
{
// Writes BrainParameters to file.
m_Writer.Seek(MetaDataBytes + 1, 0);

/// <summary>
/// Performs necessary episode-completion steps.
/// </summary>
private void EndEpisode()
void EndEpisode()
{
m_MetaData.numberEpisodes += 1;
}

/// </summary>
private void WriteMetadata()
void WriteMetadata()
{
var metaProto = m_MetaData.ToProto();
var metaProtoBytes = metaProto.ToByteArray();

33
UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentAction.cs


string.Concat(
"CjVtbGFnZW50cy9lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2FnZW50X2Fj",
"dGlvbi5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMaNm1sYWdlbnRzL2Vu",
"dnMvY29tbXVuaWNhdG9yX29iamVjdHMvY3VzdG9tX2FjdGlvbi5wcm90byKh",
"dnMvY29tbXVuaWNhdG9yX29iamVjdHMvY3VzdG9tX2FjdGlvbi5wcm90byKV",
"Cgx0ZXh0X2FjdGlvbnMYAiABKAkSEAoIbWVtb3JpZXMYAyADKAISDQoFdmFs",
"dWUYBCABKAISPgoNY3VzdG9tX2FjdGlvbhgFIAEoCzInLmNvbW11bmljYXRv",
"cl9vYmplY3RzLkN1c3RvbUFjdGlvblByb3RvQh+qAhxNTEFnZW50cy5Db21t",
"dW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
"Cgx0ZXh0X2FjdGlvbnMYAiABKAkSDQoFdmFsdWUYBCABKAISPgoNY3VzdG9t",
"X2FjdGlvbhgFIAEoCzInLmNvbW11bmljYXRvcl9vYmplY3RzLkN1c3RvbUFj",
"dGlvblByb3RvSgQIAxAEQh+qAhxNTEFnZW50cy5Db21tdW5pY2F0b3JPYmpl",
"Y3RzYgZwcm90bzM="));
new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentActionProto), global::MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActions", "TextActions", "Memories", "Value", "CustomAction" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentActionProto), global::MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActions", "TextActions", "Value", "CustomAction" }, null, null, null)
}));
}
#endregion

public AgentActionProto(AgentActionProto other) : this() {
vectorActions_ = other.vectorActions_.Clone();
textActions_ = other.textActions_;
memories_ = other.memories_.Clone();
value_ = other.value_;
CustomAction = other.customAction_ != null ? other.CustomAction.Clone() : null;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);

}
}
/// <summary>Field number for the "memories" field.</summary>
public const int MemoriesFieldNumber = 3;
private static readonly pb::FieldCodec<float> _repeated_memories_codec
= pb::FieldCodec.ForFloat(26);
private readonly pbc::RepeatedField<float> memories_ = new pbc::RepeatedField<float>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<float> Memories {
get { return memories_; }
}
/// <summary>Field number for the "value" field.</summary>
public const int ValueFieldNumber = 4;
private float value_;

}
if(!vectorActions_.Equals(other.vectorActions_)) return false;
if (TextActions != other.TextActions) return false;
if(!memories_.Equals(other.memories_)) return false;
if (!pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.Equals(Value, other.Value)) return false;
if (!object.Equals(CustomAction, other.CustomAction)) return false;
return Equals(_unknownFields, other._unknownFields);

int hash = 1;
hash ^= vectorActions_.GetHashCode();
if (TextActions.Length != 0) hash ^= TextActions.GetHashCode();
hash ^= memories_.GetHashCode();
if (Value != 0F) hash ^= pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.GetHashCode(Value);
if (customAction_ != null) hash ^= CustomAction.GetHashCode();
if (_unknownFields != null) {

output.WriteRawTag(18);
output.WriteString(TextActions);
}
memories_.WriteTo(output, _repeated_memories_codec);
if (Value != 0F) {
output.WriteRawTag(37);
output.WriteFloat(Value);

if (TextActions.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(TextActions);
}
size += memories_.CalculateSize(_repeated_memories_codec);
if (Value != 0F) {
size += 1 + 4;
}

if (other.TextActions.Length != 0) {
TextActions = other.TextActions;
}
memories_.Add(other.memories_);
if (other.Value != 0F) {
Value = other.Value;
}

}
case 18: {
TextActions = input.ReadString();
break;
}
case 26:
case 29: {
memories_.AddEntriesFrom(input, _repeated_memories_codec);
break;
}
case 37: {

41
UnitySDK/Assets/ML-Agents/Scripts/Grpc/CommunicatorObjects/AgentInfo.cs


"Zm8ucHJvdG8SFGNvbW11bmljYXRvcl9vYmplY3RzGj9tbGFnZW50cy9lbnZz",
"L2NvbW11bmljYXRvcl9vYmplY3RzL2NvbXByZXNzZWRfb2JzZXJ2YXRpb24u",
"cHJvdG8aO21sYWdlbnRzL2VudnMvY29tbXVuaWNhdG9yX29iamVjdHMvY3Vz",
"dG9tX29ic2VydmF0aW9uLnByb3RvIpgDCg5BZ2VudEluZm9Qcm90bxIiChpz",
"dG9tX29ic2VydmF0aW9uLnByb3RvIowDCg5BZ2VudEluZm9Qcm90bxIiChpz",
"ChNzdG9yZWRfdGV4dF9hY3Rpb25zGAUgASgJEhAKCG1lbW9yaWVzGAYgAygC",
"Eg4KBnJld2FyZBgHIAEoAhIMCgRkb25lGAggASgIEhgKEG1heF9zdGVwX3Jl",
"YWNoZWQYCSABKAgSCgoCaWQYCiABKAUSEwoLYWN0aW9uX21hc2sYCyADKAgS",
"SAoSY3VzdG9tX29ic2VydmF0aW9uGAwgASgLMiwuY29tbXVuaWNhdG9yX29i",
"amVjdHMuQ3VzdG9tT2JzZXJ2YXRpb25Qcm90bxJRChdjb21wcmVzc2VkX29i",
"c2VydmF0aW9ucxgNIAMoCzIwLmNvbW11bmljYXRvcl9vYmplY3RzLkNvbXBy",
"ZXNzZWRPYnNlcnZhdGlvblByb3RvSgQIAhADQh+qAhxNTEFnZW50cy5Db21t",
"dW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
"ChNzdG9yZWRfdGV4dF9hY3Rpb25zGAUgASgJEg4KBnJld2FyZBgHIAEoAhIM",
"CgRkb25lGAggASgIEhgKEG1heF9zdGVwX3JlYWNoZWQYCSABKAgSCgoCaWQY",
"CiABKAUSEwoLYWN0aW9uX21hc2sYCyADKAgSSAoSY3VzdG9tX29ic2VydmF0",
"aW9uGAwgASgLMiwuY29tbXVuaWNhdG9yX29iamVjdHMuQ3VzdG9tT2JzZXJ2",
"YXRpb25Qcm90bxJRChdjb21wcmVzc2VkX29ic2VydmF0aW9ucxgNIAMoCzIw",
"LmNvbW11bmljYXRvcl9vYmplY3RzLkNvbXByZXNzZWRPYnNlcnZhdGlvblBy",
"b3RvSgQIAhADSgQIBhAHQh+qAhxNTEFnZW50cy5Db21tdW5pY2F0b3JPYmpl",
"Y3RzYgZwcm90bzM="));
new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentInfoProto), global::MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "StackedVectorObservation", "TextObservation", "StoredVectorActions", "StoredTextActions", "Memories", "Reward", "Done", "MaxStepReached", "Id", "ActionMask", "CustomObservation", "CompressedObservations" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::MLAgents.CommunicatorObjects.AgentInfoProto), global::MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "StackedVectorObservation", "TextObservation", "StoredVectorActions", "StoredTextActions", "Reward", "Done", "MaxStepReached", "Id", "ActionMask", "CustomObservation", "CompressedObservations" }, null, null, null)
}));
}
#endregion

textObservation_ = other.textObservation_;
storedVectorActions_ = other.storedVectorActions_.Clone();
storedTextActions_ = other.storedTextActions_;
memories_ = other.memories_.Clone();
reward_ = other.reward_;
done_ = other.done_;
maxStepReached_ = other.maxStepReached_;

}
}
/// <summary>Field number for the "memories" field.</summary>
public const int MemoriesFieldNumber = 6;
private static readonly pb::FieldCodec<float> _repeated_memories_codec
= pb::FieldCodec.ForFloat(50);
private readonly pbc::RepeatedField<float> memories_ = new pbc::RepeatedField<float>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<float> Memories {
get { return memories_; }
}
/// <summary>Field number for the "reward" field.</summary>
public const int RewardFieldNumber = 7;
private float reward_;

if (TextObservation != other.TextObservation) return false;
if(!storedVectorActions_.Equals(other.storedVectorActions_)) return false;
if (StoredTextActions != other.StoredTextActions) return false;
if(!memories_.Equals(other.memories_)) return false;
if (!pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.Equals(Reward, other.Reward)) return false;
if (Done != other.Done) return false;
if (MaxStepReached != other.MaxStepReached) return false;

if (TextObservation.Length != 0) hash ^= TextObservation.GetHashCode();
hash ^= storedVectorActions_.GetHashCode();
if (StoredTextActions.Length != 0) hash ^= StoredTextActions.GetHashCode();
hash ^= memories_.GetHashCode();
if (Reward != 0F) hash ^= pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.GetHashCode(Reward);
if (Done != false) hash ^= Done.GetHashCode();
if (MaxStepReached != false) hash ^= MaxStepReached.GetHashCode();

output.WriteRawTag(42);
output.WriteString(StoredTextActions);
}
memories_.WriteTo(output, _repeated_memories_codec);
if (Reward != 0F) {
output.WriteRawTag(61);
output.WriteFloat(Reward);

if (StoredTextActions.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(StoredTextActions);
}
size += memories_.CalculateSize(_repeated_memories_codec);
if (Reward != 0F) {
size += 1 + 4;
}

if (other.StoredTextActions.Length != 0) {
StoredTextActions = other.StoredTextActions;
}
memories_.Add(other.memories_);
if (other.Reward != 0F) {
Reward = other.Reward;
}

}
case 42: {
StoredTextActions = input.ReadString();
break;
}
case 50:
case 53: {
memories_.AddEntriesFrom(input, _repeated_memories_codec);
break;
}
case 61: {

9
UnitySDK/Assets/ML-Agents/Scripts/Grpc/GrpcExtensions.cs


{
var agentInfoProto = new AgentInfoProto
{
StackedVectorObservation = { ai.stackedVectorObservation },
StackedVectorObservation = { ai.floatObservations },
StoredVectorActions = { ai.storedVectorActions },
StoredTextActions = ai.storedTextActions,
TextObservation = ai.textObservation,

Id = ai.id,
CustomObservation = ai.customObservation
};
if (ai.memories != null)
{
agentInfoProto.Memories.Add(ai.memories);
}
if (ai.actionMasks != null)
{

{
vectorActions = aap.VectorActions.ToArray(),
textActions = aap.TextActions,
memories = aap.Memories.ToList(),
value = aap.Value,
customAction = aap.CustomAction
};

var obsProto = new CompressedObservationProto
{
Data = ByteString.CopyFrom(obs.Data),
CompressionType = (CompressionTypeProto) obs.CompressionType,
CompressionType = (CompressionTypeProto)obs.CompressionType,
};
obsProto.Shape.AddRange(obs.Shape);
return obsProto;

41
UnitySDK/Assets/ML-Agents/Scripts/Grpc/RpcCommunicator.cs


bool m_IsOpen;
/// The default number of agents in the scene
private const int k_NumAgents = 32;
const int k_NumAgents = 32;
/// Keeps track of the agents of each brain on the current step
Dictionary<string, List<Agent>> m_CurrentAgents =

new Dictionary<string, Dictionary<Agent, AgentAction>>();
// Brains that we have sent over the communicator with agents.
HashSet<string> m_sentBrainKeys = new HashSet<string>();
Dictionary<string, BrainParameters> m_unsentBrainKeys = new Dictionary<string, BrainParameters>();
HashSet<string> m_SentBrainKeys = new HashSet<string>();
Dictionary<string, BrainParameters> m_UnsentBrainKeys = new Dictionary<string, BrainParameters>();
# if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX

SendCommandEvent(rlInput.Command, rlInput.EnvironmentParameters);
}
private UnityInputProto Initialize(UnityOutputProto unityOutput,
UnityInputProto Initialize(UnityOutputProto unityOutput,
out UnityInputProto unityInput)
{
# if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX

#endregion
#region Sending Events
private void SendCommandEvent(CommandProto command, EnvironmentParametersProto environmentParametersProto)
void SendCommandEvent(CommandProto command, EnvironmentParametersProto environmentParametersProto)
{
switch (command)
{

}
}
private void SendRLInputReceivedEvent(bool isTraining)
void SendRLInputReceivedEvent(bool isTraining)
{
RLInputReceived?.Invoke(new UnityRLInputParameters { isTraining = isTraining });
}

/// <summary>
/// Sends the observations of one Agent.
/// </summary>
/// <param name="key">Batch Key.</param>
/// <param name="agents">Agent info.</param>
/// <param name="brainKey">Batch Key.</param>
/// <param name="agent">Agent info.</param>
public void PutObservations(string brainKey, Agent agent)
{
m_CurrentAgents[brainKey].Add(agent);

/// </summary>
/// <returns>The next UnityInput.</returns>
/// <param name="unityOutput">The UnityOutput to be sent.</param>
private UnityInputProto Exchange(UnityOutputProto unityOutput)
UnityInputProto Exchange(UnityOutputProto unityOutput)
{
# if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX
if (!m_IsOpen)

/// <returns>The UnityMessage corresponding.</returns>
/// <param name="content">The UnityOutput to be wrapped.</param>
/// <param name="status">The status of the message.</param>
private static UnityMessageProto WrapMessage(UnityOutputProto content, int status)
static UnityMessageProto WrapMessage(UnityOutputProto content, int status)
{
return new UnityMessageProto
{

}
private void CacheBrainParameters(string brainKey, BrainParameters brainParameters)
void CacheBrainParameters(string brainKey, BrainParameters brainParameters)
if (m_sentBrainKeys.Contains(brainKey))
if (m_SentBrainKeys.Contains(brainKey))
m_unsentBrainKeys[brainKey] = brainParameters;
m_UnsentBrainKeys[brainKey] = brainParameters;
private UnityRLInitializationOutputProto GetTempUnityRlInitializationOutput()
UnityRLInitializationOutputProto GetTempUnityRlInitializationOutput()
foreach (var brainKey in m_unsentBrainKeys.Keys)
foreach (var brainKey in m_UnsentBrainKeys.Keys)
{
if (m_CurrentUnityRlOutput.AgentInfos.ContainsKey(brainKey))
{

}
var brainParameters = m_unsentBrainKeys[brainKey];
var brainParameters = m_UnsentBrainKeys[brainKey];
output.BrainParameters.Add(brainParameters.ToProto(brainKey, true));
}
}

private void UpdateSentBrainParameters(UnityRLInitializationOutputProto output)
void UpdateSentBrainParameters(UnityRLInitializationOutputProto output)
{
if (output == null)
{

foreach (var brainProto in output.BrainParameters)
{
m_sentBrainKeys.Add(brainProto.BrainName);
m_unsentBrainKeys.Remove(brainProto.BrainName);
m_SentBrainKeys.Add(brainProto.BrainName);
m_UnsentBrainKeys.Remove(brainProto.BrainName);
}
}

/// When the editor exits, the communicator must be closed
/// </summary>
/// <param name="state">State.</param>
private void HandleOnPlayModeChanged(PlayModeStateChange state)
void HandleOnPlayModeChanged(PlayModeStateChange state)
{
// This method is run whenever the playmode state is changed.
if (state == PlayModeStateChange.ExitingPlayMode)

22
UnitySDK/Assets/ML-Agents/Scripts/ICommunicator.cs


UnityOutput and UnityInput can be extended to provide functionalities beyond RL
UnityRLOutput and UnityRLInput can be extended to provide new RL functionalities
*/
public interface ICommunicator : IBatchedDecisionMaker
public interface ICommunicator
{
/// <summary>
/// Quit was received by the communicator.

void SubscribeBrain(string name, BrainParameters brainParameters);
/// <summary>
/// Sends the observations of one Agent.
/// </summary>
/// <param name="brainKey">Batch Key.</param>
/// <param name="agent">Agent info.</param>
void PutObservations(string brainKey, Agent agent);
/// <summary>
/// Signals the ICommunicator that the Agents are now ready to receive their action
/// and that if the communicator has not yet received an action for one of the Agents
/// it needs to get one at this point.
/// </summary>
void DecideBatch();
/// <summary>
}
public interface IBatchedDecisionMaker : IDisposable
{
void PutObservations(string key, Agent agent);
void DecideBatch();
}
}

54
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ApplierImpl.cs


/// </summary>
public class DiscreteActionOutputApplier : TensorApplier.IApplier
{
private readonly int[] m_ActionSize;
private readonly Multinomial m_Multinomial;
private readonly ITensorAllocator m_Allocator;
readonly int[] m_ActionSize;
readonly Multinomial m_Multinomial;
readonly ITensorAllocator m_Allocator;
public DiscreteActionOutputApplier(int[] actionSize, int seed, ITensorAllocator allocator)
{

var actionProbs = new TensorProxy()
{
valueType = TensorProxy.TensorType.FloatingPoint,
shape = new long[] {batchSize, nBranchAction},
shape = new long[] { batchSize, nBranchAction },
data = m_Allocator.Alloc(new TensorShape(batchSize, nBranchAction))
};

var outputTensor = new TensorProxy()
{
valueType = TensorProxy.TensorType.FloatingPoint,
shape = new long[] {batchSize, 1},
shape = new long[] { batchSize, 1 },
data = m_Allocator.Alloc(new TensorShape(batchSize, 1))
};

public class BarracudaMemoryOutputApplier : TensorApplier.IApplier
{
private readonly int m_MemoriesCount;
private readonly int m_MemoryIndex;
readonly int m_MemoriesCount;
readonly int m_MemoryIndex;
Dictionary<int, List<float>> m_Memories;
public BarracudaMemoryOutputApplier(int memoriesCount, int memoryIndex)
public BarracudaMemoryOutputApplier(
int memoriesCount,
int memoryIndex,
Dictionary<int, List<float>> memories)
m_Memories = memories;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)

foreach (var agent in agents)
{
var memory = agent.GetMemoriesAction();
if (memory == null || memory.Count < memorySize * m_MemoriesCount)
List<float> memory = null;
if (!m_Memories.TryGetValue(agent.Info.id, out memory)
|| memory.Count < memorySize * m_MemoriesCount)
{
memory = new List<float>();
memory.AddRange(Enumerable.Repeat(0f, memorySize * m_MemoriesCount));

memory[memorySize * m_MemoryIndex + j] = tensorProxy.data[agentIndex, j];
}
agent.UpdateMemoriesAction(memory);
m_Memories[agent.Info.id] = memory;
/// <summary>
/// The Applier for the Memory output tensor. Tensor is assumed to contain the new
/// memory data of the agents in the batch.
/// </summary>
public class MemoryOutputApplier : TensorApplier.IApplier
{
public void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents)
{
var agentIndex = 0;
var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1];
foreach (var agent in agents)
{
var memory = new List<float>();
for (var j = 0; j < memorySize; j++)
{
memory.Add(tensorProxy.data[agentIndex, j]);
}
agent.UpdateMemoriesAction(memory);
agentIndex++;
}
}
}
/// <summary>
/// The Applier for the Value Estimate output tensor. Tensor is assumed to contain the

25
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/BarracudaModelParamLoader.cs


/// </summary>
public class BarracudaModelParamLoader
{
private enum ModelActionType
enum ModelActionType
private const long k_ApiVersion = 2;
const long k_ApiVersion = 2;
/// <summary>
/// Generates the Tensor inputs that are expected to be present in the Model.

/// The integer value in the model indicating the type of control
/// </param>
/// <returns>The equivalent ModelActionType</returns>
private static ModelActionType GetActionType(int isContinuousInt)
static ModelActionType GetActionType(int isContinuousInt)
{
ModelActionType isContinuous;
switch (isContinuousInt)

/// </summary>
/// <param name="requiredScalarFields"> Mapping from node names to int values</param>
/// <returns>The list the error messages of the checks that failed</returns>
private static IEnumerable<string> CheckIntScalarPresenceHelper(
static IEnumerable<string> CheckIntScalarPresenceHelper(
Dictionary<string, int> requiredScalarFields)
{
var failedModelChecks = new List<string>();

/// <returns>
/// A IEnumerable of string corresponding to the failed input presence checks.
/// </returns>
private static IEnumerable<string> CheckInputTensorPresence(
static IEnumerable<string> CheckInputTensorPresence(
Model model,
BrainParameters brainParameters,
int memory,

/// <returns>
/// A IEnumerable of string corresponding to the failed output presence checks.
/// </returns>
private static IEnumerable<string> CheckOutputTensorPresence(Model model, int memory)
static IEnumerable<string> CheckOutputTensorPresence(Model model, int memory)
{
var failedModelChecks = new List<string>();
// If there is no Action Output.

/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <returns>The list the error messages of the checks that failed</returns>
private static IEnumerable<string> CheckInputTensorShape(
static IEnumerable<string> CheckInputTensorShape(
Model model, BrainParameters brainParameters)
{
var failedModelChecks = new List<string>();

/// If the Check failed, returns a string containing information about why the
/// check failed. If the check passed, returns null.
/// </returns>
private static string CheckVectorObsShape(
static string CheckVectorObsShape(
BrainParameters brainParameters, TensorProxy tensorProxy)
{
var vecObsSizeBp = brainParameters.vectorObservationSize;

/// <param name="tensorProxy"> The tensor that is expected by the model</param>
/// <returns>If the Check failed, returns a string containing information about why the
/// check failed. If the check passed, returns null.</returns>
private static string CheckPreviousActionShape(
static string CheckPreviousActionShape(
BrainParameters brainParameters, TensorProxy tensorProxy)
{
var numberActionsBp = brainParameters.vectorActionSize.Length;

/// A IEnumerable of string corresponding to the incompatible shapes between model
/// and BrainParameters.
/// </returns>
private static IEnumerable<string> CheckOutputTensorShape(
static IEnumerable<string> CheckOutputTensorShape(
Model model,
BrainParameters brainParameters,
ModelActionType isContinuous,

/// If the Check failed, returns a string containing information about why the
/// check failed. If the check passed, returns null.
/// </returns>
private static string CheckDiscreteActionOutputShape(
static string CheckDiscreteActionOutputShape(
BrainParameters brainParameters, TensorShape shape, int modelActionSize)
{
var bpActionSize = brainParameters.vectorActionSize.Sum();

/// </param>
/// <returns>If the Check failed, returns a string containing information about why the
/// check failed. If the check passed, returns null.</returns>
private static string CheckContinuousActionOutputShape(
static string CheckContinuousActionOutputShape(
BrainParameters brainParameters, TensorShape shape, int modelActionSize)
{
var bpActionSize = brainParameters.vectorActionSize[0];

141
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/GeneratorImpl.cs


using System.Collections.Generic;
using System;
using System.Linq;
using MLAgents.Sensor;
using UnityEngine;
namespace MLAgents.InferenceBrain
{

/// </summary>
public class BiDimensionalOutputGenerator : TensorGenerator.IGenerator
{
private readonly ITensorAllocator m_Allocator;
readonly ITensorAllocator m_Allocator;
public BiDimensionalOutputGenerator(ITensorAllocator allocator)
{

/// </summary>
public class BatchSizeGenerator : TensorGenerator.IGenerator
{
private readonly ITensorAllocator m_Allocator;
readonly ITensorAllocator m_Allocator;
public BatchSizeGenerator(ITensorAllocator allocator)
{

/// </summary>
public class SequenceLengthGenerator : TensorGenerator.IGenerator
{
private readonly ITensorAllocator m_Allocator;
readonly ITensorAllocator m_Allocator;
public SequenceLengthGenerator(ITensorAllocator allocator)
{

/// </summary>
public class VectorObservationGenerator : TensorGenerator.IGenerator
{
private readonly ITensorAllocator m_Allocator;
readonly ITensorAllocator m_Allocator;
List<int> m_SensorIndices = new List<int>();
WriteAdapter m_WriteAdapter = new WriteAdapter();
public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void AddSensorIndex(int sensorIndex)
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
var vecObsSizeT = tensorProxy.shape[tensorProxy.shape.Length - 1];
var agentIndex = 0;
foreach (var agent in agents)
{
var info = agent.Info;
var vectorObs = info.stackedVectorObservation;
for (var j = 0; j < vecObsSizeT; j++)
{
tensorProxy.data[agentIndex, j] = vectorObs[j];
}
agentIndex++;
}
m_SensorIndices.Add(sensorIndex);
}
/// <summary>
/// Generates the Tensor corresponding to the Recurrent input : Will be a two
/// dimensional float array of dimension [batchSize x memorySize].
/// It will use the Memory data contained in the agentInfo to fill the data
/// of the tensor.
/// </summary>
public class RecurrentInputGenerator : TensorGenerator.IGenerator
{
private readonly ITensorAllocator m_Allocator;
public RecurrentInputGenerator(ITensorAllocator allocator)
{
m_Allocator = allocator;
}
public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
var memorySize = tensorProxy.shape[tensorProxy.shape.Length - 1];
var vecObsSizeT = tensorProxy.shape[tensorProxy.shape.Length - 1];
var info = agent.Info;
var memory = info.memories;
if (memory == null)
var tensorOffset = 0;
// Write each sensor consecutively to the tensor
foreach (var sensorIndex in m_SensorIndices)
agentIndex++;
continue;
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var sensor = agent.sensors[sensorIndex];
var numWritten = sensor.Write(m_WriteAdapter);
tensorOffset += numWritten;
for (var j = 0; j < Math.Min(memorySize, memory.Count); j++)
{
if (j >= memory.Count)
{
break;
}
tensorProxy.data[agentIndex, j] = memory[j];
}
Debug.AssertFormat(
tensorOffset == vecObsSizeT,
"mismatch between vector observation size ({0}) and number of observations written ({1})",
vecObsSizeT, tensorOffset
);
private int m_MemoriesCount;
private readonly int m_MemoryIndex;
private readonly ITensorAllocator m_Allocator;
int m_MemoriesCount;
readonly int m_MemoryIndex;
readonly ITensorAllocator m_Allocator;
Dictionary<int, List<float>> m_Memories;
public BarracudaRecurrentInputGenerator(int memoryIndex, ITensorAllocator allocator)
public BarracudaRecurrentInputGenerator(
int memoryIndex,
ITensorAllocator allocator,
Dictionary<int, List<float>> memories)
m_Memories = memories;
public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);

{
var agentInfo = agent.Info;
var memory = agentInfo.memories;
if (memory == null)
List<float> memory;
if (!m_Memories.TryGetValue(agent.Info.id, out memory))
{
agentIndex++;
continue;

{
break;
}
tensorProxy.data[agentIndex, j] = memory[j + offset];
}
agentIndex++;

/// </summary>
public class PreviousActionInputGenerator : TensorGenerator.IGenerator
{
private readonly ITensorAllocator m_Allocator;
readonly ITensorAllocator m_Allocator;
public PreviousActionInputGenerator(ITensorAllocator allocator)
{

public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);

/// </summary>
public class ActionMaskInputGenerator : TensorGenerator.IGenerator
{
private readonly ITensorAllocator m_Allocator;
readonly ITensorAllocator m_Allocator;
public ActionMaskInputGenerator(ITensorAllocator allocator)
{

public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);

/// </summary>
public class RandomNormalInputGenerator : TensorGenerator.IGenerator
{
private readonly RandomNormal m_RandomNormal;
private readonly ITensorAllocator m_Allocator;
readonly RandomNormal m_RandomNormal;
readonly ITensorAllocator m_Allocator;
public RandomNormalInputGenerator(int seed, ITensorAllocator allocator)
{

public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
TensorUtils.FillTensorWithRandomNormal(tensorProxy, m_RandomNormal);

/// </summary>
public class VisualObservationInputGenerator : TensorGenerator.IGenerator
{
private readonly int m_Index;
private readonly bool m_GrayScale;
private readonly ITensorAllocator m_Allocator;
readonly int m_SensorIndex;
readonly ITensorAllocator m_Allocator;
WriteAdapter m_WriteAdapter = new WriteAdapter();
int index, ITensorAllocator allocator)
int sensorIndex, ITensorAllocator allocator)
m_Index = index;
m_SensorIndex = sensorIndex;
public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents)
// TODO direct access to sensors list here - should we do it differently?
// TODO m_Index here is the visual observation index. Will work for now but not if we add more sensor types.
agent.m_Sensors[m_Index].WriteToTensor(tensorProxy, agentIndex);
m_WriteAdapter.SetTarget(tensorProxy, agentIndex, 0);
agent.sensors[m_SensorIndex].Write(m_WriteAdapter);
agentIndex++;
}
}

45
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/ModelRunner.cs


namespace MLAgents.InferenceBrain
{
public class ModelRunner : IBatchedDecisionMaker
public class ModelRunner
private List<Agent> m_Agents = new List<Agent>();
private ITensorAllocator m_TensorAllocator;
private TensorGenerator m_TensorGenerator;
private TensorApplier m_TensorApplier;
List<Agent> m_Agents = new List<Agent>();
ITensorAllocator m_TensorAllocator;
TensorGenerator m_TensorGenerator;
TensorApplier m_TensorApplier;
private NNModel m_Model;
private InferenceDevice m_InferenceDevice;
private IWorker m_Engine;
private bool m_Verbose = false;
private string[] m_OutputNames;
private IReadOnlyList<TensorProxy> m_InferenceInputs;
private IReadOnlyList<TensorProxy> m_InferenceOutputs;
NNModel m_Model;
InferenceDevice m_InferenceDevice;
IWorker m_Engine;
bool m_Verbose = false;
string[] m_OutputNames;
IReadOnlyList<TensorProxy> m_InferenceInputs;
IReadOnlyList<TensorProxy> m_InferenceOutputs;
Dictionary<int, List<float>> m_Memories = new Dictionary<int, List<float>>();
private bool m_visualObservationsInitialized = false;
bool m_VisualObservationsInitialized;
/// <summary>
/// Initializes the Brain with the Model that it will use when selecting actions for

m_InferenceInputs = BarracudaModelParamLoader.GetInputTensors(barracudaModel);
m_OutputNames = BarracudaModelParamLoader.GetOutputNames(barracudaModel);
m_TensorGenerator = new TensorGenerator(brainParameters, seed, m_TensorAllocator, barracudaModel);
m_TensorApplier = new TensorApplier(brainParameters, seed, m_TensorAllocator, barracudaModel);
m_TensorGenerator = new TensorGenerator(
seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TensorApplier = new TensorApplier(
brainParameters, seed, m_TensorAllocator, m_Memories, barracudaModel);
private static Dictionary<string, Tensor> PrepareBarracudaInputs(IEnumerable<TensorProxy> infInputs)
static Dictionary<string, Tensor> PrepareBarracudaInputs(IEnumerable<TensorProxy> infInputs)
{
var inputs = new Dictionary<string, Tensor>();
foreach (var inp in infInputs)

m_TensorAllocator?.Reset(false);
}
private List<TensorProxy> FetchBarracudaOutputs(string[] names)
List<TensorProxy> FetchBarracudaOutputs(string[] names)
{
var outputs = new List<TensorProxy>();
foreach (var n in names)

return outputs;
}
public void PutObservations(string key, Agent agent)
public void PutObservations(Agent agent)
{
m_Agents.Add(agent);
}

return;
}
if (!m_visualObservationsInitialized)
if (!m_VisualObservationsInitialized)
m_TensorGenerator.InitializeVisualObservations(firstAgent, m_TensorAllocator);
m_visualObservationsInitialized = true;
m_TensorGenerator.InitializeObservations(firstAgent, m_TensorAllocator);
m_VisualObservationsInitialized = true;
}
Profiler.BeginSample("LearningBrain.DecideAction");

14
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorApplier.cs


void Apply(TensorProxy tensorProxy, IEnumerable<Agent> agents);
}
private readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();
readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();
/// <summary>
/// Returns a new TensorAppliers object.

/// <param name="seed"> The seed the Appliers will be initialized with.</param>
/// <param name="allocator"> Tensor allocator</param>
/// <param name="memories">Dictionary of AgentInfo.id to memory used to pass to the inference model.</param>
BrainParameters bp, int seed, ITensorAllocator allocator, object barracudaModel = null)
BrainParameters bp,
int seed,
ITensorAllocator allocator,
Dictionary<int, List<float>> memories,
object barracudaModel = null)
{
m_Dict[TensorNames.ValueEstimateOutput] = new ValueEstimateApplier();
if (bp.vectorActionSpaceType == SpaceType.Continuous)

m_Dict[TensorNames.ActionOutput] =
new DiscreteActionOutputApplier(bp.vectorActionSize, seed, allocator);
}
m_Dict[TensorNames.RecurrentOutput] = new MemoryOutputApplier();
if (barracudaModel != null)
{

{
m_Dict[model.memories[i].output] =
new BarracudaMemoryOutputApplier(model.memories.Length, i);
new BarracudaMemoryOutputApplier(model.memories.Length, i, memories);
}
}
}

/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated applier.</exception>
public void ApplyTensors(
IEnumerable<TensorProxy> tensors, IEnumerable<Agent> agents)
IEnumerable<TensorProxy> tensors, IEnumerable<Agent> agents)
{
foreach (var tensor in tensors)
{

58
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorGenerator.cs


TensorProxy tensorProxy, int batchSize, IEnumerable<Agent> agents);
}
private readonly Dictionary<string, IGenerator> m_Dict = new Dictionary<string, IGenerator>();
readonly Dictionary<string, IGenerator> m_Dict = new Dictionary<string, IGenerator>();
/// <param name="bp"> The BrainParameters used to determine what Generators will be
/// used</param>
/// <param name="memories">Dictionary of AgentInfo.id to memory for use in the inference model.</param>
BrainParameters bp, int seed, ITensorAllocator allocator, object barracudaModel = null)
int seed,
ITensorAllocator allocator,
Dictionary<int, List<float>> memories,
object barracudaModel = null)
{
// Generator for Inputs
m_Dict[TensorNames.BatchSizePlaceholder] =

m_Dict[TensorNames.VectorObservationPlacholder] =
new VectorObservationGenerator(allocator);
m_Dict[TensorNames.RecurrentInPlaceholder] =
new RecurrentInputGenerator(allocator);
for (var i = 0; i < model?.memories.Length; i++)
for (var i = 0; i < model.memories.Length; i++)
new BarracudaRecurrentInputGenerator(i, allocator);
new BarracudaRecurrentInputGenerator(i, allocator, memories);
}
}

m_Dict[TensorNames.ValueEstimateOutput] = new BiDimensionalOutputGenerator(allocator);
}
public void InitializeVisualObservations(Agent agent, ITensorAllocator allocator)
public void InitializeObservations(Agent agent, ITensorAllocator allocator)
for (var visIndex = 0; visIndex < agent.m_Sensors.Count; visIndex++)
// Loop through the sensors on a representative agent.
// For vector observations, add the index to the (single) VectorObservationGenerator
// For visual observations, make a VisualObservationInputGenerator
var visIndex = 0;
VectorObservationGenerator vecObsGen = null;
for (var sensorIndex = 0; sensorIndex < agent.sensors.Count; sensorIndex++)
// TODO handle non-visual sensors too - need to index better
m_Dict[TensorNames.VisualObservationPlaceholderPrefix + visIndex] =
new VisualObservationInputGenerator(visIndex, allocator);
var sensor = agent.sensors[sensorIndex];
var shape = sensor.GetFloatObservationShape();
// TODO generalize - we currently only have vector or visual, but can't handle "2D" observations
var isVectorSensor = (shape.Length == 1);
if (isVectorSensor)
{
if (vecObsGen == null)
{
vecObsGen = new VectorObservationGenerator(allocator);
}
vecObsGen.AddSensorIndex(sensorIndex);
}
else
{
m_Dict[TensorNames.VisualObservationPlaceholderPrefix + visIndex] =
new VisualObservationInputGenerator(sensorIndex, allocator);
visIndex++;
}
}
if (vecObsGen != null)
{
m_Dict[TensorNames.VectorObservationPlacholder] = vecObsGen;
}
}

/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated generator.</exception>
public void GenerateTensors(
IEnumerable<TensorProxy> tensors,
int currentBatchSize,
IEnumerable<Agent> agents)
IEnumerable<TensorProxy> tensors, int currentBatchSize, IEnumerable<Agent> agents)
{
foreach (var tensor in tensors)
{

2
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/TensorProxy.cs


FloatingPoint
};
private static readonly Dictionary<TensorType, Type> k_TypeMap =
static readonly Dictionary<TensorType, Type> k_TypeMap =
new Dictionary<TensorType, Type>()
{
{TensorType.FloatingPoint, typeof(float)},

2
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/Utils/Multinomial.cs


/// </summary>
public class Multinomial
{
private readonly System.Random m_Random;
readonly System.Random m_Random;
/// <summary>
/// Constructor.

10
UnitySDK/Assets/ML-Agents/Scripts/InferenceBrain/Utils/RandomNormal.cs


/// </summary>
public class RandomNormal
{
private readonly double m_Mean;
private readonly double m_Stddev;
private readonly Random m_Random;
readonly double m_Mean;
readonly double m_Stddev;
readonly Random m_Random;
public RandomNormal(int seed, float mean = 0.0f, float stddev = 1.0f)
{

}
// Each iteration produces two numbers. Hold one here for next call
private bool m_HasSpare;
private double m_SpareUnscaled;
bool m_HasSpare;
double m_SpareUnscaled;
/// <summary>
/// Return the next random double number

27
UnitySDK/Assets/ML-Agents/Scripts/Policy/BarracudaPolicy.cs


using UnityEngine;
using Barracuda;
using System.Collections.Generic;
using MLAgents.InferenceBrain;
namespace MLAgents
{

public class BarracudaPolicy : IPolicy
{
protected IBatchedDecisionMaker m_BatchedDecisionMaker;
protected ModelRunner m_ModelRunner;
/// Sensor shapes for the associated Agents. All Agents must have the same shapes for their sensors.
/// Sensor shapes for the associated Agents. All Agents must have the same shapes for their Sensors.
/// </summary>
List<int[]> m_SensorShapes;

NNModel model,
InferenceDevice inferenceDevice)
{
var aca = GameObject.FindObjectOfType<Academy>();
var aca = Object.FindObjectOfType<Academy>();
m_BatchedDecisionMaker = modelRunner;
m_ModelRunner = modelRunner;
}
/// <inheritdoc />

ValidateAgentSensorShapes(agent);
#endif
m_BatchedDecisionMaker?.PutObservations(null, agent);
m_ModelRunner?.PutObservations(agent);
m_BatchedDecisionMaker?.DecideBatch();
m_ModelRunner?.DecideBatch();
/// Check that the Agent sensors are the same shape as the the other Agents using the same Brain.
/// Check that the Agent Sensors are the same shape as the the other Agents using the same Brain.
private void ValidateAgentSensorShapes(Agent agent)
void ValidateAgentSensorShapes(Agent agent)
m_SensorShapes = new List<int[]>(agent.m_Sensors.Count);
m_SensorShapes = new List<int[]>(agent.sensors.Count);
foreach (var sensor in agent.m_Sensors)
foreach (var sensor in agent.sensors)
{
m_SensorShapes.Add(sensor.GetFloatObservationShape());
}

// Check for compatibility with the other Agents' sensors
// Check for compatibility with the other Agents' Sensors
Debug.Assert(m_SensorShapes.Count == agent.m_Sensors.Count, $"Number of sensors must match. {m_SensorShapes.Count} != {agent.m_Sensors.Count}");
Debug.Assert(m_SensorShapes.Count == agent.sensors.Count, $"Number of Sensors must match. {m_SensorShapes.Count} != {agent.sensors.Count}");
var sensorShape = agent.m_Sensors[i].GetFloatObservationShape();
var sensorShape = agent.sensors[i].GetFloatObservationShape();
Debug.Assert(cachedShape.Length == sensorShape.Length, "Sensor dimensions must match.");
for (var j = 0; j < cachedShape.Length; j++)
{

16
UnitySDK/Assets/ML-Agents/Scripts/Policy/BehaviorParameters.cs


[HideInInspector]
[SerializeField]
private BrainParameters m_BrainParameters = new BrainParameters();
[HideInInspector] [SerializeField] private NNModel m_Model;
[HideInInspector] [SerializeField] private InferenceDevice m_InferenceDevice;
[HideInInspector] [SerializeField] private bool m_UseHeuristic;
[HideInInspector] [SerializeField] private string m_BehaviorName = "My Behavior";
BrainParameters m_BrainParameters = new BrainParameters();
[HideInInspector] [SerializeField]
NNModel m_Model;
[HideInInspector] [SerializeField]
InferenceDevice m_InferenceDevice;
[HideInInspector] [SerializeField]
bool m_UseHeuristic;
[HideInInspector] [SerializeField]
string m_BehaviorName = "My Behavior";
[HideInInspector]
[HideInInspector]
public string behaviorName
{
get { return m_BehaviorName; }

6
UnitySDK/Assets/ML-Agents/Scripts/Policy/HeuristicPolicy.cs


using UnityEngine;
using Barracuda;
using MLAgents.InferenceBrain;
using System;
namespace MLAgents

/// </summary>
public class HeuristicPolicy : IPolicy
{
private Func<float[]> m_Heuristic;
private Agent m_Agent;
Func<float[]> m_Heuristic;
Agent m_Agent;
/// <inheritdoc />
public HeuristicPolicy(Func<float[]> heuristic)

1
UnitySDK/Assets/ML-Agents/Scripts/Policy/IPolicy.cs


using System;
using System.Collections.Generic;
using UnityEngine;
namespace MLAgents

29
UnitySDK/Assets/ML-Agents/Scripts/Policy/RemotePolicy.cs


/// </summary>
public class RemotePolicy : IPolicy
{
private string m_BehaviorName;
protected IBatchedDecisionMaker m_BatchedDecisionMaker;
string m_BehaviorName;
protected ICommunicator m_Communicator;
/// Sensor shapes for the associated Agents. All Agents must have the same shapes for their sensors.
/// Sensor shapes for the associated Agents. All Agents must have the same shapes for their Sensors.
/// </summary>
List<int[]> m_SensorShapes;

string behaviorName)
{
m_BehaviorName = behaviorName;
var aca = GameObject.FindObjectOfType<Academy>();
var aca = Object.FindObjectOfType<Academy>();
m_BatchedDecisionMaker = aca.Communicator;
m_Communicator = aca.Communicator;
aca.Communicator.SubscribeBrain(m_BehaviorName, brainParameters);
}

#if DEBUG
ValidateAgentSensorShapes(agent);
#endif
m_BatchedDecisionMaker?.PutObservations(m_BehaviorName, agent);
m_Communicator?.PutObservations(m_BehaviorName, agent);
m_BatchedDecisionMaker?.DecideBatch();
m_Communicator?.DecideBatch();
/// Check that the Agent sensors are the same shape as the the other Agents using the same Brain.
/// Check that the Agent Sensors are the same shape as the the other Agents using the same Brain.
private void ValidateAgentSensorShapes(Agent agent)
void ValidateAgentSensorShapes(Agent agent)
m_SensorShapes = new List<int[]>(agent.m_Sensors.Count);
m_SensorShapes = new List<int[]>(agent.sensors.Count);
foreach (var sensor in agent.m_Sensors)
foreach (var sensor in agent.sensors)
{
m_SensorShapes.Add(sensor.GetFloatObservationShape());
}

// Check for compatibility with the other Agents' sensors
// Check for compatibility with the other Agents' Sensors
Debug.Assert(m_SensorShapes.Count == agent.m_Sensors.Count, $"Number of sensors must match. {m_SensorShapes.Count} != {agent.m_Sensors.Count}");
Debug.Assert(m_SensorShapes.Count == agent.sensors.Count, $"Number of Sensors must match. {m_SensorShapes.Count} != {agent.sensors.Count}");
var sensorShape = agent.m_Sensors[i].GetFloatObservationShape();
var sensorShape = agent.sensors[i].GetFloatObservationShape();
Debug.Assert(cachedShape.Length == sensorShape.Length, "Sensor dimensions must match.");
for (var j = 0; j < cachedShape.Length; j++)
{

5
UnitySDK/Assets/ML-Agents/Scripts/ResetParameters.cs


UpdateResetParameters();
}
private void UpdateResetParameters()
void UpdateResetParameters()
{
m_ResetParameters.Clear();
foreach (var pair in this)

}
[FormerlySerializedAs("resetParameters")]
[SerializeField] private List<ResetParameter> m_ResetParameters = new List<ResetParameter>();
[SerializeField]
List<ResetParameter> m_ResetParameters = new List<ResetParameter>();
public void OnBeforeSerialize()
{

18
UnitySDK/Assets/ML-Agents/Scripts/Sensor/CameraSensor.cs


using System;
using MLAgents.InferenceBrain;
using UnityEngine;
namespace MLAgents.Sensor

private Camera m_Camera;
private int m_Width;
private int m_Height;
private bool m_Grayscale;
private string m_Name;
private int[] m_Shape;
Camera m_Camera;
int m_Width;
int m_Height;
bool m_Grayscale;
string m_Name;
int[] m_Shape;
public CameraSensor(Camera camera, int width, int height, bool grayscale, string name)
{

}
}
public void WriteToTensor(TensorProxy tensorProxy, int agentIndex)
public int Write(WriteAdapter adapter)
Utilities.TextureToTensorProxy(texture, tensorProxy, m_Grayscale, agentIndex);
var numWritten = Utilities.TextureToTensorProxy(texture, adapter, m_Grayscale);
return numWritten;
}
}

2
UnitySDK/Assets/ML-Agents/Scripts/Sensor/CameraSensorComponent.cs


public string sensorName = "CameraSensor";
public int width = 84;
public int height = 84;
public bool grayscale = false;
public bool grayscale;
public override ISensor CreateSensor()
{

12
UnitySDK/Assets/ML-Agents/Scripts/Sensor/ISensor.cs


using MLAgents.InferenceBrain;
PNG,
PNG
}
/// <summary>

int[] GetFloatObservationShape();
/// <summary>
/// Write the observation data directly to the TensorProxy.
/// Write the observation data directly to the WriteAdapter.
/// <param name="tensorProxy"></param>
/// <param name="agentIndex"></param>
void WriteToTensor(TensorProxy tensorProxy, int agentIndex);
/// <param name="adapater"></param>
/// <returns>The number of elements written</returns>
int Write(WriteAdapter adapater);
/// <summary>
/// Return a compressed representation of the observation. For small observations, this should generally not be

21
UnitySDK/Assets/ML-Agents/Scripts/Sensor/RenderTextureSensor.cs


using System;
using System.Threading;
using MLAgents.InferenceBrain;
class RenderTextureSensor : ISensor
public class RenderTextureSensor : ISensor
private RenderTexture m_RenderTexture;
private int m_Width;
private int m_Height;
private bool m_Grayscale;
private string m_Name;
private int[] m_Shape;
RenderTexture m_RenderTexture;
int m_Width;
int m_Height;
bool m_Grayscale;
string m_Name;
int[] m_Shape;
public RenderTextureSensor(RenderTexture renderTexture, int width, int height, bool grayscale, string name)
{

}
}
public void WriteToTensor(TensorProxy tensorProxy, int index)
public int Write(WriteAdapter adapter)
Utilities.TextureToTensorProxy(texture, tensorProxy, m_Grayscale, index);
var numWritten = Utilities.TextureToTensorProxy(texture, adapter, m_Grayscale);
return numWritten;
}
}

2
UnitySDK/Assets/ML-Agents/Scripts/Sensor/RenderTextureSensorComponent.cs


public string sensorName = "RenderTextureSensor";
public int width = 84;
public int height = 84;
public bool grayscale = false;
public bool grayscale;
public override ISensor CreateSensor()
{

17
UnitySDK/Assets/ML-Agents/Scripts/Sensor/SensorBase.cs


using MLAgents.InferenceBrain;
using UnityEngine;
namespace MLAgents.Sensor

public abstract string GetName();
/// <summary>
/// Default implementation of WriteToTensor interface. This creates a temporary array, calls WriteObservation,
/// and then writes the results to the TensorProxy.
/// Default implementation of Write interface. This creates a temporary array, calls WriteObservation,
/// and then writes the results to the WriteAdapter.
/// <param name="tensorProxy"></param>
/// <param name="agentIndex"></param>
public virtual void WriteToTensor(TensorProxy tensorProxy, int agentIndex)
/// <param name="adapter"></param>
public virtual int Write(WriteAdapter adapter)
{
// TODO reuse buffer for similar agents, don't call GetFloatObservationShape()
int[] shape = GetFloatObservationShape();

float[] buffer = new float[numFloats];
WriteObservation(buffer);
for (var i = 0; i < numFloats; i++)
{
tensorProxy.data[agentIndex, i] = buffer[i];
}
adapter.AddRange(buffer);
return numFloats;
}
public virtual byte[] GetCompressedObservation()

6
UnitySDK/Assets/ML-Agents/Scripts/Startup.cs


{
public class Startup : MonoBehaviour
{
private const string k_SceneVariableName = "SCENE_NAME";
const string k_SceneVariableName = "SCENE_NAME";
private void Awake()
void Awake()
private static void SwitchScene(string sceneName)
static void SwitchScene(string sceneName)
{
if (sceneName == null)
{

15
UnitySDK/Assets/ML-Agents/Scripts/Timer.cs


using UnityEngine.Profiling;
using System.Runtime.Serialization;
using System.Runtime.Serialization.Json;
#if UNITY_EDITOR
using UnityEditor;
#endif
namespace MLAgents
{

/// <summary>
/// Custom sampler used to add timings to the profiler.
/// </summary>
private CustomSampler m_Sampler;
CustomSampler m_Sampler;
/// <summary>
/// Number of total ticks elapsed for this node.

/// </remarks>
public class TimerStack : System.IDisposable
{
private static readonly TimerStack k_Instance = new TimerStack();
static readonly TimerStack k_Instance = new TimerStack();
Stack<TimerNode> m_Stack;
TimerNode m_RootNode;

{
}
private TimerStack()
TimerStack()
{
Reset();
}

get { return m_RootNode; }
}
private void Push(string name)
void Push(string name)
{
var current = m_Stack.Peek();
var next = current.GetChild(name);

private void Pop()
void Pop()
{
var node = m_Stack.Pop();
node.End();

61
UnitySDK/Assets/ML-Agents/Scripts/Utilities.cs


using UnityEngine;
using System.Collections.Generic;
using MLAgents.InferenceBrain;
using MLAgents.Sensor;
/// <summary>
/// Converts a list of Texture2D into a TensorProxy.
/// </summary>
/// <param name="textures">
/// The list of textures to be put into the tensor.
/// Note that the textures must have same width and height.
/// </param>
/// <param name="tensorProxy">
/// TensorProxy to fill with Texture data.
/// </param>
/// <param name="grayScale">
/// If set to <c>true</c> the textures will be converted to grayscale before
/// being stored in the tensor.
/// </param>
public static void TextureToTensorProxy(
List<Texture2D> textures,
TensorProxy tensorProxy,
bool grayScale)
{
var numTextures = textures.Count;
var width = textures[0].width;
var height = textures[0].height;
for (var t = 0; t < numTextures; t++)
{
var texture = textures[t];
Debug.Assert(width == texture.width, "All Textures must have the same dimension");
Debug.Assert(height == texture.height, "All Textures must have the same dimension");
TextureToTensorProxy(texture, tensorProxy, grayScale, t);
}
}
/// Puts a Texture2D into a TensorProxy.
/// Puts a Texture2D into a WriteAdapter.
/// <param name="tensorProxy">
/// TensorProxy to fill with Texture data.
/// <param name="adapter">
/// Adapter to fill with Texture data.
/// <param name="textureOffset">
/// Index of the texture being written.
/// </param>
public static void TextureToTensorProxy(
/// <returns>The number of floats written</returns>
public static int TextureToTensorProxy(
TensorProxy tensorProxy,
bool grayScale,
int textureOffset = 0)
WriteAdapter adapter,
bool grayScale)
var data = tensorProxy.data;
var t = textureOffset;
var texturePixels = texture.GetPixels32();
// During training, we convert from Texture to PNG before sending to the trainer, which has the
// effect of flipping the image. We need another flip here at inference time to match this.

var currentPixel = texturePixels[(height - h - 1) * width + w];
if (grayScale)
{
data[t, h, w, 0] =
adapter[h, w, 0] =
data[t, h, w, 0] = currentPixel.r / 255.0f;
data[t, h, w, 1] = currentPixel.g / 255.0f;
data[t, h, w, 2] = currentPixel.b / 255.0f;
adapter[h, w, 0] = currentPixel.r / 255.0f;
adapter[h, w, 1] = currentPixel.g / 255.0f;
adapter[h, w, 2] = currentPixel.b / 255.0f;
return height * width * (grayScale ? 1 : 3);
}
/// <summary>

1
UnitySDK/UnitySDK.sln.DotSettings


<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=CPU/@EntryIndexedValue">CPU</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=GPU/@EntryIndexedValue">GPU</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=NN/@EntryIndexedValue">NN</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=PNG/@EntryIndexedValue">PNG</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=RL/@EntryIndexedValue">RL</s:String>
<s:Boolean x:Key="/Default/UserDictionary/Words/=BLAS/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Logits/@EntryIndexedValue">True</s:Boolean>

9
docs/Migrating.md


# Migrating
## Migrating from ML-Agents toolkit v0.10 to v0.11
## Migrating from ML-Agents toolkit v0.11.0
### Important Changes
### Steps to Migrate
* We [fixed a bug](https://github.com/Unity-Technologies/ml-agents/pull/2823) in `RayPerception3d.Perceive()` that was causing the `endOffset` to be used incorrectly. However this may produce different behavior from previous versions if you use a non-zero `startOffset`. To reproduce the old behavior, you should increase the the value of `endOffset` by `startOffset`. You can verify your raycasts are performing as expected in scene view using the debug rays.
## Migrating from ML-Agents toolkit v0.10 to v0.11.0
### Important Changes
* The definition of the gRPC service has changed.

2
gym-unity/gym_unity/envs/__init__.py


"""Sets the seed for this env's random number generator(s).
Currently not implemented.
"""
logger.warn("Could not seed environment %s", self.name)
logger.warning("Could not seed environment %s", self.name)
return
def _check_agents(self, n_agents):

1
ml-agents-envs/mlagents/envs/action_info.py


class ActionInfo(NamedTuple):
action: Any
memory: Any
text: Any
value: Any
outputs: ActionInfoOutputs

43
ml-agents-envs/mlagents/envs/brain.py


visual_observation,
vector_observation,
text_observations,
memory=None,
reward=None,
agents=None,
local_done=None,

self.visual_observations = visual_observation
self.vector_observations = vector_observation
self.text_observations = text_observations
self.memories = memory
self.rewards = reward
self.local_done = local_done
self.max_reached = max_reached

self.action_masks = action_mask
self.custom_observations = custom_observations
def merge(self, other):
for i in range(len(self.visual_observations)):
self.visual_observations[i].extend(other.visual_observations[i])
self.vector_observations = np.append(
self.vector_observations, other.vector_observations, axis=0
)
self.text_observations.extend(other.text_observations)
self.memories = self.merge_memories(
self.memories, other.memories, self.agents, other.agents
)
self.rewards = safe_concat_lists(self.rewards, other.rewards)
self.local_done = safe_concat_lists(self.local_done, other.local_done)
self.max_reached = safe_concat_lists(self.max_reached, other.max_reached)
self.agents = safe_concat_lists(self.agents, other.agents)
self.previous_vector_actions = safe_concat_np_ndarray(
self.previous_vector_actions, other.previous_vector_actions
)
self.previous_text_actions = safe_concat_lists(
self.previous_text_actions, other.previous_text_actions
)
self.action_masks = safe_concat_np_ndarray(
self.action_masks, other.action_masks
)
self.custom_observations = safe_concat_lists(
self.custom_observations, other.custom_observations
)
@staticmethod
def merge_memories(m1, m2, agents1, agents2):
if len(m1) == 0 and len(m2) != 0:

for x in agent_info_list
]
vis_obs += [obs]
if len(agent_info_list) == 0:
memory_size = 0
else:
memory_size = max(len(x.memories) for x in agent_info_list)
if memory_size == 0:
memory = np.zeros((0, 0))
else:
[
x.memories.extend([0] * (memory_size - len(x.memories)))
for x in agent_info_list
]
memory = np.array([list(x.memories) for x in agent_info_list])
total_num_actions = sum(brain_params.vector_action_space_size)
mask_actions = np.ones((len(agent_info_list), total_num_actions))
for agent_index, agent_info in enumerate(agent_info_list):

visual_observation=vis_obs,
vector_observation=vector_obs,
text_observations=[x.text_observation for x in agent_info_list],
memory=memory,
reward=[x.reward if not np.isnan(x.reward) else 0 for x in agent_info_list],
agents=agents,
local_done=[x.done for x in agent_info_list],

15
ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.py


name='mlagents/envs/communicator_objects/agent_action.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n5mlagents/envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/custom_action.proto\"\xa1\x01\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\x14\n\x0ctext_actions\x18\x02 \x01(\t\x12\x10\n\x08memories\x18\x03 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02\x12>\n\rcustom_action\x18\x05 \x01(\x0b\x32\'.communicator_objects.CustomActionProtoB\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n5mlagents/envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\x1a\x36mlagents/envs/communicator_objects/custom_action.proto\"\x95\x01\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\x14\n\x0ctext_actions\x18\x02 \x01(\t\x12\r\n\x05value\x18\x04 \x01(\x02\x12>\n\rcustom_action\x18\x05 \x01(\x0b\x32\'.communicator_objects.CustomActionProtoJ\x04\x08\x03\x10\x04\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2.DESCRIPTOR,])

is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='memories', full_name='communicator_objects.AgentActionProto.memories', index=2,
number=3, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='value', full_name='communicator_objects.AgentActionProto.value', index=3,
name='value', full_name='communicator_objects.AgentActionProto.value', index=2,
number=4, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,

name='custom_action', full_name='communicator_objects.AgentActionProto.custom_action', index=4,
name='custom_action', full_name='communicator_objects.AgentActionProto.custom_action', index=3,
number=5, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,

oneofs=[
],
serialized_start=136,
serialized_end=297,
serialized_end=285,
)
_AGENTACTIONPROTO.fields_by_name['custom_action'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_custom__action__pb2._CUSTOMACTIONPROTO

6
ml-agents-envs/mlagents/envs/communicator_objects/agent_action_pb2.pyi


DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
vector_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
text_actions = ... # type: typing___Text
memories = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
value = ... # type: builtin___float
@property

*,
vector_actions : typing___Optional[typing___Iterable[builtin___float]] = None,
text_actions : typing___Optional[typing___Text] = None,
memories : typing___Optional[typing___Iterable[builtin___float]] = None,
value : typing___Optional[builtin___float] = None,
custom_action : typing___Optional[mlagents___envs___communicator_objects___custom_action_pb2___CustomActionProto] = None,
) -> None: ...

def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
if sys.version_info >= (3,):
def HasField(self, field_name: typing_extensions___Literal[u"custom_action"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"custom_action",u"memories",u"text_actions",u"value",u"vector_actions"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"custom_action",u"text_actions",u"value",u"vector_actions"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"custom_action",b"custom_action",u"memories",b"memories",u"text_actions",b"text_actions",u"value",b"value",u"vector_actions",b"vector_actions"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"custom_action",b"custom_action",u"text_actions",b"text_actions",u"value",b"value",u"vector_actions",b"vector_actions"]) -> None: ...

25
ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.py


name='mlagents/envs/communicator_objects/agent_info.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n3mlagents/envs/communicator_objects/agent_info.proto\x12\x14\x63ommunicator_objects\x1a?mlagents/envs/communicator_objects/compressed_observation.proto\x1a;mlagents/envs/communicator_objects/custom_observation.proto\"\x98\x03\n\x0e\x41gentInfoProto\x12\"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x10\n\x08memories\x18\x06 \x03(\x02\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x12H\n\x12\x63ustom_observation\x18\x0c \x01(\x0b\x32,.communicator_objects.CustomObservationProto\x12Q\n\x17\x63ompressed_observations\x18\r \x03(\x0b\x32\x30.communicator_objects.CompressedObservationProtoJ\x04\x08\x02\x10\x03\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n3mlagents/envs/communicator_objects/agent_info.proto\x12\x14\x63ommunicator_objects\x1a?mlagents/envs/communicator_objects/compressed_observation.proto\x1a;mlagents/envs/communicator_objects/custom_observation.proto\"\x8c\x03\n\x0e\x41gentInfoProto\x12\"\n\x1astacked_vector_observation\x18\x01 \x03(\x02\x12\x18\n\x10text_observation\x18\x03 \x01(\t\x12\x1d\n\x15stored_vector_actions\x18\x04 \x03(\x02\x12\x1b\n\x13stored_text_actions\x18\x05 \x01(\t\x12\x0e\n\x06reward\x18\x07 \x01(\x02\x12\x0c\n\x04\x64one\x18\x08 \x01(\x08\x12\x18\n\x10max_step_reached\x18\t \x01(\x08\x12\n\n\x02id\x18\n \x01(\x05\x12\x13\n\x0b\x61\x63tion_mask\x18\x0b \x03(\x08\x12H\n\x12\x63ustom_observation\x18\x0c \x01(\x0b\x32,.communicator_objects.CustomObservationProto\x12Q\n\x17\x63ompressed_observations\x18\r \x03(\x0b\x32\x30.communicator_objects.CompressedObservationProtoJ\x04\x08\x02\x10\x03J\x04\x08\x06\x10\x07\x42\x1f\xaa\x02\x1cMLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents_dot_envs_dot_communicator__objects_dot_compressed__observation__pb2.DESCRIPTOR,mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2.DESCRIPTOR,])

is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='memories', full_name='communicator_objects.AgentInfoProto.memories', index=4,
number=6, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='reward', full_name='communicator_objects.AgentInfoProto.reward', index=5,
name='reward', full_name='communicator_objects.AgentInfoProto.reward', index=4,
number=7, type=2, cpp_type=6, label=1,
has_default_value=False, default_value=float(0),
message_type=None, enum_type=None, containing_type=None,

name='done', full_name='communicator_objects.AgentInfoProto.done', index=6,
name='done', full_name='communicator_objects.AgentInfoProto.done', index=5,
number=8, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,

name='max_step_reached', full_name='communicator_objects.AgentInfoProto.max_step_reached', index=7,
name='max_step_reached', full_name='communicator_objects.AgentInfoProto.max_step_reached', index=6,
number=9, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,

name='id', full_name='communicator_objects.AgentInfoProto.id', index=8,
name='id', full_name='communicator_objects.AgentInfoProto.id', index=7,
number=10, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,

name='action_mask', full_name='communicator_objects.AgentInfoProto.action_mask', index=9,
name='action_mask', full_name='communicator_objects.AgentInfoProto.action_mask', index=8,
number=11, type=8, cpp_type=7, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,

name='custom_observation', full_name='communicator_objects.AgentInfoProto.custom_observation', index=10,
name='custom_observation', full_name='communicator_objects.AgentInfoProto.custom_observation', index=9,
number=12, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,

name='compressed_observations', full_name='communicator_objects.AgentInfoProto.compressed_observations', index=11,
name='compressed_observations', full_name='communicator_objects.AgentInfoProto.compressed_observations', index=10,
number=13, type=11, cpp_type=10, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,

oneofs=[
],
serialized_start=204,
serialized_end=612,
serialized_end=600,
)
_AGENTINFOPROTO.fields_by_name['custom_observation'].message_type = mlagents_dot_envs_dot_communicator__objects_dot_custom__observation__pb2._CUSTOMOBSERVATIONPROTO

6
ml-agents-envs/mlagents/envs/communicator_objects/agent_info_pb2.pyi


text_observation = ... # type: typing___Text
stored_vector_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
stored_text_actions = ... # type: typing___Text
memories = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
reward = ... # type: builtin___float
done = ... # type: builtin___bool
max_step_reached = ... # type: builtin___bool

text_observation : typing___Optional[typing___Text] = None,
stored_vector_actions : typing___Optional[typing___Iterable[builtin___float]] = None,
stored_text_actions : typing___Optional[typing___Text] = None,
memories : typing___Optional[typing___Iterable[builtin___float]] = None,
reward : typing___Optional[builtin___float] = None,
done : typing___Optional[builtin___bool] = None,
max_step_reached : typing___Optional[builtin___bool] = None,

def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
if sys.version_info >= (3,):
def HasField(self, field_name: typing_extensions___Literal[u"custom_observation"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",u"compressed_observations",u"custom_observation",u"done",u"id",u"max_step_reached",u"memories",u"reward",u"stacked_vector_observation",u"stored_text_actions",u"stored_vector_actions",u"text_observation"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",u"compressed_observations",u"custom_observation",u"done",u"id",u"max_step_reached",u"reward",u"stacked_vector_observation",u"stored_text_actions",u"stored_vector_actions",u"text_observation"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",b"action_mask",u"compressed_observations",b"compressed_observations",u"custom_observation",b"custom_observation",u"done",b"done",u"id",b"id",u"max_step_reached",b"max_step_reached",u"memories",b"memories",u"reward",b"reward",u"stacked_vector_observation",b"stacked_vector_observation",u"stored_text_actions",b"stored_text_actions",u"stored_vector_actions",b"stored_vector_actions",u"text_observation",b"text_observation"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"action_mask",b"action_mask",u"compressed_observations",b"compressed_observations",u"custom_observation",b"custom_observation",u"done",b"done",u"id",b"id",u"max_step_reached",b"max_step_reached",u"reward",b"reward",u"stacked_vector_observation",b"stacked_vector_observation",u"stored_text_actions",b"stored_text_actions",u"stored_vector_actions",b"stored_vector_actions",u"text_observation",b"text_observation"]) -> None: ...

73
ml-agents-envs/mlagents/envs/environment.py


seed: int = 0,
docker_training: bool = False,
no_graphics: bool = False,
timeout_wait: int = 30,
timeout_wait: int = 60,
args: Optional[List[str]] = None,
):
"""

) from perm
else:
"""
Comments for future maintenance:
xvfb-run is a wrapper around Xvfb, a virtual xserver where all
rendering is done to virtual memory. It automatically creates a
new virtual server automatically picking a server number `auto-servernum`.
The server is passed the arguments using `server-args`, we are telling
Xvfb to create Screen number 0 with width 640, height 480 and depth 24 bits.
Note that 640 X 480 are the default width and height. The main reason for
us to add this is because we'd like to change the depth from the default
of 8 bits to 24.
Unfortunately, this means that we will need to pass the arguments through
a shell which is why we set `shell=True`. Now, this adds its own
complications. E.g SIGINT can bounce off the shell and not get propagated
to the child processes. This is why we add `exec`, so that the shell gets
launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell
we created with `xvfb`.
"""
# Comments for future maintenance:
# xvfb-run is a wrapper around Xvfb, a virtual xserver where all
# rendering is done to virtual memory. It automatically creates a
# new virtual server automatically picking a server number `auto-servernum`.
# The server is passed the arguments using `server-args`, we are telling
# Xvfb to create Screen number 0 with width 640, height 480 and depth 24 bits.
# Note that 640 X 480 are the default width and height. The main reason for
# us to add this is because we'd like to change the depth from the default
# of 8 bits to 24.
# Unfortunately, this means that we will need to pass the arguments through
# a shell which is why we set `shell=True`. Now, this adds its own
# complications. E.g SIGINT can bounce off the shell and not get propagated
# to the child processes. This is why we add `exec`, so that the shell gets
# launched, the arguments are passed to `xvfb-run`. `exec` replaces the shell
# we created with `xvfb`.
#
docker_ls = (
"exec xvfb-run --auto-servernum"
" --server-args='-screen 0 640x480x24'"

def step(
self,
vector_action: Dict[str, np.ndarray] = None,
memory: Optional[Dict[str, np.ndarray]] = None,
text_action: Optional[Dict[str, List[str]]] = None,
value: Optional[Dict[str, np.ndarray]] = None,
custom_action: Dict[str, Any] = None,

if self._is_first_message:
return self.reset()
vector_action = {} if vector_action is None else vector_action
memory = {} if memory is None else memory
text_action = {} if text_action is None else text_action
value = {} if value is None else value
custom_action = {} if custom_action is None else custom_action

"step cannot take a vector_action input"
)
if isinstance(memory, self.SINGLE_BRAIN_ACTION_TYPES):
if self._num_external_brains == 1:
memory = {self._external_brain_names[0]: memory}
elif self._num_external_brains > 1:
raise UnityActionException(
"You have {0} brains, you need to feed a dictionary of brain names as keys "
"and memories as values".format(self._num_external_brains)
)
else:
raise UnityActionException(
"There are no external brains in the environment, "
"step cannot take a memory input"
)
if isinstance(text_action, self.SINGLE_BRAIN_TEXT_TYPES):
if self._num_external_brains == 1:
text_action = {self._external_brain_names[0]: text_action}

"step cannot take a custom_action input"
)
for brain_name in (
list(vector_action.keys())
+ list(memory.keys())
+ list(text_action.keys())
):
for brain_name in list(vector_action.keys()) + list(text_action.keys()):
if brain_name not in self._external_brain_names:
raise UnityActionException(
"The name {0} does not correspond to an external brain "

)
else:
vector_action[brain_name] = self._flatten(vector_action[brain_name])
if brain_name not in memory:
memory[brain_name] = []
else:
if memory[brain_name] is None:
memory[brain_name] = []
else:
memory[brain_name] = self._flatten(memory[brain_name])
if brain_name not in text_action:
text_action[brain_name] = [""] * n_agent
else:

)
step_input = self._generate_step_input(
vector_action, memory, text_action, value, custom_action
vector_action, text_action, value, custom_action
)
with hierarchical_timer("communicator.exchange"):
outputs = self.communicator.exchange(step_input)

if len(arr) == 0:
return arr
if isinstance(arr[0], np.ndarray):
# pylint: disable=no-member
# pylint: disable=not-an-iterable
arr = [item for sublist in arr for item in sublist]
arr = [float(x) for x in arr]
return arr

def _generate_step_input(
self,
vector_action: Dict[str, np.ndarray],
memory: Dict[str, np.ndarray],
text_action: Dict[str, list],
value: Dict[str, np.ndarray],
custom_action: Dict[str, list],

if n_agents == 0:
continue
_a_s = len(vector_action[b]) // n_agents
_m_s = len(memory[b]) // n_agents
memories=memory[b][i * _m_s : (i + 1) * _m_s],
text_actions=text_action[b][i],
custom_action=custom_action[b][i],
)

"""
try:
# A negative value -N indicates that the child was terminated by signal N (POSIX only).
s = signal.Signals(-returncode)
s = signal.Signals(-returncode) # pylint: disable=no-member
return s.name
except Exception:
# Should generally be a ValueError, but catch everything just in case.

2
ml-agents-envs/mlagents/envs/mock_communicator.py


:int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
:int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
"""
super().__init__()
self.is_discrete = discrete_action
self.steps = 0
self.visual_inputs = visual_inputs

stored_vector_actions=vector_action,
stored_text_actions="",
text_observation="",
memories=[],
done=(i == 2),
max_step_reached=False,
id=i,

12
ml-agents-envs/mlagents/envs/rpc_communicator.py


:int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
:int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
"""
super().__init__(worker_id, base_port)
self.port = base_port + worker_id
self.worker_id = worker_id
self.timeout_wait = timeout_wait

finally:
s.close()
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
def poll_for_timeout(self):
"""
Polls the GRPC parent connection for data, to be used before calling recv. This prevents
us from hanging indefinitely in the case where the environment process has died or was not
launched.
"""
if not self.unity_to_external.parent_conn.poll(self.timeout_wait):
raise UnityTimeOutException(
"The Unity environment took too long to respond. Make sure that :\n"

)
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
self.poll_for_timeout()
aca_param = self.unity_to_external.parent_conn.recv().unity_output
message = UnityMessageProto()
message.header.status = 200

message.header.status = 200
message.unity_input.CopyFrom(inputs)
self.unity_to_external.parent_conn.send(message)
self.poll_for_timeout()
output = self.unity_to_external.parent_conn.recv()
if output.header.status != 200:
return None

4
ml-agents-envs/mlagents/envs/simple_env_manager.py


self.previous_all_action_info = all_action_info
actions = {}
memories = {}
memories[brain_name] = action_info.memory
all_brain_info = self.env.step(actions, memories, texts, values)
all_brain_info = self.env.step(actions, texts, values, None)
step_brain_info = all_brain_info
step_info = EnvironmentStep(

8
ml-agents-envs/mlagents/envs/subprocess_env_manager.py


import cloudpickle
from mlagents.envs.environment import UnityEnvironment
from mlagents.envs.exception import UnityCommunicationException
from mlagents.envs.exception import UnityCommunicationException, UnityTimeOutException
from multiprocessing import Process, Pipe, Queue
from multiprocessing.connection import Connection
from queue import Empty as EmptyQueueException

if cmd.name == "step":
all_action_info = cmd.payload
actions = {}
memories = {}
memories[brain_name] = action_info.memory
all_brain_info = env.step(actions, memories, texts, values)
all_brain_info = env.step(actions, texts, values, None)
# The timers in this process are independent from all the processes and the "main" process
# So after we send back the root timer, we can safely clear them.
# Note that we could randomly return timers a fraction of the time if we wanted to reduce

_send_response("reset", all_brain_info)
elif cmd.name == "close":
break
except (KeyboardInterrupt, UnityCommunicationException):
except (KeyboardInterrupt, UnityCommunicationException, UnityTimeOutException):
logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.")
step_queue.put(EnvironmentResponse("env_close", worker_id, None))
finally:

13
ml-agents-envs/mlagents/envs/timers.py


# # Unity ML-Agents Toolkit
import math
from time import perf_counter
from contextlib import contextmanager
from typing import Any, Callable, Dict, Generator, List, TypeVar
"""
Lightweight, hierarchical timers for profiling sections of code.

The decorator and contextmanager are equivalent; the context manager may be more useful if you want more control
over the timer name, or are splitting up multiple sections of a large function.
"""
import math
from time import perf_counter
from contextlib import contextmanager
from typing import Any, Callable, Dict, Generator, List, TypeVar
class TimerNode:

1
ml-agents/mlagents/trainers/barracuda.py


# pylint: skip-file
from __future__ import print_function
from collections import defaultdict
import numpy as np

4
ml-agents/mlagents/trainers/bc/policy.py


feed_dict = self.fill_eval_dict(feed_dict, brain_info)
if self.use_recurrent:
if brain_info.memories.shape[1] == 0:
brain_info.memories = self.make_empty_memory(len(brain_info.agents))
feed_dict[self.model.memory_in] = brain_info.memories
feed_dict[self.model.memory_in] = self.retrieve_memories(brain_info.agents)
run_out = self._execute_model(feed_dict, self.inference_dict)
return run_out

18
ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py


from mlagents.envs.brain import BrainInfo
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.tf_policy import TFPolicy
from mlagents.trainers.models import LearningModel
def __init__(
self,
policy: TFPolicy,
policy_model: LearningModel,
strength: float,
gamma: float,
):
"""
The extrinsic reward generator. Returns the reward received by the environment
:param policy: The Policy object (e.g. PPOPolicy) that this Reward Signal will apply to.
:param strength: The strength of the reward. The reward's raw value will be multiplied by this value.
:param gamma: The time discounting factor used for this reward.
:return: An ExtrinsicRewardSignal object.
"""
super().__init__(policy, policy_model, strength, gamma)
@classmethod
def check_config(
cls, config_dict: Dict[str, Any], param_keys: List[str] = None

12
ml-agents/mlagents/trainers/learn.py


)
docker_training = docker_target_name is not None
if docker_training and env_path is not None:
"""
Comments for future maintenance:
Some OS/VM instances (e.g. COS GCP Image) mount filesystems
with COS flag which prevents execution of the Unity scene,
to get around this, we will copy the executable into the
container.
"""
# Comments for future maintenance:
# Some OS/VM instances (e.g. COS GCP Image) mount filesystems
# with COS flag which prevents execution of the Unity scene,
# to get around this, we will copy the executable into the
# container.
# Navigate in docker path and find env_path and copy it.
env_path = prepare_for_docker_run(docker_target_name, env_path)
seed_count = 10000

11
ml-agents/mlagents/trainers/ppo/multi_gpu_policy.py


class MultiGpuPPOPolicy(PPOPolicy):
def __init__(self, seed, brain, trainer_params, is_training, load):
"""
Policy for Proximal Policy Optimization Networks with multi-GPU training
:param seed: Random seed.
:param brain: Assigned Brain object.
:param trainer_params: Defined training parameters.
:param is_training: Whether the model should be trained.
:param load: Whether a pre-trained model will be loaded or a new one created.
"""
super().__init__(seed, brain, trainer_params, is_training, load)
def create_model(
self, brain, trainer_params, reward_signal_configs, is_training, load, seed
):

8
ml-agents/mlagents/trainers/ppo/policy.py


] = brain_info.previous_vector_actions.reshape(
[-1, len(self.model.act_size)]
)
if brain_info.memories.shape[1] == 0:
brain_info.memories = self.make_empty_memory(len(brain_info.agents))
feed_dict[self.model.memory_in] = brain_info.memories
feed_dict[self.model.memory_in] = self.retrieve_memories(brain_info.agents)
if self.use_continuous_act:
epsilon = np.random.normal(
size=(len(brain_info.vector_observations), self.model.act_size[0])

if self.use_vec_obs:
feed_dict[self.model.vector_in] = [brain_info.vector_observations[idx]]
if self.use_recurrent:
if brain_info.memories.shape[1] == 0:
brain_info.memories = self.make_empty_memory(len(brain_info.agents))
feed_dict[self.model.memory_in] = [brain_info.memories[idx]]
feed_dict[self.model.memory_in] = self.retrieve_memories([idx])
if not self.use_continuous_act and self.use_recurrent:
feed_dict[self.model.prev_action] = [
brain_info.previous_vector_actions[idx]

17
ml-agents/mlagents/trainers/rl_trainer.py


] # TODO add types to brain.py methods
vector_observations = []
text_observations = []
memories = []
rewards = []
local_dones = []
max_reacheds = []

agent_brain_info.vector_observations[agent_index]
)
text_observations.append(agent_brain_info.text_observations[agent_index])
if self.policy.use_recurrent:
if len(agent_brain_info.memories) > 0:
memories.append(agent_brain_info.memories[agent_index])
else:
memories.append(self.policy.make_empty_memory(1))
rewards.append(agent_brain_info.rewards[agent_index])
local_dones.append(agent_brain_info.local_done[agent_index])
max_reacheds.append(agent_brain_info.max_reached[agent_index])

agent_brain_info.previous_text_actions[agent_index]
)
action_masks.append(agent_brain_info.action_masks[agent_index])
# Check if memories exists (i.e. next_info is not empty) before attempting vstack
if self.policy.use_recurrent and memories:
memories = np.vstack(memories)
memories,
rewards,
agents,
local_dones,

next_info.vector_observations[next_idx]
)
if self.policy.use_recurrent:
if stored_info.memories.shape[1] == 0:
stored_info.memories = np.zeros(
(len(stored_info.agents), self.policy.m_size)
)
stored_info.memories[idx]
self.policy.retrieve_memories([agent_id])[0, :]
self.training_buffer[agent_id]["masks"].append(1.0)
self.training_buffer[agent_id]["done"].append(
next_info.local_done[next_idx]

6
ml-agents/mlagents/trainers/sac/models.py


import logging
import numpy as np
from typing import Optional
import tensorflow as tf
from mlagents.trainers.models import LearningModel, LearningRateSchedule, EncoderType

self.stream_names = stream_names
self.h_size = h_size
self.activ_fn = self.swish
self.policy_memory_in: Optional[tf.Tensor] = None
self.value_memory_in: Optional[tf.Tensor] = None
self.q1_memory_in: Optional[tf.Tensor] = None
self.q2_memory_in: Optional[tf.Tensor] = None
def get_vars(self, scope):
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope)

4
ml-agents/mlagents/trainers/sac/policy.py


] = brain_info.previous_vector_actions.reshape(
[-1, len(self.model.act_size)]
)
if brain_info.memories.shape[1] == 0:
brain_info.memories = self.make_empty_memory(len(brain_info.agents))
feed_dict[self.model.memory_in] = brain_info.memories
feed_dict[self.model.memory_in] = self.retrieve_memories(brain_info.agents)
feed_dict = self.fill_eval_dict(feed_dict, brain_info)
run_out = self._execute_model(feed_dict, self.inference_dict)

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存