浏览代码

Merge branch 'master' into develop-centralizedcritic

/comms-grad
Ervin Teng 3 年前
当前提交
25dfd883
共有 130 个文件被更改,包括 2454 次插入1515 次删除
  1. 4
      .github/workflows/pytest.yml
  2. 4
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  3. 1
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  4. 5
      com.unity.ml-agents/CHANGELOG.md
  5. 32
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  6. 58
      com.unity.ml-agents/Editor/BrainParametersDrawer.cs
  7. 46
      com.unity.ml-agents/Editor/DemonstrationDrawer.cs
  8. 7
      com.unity.ml-agents/Runtime/Academy.cs
  9. 9
      com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
  10. 36
      com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
  11. 6
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  12. 57
      com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
  13. 32
      com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
  14. 56
      com.unity.ml-agents/Runtime/Agent.cs
  15. 14
      com.unity.ml-agents/Runtime/Agent.deprecated.cs
  16. 113
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  17. 2
      com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs
  18. 13
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  19. 13
      com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
  20. 82
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentAction.cs
  21. 348
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/BrainParameters.cs
  22. 44
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
  23. 44
      com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
  24. 271
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  25. 4
      com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
  26. 12
      com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
  27. 35
      com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
  28. 26
      com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
  29. 15
      com.unity.ml-agents/Runtime/Inference/TensorNames.cs
  30. 19
      com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
  31. 109
      com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
  32. 14
      com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
  33. 4
      com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
  34. 12
      com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
  35. 10
      com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
  36. 9
      com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
  37. 74
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
  38. 7
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs
  39. 27
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  40. 62
      com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
  41. 217
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  42. 2
      com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn.meta
  43. 2
      com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn.meta
  44. 3
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  45. 8
      docs/Getting-Started.md
  46. 15
      docs/Learning-Environment-Create-New.md
  47. 80
      docs/Learning-Environment-Design-Agents.md
  48. 4
      docs/Learning-Environment-Design.md
  49. 64
      docs/Python-API.md
  50. 4
      docs/Training-Configuration-File.md
  51. 10
      gym-unity/gym_unity/envs/__init__.py
  52. 148
      ml-agents-envs/mlagents_envs/base_env.py
  53. 22
      ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.py
  54. 12
      ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.pyi
  55. 82
      ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.py
  56. 45
      ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.pyi
  57. 13
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  58. 6
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
  59. 32
      ml-agents-envs/mlagents_envs/environment.py
  60. 18
      ml-agents-envs/mlagents_envs/mock_communicator.py
  61. 27
      ml-agents-envs/mlagents_envs/rpc_utils.py
  62. 6
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  63. 33
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  64. 27
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  65. 3
      ml-agents/mlagents/trainers/action_info.py
  66. 23
      ml-agents/mlagents/trainers/agent_processor.py
  67. 17
      ml-agents/mlagents/trainers/demo_loader.py
  68. 1
      ml-agents/mlagents/trainers/env_manager.py
  69. 4
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  70. 40
      ml-agents/mlagents/trainers/policy/policy.py
  71. 33
      ml-agents/mlagents/trainers/policy/tf_policy.py
  72. 84
      ml-agents/mlagents/trainers/policy/torch_policy.py
  73. 9
      ml-agents/mlagents/trainers/ppo/optimizer_tf.py
  74. 12
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  75. 6
      ml-agents/mlagents/trainers/sac/optimizer_tf.py
  76. 278
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  77. 2
      ml-agents/mlagents/trainers/simple_env_manager.py
  78. 56
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  79. 24
      ml-agents/mlagents/trainers/tests/mock_brain.py
  80. 91
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  81. 66
      ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py
  82. 128
      ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py
  83. 12
      ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py
  84. 41
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  85. 10
      ml-agents/mlagents/trainers/tests/test_demo_loader.py
  86. 33
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  87. 7
      ml-agents/mlagents/trainers/tests/test_trajectory.py
  88. 13
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
  89. 2
      ml-agents/mlagents/trainers/tests/torch/test_distributions.py
  90. 90
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  91. 28
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  92. 15
      ml-agents/mlagents/trainers/tests/torch/test_ppo.py
  93. 2
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  94. 11
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
  95. 3
      ml-agents/mlagents/trainers/tests/torch/test_sac.py
  96. 132
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  97. 44
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  98. 6
      ml-agents/mlagents/trainers/tf/components/bc/module.py
  99. 10
      ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py
  100. 17
      ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py

4
.github/workflows/pytest.yml


- 'gym-unity/**'
- 'test_constraints*.txt'
- 'test_requirements.txt'
- '.github/workflows/pytest.yml'
push:
branches: [master]

run: python -c "import sys; print(sys.version)"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
# pin pip to workaround https://github.com/pypa/pip/issues/9180
python -m pip install pip==20.2
python -m pip install --upgrade setuptools
python -m pip install --progress-bar=off -e ./ml-agents-envs -c ${{ matrix.pip_constraints }}
python -m pip install --progress-bar=off -e ./ml-agents -c ${{ matrix.pip_constraints }}

4
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


if (maskActions)
{
// Prevents the agent from picking an action that would make it collide with a wall
var positionX = (int)transform.position.x;
var positionZ = (int)transform.position.z;
var positionX = (int)transform.localPosition.x;
var positionZ = (int)transform.localPosition.z;
var maxPosition = (int)m_ResetParams.GetWithDefault("gridSize", 5f) - 1;
if (positionX == 0)

1
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


/// The agent's four actions correspond to torques on each of the two joints.
/// </summary>
public override void OnActionReceived(ActionBuffers actionBuffers)
{
m_GoalDegree += m_GoalSpeed;
UpdateGoalPosition();

5
com.unity.ml-agents/CHANGELOG.md


### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- PyTorch trainers now support training agents with both continuous and discrete action spaces. (#4702)
- Agent with both continuous and discrete actions is now supported. You can specify
continuous and discrete action sizes repectively in Behavior Parameters. (#4702, #4718)
- `ActionSpec.validate_action()` now enforces that `UnityEnvironment.set_action_for_agent()` receives a 1D `np.array`.
### Bug Fixes
#### com.unity.ml-agents (C#)

32
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


float m_TimeSinceModelReload;
// Whether or not the model needs to be reloaded
bool m_RequireReload;
const string k_BehaviorName = "m_BehaviorName";
const string k_BrainParametersName = "m_BrainParameters";
const string k_ModelName = "m_Model";
const string k_InferenceDeviceName = "m_InferenceDevice";
const string k_BehaviorTypeName = "m_BehaviorType";
const string k_TeamIdName = "TeamId";
const string k_GroupIdName = "GroupId";
const string k_UseChildSensorsName = "m_UseChildSensors";
const string k_ObservableAttributeHandlingName = "m_ObservableAttributeHandling";
public override void OnInspectorGUI()
{

bool needBrainParametersUpdate; // Whether the brain parameters changed
// Drawing the Behavior Parameters
EditorGUI.indentLevel++;

{
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
EditorGUILayout.PropertyField(so.FindProperty(k_BehaviorName));
EditorGUI.BeginChangeCheck();
EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
EditorGUILayout.PropertyField(so.FindProperty(k_BrainParametersName), true);
needBrainParametersUpdate = EditorGUI.EndChangeCheck();
EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
EditorGUILayout.PropertyField(so.FindProperty(k_ModelName), true);
EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
EditorGUILayout.PropertyField(so.FindProperty(k_InferenceDeviceName), true);
EditorGUI.indentLevel--;
}
needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();

EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
EditorGUILayout.PropertyField(so.FindProperty(k_BehaviorTypeName));
EditorGUILayout.PropertyField(so.FindProperty("TeamId"));
EditorGUILayout.PropertyField(so.FindProperty("GroupId"));
EditorGUILayout.PropertyField(so.FindProperty(k_GroupIdName));
EditorGUILayout.PropertyField(so.FindProperty(k_TeamIdName));
EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_ObservableAttributeHandling"), true);
EditorGUILayout.PropertyField(so.FindProperty(k_UseChildSensorsName), true);
EditorGUILayout.PropertyField(so.FindProperty(k_ObservableAttributeHandlingName), true);
}
EditorGUI.EndDisabledGroup();

// Display all failed checks
D.logEnabled = false;
Model barracudaModel = null;
var model = (NNModel)serializedObject.FindProperty("m_Model").objectReferenceValue;
var model = (NNModel)serializedObject.FindProperty(k_ModelName).objectReferenceValue;
var behaviorParameters = (BehaviorParameters)target;
// Grab the sensor components, since we need them to determine the observation sizes.

58
com.unity.ml-agents/Editor/BrainParametersDrawer.cs


// The height of a line in the Unity Inspectors
const float k_LineHeight = 17f;
const int k_VecObsNumLine = 3;
const string k_ActionSizePropName = "VectorActionSize";
const string k_ActionTypePropName = "VectorActionSpaceType";
const string k_ActionSpecName = "m_ActionSpec";
const string k_ContinuousActionSizeName = "m_NumContinuousActions";
const string k_DiscreteBranchSizeName = "BranchSizes";
const string k_ActionDescriptionPropName = "VectorActionDescriptions";
const string k_VecObsPropName = "VectorObservationSize";
const string k_NumVecObsPropName = "NumStackedVectorObservations";

EditorGUI.LabelField(position, "Vector Action");
position.y += k_LineHeight;
EditorGUI.indentLevel++;
var bpVectorActionType = property.FindPropertyRelative(k_ActionTypePropName);
EditorGUI.PropertyField(
position,
bpVectorActionType,
new GUIContent("Space Type",
"Corresponds to whether state vector contains a single integer (Discrete) " +
"or a series of real-valued floats (Continuous)."));
var actionSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
DrawContinuousVectorAction(position, actionSpecProperty);
if (bpVectorActionType.enumValueIndex == 1)
{
DrawContinuousVectorAction(position, property);
}
else
{
DrawDiscreteVectorAction(position, property);
}
DrawDiscreteVectorAction(position, actionSpecProperty);
}
/// <summary>

/// to make the custom GUI for.</param>
static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
// This check is here due to:
// https://fogbugz.unity3d.com/f/cases/1246524/
// If this case has been resolved, please remove this if condition.
if (vecActionSize.arraySize != 1)
{
vecActionSize.arraySize = 1;
}
var continuousActionSize =
vecActionSize.GetArrayElementAtIndex(0);
var continuousActionSize = property.FindPropertyRelative(k_ContinuousActionSizeName);
new GUIContent("Space Size", "Length of continuous action vector."));
new GUIContent("Continuous Action Size", "Length of continuous action vector."));
}
/// <summary>

/// to make the custom GUI for.</param>
static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
var branchSizes = property.FindPropertyRelative(k_DiscreteBranchSizeName);
position, "Branches Size", vecActionSize.arraySize);
position, "Discrete Branch Size", branchSizes.arraySize);
if (newSize != vecActionSize.arraySize)
if (newSize != branchSizes.arraySize)
vecActionSize.arraySize = newSize;
branchSizes.arraySize = newSize;
}
position.y += k_LineHeight;

branchIndex < vecActionSize.arraySize;
branchIndex < branchSizes.arraySize;
vecActionSize.GetArrayElementAtIndex(branchIndex);
branchSizes.GetArrayElementAtIndex(branchIndex);
EditorGUI.PropertyField(
position,

/// <returns>The height of the drawer of the Vector Action.</returns>
static float GetHeightDrawVectorAction(SerializedProperty property)
{
var actionSize = 2 + property.FindPropertyRelative(k_ActionSizePropName).arraySize;
if (property.FindPropertyRelative(k_ActionTypePropName).enumValueIndex == 0)
{
actionSize += 1;
}
return actionSize * k_LineHeight;
var actionSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
var numActionLines = 3 + actionSpecProperty.FindPropertyRelative(k_DiscreteBranchSizeName).arraySize;
return numActionLines * k_LineHeight;
}
}
}

46
com.unity.ml-agents/Editor/DemonstrationDrawer.cs


using System.Text;
using UnityEditor;
using Unity.MLAgents.Demonstrations;
using Unity.MLAgents.Policies;
namespace Unity.MLAgents.Editor

SerializedProperty m_BrainParameters;
SerializedProperty m_DemoMetaData;
SerializedProperty m_ObservationShapes;
const string k_BrainParametersName = "brainParameters";
const string k_MetaDataName = "metaData";
const string k_ObservationSummariesName = "observationSummaries";
const string k_DemonstrationName = "demonstrationName";
const string k_NumberStepsName = "numberSteps";
const string k_NumberEpisodesName = "numberEpisodes";
const string k_MeanRewardName = "meanReward";
const string k_ActionSpecName = "ActionSpec";
const string k_NumContinuousActionsName = "m_NumContinuousActions";
const string k_NumDiscreteActionsName = "m_NumDiscreteActions";
const string k_ShapeName = "shape";
m_BrainParameters = serializedObject.FindProperty("brainParameters");
m_DemoMetaData = serializedObject.FindProperty("metaData");
m_ObservationShapes = serializedObject.FindProperty("observationSummaries");
m_BrainParameters = serializedObject.FindProperty(k_BrainParametersName);
m_DemoMetaData = serializedObject.FindProperty(k_MetaDataName);
m_ObservationShapes = serializedObject.FindProperty(k_ObservationSummariesName);
}
/// <summary>

{
var nameProp = property.FindPropertyRelative("demonstrationName");
var experiencesProp = property.FindPropertyRelative("numberSteps");
var episodesProp = property.FindPropertyRelative("numberEpisodes");
var rewardsProp = property.FindPropertyRelative("meanReward");
var nameProp = property.FindPropertyRelative(k_DemonstrationName);
var experiencesProp = property.FindPropertyRelative(k_NumberStepsName);
var episodesProp = property.FindPropertyRelative(k_NumberEpisodesName);
var rewardsProp = property.FindPropertyRelative(k_MeanRewardName);
var nameLabel = nameProp.displayName + ": " + nameProp.stringValue;
var experiencesLabel = experiencesProp.displayName + ": " + experiencesProp.intValue;

/// </summary>
void MakeActionsProperty(SerializedProperty property)
{
var actSizeProperty = property.FindPropertyRelative("VectorActionSize");
var actSpaceTypeProp = property.FindPropertyRelative("VectorActionSpaceType");
var actSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
var continuousSizeProperty = actSpecProperty.FindPropertyRelative(k_NumContinuousActionsName);
var discreteSizeProperty = actSpecProperty.FindPropertyRelative(k_NumDiscreteActionsName);
var vecActSizeLabel =
actSizeProperty.displayName + ": " + BuildIntArrayLabel(actSizeProperty);
var actSpaceTypeLabel = actSpaceTypeProp.displayName + ": " +
(SpaceType)actSpaceTypeProp.enumValueIndex;
var continuousSizeLabel =
continuousSizeProperty.displayName + ": " + continuousSizeProperty.intValue;
var discreteSizeLabel = discreteSizeProperty.displayName + ": " +
discreteSizeProperty.intValue;
EditorGUILayout.LabelField(vecActSizeLabel);
EditorGUILayout.LabelField(actSpaceTypeLabel);
EditorGUILayout.LabelField(continuousSizeLabel);
EditorGUILayout.LabelField(discreteSizeLabel);
}
/// <summary>

for (var i = 0; i < numObservations; i++)
{
var summary = obsSummariesProperty.GetArrayElementAtIndex(i);
var shapeProperty = summary.FindPropertyRelative("shape");
var shapeProperty = summary.FindPropertyRelative(k_ShapeName);
shapesLabels.Add(BuildIntArrayLabel(shapeProperty));
}

7
com.unity.ml-agents/Runtime/Academy.cs


/// <term>1.2.0</term>
/// <description>Support compression mapping for stacked compressed observations.</description>
/// </item>
/// <item>
/// <term>1.3.0</term>
/// <description>Support action spaces with both continuous and discrete actions.</description>
/// </item>
const string k_ApiVersion = "1.2.0";
const string k_ApiVersion = "1.3.0";
/// <summary>
/// Unity package version of com.unity.ml-agents.

Dispose();
}
}
#endif
/// <summary>

9
com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs


System.Array.Clear(Array, Offset, Length);
}
/// <summary>
/// Check if the segment is empty.
/// </summary>
/// <returns>Whether or not the segment is empty.</returns>
public bool IsEmpty()
{
return Array == null || Array.Length == 0;
}
/// <inheritdoc/>
IEnumerator<T> IEnumerable<T>.GetEnumerator()
{

36
com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs


using System;
using System.Collections.Generic;
using UnityEngine;
namespace Unity.MLAgents.Actuators
{

public readonly struct ActionSpec
[Serializable]
public struct ActionSpec
[SerializeField]
int m_NumContinuousActions;
/// <summary>
/// An array of branch sizes for our action space.

///
/// For an IActuator with a Continuous it will be null.
/// </summary>
public readonly int[] BranchSizes;
public int[] BranchSizes;
public int NumContinuousActions { get; }
public int NumContinuousActions { get { return m_NumContinuousActions; } set { m_NumContinuousActions = value; } }
public int NumDiscreteActions { get; }
public int NumDiscreteActions { get { return BranchSizes == null ? 0 : BranchSizes.Length; } }
public int SumOfDiscreteBranchSizes { get; }
public int SumOfDiscreteBranchSizes { get { return BranchSizes == null ? 0 : BranchSizes.Sum(); } }
/// <summary>
/// Creates a Continuous <see cref="ActionSpec"/> with the number of actions available.

public static ActionSpec MakeContinuous(int numActions)
{
var actuatorSpace = new ActionSpec(numActions, 0);
var actuatorSpace = new ActionSpec(numActions, null);
return actuatorSpace;
}

public static ActionSpec MakeDiscrete(params int[] branchSizes)
{
var numActions = branchSizes.Length;
var actuatorSpace = new ActionSpec(0, numActions, branchSizes);
var actuatorSpace = new ActionSpec(0, branchSizes);
internal ActionSpec(int numContinuousActions, int numDiscreteActions, int[] branchSizes = null)
internal ActionSpec(int numContinuousActions, int[] branchSizes = null)
NumContinuousActions = numContinuousActions;
NumDiscreteActions = numDiscreteActions;
m_NumContinuousActions = numContinuousActions;
SumOfDiscreteBranchSizes = branchSizes?.Sum() ?? 0;
/// Temporary check that the ActionSpec uses either all continuous or all discrete actions.
/// This should be removed once the trainer supports them.
/// Check that the ActionSpec uses either all continuous or all discrete actions.
/// This is only used when connecting to old versions of the trainer that don't support this.
internal void CheckNotHybrid()
internal void CheckAllContinuousOrDiscrete()
throw new UnityAgentsException("ActionSpecs must be all continuous or all discrete.");
throw new UnityAgentsException(
"Action spaces with both continuous and discrete actions are not supported by the trainer. " +
"ActionSpecs must be all continuous or all discrete."
);
}
}
}

6
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


}
}
return new ActionSpec(numContinuousActions, numDiscreteActions, combinedBranchSizes);
return new ActionSpec(numContinuousActions, combinedBranchSizes);
}
/// <summary>

Debug.Assert(
!m_Actuators[i].Name.Equals(m_Actuators[i + 1].Name),
"Actuator names must be unique.");
var first = m_Actuators[i].ActionSpec;
var second = m_Actuators[i + 1].ActionSpec;
Debug.Assert(first.NumContinuousActions > 0 == second.NumContinuousActions > 0,
"Actuators on the same Agent must have the same action SpaceType.");
}
}

57
com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs


}
/// <summary>
/// Construct an <see cref="ActionBuffers"/> instance with <see cref="ActionSpec"/>. All values are initialized to zeros.
/// /// </summary>
/// <param name="actionSpec">The <see cref="ActionSpec"/> to send to an <see cref="IActionReceiver"/>.</param>
public ActionBuffers(ActionSpec actionSpec)
: this(new ActionSegment<float>(new float[actionSpec.NumContinuousActions]),
new ActionSegment<int>(new int[actionSpec.NumDiscreteActions]))
{ }
/// <summary>
/// Create an <see cref="ActionBuffers"/> instance with ActionSpec and all actions stored as a float array.
/// </summary>
/// <param name="actionSpec"><see cref="ActionSpec"/> of the <see cref="ActionBuffers"/></param>
/// <param name="actions">The float array of all actions, including discrete and continuous actions.</param>
/// <returns>An <see cref="ActionBuffers"/> instance initialized with a <see cref="ActionSpec"/> and a float array.
internal static ActionBuffers FromActionSpec(ActionSpec actionSpec, float[] actions)
{
if (actions == null)
{
return ActionBuffers.Empty;
}
Debug.Assert(actions.Length == actionSpec.NumContinuousActions + actionSpec.NumDiscreteActions,
$"The length of '{nameof(actions)}' does not match the total size of ActionSpec.\n" +
$"{nameof(actions)}.Length: {actions.Length}\n" +
$"{nameof(actionSpec)}: {actionSpec.NumContinuousActions + actionSpec.NumDiscreteActions}");
ActionSegment<float> continuousActionSegment = ActionSegment<float>.Empty;
ActionSegment<int> discreteActionSegment = ActionSegment<int>.Empty;
int offset = 0;
if (actionSpec.NumContinuousActions > 0)
{
continuousActionSegment = new ActionSegment<float>(actions, 0, actionSpec.NumContinuousActions);
offset += actionSpec.NumContinuousActions;
}
if (actionSpec.NumDiscreteActions > 0)
{
int[] discreteActions = new int[actionSpec.NumDiscreteActions];
for (var i = 0; i < actionSpec.NumDiscreteActions; i++)
{
discreteActions[i] = (int)actions[i + offset];
}
discreteActionSegment = new ActionSegment<int>(discreteActions);
}
return new ActionBuffers(continuousActionSegment, discreteActionSegment);
}
/// <summary>
/// Clear the <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/> segments to be all zeros.
/// </summary>
public void Clear()

}
/// <summary>
/// Check if the <see cref="ActionBuffers"/> is empty.
/// </summary>
public bool IsEmpty()
{
return ContinuousActions.IsEmpty() && DiscreteActions.IsEmpty();
}
/// <inheritdoc/>

/// <param name="destination">A float array to pack actions into whose length is greater than or
/// equal to the addition of the Lengths of this objects <see cref="ContinuousActions"/> and
/// <see cref="DiscreteActions"/> segments.</param>
/// [Obsolete("PackActions has been deprecated.")]
public void PackActions(in float[] destination)
{
Debug.Assert(destination.Length >= ContinuousActions.Length + DiscreteActions.Length,

32
com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs


/// Create a VectorActuator that forwards to the provided IActionReceiver.
/// </summary>
/// <param name="actionReceiver">The <see cref="IActionReceiver"/> used for OnActionReceived and WriteDiscreteActionMask.</param>
/// <param name="vectorActionSize">For discrete action spaces, the branch sizes for each action.
/// For continuous action spaces, the number of actions is the 0th element.</param>
/// <param name="spaceType"></param>
/// <param name="actionSpec"></param>
/// <exception cref="ArgumentOutOfRangeException">Thrown for invalid <see cref="SpaceType"/></exception>
int[] vectorActionSize,
SpaceType spaceType,
ActionSpec actionSpec,
ActionSpec = actionSpec;
switch (spaceType)
if (actionSpec.NumContinuousActions == 0)
case SpaceType.Continuous:
ActionSpec = ActionSpec.MakeContinuous(vectorActionSize[0]);
suffix = "-Continuous";
break;
case SpaceType.Discrete:
ActionSpec = ActionSpec.MakeDiscrete(vectorActionSize);
suffix = "-Discrete";
break;
default:
throw new ArgumentOutOfRangeException(nameof(spaceType),
spaceType,
"Unknown enum value.");
suffix = "-Discrete";
}
else if (actionSpec.NumDiscreteActions == 0)
{
suffix = "-Continuous";
}
else
{
suffix = $"-Continuous-{actionSpec.NumContinuousActions}-Discrete-{actionSpec.NumDiscreteActions}";
}
Name = name + suffix;
}

56
com.unity.ml-agents/Runtime/Agent.cs


using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using UnityEngine;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;

/// <summary>
/// Keeps track of the last vector action taken by the Brain.
/// </summary>
public float[] storedVectorActions;
public ActionBuffers storedVectorActions;
/// <summary>
/// For discrete control, specifies the actions that the agent cannot take.

public void ClearActions()
{
Array.Clear(storedVectorActions, 0, storedVectorActions.Length);
storedVectorActions.Clear();
actionBuffers.PackActions(storedVectorActions);
var continuousActions = storedVectorActions.ContinuousActions;
for (var i = 0; i < actionBuffers.ContinuousActions.Length; i++)
{
continuousActions[i] = actionBuffers.ContinuousActions[i];
}
var discreteActions = storedVectorActions.DiscreteActions;
for (var i = 0; i < actionBuffers.DiscreteActions.Length; i++)
{
discreteActions[i] = actionBuffers.DiscreteActions[i];
}
}
}

/// * <see cref="BehaviorType.InferenceOnly"/>: decisions are always made using the trained
/// model specified in the <see cref="BehaviorParameters"/> component.
/// * <see cref="BehaviorType.HeuristicOnly"/>: when a decision is needed, the agent's
/// <see cref="Heuristic"/> function is called. Your implementation is responsible for
/// <see cref="Heuristic(in ActionBuffers)"/> function is called. Your implementation is responsible for
/// providing the appropriate action.
///
/// To trigger an agent decision automatically, you can attach a <see cref="DecisionRequester"/>

/// can only take an action when it touches the ground, so several frames might elapse between
/// one decision and the need for the next.
///
/// Use the <see cref="OnActionReceived(float[])"/> function to implement the actions your agent can take,
/// Use the <see cref="OnActionReceived(ActionBuffers)"/> function to implement the actions your agent can take,
/// such as moving to reach a goal or interacting with its environment.
///
/// When you call <see cref="EndEpisode"/> on an agent or the agent reaches its <see cref="MaxStep"/> count,

/// only use the [MonoBehaviour.Update] function for cosmetic purposes. If you override the [MonoBehaviour]
/// methods, [OnEnable()] or [OnDisable()], always call the base Agent class implementations.
///
/// You can implement the <see cref="Heuristic"/> function to specify agent actions using
/// You can implement the <see cref="Heuristic(in ActionBuffers)"/> function to specify agent actions using
/// your own heuristic algorithm. Implementing a heuristic function can be useful
/// for debugging. For example, you can use keyboard input to select agent actions in
/// order to manually control an agent's behavior.

/// <summary>
/// VectorActuator which is used by default if no other sensors exist on this Agent. This VectorSensor will
/// delegate its actions to <see cref="OnActionReceived(float[])"/> by default in order to keep backward compatibility
/// delegate its actions to <see cref="OnActionReceived(ActionBuffers)"/> by default in order to keep backward compatibility
/// with the current behavior of Agent.
/// </summary>
IActuator m_VectorActuator;

InitializeSensors();
}
m_Info.storedVectorActions = new float[m_ActuatorManager.TotalNumberOfActions];
m_Info.storedVectorActions = new ActionBuffers(
new float[m_ActuatorManager.NumContinuousActions],
new int[m_ActuatorManager.NumDiscreteActions]
);
// The first time the Academy resets, all Agents in the scene will be
// forced to reset through the <see cref="AgentForceReset"/> event.

m_CumulativeReward = 0f;
m_RequestAction = false;
m_RequestDecision = false;
Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);
m_Info.storedVectorActions.Clear();
}
/// <summary>

/// Use <see cref="AddReward(float)"/> to incrementally change the reward rather than
/// overriding it.
///
/// Typically, you assign rewards in the Agent subclass's <see cref="OnActionReceived(float[])"/>
/// Typically, you assign rewards in the Agent subclass's <see cref="OnActionReceived(ActionBuffers)"/>
/// implementation after carrying out the received action and evaluating its success.
///
/// Rewards are used during reinforcement learning; they are ignored during inference.

/// You can also use the [Input System package], which provides a more flexible and
/// configurable input system.
/// <code>
/// public override void Heuristic(ActionBuffers actionsOut)
/// public override void Heuristic(in ActionBuffers actionsOut)
/// actionsOut.ContinuousActions[0] = Input.GetAxis("Horizontal");
/// actionsOut.ContinuousActions[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
/// actionsOut.ContinuousActions[2] = Input.GetAxis("Vertical");
/// var continuousActionsOut = actionsOut.ContinuousActions;
/// continuousActionsOut[0] = Input.GetAxis("Horizontal");
/// continuousActionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
/// continuousActionsOut[2] = Input.GetAxis("Vertical");
/// }
/// </code>
/// [Input Manager]: https://docs.unity3d.com/Manual/class-InputManager.html

// Support legacy OnActionReceived
// TODO don't set this up if the sizes are 0?
var param = m_PolicyFactory.BrainParameters;
m_VectorActuator = new VectorActuator(this, param.VectorActionSize, param.VectorActionSpaceType);
m_VectorActuator = new VectorActuator(this, param.ActionSpec);
m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1);
m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions()];

}
else
{
m_ActuatorManager.StoredActions.PackActions(m_Info.storedVectorActions);
m_Info.CopyActions(m_ActuatorManager.StoredActions);
}
UpdateSensors();

/// </param>
public virtual void OnActionReceived(ActionBuffers actions)
{
actions.PackActions(m_LegacyActionCache);
if (!actions.ContinuousActions.IsEmpty())
{
m_LegacyActionCache = actions.ContinuousActions.Array;
}
else
{
m_LegacyActionCache = Array.ConvertAll(actions.DiscreteActions.Array, x => (float)x);
}
OnActionReceived(m_LegacyActionCache);
}

{
OnEpisodeBegin();
}
}
/// <summary>

14
com.unity.ml-agents/Runtime/Agent.deprecated.cs


}
/// <summary>
/// This method passes in a float array that is to be populated with actions.
/// Deprecated, use <see cref="Heuristic(in ActionBuffers)"/> instead.
/// </summary>
/// <param name="actionsOut"></param>
public virtual void Heuristic(float[] actionsOut)

/// <returns>
/// The last action that was decided by the Agent (or null if no decision has been made).
/// </returns>
/// <seealso cref="OnActionReceived(float[])"/>
/// <seealso cref="OnActionReceived(ActionBuffers)"/>
return m_Info.storedVectorActions;
var storedAction = m_Info.storedVectorActions;
if (!storedAction.ContinuousActions.IsEmpty())
{
return storedAction.ContinuousActions.Array;
}
else
{
return Array.ConvertAll(storedAction.DiscreteActions.Array, x => (float)x);
}
}
}
}

113
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


var agentInfoProto = ai.ToAgentInfoProto();
var agentActionProto = new AgentActionProto();
if (ai.storedVectorActions != null)
if (!ai.storedVectorActions.IsEmpty())
agentActionProto.VectorActions.AddRange(ai.storedVectorActions);
if (!ai.storedVectorActions.ContinuousActions.IsEmpty())
{
agentActionProto.ContinuousActions.AddRange(ai.storedVectorActions.ContinuousActions.Array);
}
if (!ai.storedVectorActions.DiscreteActions.IsEmpty())
{
agentActionProto.DiscreteActions.AddRange(ai.storedVectorActions.DiscreteActions.Array);
}
}
return new AgentInfoActionPairProto

return summariesOut;
}
#endregion
#region BrainParameters

{
var brainParametersProto = new BrainParametersProto
{
VectorActionSize = { bp.VectorActionSize },
VectorActionSpaceType = (SpaceTypeProto)bp.VectorActionSpaceType,
VectorActionSpaceTypeDeprecated = (SpaceTypeProto)bp.VectorActionSpaceType,
IsTraining = isTraining
IsTraining = isTraining,
ActionSpec = ToActionSpecProto(bp.ActionSpec),
if (bp.VectorActionSize != null)
{
brainParametersProto.VectorActionSizeDeprecated.AddRange(bp.VectorActionSize);
}
brainParametersProto.VectorActionDescriptions.AddRange(bp.VectorActionDescriptions);
brainParametersProto.VectorActionDescriptionsDeprecated.AddRange(bp.VectorActionDescriptions);
}
return brainParametersProto;
}

/// <param name="isTraining">Whether or not the Brain is training.</param>
public static BrainParametersProto ToBrainParametersProto(this ActionSpec actionSpec, string name, bool isTraining)
{
actionSpec.CheckNotHybrid();
IsTraining = isTraining
IsTraining = isTraining,
ActionSpec = ToActionSpecProto(actionSpec),
if (actionSpec.NumContinuousActions > 0)
{
brainParametersProto.VectorActionSize.Add(actionSpec.NumContinuousActions);
brainParametersProto.VectorActionSpaceType = SpaceTypeProto.Continuous;
}
else if (actionSpec.NumDiscreteActions > 0)
var supportHybrid = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.HybridActions;
if (!supportHybrid)
brainParametersProto.VectorActionSize.AddRange(actionSpec.BranchSizes);
brainParametersProto.VectorActionSpaceType = SpaceTypeProto.Discrete;
actionSpec.CheckAllContinuousOrDiscrete();
if (actionSpec.NumContinuousActions > 0)
{
brainParametersProto.VectorActionSizeDeprecated.Add(actionSpec.NumContinuousActions);
brainParametersProto.VectorActionSpaceTypeDeprecated = SpaceTypeProto.Continuous;
}
else if (actionSpec.NumDiscreteActions > 0)
{
brainParametersProto.VectorActionSizeDeprecated.AddRange(actionSpec.BranchSizes);
brainParametersProto.VectorActionSpaceTypeDeprecated = SpaceTypeProto.Discrete;
}
}
// TODO handle ActionDescriptions?

{
var bp = new BrainParameters
{
VectorActionSize = bpp.VectorActionSize.ToArray(),
VectorActionDescriptions = bpp.VectorActionDescriptions.ToArray(),
VectorActionSpaceType = (SpaceType)bpp.VectorActionSpaceType
VectorActionDescriptions = bpp.VectorActionDescriptionsDeprecated.ToArray(),
ActionSpec = ToActionSpec(bpp.ActionSpec),
/// <summary>
/// Convert a ActionSpecProto to a ActionSpec struct.
/// </summary>
/// <param name="actionSpecProto">An instance of an action spec protobuf object.</param>
/// <returns>An ActionSpec struct.</returns>
public static ActionSpec ToActionSpec(this ActionSpecProto actionSpecProto)
{
var actionSpec = new ActionSpec(actionSpecProto.NumContinuousActions);
if (actionSpecProto.DiscreteBranchSizes != null)
{
actionSpec.BranchSizes = actionSpecProto.DiscreteBranchSizes.ToArray();
}
return actionSpec;
}
/// <summary>
/// Convert a ActionSpec struct to a ActionSpecProto.
/// </summary>
/// <param name="actionSpecProto">An instance of an action spec struct.</param>
/// <returns>An ActionSpecProto.</returns>
public static ActionSpecProto ToActionSpecProto(this ActionSpec actionSpec)
{
var actionSpecProto = new ActionSpecProto
{
NumContinuousActions = actionSpec.NumContinuousActions,
NumDiscreteActions = actionSpec.NumDiscreteActions,
};
if (actionSpec.BranchSizes != null)
{
actionSpecProto.DiscreteBranchSizes.AddRange(actionSpec.BranchSizes);
}
return actionSpecProto;
}
#endregion
#region DemonstrationMetaData

}
return dm;
}
#endregion
public static UnityRLInitParameters ToUnityRLInitParameters(this UnityRLInitializationInputProto inputProto)

}
#region AgentAction
public static List<float[]> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
public static List<ActionBuffers> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
var agentActions = new List<float[]>(proto.Value.Count);
var agentActions = new List<ActionBuffers>(proto.Value.Count);
agentActions.Add(ap.VectorActions.ToArray());
agentActions.Add(ap.ToActionBuffers());
public static ActionBuffers ToActionBuffers(this AgentActionProto proto)
{
return new ActionBuffers(proto.ContinuousActions.ToArray(), proto.DiscreteActions.ToArray());
}
#endregion
#region Observations

if (!s_HaveWarnedTrainerCapabilitiesMapping)
{
Debug.LogWarning($"The sensor {sensor.GetName()} is using non-trivial mapping and " +
"the attached trainer doesn't support compression mapping. " +
"Switching to uncompressed observations.");
"the attached trainer doesn't support compression mapping. " +
"Switching to uncompressed observations.");
s_HaveWarnedTrainerCapabilitiesMapping = true;
}
compressionType = SensorCompressionType.None;

$"GetCompressedObservation() returned null data for sensor named {sensor.GetName()}. " +
"You must return a byte[]. If you don't want to use compressed observations, " +
"return SensorCompressionType.None from GetCompressionType()."
);
);
}
observationProto = new ObservationProto
{

observationProto.Shape.AddRange(shape);
return observationProto;
}
#endregion
public static UnityRLCapabilities ToRLCapabilities(this UnityRLCapabilitiesProto proto)

BaseRLCapabilities = proto.BaseRLCapabilities,
ConcatenatedPngObservations = proto.ConcatenatedPngObservations,
CompressedChannelMapping = proto.CompressedChannelMapping,
HybridActions = proto.HybridActions,
};
}

BaseRLCapabilities = rlCaps.BaseRLCapabilities,
ConcatenatedPngObservations = rlCaps.ConcatenatedPngObservations,
CompressedChannelMapping = rlCaps.CompressedChannelMapping,
HybridActions = rlCaps.HybridActions,
};
}

2
com.unity.ml-agents/Runtime/Communicator/ICommunicator.cs


/// <param name="key">A key to identify which behavior actions to get.</param>
/// <param name="agentId">A key to identify which Agent actions to get.</param>
/// <returns></returns>
float[] GetActions(string key, int agentId);
ActionBuffers GetActions(string key, int agentId);
}
}

13
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


UnityRLOutputProto m_CurrentUnityRlOutput =
new UnityRLOutputProto();
Dictionary<string, Dictionary<int, float[]>> m_LastActionsReceived =
new Dictionary<string, Dictionary<int, float[]>>();
Dictionary<string, Dictionary<int, ActionBuffers>> m_LastActionsReceived =
new Dictionary<string, Dictionary<int, ActionBuffers>>();
// Brains that we have sent over the communicator with agents.
HashSet<string> m_SentBrainKeys = new HashSet<string>();

{
return false;
}
}
else if (unityVersion.Major != pythonVersion.Major)
{

}
if (!m_LastActionsReceived.ContainsKey(behaviorName))
{
m_LastActionsReceived[behaviorName] = new Dictionary<int, float[]>();
m_LastActionsReceived[behaviorName] = new Dictionary<int, ActionBuffers>();
m_LastActionsReceived[behaviorName][info.episodeId] = null;
m_LastActionsReceived[behaviorName][info.episodeId] = ActionBuffers.Empty;
if (info.done)
{
m_LastActionsReceived[behaviorName].Remove(info.episodeId);

}
}
public float[] GetActions(string behaviorName, int agentId)
public ActionBuffers GetActions(string behaviorName, int agentId)
{
if (m_LastActionsReceived.ContainsKey(behaviorName))
{

}
}
return null;
return ActionBuffers.Empty;
}
/// <summary>

13
com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs


public bool BaseRLCapabilities;
public bool ConcatenatedPngObservations;
public bool CompressedChannelMapping;
public bool HybridActions;
public UnityRLCapabilities(bool baseRlCapabilities = true, bool concatenatedPngObservations = true, bool compressedChannelMapping = true)
public UnityRLCapabilities(
bool baseRlCapabilities = true,
bool concatenatedPngObservations = true,
bool compressedChannelMapping = true,
bool hybridActions = true)
HybridActions = hybridActions;
}
/// <summary>

return false;
}
Debug.LogWarning("Unity has connected to a Training process that does not support" +
"Base Reinforcement Learning Capabilities. Please make sure you have the" +
" latest training codebase installed for this version of the ML-Agents package.");
"Base Reinforcement Learning Capabilities. Please make sure you have the" +
" latest training codebase installed for this version of the ML-Agents package.");
}
}

82
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentAction.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2FnZW50X2Fj",
"dGlvbi5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMiSwoQQWdlbnRBY3Rp",
"b25Qcm90bxIWCg52ZWN0b3JfYWN0aW9ucxgBIAMoAhINCgV2YWx1ZRgEIAEo",
"AkoECAIQA0oECAMQBEoECAUQBkIlqgIiVW5pdHkuTUxBZ2VudHMuQ29tbXVu",
"aWNhdG9yT2JqZWN0c2IGcHJvdG8z"));
"dGlvbi5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMijAEKEEFnZW50QWN0",
"aW9uUHJvdG8SIQoZdmVjdG9yX2FjdGlvbnNfZGVwcmVjYXRlZBgBIAMoAhIN",
"CgV2YWx1ZRgEIAEoAhIaChJjb250aW51b3VzX2FjdGlvbnMYBiADKAISGAoQ",
"ZGlzY3JldGVfYWN0aW9ucxgHIAMoBUoECAIQA0oECAMQBEoECAUQBkIlqgIi",
"VW5pdHkuTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0c2IGcHJvdG8z"));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentActionProto), global::Unity.MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActions", "Value" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentActionProto), global::Unity.MLAgents.CommunicatorObjects.AgentActionProto.Parser, new[]{ "VectorActionsDeprecated", "Value", "ContinuousActions", "DiscreteActions" }, null, null, null)
}));
}
#endregion

[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public AgentActionProto(AgentActionProto other) : this() {
vectorActions_ = other.vectorActions_.Clone();
vectorActionsDeprecated_ = other.vectorActionsDeprecated_.Clone();
continuousActions_ = other.continuousActions_.Clone();
discreteActions_ = other.discreteActions_.Clone();
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}

}
/// <summary>Field number for the "vector_actions" field.</summary>
public const int VectorActionsFieldNumber = 1;
private static readonly pb::FieldCodec<float> _repeated_vectorActions_codec
/// <summary>Field number for the "vector_actions_deprecated" field.</summary>
public const int VectorActionsDeprecatedFieldNumber = 1;
private static readonly pb::FieldCodec<float> _repeated_vectorActionsDeprecated_codec
private readonly pbc::RepeatedField<float> vectorActions_ = new pbc::RepeatedField<float>();
private readonly pbc::RepeatedField<float> vectorActionsDeprecated_ = new pbc::RepeatedField<float>();
/// <summary>
/// mark as deprecated in communicator v1.3.0
/// </summary>
public pbc::RepeatedField<float> VectorActions {
get { return vectorActions_; }
public pbc::RepeatedField<float> VectorActionsDeprecated {
get { return vectorActionsDeprecated_; }
}
/// <summary>Field number for the "value" field.</summary>

}
}
/// <summary>Field number for the "continuous_actions" field.</summary>
public const int ContinuousActionsFieldNumber = 6;
private static readonly pb::FieldCodec<float> _repeated_continuousActions_codec
= pb::FieldCodec.ForFloat(50);
private readonly pbc::RepeatedField<float> continuousActions_ = new pbc::RepeatedField<float>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<float> ContinuousActions {
get { return continuousActions_; }
}
/// <summary>Field number for the "discrete_actions" field.</summary>
public const int DiscreteActionsFieldNumber = 7;
private static readonly pb::FieldCodec<int> _repeated_discreteActions_codec
= pb::FieldCodec.ForInt32(58);
private readonly pbc::RepeatedField<int> discreteActions_ = new pbc::RepeatedField<int>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<int> DiscreteActions {
get { return discreteActions_; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as AgentActionProto);

if (ReferenceEquals(other, this)) {
return true;
}
if(!vectorActions_.Equals(other.vectorActions_)) return false;
if(!vectorActionsDeprecated_.Equals(other.vectorActionsDeprecated_)) return false;
if(!continuousActions_.Equals(other.continuousActions_)) return false;
if(!discreteActions_.Equals(other.discreteActions_)) return false;
return Equals(_unknownFields, other._unknownFields);
}

hash ^= vectorActions_.GetHashCode();
hash ^= vectorActionsDeprecated_.GetHashCode();
hash ^= continuousActions_.GetHashCode();
hash ^= discreteActions_.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}

[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void WriteTo(pb::CodedOutputStream output) {
vectorActions_.WriteTo(output, _repeated_vectorActions_codec);
vectorActionsDeprecated_.WriteTo(output, _repeated_vectorActionsDeprecated_codec);
continuousActions_.WriteTo(output, _repeated_continuousActions_codec);
discreteActions_.WriteTo(output, _repeated_discreteActions_codec);
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

public int CalculateSize() {
int size = 0;
size += vectorActions_.CalculateSize(_repeated_vectorActions_codec);
size += vectorActionsDeprecated_.CalculateSize(_repeated_vectorActionsDeprecated_codec);
size += continuousActions_.CalculateSize(_repeated_continuousActions_codec);
size += discreteActions_.CalculateSize(_repeated_discreteActions_codec);
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

if (other == null) {
return;
}
vectorActions_.Add(other.vectorActions_);
vectorActionsDeprecated_.Add(other.vectorActionsDeprecated_);
continuousActions_.Add(other.continuousActions_);
discreteActions_.Add(other.discreteActions_);
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}

break;
case 10:
case 13: {
vectorActions_.AddEntriesFrom(input, _repeated_vectorActions_codec);
vectorActionsDeprecated_.AddEntriesFrom(input, _repeated_vectorActionsDeprecated_codec);
break;
}
case 50:
case 53: {
continuousActions_.AddEntriesFrom(input, _repeated_continuousActions_codec);
break;
}
case 58:
case 56: {
discreteActions_.AddEntriesFrom(input, _repeated_discreteActions_codec);
break;
}
}

348
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/BrainParameters.cs


"CjltbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2JyYWluX3Bh",
"cmFtZXRlcnMucHJvdG8SFGNvbW11bmljYXRvcl9vYmplY3RzGjNtbGFnZW50",
"c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL3NwYWNlX3R5cGUucHJvdG8i",
"2QEKFEJyYWluUGFyYW1ldGVyc1Byb3RvEhoKEnZlY3Rvcl9hY3Rpb25fc2l6",
"ZRgDIAMoBRIiChp2ZWN0b3JfYWN0aW9uX2Rlc2NyaXB0aW9ucxgFIAMoCRJG",
"Chh2ZWN0b3JfYWN0aW9uX3NwYWNlX3R5cGUYBiABKA4yJC5jb21tdW5pY2F0",
"b3Jfb2JqZWN0cy5TcGFjZVR5cGVQcm90bxISCgpicmFpbl9uYW1lGAcgASgJ",
"EhMKC2lzX3RyYWluaW5nGAggASgISgQIARACSgQIAhADSgQIBBAFQiWqAiJV",
"bml0eS5NTEFnZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
"iwEKD0FjdGlvblNwZWNQcm90bxIeChZudW1fY29udGludW91c19hY3Rpb25z",
"GAEgASgFEhwKFG51bV9kaXNjcmV0ZV9hY3Rpb25zGAIgASgFEh0KFWRpc2Ny",
"ZXRlX2JyYW5jaF9zaXplcxgDIAMoBRIbChNhY3Rpb25fZGVzY3JpcHRpb25z",
"GAQgAygJIrYCChRCcmFpblBhcmFtZXRlcnNQcm90bxIlCh12ZWN0b3JfYWN0",
"aW9uX3NpemVfZGVwcmVjYXRlZBgDIAMoBRItCiV2ZWN0b3JfYWN0aW9uX2Rl",
"c2NyaXB0aW9uc19kZXByZWNhdGVkGAUgAygJElEKI3ZlY3Rvcl9hY3Rpb25f",
"c3BhY2VfdHlwZV9kZXByZWNhdGVkGAYgASgOMiQuY29tbXVuaWNhdG9yX29i",
"amVjdHMuU3BhY2VUeXBlUHJvdG8SEgoKYnJhaW5fbmFtZRgHIAEoCRITCgtp",
"c190cmFpbmluZxgIIAEoCBI6CgthY3Rpb25fc3BlYxgJIAEoCzIlLmNvbW11",
"bmljYXRvcl9vYmplY3RzLkFjdGlvblNwZWNQcm90b0oECAEQAkoECAIQA0oE",
"CAQQBUIlqgIiVW5pdHkuTUxBZ2VudHMuQ29tbXVuaWNhdG9yT2JqZWN0c2IG",
"cHJvdG8z"));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto), global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto.Parser, new[]{ "VectorActionSize", "VectorActionDescriptions", "VectorActionSpaceType", "BrainName", "IsTraining" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto), global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto.Parser, new[]{ "NumContinuousActions", "NumDiscreteActions", "DiscreteBranchSizes", "ActionDescriptions" }, null, null, null),
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto), global::Unity.MLAgents.CommunicatorObjects.BrainParametersProto.Parser, new[]{ "VectorActionSizeDeprecated", "VectorActionDescriptionsDeprecated", "VectorActionSpaceTypeDeprecated", "BrainName", "IsTraining", "ActionSpec" }, null, null, null)
}));
}
#endregion

internal sealed partial class ActionSpecProto : pb::IMessage<ActionSpecProto> {
private static readonly pb::MessageParser<ActionSpecProto> _parser = new pb::MessageParser<ActionSpecProto>(() => new ActionSpecProto());
private pb::UnknownFieldSet _unknownFields;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public static pb::MessageParser<ActionSpecProto> Parser { get { return _parser; } }
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public static pbr::MessageDescriptor Descriptor {
get { return global::Unity.MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor.MessageTypes[0]; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
pbr::MessageDescriptor pb::IMessage.Descriptor {
get { return Descriptor; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public ActionSpecProto() {
OnConstruction();
}
partial void OnConstruction();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public ActionSpecProto(ActionSpecProto other) : this() {
numContinuousActions_ = other.numContinuousActions_;
numDiscreteActions_ = other.numDiscreteActions_;
discreteBranchSizes_ = other.discreteBranchSizes_.Clone();
actionDescriptions_ = other.actionDescriptions_.Clone();
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public ActionSpecProto Clone() {
return new ActionSpecProto(this);
}
/// <summary>Field number for the "num_continuous_actions" field.</summary>
public const int NumContinuousActionsFieldNumber = 1;
private int numContinuousActions_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int NumContinuousActions {
get { return numContinuousActions_; }
set {
numContinuousActions_ = value;
}
}
/// <summary>Field number for the "num_discrete_actions" field.</summary>
public const int NumDiscreteActionsFieldNumber = 2;
private int numDiscreteActions_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int NumDiscreteActions {
get { return numDiscreteActions_; }
set {
numDiscreteActions_ = value;
}
}
/// <summary>Field number for the "discrete_branch_sizes" field.</summary>
public const int DiscreteBranchSizesFieldNumber = 3;
private static readonly pb::FieldCodec<int> _repeated_discreteBranchSizes_codec
= pb::FieldCodec.ForInt32(26);
private readonly pbc::RepeatedField<int> discreteBranchSizes_ = new pbc::RepeatedField<int>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<int> DiscreteBranchSizes {
get { return discreteBranchSizes_; }
}
/// <summary>Field number for the "action_descriptions" field.</summary>
public const int ActionDescriptionsFieldNumber = 4;
private static readonly pb::FieldCodec<string> _repeated_actionDescriptions_codec
= pb::FieldCodec.ForString(34);
private readonly pbc::RepeatedField<string> actionDescriptions_ = new pbc::RepeatedField<string>();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public pbc::RepeatedField<string> ActionDescriptions {
get { return actionDescriptions_; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as ActionSpecProto);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool Equals(ActionSpecProto other) {
if (ReferenceEquals(other, null)) {
return false;
}
if (ReferenceEquals(other, this)) {
return true;
}
if (NumContinuousActions != other.NumContinuousActions) return false;
if (NumDiscreteActions != other.NumDiscreteActions) return false;
if(!discreteBranchSizes_.Equals(other.discreteBranchSizes_)) return false;
if(!actionDescriptions_.Equals(other.actionDescriptions_)) return false;
return Equals(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override int GetHashCode() {
int hash = 1;
if (NumContinuousActions != 0) hash ^= NumContinuousActions.GetHashCode();
if (NumDiscreteActions != 0) hash ^= NumDiscreteActions.GetHashCode();
hash ^= discreteBranchSizes_.GetHashCode();
hash ^= actionDescriptions_.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}
return hash;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override string ToString() {
return pb::JsonFormatter.ToDiagnosticString(this);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void WriteTo(pb::CodedOutputStream output) {
if (NumContinuousActions != 0) {
output.WriteRawTag(8);
output.WriteInt32(NumContinuousActions);
}
if (NumDiscreteActions != 0) {
output.WriteRawTag(16);
output.WriteInt32(NumDiscreteActions);
}
discreteBranchSizes_.WriteTo(output, _repeated_discreteBranchSizes_codec);
actionDescriptions_.WriteTo(output, _repeated_actionDescriptions_codec);
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int CalculateSize() {
int size = 0;
if (NumContinuousActions != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumContinuousActions);
}
if (NumDiscreteActions != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumDiscreteActions);
}
size += discreteBranchSizes_.CalculateSize(_repeated_discreteBranchSizes_codec);
size += actionDescriptions_.CalculateSize(_repeated_actionDescriptions_codec);
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}
return size;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void MergeFrom(ActionSpecProto other) {
if (other == null) {
return;
}
if (other.NumContinuousActions != 0) {
NumContinuousActions = other.NumContinuousActions;
}
if (other.NumDiscreteActions != 0) {
NumDiscreteActions = other.NumDiscreteActions;
}
discreteBranchSizes_.Add(other.discreteBranchSizes_);
actionDescriptions_.Add(other.actionDescriptions_);
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void MergeFrom(pb::CodedInputStream input) {
uint tag;
while ((tag = input.ReadTag()) != 0) {
switch(tag) {
default:
_unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input);
break;
case 8: {
NumContinuousActions = input.ReadInt32();
break;
}
case 16: {
NumDiscreteActions = input.ReadInt32();
break;
}
case 26:
case 24: {
discreteBranchSizes_.AddEntriesFrom(input, _repeated_discreteBranchSizes_codec);
break;
}
case 34: {
actionDescriptions_.AddEntriesFrom(input, _repeated_actionDescriptions_codec);
break;
}
}
}
}
}
internal sealed partial class BrainParametersProto : pb::IMessage<BrainParametersProto> {
private static readonly pb::MessageParser<BrainParametersProto> _parser = new pb::MessageParser<BrainParametersProto>(() => new BrainParametersProto());
private pb::UnknownFieldSet _unknownFields;

[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public static pbr::MessageDescriptor Descriptor {
get { return global::Unity.MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor.MessageTypes[0]; }
get { return global::Unity.MLAgents.CommunicatorObjects.BrainParametersReflection.Descriptor.MessageTypes[1]; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]

[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public BrainParametersProto(BrainParametersProto other) : this() {
vectorActionSize_ = other.vectorActionSize_.Clone();
vectorActionDescriptions_ = other.vectorActionDescriptions_.Clone();
vectorActionSpaceType_ = other.vectorActionSpaceType_;
vectorActionSizeDeprecated_ = other.vectorActionSizeDeprecated_.Clone();
vectorActionDescriptionsDeprecated_ = other.vectorActionDescriptionsDeprecated_.Clone();
vectorActionSpaceTypeDeprecated_ = other.vectorActionSpaceTypeDeprecated_;
ActionSpec = other.actionSpec_ != null ? other.ActionSpec.Clone() : null;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}

}
/// <summary>Field number for the "vector_action_size" field.</summary>
public const int VectorActionSizeFieldNumber = 3;
private static readonly pb::FieldCodec<int> _repeated_vectorActionSize_codec
/// <summary>Field number for the "vector_action_size_deprecated" field.</summary>
public const int VectorActionSizeDeprecatedFieldNumber = 3;
private static readonly pb::FieldCodec<int> _repeated_vectorActionSizeDeprecated_codec
private readonly pbc::RepeatedField<int> vectorActionSize_ = new pbc::RepeatedField<int>();
private readonly pbc::RepeatedField<int> vectorActionSizeDeprecated_ = new pbc::RepeatedField<int>();
/// <summary>
/// mark as deprecated in communicator v1.3.0
/// </summary>
public pbc::RepeatedField<int> VectorActionSize {
get { return vectorActionSize_; }
public pbc::RepeatedField<int> VectorActionSizeDeprecated {
get { return vectorActionSizeDeprecated_; }
/// <summary>Field number for the "vector_action_descriptions" field.</summary>
public const int VectorActionDescriptionsFieldNumber = 5;
private static readonly pb::FieldCodec<string> _repeated_vectorActionDescriptions_codec
/// <summary>Field number for the "vector_action_descriptions_deprecated" field.</summary>
public const int VectorActionDescriptionsDeprecatedFieldNumber = 5;
private static readonly pb::FieldCodec<string> _repeated_vectorActionDescriptionsDeprecated_codec
private readonly pbc::RepeatedField<string> vectorActionDescriptions_ = new pbc::RepeatedField<string>();
private readonly pbc::RepeatedField<string> vectorActionDescriptionsDeprecated_ = new pbc::RepeatedField<string>();
/// <summary>
/// mark as deprecated in communicator v1.3.0
/// </summary>
public pbc::RepeatedField<string> VectorActionDescriptions {
get { return vectorActionDescriptions_; }
public pbc::RepeatedField<string> VectorActionDescriptionsDeprecated {
get { return vectorActionDescriptionsDeprecated_; }
/// <summary>Field number for the "vector_action_space_type" field.</summary>
public const int VectorActionSpaceTypeFieldNumber = 6;
private global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto vectorActionSpaceType_ = 0;
/// <summary>Field number for the "vector_action_space_type_deprecated" field.</summary>
public const int VectorActionSpaceTypeDeprecatedFieldNumber = 6;
private global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto vectorActionSpaceTypeDeprecated_ = 0;
/// <summary>
/// mark as deprecated in communicator v1.3.0
/// </summary>
public global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto VectorActionSpaceType {
get { return vectorActionSpaceType_; }
public global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto VectorActionSpaceTypeDeprecated {
get { return vectorActionSpaceTypeDeprecated_; }
vectorActionSpaceType_ = value;
vectorActionSpaceTypeDeprecated_ = value;
}
}

}
}
/// <summary>Field number for the "action_spec" field.</summary>
public const int ActionSpecFieldNumber = 9;
private global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto actionSpec_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto ActionSpec {
get { return actionSpec_; }
set {
actionSpec_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as BrainParametersProto);

if (ReferenceEquals(other, this)) {
return true;
}
if(!vectorActionSize_.Equals(other.vectorActionSize_)) return false;
if(!vectorActionDescriptions_.Equals(other.vectorActionDescriptions_)) return false;
if (VectorActionSpaceType != other.VectorActionSpaceType) return false;
if(!vectorActionSizeDeprecated_.Equals(other.vectorActionSizeDeprecated_)) return false;
if(!vectorActionDescriptionsDeprecated_.Equals(other.vectorActionDescriptionsDeprecated_)) return false;
if (VectorActionSpaceTypeDeprecated != other.VectorActionSpaceTypeDeprecated) return false;
if (!object.Equals(ActionSpec, other.ActionSpec)) return false;
return Equals(_unknownFields, other._unknownFields);
}

hash ^= vectorActionSize_.GetHashCode();
hash ^= vectorActionDescriptions_.GetHashCode();
if (VectorActionSpaceType != 0) hash ^= VectorActionSpaceType.GetHashCode();
hash ^= vectorActionSizeDeprecated_.GetHashCode();
hash ^= vectorActionDescriptionsDeprecated_.GetHashCode();
if (VectorActionSpaceTypeDeprecated != 0) hash ^= VectorActionSpaceTypeDeprecated.GetHashCode();
if (actionSpec_ != null) hash ^= ActionSpec.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}

[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void WriteTo(pb::CodedOutputStream output) {
vectorActionSize_.WriteTo(output, _repeated_vectorActionSize_codec);
vectorActionDescriptions_.WriteTo(output, _repeated_vectorActionDescriptions_codec);
if (VectorActionSpaceType != 0) {
vectorActionSizeDeprecated_.WriteTo(output, _repeated_vectorActionSizeDeprecated_codec);
vectorActionDescriptionsDeprecated_.WriteTo(output, _repeated_vectorActionDescriptionsDeprecated_codec);
if (VectorActionSpaceTypeDeprecated != 0) {
output.WriteEnum((int) VectorActionSpaceType);
output.WriteEnum((int) VectorActionSpaceTypeDeprecated);
}
if (BrainName.Length != 0) {
output.WriteRawTag(58);

output.WriteRawTag(64);
output.WriteBool(IsTraining);
}
if (actionSpec_ != null) {
output.WriteRawTag(74);
output.WriteMessage(ActionSpec);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

public int CalculateSize() {
int size = 0;
size += vectorActionSize_.CalculateSize(_repeated_vectorActionSize_codec);
size += vectorActionDescriptions_.CalculateSize(_repeated_vectorActionDescriptions_codec);
if (VectorActionSpaceType != 0) {
size += 1 + pb::CodedOutputStream.ComputeEnumSize((int) VectorActionSpaceType);
size += vectorActionSizeDeprecated_.CalculateSize(_repeated_vectorActionSizeDeprecated_codec);
size += vectorActionDescriptionsDeprecated_.CalculateSize(_repeated_vectorActionDescriptionsDeprecated_codec);
if (VectorActionSpaceTypeDeprecated != 0) {
size += 1 + pb::CodedOutputStream.ComputeEnumSize((int) VectorActionSpaceTypeDeprecated);
}
if (BrainName.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(BrainName);

}
if (actionSpec_ != null) {
size += 1 + pb::CodedOutputStream.ComputeMessageSize(ActionSpec);
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();

if (other == null) {
return;
}
vectorActionSize_.Add(other.vectorActionSize_);
vectorActionDescriptions_.Add(other.vectorActionDescriptions_);
if (other.VectorActionSpaceType != 0) {
VectorActionSpaceType = other.VectorActionSpaceType;
vectorActionSizeDeprecated_.Add(other.vectorActionSizeDeprecated_);
vectorActionDescriptionsDeprecated_.Add(other.vectorActionDescriptionsDeprecated_);
if (other.VectorActionSpaceTypeDeprecated != 0) {
VectorActionSpaceTypeDeprecated = other.VectorActionSpaceTypeDeprecated;
}
if (other.BrainName.Length != 0) {
BrainName = other.BrainName;

}
if (other.actionSpec_ != null) {
if (actionSpec_ == null) {
actionSpec_ = new global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto();
}
ActionSpec.MergeFrom(other.ActionSpec);
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}

break;
case 26:
case 24: {
vectorActionSize_.AddEntriesFrom(input, _repeated_vectorActionSize_codec);
vectorActionSizeDeprecated_.AddEntriesFrom(input, _repeated_vectorActionSizeDeprecated_codec);
vectorActionDescriptions_.AddEntriesFrom(input, _repeated_vectorActionDescriptions_codec);
vectorActionDescriptionsDeprecated_.AddEntriesFrom(input, _repeated_vectorActionDescriptionsDeprecated_codec);
vectorActionSpaceType_ = (global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto) input.ReadEnum();
vectorActionSpaceTypeDeprecated_ = (global::Unity.MLAgents.CommunicatorObjects.SpaceTypeProto) input.ReadEnum();
break;
}
case 58: {

case 64: {
IsTraining = input.ReadBool();
break;
}
case 74: {
if (actionSpec_ == null) {
actionSpec_ = new global::Unity.MLAgents.CommunicatorObjects.ActionSpecProto();
}
input.ReadMessage(actionSpec_);
break;
}
}

44
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMifQoYVW5pdHlSTENh",
"cGFiaWxpdGllc1Byb3RvEhoKEmJhc2VSTENhcGFiaWxpdGllcxgBIAEoCBIj",
"Chtjb25jYXRlbmF0ZWRQbmdPYnNlcnZhdGlvbnMYAiABKAgSIAoYY29tcHJl",
"c3NlZENoYW5uZWxNYXBwaW5nGAMgASgIQiWqAiJVbml0eS5NTEFnZW50cy5D",
"b21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMilAEKGFVuaXR5UkxD",
"YXBhYmlsaXRpZXNQcm90bxIaChJiYXNlUkxDYXBhYmlsaXRpZXMYASABKAgS",
"IwobY29uY2F0ZW5hdGVkUG5nT2JzZXJ2YXRpb25zGAIgASgIEiAKGGNvbXBy",
"ZXNzZWRDaGFubmVsTWFwcGluZxgDIAEoCBIVCg1oeWJyaWRBY3Rpb25zGAQg",
"ASgIQiWqAiJVbml0eS5NTEFnZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZw",
"cm90bzM="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions" }, null, null, null)
}));
}
#endregion

baseRLCapabilities_ = other.baseRLCapabilities_;
concatenatedPngObservations_ = other.concatenatedPngObservations_;
compressedChannelMapping_ = other.compressedChannelMapping_;
hybridActions_ = other.hybridActions_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}

}
}
/// <summary>Field number for the "hybridActions" field.</summary>
public const int HybridActionsFieldNumber = 4;
private bool hybridActions_;
/// <summary>
/// support for hybrid action spaces (discrete + continuous)
/// </summary>
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool HybridActions {
get { return hybridActions_; }
set {
hybridActions_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as UnityRLCapabilitiesProto);

if (BaseRLCapabilities != other.BaseRLCapabilities) return false;
if (ConcatenatedPngObservations != other.ConcatenatedPngObservations) return false;
if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
if (HybridActions != other.HybridActions) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (BaseRLCapabilities != false) hash ^= BaseRLCapabilities.GetHashCode();
if (ConcatenatedPngObservations != false) hash ^= ConcatenatedPngObservations.GetHashCode();
if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
if (HybridActions != false) hash ^= HybridActions.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}

output.WriteRawTag(24);
output.WriteBool(CompressedChannelMapping);
}
if (HybridActions != false) {
output.WriteRawTag(32);
output.WriteBool(HybridActions);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

size += 1 + 1;
}
if (CompressedChannelMapping != false) {
size += 1 + 1;
}
if (HybridActions != false) {
size += 1 + 1;
}
if (_unknownFields != null) {

if (other.CompressedChannelMapping != false) {
CompressedChannelMapping = other.CompressedChannelMapping;
}
if (other.HybridActions != false) {
HybridActions = other.HybridActions;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}

}
case 24: {
CompressedChannelMapping = input.ReadBool();
break;
}
case 32: {
HybridActions = input.ReadBool();
break;
}
}

44
com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs


using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents.Inference.Utils;
using Unity.MLAgents.Actuators;
using Unity.Barracuda;
using UnityEngine;

/// </summary>
internal class ContinuousActionOutputApplier : TensorApplier.IApplier
{
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
readonly ActionSpec m_ActionSpec;
public ContinuousActionOutputApplier(ActionSpec actionSpec)
{
m_ActionSpec = actionSpec;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1];
var agentIndex = 0;

{
var actionValue = lastActions[agentId];
if (actionValue == null)
var actionBuffer = lastActions[agentId];
if (actionBuffer.IsEmpty())
actionValue = new float[actionSize];
lastActions[agentId] = actionValue;
actionBuffer = new ActionBuffers(m_ActionSpec);
lastActions[agentId] = actionBuffer;
var continuousBuffer = actionBuffer.ContinuousActions;
actionValue[j] = tensorProxy.data[agentIndex, j];
continuousBuffer[j] = tensorProxy.data[agentIndex, j];
}
}
agentIndex++;

readonly int[] m_ActionSize;
readonly Multinomial m_Multinomial;
readonly ITensorAllocator m_Allocator;
readonly ActionSpec m_ActionSpec;
public DiscreteActionOutputApplier(int[] actionSize, int seed, ITensorAllocator allocator)
public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator)
m_ActionSize = actionSize;
m_ActionSize = actionSpec.BranchSizes;
m_ActionSpec = actionSpec;
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
//var tensorDataProbabilities = tensorProxy.Data as float[,];
var idActionPairList = actionIds as List<int> ?? actionIds.ToList();

{
if (lastActions.ContainsKey(agentId))
{
var actionVal = lastActions[agentId];
if (actionVal == null)
var actionBuffer = lastActions[agentId];
if (actionBuffer.IsEmpty())
actionVal = new float[m_ActionSize.Length];
lastActions[agentId] = actionVal;
actionBuffer = new ActionBuffers(m_ActionSpec);
lastActions[agentId] = actionBuffer;
var discreteBuffer = actionBuffer.DiscreteActions;
actionVal[j] = actionValues[agentIndex, j];
discreteBuffer[j] = (int)actionValues[agentIndex, j];
}
}
agentIndex++;

m_Memories = memories;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var agentIndex = 0;
var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];

m_Memories = memories;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
var agentIndex = 0;
var memorySize = (int)tensorProxy.shape[tensorProxy.shape.Length - 1];

271
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


/// </summary>
internal class BarracudaModelParamLoader
{
enum ModelActionType
{
Unknown,
Discrete,
Continuous
}
/// Generates the Tensor inputs that are expected to be present in the Model.
/// </summary>
/// <param name="model">
/// The Barracuda engine model for loading static parameters.
/// </param>
/// <returns>TensorProxy IEnumerable with the expected Tensor inputs.</returns>
public static IReadOnlyList<TensorProxy> GetInputTensors(Model model)
{
var tensors = new List<TensorProxy>();
if (model == null)
return tensors;
foreach (var input in model.inputs)
{
tensors.Add(new TensorProxy
{
name = input.name,
valueType = TensorProxy.TensorType.FloatingPoint,
data = null,
shape = input.shape.Select(i => (long)i).ToArray()
});
}
foreach (var mem in model.memories)
{
tensors.Add(new TensorProxy
{
name = mem.input,
valueType = TensorProxy.TensorType.FloatingPoint,
data = null,
shape = TensorUtils.TensorShapeFromBarracuda(mem.shape)
});
}
tensors.Sort((el1, el2) => el1.name.CompareTo(el2.name));
return tensors;
}
public static int GetNumVisualInputs(Model model)
{
var count = 0;
if (model == null)
return count;
foreach (var input in model.inputs)
{
if (input.shape.Length == 4)
{
if (input.name.StartsWith(TensorNames.VisualObservationPlaceholderPrefix))
{
count++;
}
}
}
return count;
}
/// <summary>
/// Generates the Tensor outputs that are expected to be present in the Model.
/// </summary>
/// <param name="model">
/// The Barracuda engine model for loading static parameters
/// </param>
/// <returns>TensorProxy IEnumerable with the expected Tensor outputs</returns>
public static string[] GetOutputNames(Model model)
{
var names = new List<string>();
if (model == null)
{
return names.ToArray();
}
names.Add(TensorNames.ActionOutput);
var memory = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
if (memory > 0)
{
foreach (var mem in model.memories)
{
names.Add(mem.output);
}
}
names.Sort();
return names.ToArray();
}
/// <summary>
/// Factory for the ModelParamLoader : Creates a ModelParamLoader and runs the checks
/// on it.
/// </summary>

return failedModelChecks;
}
foreach (var constantName in TensorNames.RequiredConstants)
var hasExpectedTensors = model.CheckExpectedTensors(failedModelChecks);
if (!hasExpectedTensors)
var tensor = model.GetTensorByName(constantName);
if (tensor == null)
{
failedModelChecks.Add($"Required constant \"{constantName}\" was not found in the model file.");
return failedModelChecks;
}
return failedModelChecks;
var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
var isContinuousInt = (int)model.GetTensorByName(TensorNames.IsContinuousControl)[0];
var isContinuous = GetActionType(isContinuousInt);
var actionSize = (int)model.GetTensorByName(TensorNames.ActionOutputShape)[0];
if (modelApiVersion == -1)
{
failedModelChecks.Add(

return failedModelChecks;
}
var modelDiscreteActionSize = isContinuous == ModelActionType.Discrete ? actionSize : 0;
var modelContinuousActionSize = isContinuous == ModelActionType.Continuous ? actionSize : 0;
var memorySize = (int)model.GetTensorByName(TensorNames.MemorySize)[0];
if (memorySize == -1)
{
failedModelChecks.Add($"Missing node in the model provided : {TensorNames.MemorySize}");
return failedModelChecks;
}
CheckIntScalarPresenceHelper(new Dictionary<string, int>()
{
{TensorNames.MemorySize, memorySize},
{TensorNames.IsContinuousControl, isContinuousInt},
{TensorNames.ActionOutputShape, actionSize}
})
CheckInputTensorPresence(model, brainParameters, memorySize, sensorComponents)
CheckInputTensorPresence(model, brainParameters, memorySize, isContinuous, sensorComponents)
CheckOutputTensorPresence(model, memorySize)
CheckOutputTensorPresence(model, memorySize))
;
failedModelChecks.AddRange(
CheckOutputTensorShape(model, brainParameters, actuatorComponents, isContinuous, modelContinuousActionSize, modelDiscreteActionSize)
CheckOutputTensorShape(model, brainParameters, actuatorComponents)
/// Converts the integer value in the model corresponding to the type of control to a
/// ModelActionType.
/// </summary>
/// <param name="isContinuousInt">
/// The integer value in the model indicating the type of control
/// </param>
/// <returns>The equivalent ModelActionType</returns>
static ModelActionType GetActionType(int isContinuousInt)
{
ModelActionType isContinuous;
switch (isContinuousInt)
{
case 0:
isContinuous = ModelActionType.Discrete;
break;
case 1:
isContinuous = ModelActionType.Continuous;
break;
default:
isContinuous = ModelActionType.Unknown;
break;
}
return isContinuous;
}
/// <summary>
/// Given a Dictionary of node names to int values, create checks if the values have the
/// invalid value of -1.
/// </summary>
/// <param name="requiredScalarFields"> Mapping from node names to int values</param>
/// <returns>The list the error messages of the checks that failed</returns>
static IEnumerable<string> CheckIntScalarPresenceHelper(
Dictionary<string, int> requiredScalarFields)
{
var failedModelChecks = new List<string>();
foreach (var field in requiredScalarFields)
{
if (field.Value == -1)
{
failedModelChecks.Add($"Missing node in the model provided : {field.Key}");
}
}
return failedModelChecks;
}
/// <summary>
/// Generates failed checks that correspond to inputs expected by the model that are not
/// present in the BrainParameters.
/// </summary>

Model model,
BrainParameters brainParameters,
int memory,
ModelActionType isContinuous,
var tensorsNames = GetInputTensors(model).Select(x => x.name).ToList();
var tensorsNames = model.GetInputNames();
// If there is no Vector Observation Input but the Brain Parameters expect one.
if ((brainParameters.VectorObservationSize != 0) &&

"The model does not contain a Vector Observation Placeholder Input. " +
"The model does not contain a Vector Observation Placeholder Input. " +
"You must set the Vector Observation Space Size to 0.");
}

visObsIndex++;
}
var expectedVisualObs = GetNumVisualInputs(model);
var expectedVisualObs = model.GetNumVisualInputs();
// Check if there's not enough visual sensors (too many would be handled above)
if (expectedVisualObs > visObsIndex)
{

}
// If the model uses discrete control but does not have an input for action masks
if (isContinuous == ModelActionType.Discrete)
if (model.HasDiscreteOutputs())
{
if (!tensorsNames.Contains(TensorNames.ActionMaskPlaceholder))
{

static IEnumerable<string> CheckOutputTensorPresence(Model model, int memory)
{
var failedModelChecks = new List<string>();
// If there is no Action Output.
if (!model.outputs.Contains(TensorNames.ActionOutput))
{
failedModelChecks.Add("The model does not contain an Action Output Node.");
}
// If there is no Recurrent Output but the model is Recurrent.
if (memory > 0)

}
// If the model expects an input but it is not in this list
foreach (var tensor in GetInputTensors(model))
foreach (var tensor in model.GetInputTensors())
{
if (!tensorTester.ContainsKey(tensor.name))
{

BrainParameters brainParameters, TensorProxy tensorProxy,
SensorComponent[] sensorComponents, int observableAttributeTotalSize)
{
var numberActionsBp = brainParameters.VectorActionSize.Length;
var numberActionsBp = brainParameters.ActionSpec.NumDiscreteActions;
var numberActionsT = tensorProxy.shape[tensorProxy.shape.Length - 1];
if (numberActionsBp != numberActionsT)
{

static IEnumerable<string> CheckOutputTensorShape(
Model model,
BrainParameters brainParameters,
ActuatorComponent[] actuatorComponents,
ModelActionType isContinuous,
int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
ActuatorComponent[] actuatorComponents)
if (isContinuous == ModelActionType.Unknown)
{
failedModelChecks.Add("Cannot infer type of Control from the provided model.");
return failedModelChecks;
}
if (isContinuous == ModelActionType.Continuous &&
brainParameters.VectorActionSpaceType != SpaceType.Continuous)
{
failedModelChecks.Add(
"Model has been trained using Continuous Control but the Brain Parameters " +
"suggest Discrete Control.");
return failedModelChecks;
}
if (isContinuous == ModelActionType.Discrete &&
brainParameters.VectorActionSpaceType != SpaceType.Discrete)
{
failedModelChecks.Add(
"Model has been trained using Discrete Control but the Brain Parameters " +
"suggest Continuous Control.");
return failedModelChecks;
}
var tensorTester = new Dictionary<string, Func<BrainParameters, ActuatorComponent[], TensorShape?, int, int, string>>();
// This will need to change a bit for hybrid action spaces.
if (isContinuous == ModelActionType.Continuous)
// If the model expects an output but it is not in this list
var modelContinuousActionSize = model.ContinuousOutputSize();
var continuousError = CheckContinuousActionOutputShape(brainParameters, actuatorComponents, modelContinuousActionSize);
if (continuousError != null)
tensorTester[TensorNames.ActionOutput] = CheckContinuousActionOutputShape;
failedModelChecks.Add(continuousError);
else
var modelSumDiscreteBranchSizes = model.DiscreteOutputSize();
var discreteError = CheckDiscreteActionOutputShape(brainParameters, actuatorComponents, modelSumDiscreteBranchSizes);
if (discreteError != null)
tensorTester[TensorNames.ActionOutput] = CheckDiscreteActionOutputShape;
}
// If the model expects an output but it is not in this list
foreach (var name in model.outputs)
{
if (tensorTester.ContainsKey(name))
{
var tester = tensorTester[name];
var error = tester.Invoke(brainParameters, actuatorComponents, model.GetShapeByName(name), modelContinuousActionSize, modelSumDiscreteBranchSizes);
if (error != null)
{
failedModelChecks.Add(error);
}
}
failedModelChecks.Add(discreteError);
}
return failedModelChecks;
}

/// check failed. If the check passed, returns null.
/// </returns>
static string CheckDiscreteActionOutputShape(
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, int modelSumDiscreteBranchSizes)
var sumOfDiscreteBranchSizes = 0;
if (brainParameters.VectorActionSpaceType == SpaceType.Discrete)
{
sumOfDiscreteBranchSizes += brainParameters.VectorActionSize.Sum();
}
// TODO: check each branch size instead of sum of branch sizes
var sumOfDiscreteBranchSizes = brainParameters.ActionSpec.SumOfDiscreteBranchSizes;
foreach (var actuatorComponent in actuatorComponents)
{

/// <returns>If the Check failed, returns a string containing information about why the
/// check failed. If the check passed, returns null.</returns>
static string CheckContinuousActionOutputShape(
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, int modelContinuousActionSize)
var numContinuousActions = 0;
if (brainParameters.VectorActionSpaceType == SpaceType.Continuous)
{
numContinuousActions += brainParameters.NumActions;
}
var numContinuousActions = brainParameters.ActionSpec.NumContinuousActions;
foreach (var actuatorComponent in actuatorComponents)
{

4
com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs


foreach (var infoSensorPair in infos)
{
var info = infoSensorPair.agentInfo;
var pastAction = info.storedVectorActions;
if (pastAction != null)
var pastAction = info.storedVectorActions.DiscreteActions;
if (!pastAction.IsEmpty())
{
for (var j = 0; j < actionSize; j++)
{

12
com.unity.ml-agents/Runtime/Inference/ModelRunner.cs


internal class ModelRunner
{
List<AgentInfoSensorsPair> m_Infos = new List<AgentInfoSensorsPair>();
Dictionary<int, float[]> m_LastActionsReceived = new Dictionary<int, float[]>();
Dictionary<int, ActionBuffers> m_LastActionsReceived = new Dictionary<int, ActionBuffers>();
List<int> m_OrderedAgentsRequestingDecisions = new List<int>();
ITensorAllocator m_TensorAllocator;

m_Engine = null;
}
m_InferenceInputs = BarracudaModelParamLoader.GetInputTensors(barracudaModel);
m_OutputNames = BarracudaModelParamLoader.GetOutputNames(barracudaModel);
m_InferenceInputs = barracudaModel.GetInputTensors();
m_OutputNames = barracudaModel.GetOutputNames();
m_TensorGenerator = new TensorGenerator(
seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TensorApplier = new TensorApplier(

if (!m_LastActionsReceived.ContainsKey(info.episodeId))
{
m_LastActionsReceived[info.episodeId] = null;
m_LastActionsReceived[info.episodeId] = ActionBuffers.Empty;
}
if (info.done)
{

return m_Model == other && m_InferenceDevice == otherInferenceDevice;
}
public float[] GetAction(int agentId)
public ActionBuffers GetAction(int agentId)
return null;
return ActionBuffers.Empty;
}
}
}

35
com.unity.ml-agents/Runtime/Inference/TensorApplier.cs


/// </param>
/// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
/// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions);
void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions);
}
readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();

Dictionary<int, List<float>> memories,
object barracudaModel = null)
{
actionSpec.CheckNotHybrid();
// If model is null, no inference to run and exception is thrown before reaching here.
if (barracudaModel == null)
{
return;
}
var model = (Model)barracudaModel;
if (!model.SupportsContinuousAndDiscrete())
{
actionSpec.CheckAllContinuousOrDiscrete();
}
m_Dict[TensorNames.ActionOutput] = new ContinuousActionOutputApplier();
var tensorName = model.ContinuousOutputName();
m_Dict[tensorName] = new ContinuousActionOutputApplier(actionSpec);
else
if (actionSpec.NumDiscreteActions > 0)
m_Dict[TensorNames.ActionOutput] =
new DiscreteActionOutputApplier(actionSpec.BranchSizes, seed, allocator);
var tensorName = model.DiscreteOutputName();
m_Dict[tensorName] = new DiscreteActionOutputApplier(actionSpec, seed, allocator);
if (barracudaModel != null)
for (var i = 0; i < model?.memories.Count; i++)
var model = (Model)barracudaModel;
for (var i = 0; i < model?.memories.Count; i++)
{
m_Dict[model.memories[i].output] =
new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
}
m_Dict[model.memories[i].output] =
new BarracudaMemoryOutputApplier(model.memories.Count, i, memories);
}
}

/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated applier.</exception>
public void ApplyTensors(
IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, float[]> lastActions)
IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
foreach (var tensor in tensors)
{

26
com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs


Dictionary<int, List<float>> memories,
object barracudaModel = null)
{
// If model is null, no inference to run and exception is thrown before reaching here.
if (barracudaModel == null)
{
return;
}
var model = (Model)barracudaModel;
// Generator for Inputs
m_Dict[TensorNames.BatchSizePlaceholder] =
new BatchSizeGenerator(allocator);

new RecurrentInputGenerator(allocator, memories);
if (barracudaModel != null)
for (var i = 0; i < model.memories.Count; i++)
var model = (Model)barracudaModel;
for (var i = 0; i < model.memories.Count; i++)
{
m_Dict[model.memories[i].input] =
new BarracudaRecurrentInputGenerator(i, allocator, memories);
}
m_Dict[model.memories[i].input] =
new BarracudaRecurrentInputGenerator(i, allocator, memories);
}
m_Dict[TensorNames.PreviousActionPlaceholder] =

// Generators for Outputs
m_Dict[TensorNames.ActionOutput] = new BiDimensionalOutputGenerator(allocator);
if (model.HasContinuousOutputs())
{
m_Dict[model.ContinuousOutputName()] = new BiDimensionalOutputGenerator(allocator);
}
if (model.HasDiscreteOutputs())
{
m_Dict[model.DiscreteOutputName()] = new BiDimensionalOutputGenerator(allocator);
}
m_Dict[TensorNames.RecurrentOutput] = new BiDimensionalOutputGenerator(allocator);
m_Dict[TensorNames.ValueEstimateOutput] = new BiDimensionalOutputGenerator(allocator);
}

15
com.unity.ml-agents/Runtime/Inference/TensorNames.cs


public const string recurrentOutputC = "recurrent_out_c";
public const string MemorySize = "memory_size";
public const string VersionNumber = "version_number";
public const string IsContinuousControl = "is_continuous_control";
public const string ActionOutputShape = "action_output_shape";
public const string ActionOutput = "action";
public const string ContinuousActionOutputShape = "continuous_action_output_shape";
public const string DiscreteActionOutputShape = "discrete_action_output_shape";
public const string ContinuousActionOutput = "continuous_actions";
public const string DiscreteActionOutput = "discrete_actions";
public static readonly string[] RequiredConstants =
{
VersionNumber, MemorySize, IsContinuousControl, ActionOutputShape
};
// Deprecated TensorNames entries for backward compatibility
public const string IsContinuousControlDeprecated = "is_continuous_control";
public const string ActionOutputDeprecated = "action";
public const string ActionOutputShapeDeprecated = "action_output_shape";
}
}

19
com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs


/// Sensor shapes for the associated Agents. All Agents must have the same shapes for their Sensors.
/// </summary>
List<int[]> m_SensorShapes;
SpaceType m_SpaceType;
ActionSpec m_ActionSpec;
/// <inheritdoc />
public BarracudaPolicy(

{
var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, actionSpec, inferenceDevice);
m_ModelRunner = modelRunner;
actionSpec.CheckNotHybrid();
m_SpaceType = actionSpec.NumContinuousActions > 0 ? SpaceType.Continuous : SpaceType.Discrete;
m_ActionSpec = actionSpec;
}
/// <inheritdoc />

/// <inheritdoc />
public ref readonly ActionBuffers DecideAction()
{
m_ModelRunner?.DecideBatch();
var actions = m_ModelRunner?.GetAction(m_AgentId);
if (m_SpaceType == SpaceType.Continuous)
if (m_ModelRunner == null)
{
m_LastActionBuffer = ActionBuffers.Empty;
}
else
m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
return ref m_LastActionBuffer;
m_ModelRunner?.DecideBatch();
m_LastActionBuffer = m_ModelRunner.GetAction(m_AgentId);
m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
return ref m_LastActionBuffer;
}

109
com.unity.ml-agents/Runtime/Policies/BrainParameters.cs


using System;
using UnityEngine;
using UnityEngine.Serialization;
using Unity.MLAgents.Actuators;
namespace Unity.MLAgents.Policies
{

/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </remarks>
[Serializable]
public class BrainParameters
public class BrainParameters : ISerializationCallbackReceiver
{
/// <summary>
/// The number of the observations that are added in

[FormerlySerializedAs("numStackedVectorObservations")]
[Range(1, 50)] public int NumStackedVectorObservations = 1;
[SerializeField]
internal ActionSpec m_ActionSpec = new ActionSpec(0, null);
/// The size of the action space.
/// The specification of the Action space for the BrainParameters.
/// </summary>
public ActionSpec ActionSpec
{
get { return m_ActionSpec; }
set
{
m_ActionSpec.NumContinuousActions = value.NumContinuousActions;
m_ActionSpec.BranchSizes = value.BranchSizes;
SyncDeprecatedActionFields();
}
}
/// <summary>
/// (Deprecated) The size of the action space.
/// </summary>
/// <remarks>The size specified is interpreted differently depending on whether
/// the agent uses the continuous or the discrete action space.</remarks>

/// For the discrete action space: the number of branches in the action space.
/// </value>
/// [Obsolete("VectorActionSize has been deprecated, please use ActionSpec instead.")]
[FormerlySerializedAs("vectorActionSize")]
public int[] VectorActionSize = new[] { 1 };

public string[] VectorActionDescriptions;
/// <summary>
/// Defines if the action is discrete or continuous.
/// (Deprecated) Defines if the action is discrete or continuous.
/// [Obsolete("VectorActionSpaceType has been deprecated, please use ActionSpec instead.")]
[SerializeField]
[HideInInspector]
internal bool hasUpgradedBrainParametersWithActionSpec;
/// The number of actions specified by this Brain.
/// (Deprecated) The number of actions specified by this Brain.
/// [Obsolete("NumActions has been deprecated, please use ActionSpec instead.")]
switch (VectorActionSpaceType)
{
case SpaceType.Discrete:
return VectorActionSize.Length;
case SpaceType.Continuous:
return VectorActionSize[0];
default:
return 0;
}
return ActionSpec.NumContinuousActions > 0 ? ActionSpec.NumContinuousActions : ActionSpec.NumDiscreteActions;
}
}

{
VectorObservationSize = VectorObservationSize,
NumStackedVectorObservations = NumStackedVectorObservations,
VectorActionSize = (int[])VectorActionSize.Clone(),
VectorActionSpaceType = VectorActionSpaceType
ActionSpec = new ActionSpec(ActionSpec.NumContinuousActions, ActionSpec.BranchSizes),
VectorActionSize = (int[])VectorActionSize.Clone(),
VectorActionSpaceType = VectorActionSpaceType,
}
/// <summary>
/// Propogate ActionSpec fields from deprecated fields
/// </summary>
private void UpdateToActionSpec()
{
if (!hasUpgradedBrainParametersWithActionSpec)
{
if (VectorActionSpaceType == SpaceType.Continuous)
{
m_ActionSpec.NumContinuousActions = VectorActionSize[0];
m_ActionSpec.BranchSizes = null;
}
if (VectorActionSpaceType == SpaceType.Discrete)
{
m_ActionSpec.NumContinuousActions = 0;
m_ActionSpec.BranchSizes = VectorActionSize;
}
hasUpgradedBrainParametersWithActionSpec = true;
}
}
/// <summary>
/// Sync values in ActionSpec fields to deprecated fields
/// </summary>
private void SyncDeprecatedActionFields()
{
if (m_ActionSpec.NumContinuousActions == 0)
{
VectorActionSize = ActionSpec.BranchSizes;
VectorActionSpaceType = SpaceType.Discrete;
}
else if (m_ActionSpec.NumDiscreteActions == 0)
{
VectorActionSize = new[] { m_ActionSpec.NumContinuousActions };
VectorActionSpaceType = SpaceType.Continuous;
}
else
{
VectorActionSize = null;
}
}
/// <summary>
/// Called by Unity immediately before serializing this object.
/// </summary>
public void OnBeforeSerialize()
{
UpdateToActionSpec();
SyncDeprecatedActionFields();
}
/// <summary>
/// Called by Unity immediately after deserializing this object.
/// </summary>
public void OnAfterDeserialize()
{
UpdateToActionSpec();
SyncDeprecatedActionFields();
}
}
}

14
com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs


{
int m_AgentId;
string m_FullyQualifiedBehaviorName;
SpaceType m_SpaceType;
ActionSpec m_ActionSpec;
ActionBuffers m_LastActionBuffer;
internal ICommunicator m_Communicator;

m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
m_Communicator = Academy.Instance.Communicator;
m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, actionSpec);
actionSpec.CheckNotHybrid();
m_SpaceType = actionSpec.NumContinuousActions > 0 ? SpaceType.Continuous : SpaceType.Discrete;
m_ActionSpec = actionSpec;
}
/// <inheritdoc />

{
m_Communicator?.DecideBatch();
var actions = m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
// TODO figure out how to handle this with multiple space types.
if (m_SpaceType == SpaceType.Continuous)
{
m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
return ref m_LastActionBuffer;
}
m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
m_LastActionBuffer = actions == null ? ActionBuffers.Empty : (ActionBuffers)actions;
return ref m_LastActionBuffer;
}

4
com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs


{
m_TensorShape = new TensorShape(m_Batch, shape[0]);
}
else if (shape.Length == 2)
{
m_TensorShape = new TensorShape(new int[] { m_Batch, 1, shape[0], shape[1] });
}
else
{
m_TensorShape = new TensorShape(m_Batch, shape[0], shape[1], shape[2]);

12
com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs


}
[Test]
public void TestFailOnMixedActionSpace()
{
var manager = new ActuatorManager();
var actuator1 = new TestActuator(ActionSpec.MakeDiscrete(new[] { 1, 2, 3, 4 }), "actuator1");
var actuator2 = new TestActuator(ActionSpec.MakeContinuous(3), "actuator2");
manager.Add(actuator1);
manager.Add(actuator2);
LogAssert.Expect(LogType.Assert, "Actuators on the same Agent must have the same action SpaceType.");
manager.ReadyActuatorsForExecution(new[] { actuator1, actuator2 }, 3, 10, 4);
}
[Test]
public void TestFailOnSameActuatorName()
{
var manager = new ActuatorManager();

10
com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs


public void TestConstruct()
{
var ar = new TestActionReceiver();
var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
var va1 = new VectorActuator(ar, new[] { 4 }, SpaceType.Continuous, "name");
var va1 = new VectorActuator(ar, ActionSpec.MakeContinuous(4), "name");
Assert.IsTrue(va1.ActionSpec.NumContinuousActions == 4);
Assert.IsTrue(va1.ActionSpec.SumOfDiscreteBranchSizes == 0);

public void TestOnActionReceived()
{
var ar = new TestActionReceiver();
var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
var discreteActions = new[] { 0, 1, 1 };
var ab = new ActionBuffers(ActionSegment<float>.Empty,

public void TestResetData()
{
var ar = new TestActionReceiver();
var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
var discreteActions = new[] { 0, 1, 1 };
var ab = new ActionBuffers(ActionSegment<float>.Empty,

public void TestWriteDiscreteActionMask()
{
var ar = new TestActionReceiver();
var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
var bdam = new ActuatorDiscreteActionMask(new[] { va }, 6, 3);
var groundTruthMask = new[] { false, true, false, false, true, true };

9
com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs


using UnityEngine;
using System.IO.Abstractions.TestingHelpers;
using System.Reflection;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.CommunicatorObjects;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Demonstrations;

bp.BrainParameters.VectorObservationSize = 3;
bp.BrainParameters.NumStackedVectorObservations = 2;
bp.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
bp.BrainParameters.VectorActionSize = new[] { 2, 2 };
bp.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
bp.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);
gameobj.AddComponent<TestAgent>();

done = true,
episodeId = 5,
maxStepReached = true,
storedVectorActions = new[] { 0f, 1f },
storedVectorActions = new ActionBuffers(null, new int[] { 0, 1 }),
};

bpA.BrainParameters.VectorObservationSize = 3;
bpA.BrainParameters.NumStackedVectorObservations = 1;
bpA.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
bpA.BrainParameters.VectorActionSize = new[] { 2, 2 };
bpA.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
bpA.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);
agentGo1.AddComponent<ObservationAgent>();
var agent1 = agentGo1.GetComponent<ObservationAgent>();

74
com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs


using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Policies;
namespace Unity.MLAgents.Tests
{

[Test]
public void ApplyContinuousActionOutput()
{
var actionSpec = ActionSpec.MakeContinuous(3);
var inputTensor = new TensorProxy()
{
shape = new long[] { 2, 3 },

var applier = new ContinuousActionOutputApplier();
var applier = new ContinuousActionOutputApplier(actionSpec);
var actionDict = new Dictionary<int, float[]>() { { 0, null }, { 1, null } };
var actionDict = new Dictionary<int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } };
Assert.AreEqual(actionDict[0][0], 1);
Assert.AreEqual(actionDict[0][1], 2);
Assert.AreEqual(actionDict[0][2], 3);
Assert.AreEqual(actionDict[0].ContinuousActions[0], 1);
Assert.AreEqual(actionDict[0].ContinuousActions[1], 2);
Assert.AreEqual(actionDict[0].ContinuousActions[2], 3);
Assert.AreEqual(actionDict[1][0], 4);
Assert.AreEqual(actionDict[1][1], 5);
Assert.AreEqual(actionDict[1][2], 6);
Assert.AreEqual(actionDict[1].ContinuousActions[0], 4);
Assert.AreEqual(actionDict[1].ContinuousActions[1], 5);
Assert.AreEqual(actionDict[1].ContinuousActions[2], 6);
var actionSpec = ActionSpec.MakeDiscrete(2, 3);
var inputTensor = new TensorProxy()
{
shape = new long[] { 2, 5 },

new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
};
var alloc = new TensorCachingAllocator();
var applier = new DiscreteActionOutputApplier(new[] { 2, 3 }, 0, alloc);
var applier = new DiscreteActionOutputApplier(actionSpec, 0, alloc);
var actionDict = new Dictionary<int, float[]>() { { 0, null }, { 1, null } };
var actionDict = new Dictionary<int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } };
Assert.AreEqual(actionDict[0][0], 1);
Assert.AreEqual(actionDict[0][1], 1);
Assert.AreEqual(actionDict[0].DiscreteActions[0], 1);
Assert.AreEqual(actionDict[0].DiscreteActions[1], 1);
Assert.AreEqual(actionDict[1][0], 1);
Assert.AreEqual(actionDict[1][1], 2);
Assert.AreEqual(actionDict[1].DiscreteActions[0], 1);
Assert.AreEqual(actionDict[1].DiscreteActions[1], 2);
alloc.Dispose();
}
[Test]
public void ApplyHybridActionOutput()
{
var actionSpec = new ActionSpec(3, new int[] { 2, 3 });
var continuousInputTensor = new TensorProxy()
{
shape = new long[] { 2, 3 },
data = new Tensor(2, 3, new float[] { 1, 2, 3, 4, 5, 6 })
};
var discreteInputTensor = new TensorProxy()
{
shape = new long[] { 2, 8 },
data = new Tensor(
2,
5,
new[] { 0.5f, 22.5f, 0.1f, 5f, 1f, 4f, 5f, 6f, 7f, 8f })
};
var continuousApplier = new ContinuousActionOutputApplier(actionSpec);
var alloc = new TensorCachingAllocator();
var discreteApplier = new DiscreteActionOutputApplier(actionSpec, 0, alloc);
var agentIds = new List<int>() { 0, 1 };
// Dictionary from AgentId to Action
var actionDict = new Dictionary<int, ActionBuffers>() { { 0, ActionBuffers.Empty }, { 1, ActionBuffers.Empty } };
continuousApplier.Apply(continuousInputTensor, agentIds, actionDict);
discreteApplier.Apply(discreteInputTensor, agentIds, actionDict);
Assert.AreEqual(actionDict[0].ContinuousActions[0], 1);
Assert.AreEqual(actionDict[0].ContinuousActions[1], 2);
Assert.AreEqual(actionDict[0].ContinuousActions[2], 3);
Assert.AreEqual(actionDict[0].DiscreteActions[0], 1);
Assert.AreEqual(actionDict[0].DiscreteActions[1], 1);
Assert.AreEqual(actionDict[1].ContinuousActions[0], 4);
Assert.AreEqual(actionDict[1].ContinuousActions[1], 5);
Assert.AreEqual(actionDict[1].ContinuousActions[2], 6);
Assert.AreEqual(actionDict[1].DiscreteActions[0], 1);
Assert.AreEqual(actionDict[1].DiscreteActions[1], 2);
alloc.Dispose();
}
}

7
com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorGenerator.cs


using Unity.Barracuda;
using NUnit.Framework;
using UnityEngine;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Sensors.Reflection;

var infoA = new AgentInfo
{
storedVectorActions = new[] { 1f, 2f },
discreteActionMasks = null
storedVectorActions = new ActionBuffers(null, new[] { 1, 2 }),
discreteActionMasks = null,
storedVectorActions = new[] { 3f, 4f },
storedVectorActions = new ActionBuffers(null, new[] { 3, 4 }),
discreteActionMasks = new[] { true, false, false, false, false },
};

27
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
bp2.BrainParameters.VectorActionSize = new[] { 1 };
bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();

{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
bp2.BrainParameters.VectorActionSize = new[] { 1 };
bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();

{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
var agent1 = agentGo1.AddComponent<TestAgent>();
var behaviorParameters = agentGo1.GetComponent<BehaviorParameters>();
behaviorParameters.BrainParameters.NumStackedVectorObservations = 3;

{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
bp2.BrainParameters.VectorActionSize = new[] { 1 };
bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
var agent2 = agentGo2.AddComponent<TestAgent>();
var aca = Academy.Instance;

{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;

// Make sure that Agents with HeuristicPolicies step their sensors each Academy step.
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;

62
com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs


using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Policies;
namespace Unity.MLAgents.Tests

{
const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.nn";
const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.nn";
NNModel continuous2vis8vec2actionModel;
NNModel discrete1vis0vec_2_3action_recurrModel;
const string k_continuousONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.onnx";
const string k_discreteONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.onnx";
const string k_hybridONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/hybrid0vis53vec_3c_2daction.onnx";
const string k_continuousNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn";
const string k_discreteNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn";
NNModel continuousONNXModel;
NNModel discreteONNXModel;
NNModel hybridONNXModel;
NNModel continuousNNModel;
NNModel discreteNNModel;
Test3DSensorComponent sensor_21_20_3;
Test3DSensorComponent sensor_20_22_3;

return ActionSpec.MakeDiscrete(2, 3);
}
ActionSpec GetHybrid0vis53vec_3c_2dActionSpec()
{
return new ActionSpec(3, new int[] { 2 });
}
continuous2vis8vec2actionModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuous2vis8vec2actionPath, typeof(NNModel));
discrete1vis0vec_2_3action_recurrModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discrete1vis0vec_2_3action_recurrModelPath, typeof(NNModel));
continuousONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousONNXPath, typeof(NNModel));
discreteONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteONNXPath, typeof(NNModel));
hybridONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_hybridONNXPath, typeof(NNModel));
continuousNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousNNPath, typeof(NNModel));
discreteNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteNNPath, typeof(NNModel));
var go = new GameObject("SensorA");
sensor_21_20_3 = go.AddComponent<Test3DSensorComponent>();
sensor_21_20_3.Sensor = new Test3DSensor("SensorA", 21, 20, 3);

[Test]
public void TestModelExist()
{
Assert.IsNotNull(continuous2vis8vec2actionModel);
Assert.IsNotNull(discrete1vis0vec_2_3action_recurrModel);
Assert.IsNotNull(continuousONNXModel);
Assert.IsNotNull(discreteONNXModel);
Assert.IsNotNull(hybridONNXModel);
Assert.IsNotNull(continuousNNModel);
Assert.IsNotNull(discreteNNModel);
var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionActionSpec());
var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec());
modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
modelRunner = new ModelRunner(discreteONNXModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
modelRunner.Dispose();
modelRunner = new ModelRunner(hybridONNXModel, GetHybrid0vis53vec_3c_2dActionSpec());
modelRunner.Dispose();
modelRunner = new ModelRunner(continuousNNModel, GetContinuous2vis8vec2actionActionSpec());
modelRunner.Dispose();
modelRunner = new ModelRunner(discreteNNModel, GetDiscrete1vis0vec_2_3action_recurrModelActionSpec());
modelRunner.Dispose();
}

var modelRunner = new ModelRunner(continuous2vis8vec2actionModel, GetContinuous2vis8vec2actionActionSpec(), InferenceDevice.CPU);
Assert.True(modelRunner.HasModel(continuous2vis8vec2actionModel, InferenceDevice.CPU));
Assert.False(modelRunner.HasModel(continuous2vis8vec2actionModel, InferenceDevice.GPU));
Assert.False(modelRunner.HasModel(discrete1vis0vec_2_3action_recurrModel, InferenceDevice.CPU));
var modelRunner = new ModelRunner(continuousONNXModel, GetContinuous2vis8vec2actionActionSpec(), InferenceDevice.CPU);
Assert.True(modelRunner.HasModel(continuousONNXModel, InferenceDevice.CPU));
Assert.False(modelRunner.HasModel(continuousONNXModel, InferenceDevice.GPU));
Assert.False(modelRunner.HasModel(discreteONNXModel, InferenceDevice.CPU));
modelRunner.Dispose();
}

var actionSpec = GetDiscrete1vis0vec_2_3action_recurrModelActionSpec();
var modelRunner = new ModelRunner(discrete1vis0vec_2_3action_recurrModel, actionSpec);
var modelRunner = new ModelRunner(discreteONNXModel, actionSpec);
var info1 = new AgentInfo();
info1.episodeId = 1;
modelRunner.PutObservations(info1, new[] { sensor_21_20_3.CreateSensor() }.ToList());

modelRunner.DecideBatch();
Assert.IsNotNull(modelRunner.GetAction(1));
Assert.IsNotNull(modelRunner.GetAction(2));
Assert.IsNull(modelRunner.GetAction(3));
Assert.AreEqual(actionSpec.NumDiscreteActions, modelRunner.GetAction(1).Count());
Assert.IsFalse(modelRunner.GetAction(1).Equals(ActionBuffers.Empty));
Assert.IsFalse(modelRunner.GetAction(2).Equals(ActionBuffers.Empty));
Assert.IsTrue(modelRunner.GetAction(3).Equals(ActionBuffers.Empty));
Assert.AreEqual(actionSpec.NumDiscreteActions, modelRunner.GetAction(1).DiscreteActions.Length);
modelRunner.Dispose();
}
}

217
com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs


[TestFixture]
public class ParameterLoaderTest
{
const string k_continuous2vis8vec2actionPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.nn";
const string k_discrete1vis0vec_2_3action_recurrModelPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.nn";
NNModel continuous2vis8vec2actionModel;
NNModel discrete1vis0vec_2_3action_recurrModel;
// ONNX model with continuous/discrete action output (support hybrid action)
const string k_continuousONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action.onnx";
const string k_discreteONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr.onnx";
const string k_hybridONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/hybrid0vis53vec_3c_2daction.onnx";
// NN model with single action output (deprecated, does not support hybrid action).
// Same BrainParameters settings as the corresponding ONNX model.
const string k_continuousNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn";
const string k_discreteNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn";
NNModel continuousONNXModel;
NNModel discreteONNXModel;
NNModel hybridONNXModel;
NNModel continuousNNModel;
NNModel discreteNNModel;
Test3DSensorComponent sensor_21_20_3;
Test3DSensorComponent sensor_20_22_3;

validBrainParameters.VectorObservationSize = 8;
validBrainParameters.VectorActionSize = new[] { 2 };
validBrainParameters.VectorActionSpaceType = SpaceType.Continuous;
validBrainParameters.ActionSpec = ActionSpec.MakeContinuous(2);
return validBrainParameters;
}

validBrainParameters.VectorObservationSize = 0;
validBrainParameters.VectorActionSize = new[] { 2, 3 };
validBrainParameters.NumStackedVectorObservations = 1;
validBrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 3);
return validBrainParameters;
}
BrainParameters GetHybridBrainParameters()
{
var validBrainParameters = new BrainParameters();
validBrainParameters.VectorObservationSize = 53;
validBrainParameters.VectorActionSpaceType = SpaceType.Discrete;
validBrainParameters.ActionSpec = new ActionSpec(3, new int[] { 2 });
return validBrainParameters;
}

continuous2vis8vec2actionModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuous2vis8vec2actionPath, typeof(NNModel));
discrete1vis0vec_2_3action_recurrModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discrete1vis0vec_2_3action_recurrModelPath, typeof(NNModel));
continuousONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousONNXPath, typeof(NNModel));
discreteONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteONNXPath, typeof(NNModel));
hybridONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_hybridONNXPath, typeof(NNModel));
continuousNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousNNPath, typeof(NNModel));
discreteNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteNNPath, typeof(NNModel));
var go = new GameObject("SensorA");
sensor_21_20_3 = go.AddComponent<Test3DSensorComponent>();
sensor_21_20_3.Sensor = new Test3DSensor("SensorA", 21, 20, 3);

[Test]
public void TestModelExist()
{
Assert.IsNotNull(continuous2vis8vec2actionModel);
Assert.IsNotNull(discrete1vis0vec_2_3action_recurrModel);
Assert.IsNotNull(continuousONNXModel);
Assert.IsNotNull(discreteONNXModel);
Assert.IsNotNull(hybridONNXModel);
Assert.IsNotNull(continuousNNModel);
Assert.IsNotNull(discreteNNModel);
[Test]
public void TestGetInputTensors1()
[TestCase(true)]
[TestCase(false)]
public void TestGetInputTensorsContinuous(bool useDeprecatedNNModel)
var model = ModelLoader.Load(continuous2vis8vec2actionModel);
var inputTensors = BarracudaModelParamLoader.GetInputTensors(model);
var inputNames = inputTensors.Select(x => x.name).ToList();
var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
var inputNames = model.GetInputNames();
Assert.AreEqual(3, inputNames.Count);
Assert.AreEqual(3, inputNames.Count());
Assert.AreEqual(2, BarracudaModelParamLoader.GetNumVisualInputs(model));
Assert.AreEqual(2, model.GetNumVisualInputs());
Assert.AreEqual(0, BarracudaModelParamLoader.GetInputTensors(null).Count);
Assert.AreEqual(0, BarracudaModelParamLoader.GetNumVisualInputs(null));
model = null;
Assert.AreEqual(0, model.GetInputTensors().Count);
Assert.AreEqual(0, model.GetNumVisualInputs());
[Test]
public void TestGetInputTensors2()
[TestCase(true)]
[TestCase(false)]
public void TestGetInputTensorsDiscrete(bool useDeprecatedNNModel)
var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
var inputTensors = BarracudaModelParamLoader.GetInputTensors(model);
var inputNames = inputTensors.Select(x => x.name).ToList();
var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
var inputNames = model.GetInputNames();
// Model should contain 2 inputs : recurrent and visual 1
Assert.Contains(TensorNames.VisualObservationPlaceholderPrefix + "0", inputNames);

[Test]
public void TestGetOutputTensors1()
public void TestGetInputTensorsHybrid()
var model = ModelLoader.Load(continuous2vis8vec2actionModel);
var outputNames = BarracudaModelParamLoader.GetOutputNames(model);
Assert.Contains(TensorNames.ActionOutput, outputNames);
var model = ModelLoader.Load(hybridONNXModel);
var inputNames = model.GetInputNames();
Assert.Contains(TensorNames.VectorObservationPlaceholder, inputNames);
}
[TestCase(true)]
[TestCase(false)]
public void TestGetOutputTensorsContinuous(bool useDeprecatedNNModel)
{
var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
var outputNames = model.GetOutputNames();
var actionOutputName = useDeprecatedNNModel ? TensorNames.ActionOutputDeprecated : TensorNames.ContinuousActionOutput;
Assert.Contains(actionOutputName, outputNames);
Assert.AreEqual(0, BarracudaModelParamLoader.GetOutputNames(null).Count());
model = null;
Assert.AreEqual(0, model.GetOutputNames().Count());
[Test]
public void TestGetOutputTensors2()
[TestCase(true)]
[TestCase(false)]
public void TestGetOutputTensorsDiscrete(bool useDeprecatedNNModel)
var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
var outputNames = BarracudaModelParamLoader.GetOutputNames(model);
Assert.Contains(TensorNames.ActionOutput, outputNames);
var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
var outputNames = model.GetOutputNames();
var actionOutputName = useDeprecatedNNModel ? TensorNames.ActionOutputDeprecated : TensorNames.DiscreteActionOutput;
Assert.Contains(actionOutputName, outputNames);
public void TestCheckModelValid1()
public void TestGetOutputTensorsHybrid()
{
var model = ModelLoader.Load(hybridONNXModel);
var outputNames = model.GetOutputNames();
Assert.AreEqual(2, outputNames.Count());
Assert.Contains(TensorNames.ContinuousActionOutput, outputNames);
Assert.Contains(TensorNames.DiscreteActionOutput, outputNames);
model = null;
Assert.AreEqual(0, model.GetOutputNames().Count());
}
[TestCase(true)]
[TestCase(false)]
public void TestCheckModelValidContinuous(bool useDeprecatedNNModel)
var model = ModelLoader.Load(continuous2vis8vec2actionModel);
var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
var validBrainParameters = GetContinuous2vis8vec2actionBrainParameters();
var errors = BarracudaModelParamLoader.CheckModel(

Assert.AreEqual(0, errors.Count()); // There should not be any errors
}
[Test]
public void TestCheckModelValid2()
[TestCase(true)]
[TestCase(false)]
public void TestCheckModelValidDiscrete(bool useDeprecatedNNModel)
var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
var validBrainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
var errors = BarracudaModelParamLoader.CheckModel(

}
[Test]
public void TestCheckModelThrowsVectorObservation1()
public void TestCheckModelValidHybrid()
var model = ModelLoader.Load(continuous2vis8vec2actionModel);
var model = ModelLoader.Load(hybridONNXModel);
var validBrainParameters = GetHybridBrainParameters();
var errors = BarracudaModelParamLoader.CheckModel(
model, validBrainParameters,
new SensorComponent[] { }, new ActuatorComponent[0]
);
Assert.AreEqual(0, errors.Count()); // There should not be any errors
}
[TestCase(true)]
[TestCase(false)]
public void TestCheckModelThrowsVectorObservationContinuous(bool useDeprecatedNNModel)
{
var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
brainParameters.VectorObservationSize = 9; // Invalid observation

Assert.Greater(errors.Count(), 0);
}
[Test]
public void TestCheckModelThrowsVectorObservation2()
[TestCase(true)]
[TestCase(false)]
public void TestCheckModelThrowsVectorObservationDiscrete(bool useDeprecatedNNModel)
var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
brainParameters.VectorObservationSize = 1; // Invalid observation

[Test]
public void TestCheckModelThrowsAction1()
public void TestCheckModelThrowsVectorObservationHybrid()
var model = ModelLoader.Load(continuous2vis8vec2actionModel);
var model = ModelLoader.Load(hybridONNXModel);
var brainParameters = GetHybridBrainParameters();
brainParameters.VectorObservationSize = 9; // Invalid observation
var errors = BarracudaModelParamLoader.CheckModel(
model, brainParameters,
new SensorComponent[] { }, new ActuatorComponent[0]
);
Assert.Greater(errors.Count(), 0);
brainParameters = GetContinuous2vis8vec2actionBrainParameters();
brainParameters.NumStackedVectorObservations = 2;// Invalid stacking
errors = BarracudaModelParamLoader.CheckModel(
model, brainParameters,
new SensorComponent[] { }, new ActuatorComponent[0]
);
Assert.Greater(errors.Count(), 0);
}
[TestCase(true)]
[TestCase(false)]
public void TestCheckModelThrowsActionContinuous(bool useDeprecatedNNModel)
{
var model = useDeprecatedNNModel ? ModelLoader.Load(continuousNNModel) : ModelLoader.Load(continuousONNXModel);
brainParameters.VectorActionSize = new[] { 3 }; // Invalid action
brainParameters.ActionSpec = ActionSpec.MakeContinuous(3); // Invalid action
brainParameters.VectorActionSpaceType = SpaceType.Discrete;// Invalid SpaceType
brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3); // Invalid SpaceType
[Test]
public void TestCheckModelThrowsAction2()
[TestCase(true)]
[TestCase(false)]
public void TestCheckModelThrowsActionDiscrete(bool useDeprecatedNNModel)
var model = ModelLoader.Load(discrete1vis0vec_2_3action_recurrModel);
var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
brainParameters.VectorActionSize = new[] { 3, 3 }; // Invalid action
brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3, 3); // Invalid action
brainParameters.VectorActionSpaceType = SpaceType.Continuous;// Invalid SpaceType
brainParameters.ActionSpec = ActionSpec.MakeContinuous(2); // Invalid SpaceType
Assert.Greater(errors.Count(), 0);
}
[Test]
public void TestCheckModelThrowsActionHybrid()
{
var model = ModelLoader.Load(hybridONNXModel);
var brainParameters = GetHybridBrainParameters();
brainParameters.ActionSpec = new ActionSpec(3, new int[] { 3 }); ; // Invalid discrete action size
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
brainParameters = GetContinuous2vis8vec2actionBrainParameters();
brainParameters.ActionSpec = ActionSpec.MakeDiscrete(2); // Missing continuous action
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}

2
com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated.nn.meta


fileFormatVersion: 2
guid: 8a92fbcd96caa4ef5a93dd55c0c36705
guid: 6d6040ad621454dd5b713beb5483e347
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj

2
com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated.nn.meta


fileFormatVersion: 2
guid: a75582ff670094ff2996c1c4ab9dfd15
guid: bf4543cc3c6944794bbba065bdf90079
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj

3
com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs


behaviorParams.BrainParameters.VectorObservationSize = 3;
behaviorParams.BrainParameters.NumStackedVectorObservations = 2;
behaviorParams.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
behaviorParams.BrainParameters.VectorActionSize = new[] { 2, 2 };
behaviorParams.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
behaviorParams.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);
behaviorParams.BehaviorName = "TestBehavior";
behaviorParams.TeamId = 42;
behaviorParams.UseChildSensors = true;

8
docs/Getting-Started.md


#### Behavior Parameters : Vector Action Space
An Agent is given instructions in the form of a float array of _actions_.
An Agent is given instructions in the form of actions.
The 3D Balance Ball example is programmed to use continuous action space which
is a a vector of numbers that can vary continuously. More specifically, it uses
a `Space Size` of 2 to control the amount of `x` and `z` rotations to apply to
The 3D Balance Ball example is programmed to use continuous actions, which
are a vector of floating-point numbers that can vary continuously. More specifically,
it uses a `Space Size` of 2 to control the amount of `x` and `z` rotations to apply to
itself to keep the ball balanced on its head.
## Running a pre-trained model

15
docs/Learning-Environment-Create-New.md


- `OnEpisodeBegin()`
- `CollectObservations(VectorSensor sensor)`
- `OnActionReceived(float[] vectorAction)`
- `OnActionReceived(ActionBuffers actionBuffers)`
We overview each of these in more detail in the dedicated subsections below.

```csharp
public float forceMultiplier = 10;
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
controlSignal.x = vectorAction[0];
controlSignal.z = vectorAction[1];
controlSignal.x = actionBuffers.ContinuousActions[0];
controlSignal.z = actionBuffers.ContinuousActions[1];
rBody.AddForce(controlSignal * forceMultiplier);
// Rewards

(which correspond to the keyboard arrow keys):
```csharp
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = Input.GetAxis("Horizontal");
actionsOut[1] = Input.GetAxis("Vertical");
var continuousActionsOut = actionsOut.ContinuousActions;
continuousActionsOut[0] = Input.GetAxis("Horizontal");
continuousActionsOut[1] = Input.GetAxis("Vertical");
}
```

80
docs/Learning-Environment-Design-Agents.md


## Actions
An action is an instruction from the Policy that the agent carries out. The
action is passed to the Agent as a parameter when the Academy invokes the
agent's `OnActionReceived()` function. Actions for an agent can take one of two
forms, either **Continuous** or **Discrete**.
When you specify that the vector action space is **Continuous**, the action
parameter passed to the Agent is an array of floating point numbers with length
equal to the `Vector Action Space Size` property. When you specify a
**Discrete** vector action space type, the action parameter is an array
containing integers. Each integer is an index into a list or table of commands.
In the **Discrete** vector action space type, the action parameter is an array
of indices. The number of indices in the array is determined by the number of
branches defined in the `Branches Size` property. Each branch corresponds to an
action table, you can specify the size of each table by modifying the `Branches`
property.
action is passed to the Agent as the `ActionBuffers` parameter when the Academy invokes the
agent's `OnActionReceived()` function. There are two types of actions supported:
**Continuous** and **Discrete**.
Neither the Policy nor the training algorithm know anything about what the
action values themselves mean. The training algorithm simply tries different

### Continuous Action Space
When an Agent uses a Policy set to the **Continuous** vector action space, the
action parameter passed to the Agent's `OnActionReceived()` function is an array
with length equal to the `Vector Action Space Size` property value. The
When an Agent's Policy has **Continuous** actions, the
`ActionBuffers.ContinuousActions` passed to the Agent's `OnActionReceived()` function
is an array with length equal to the `Vector Action Space Size` property value. The
individual values in the array have whatever meanings that you ascribe to them.
If you assign an element in the array as the speed of an Agent, for example, the
training process learns to control the speed of the Agent through this

These control values are applied as torques to the bodies making up the arm:
```csharp
public override void OnActionReceived(float[] act)
{
float torque_x = Mathf.Clamp(act[0], -1, 1) * 100f;
float torque_z = Mathf.Clamp(act[1], -1, 1) * 100f;
rbA.AddTorque(new Vector3(torque_x, 0f, torque_z));
public override void OnActionReceived(ActionBuffers actionBuffers)
{
var torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f) * 150f;
var torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[1], -1f, 1f) * 150f;
m_RbA.AddTorque(new Vector3(torqueX, 0f, torqueZ));
torque_x = Mathf.Clamp(act[2], -1, 1) * 100f;
torque_z = Mathf.Clamp(act[3], -1, 1) * 100f;
rbB.AddTorque(new Vector3(torque_x, 0f, torque_z));
}
torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[2], -1f, 1f) * 150f;
torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[3], -1f, 1f) * 150f;
m_RbB.AddTorque(new Vector3(torqueX, 0f, torqueZ));
}
```
By default the output from our provided PPO algorithm pre-clamps the values of

### Discrete Action Space
When an Agent uses a **Discrete** vector action space, the action parameter
passed to the Agent's `OnActionReceived()` function is an array containing
indices. With the discrete vector action space, `Branches` is an array of
integers, each value corresponds to the number of possibilities for each branch.
When an Agent's Policy uses **discrete** actions, the
`ActionBuffers.DiscreteActions` passed to the Agent's `OnActionReceived()` function
is an array of integers. When defining the discrete vector action space, `Branches`
is an array of integers, each value corresponds to the number of possibilities for each branch.
For example, if we wanted an Agent that can move in a plane and jump, we could
define two branches (one for motion and one for jumping) because we want our

```csharp
// Get the action index for movement
int movement = Mathf.FloorToInt(act[0]);
int movement = actionBuffers.DiscreteActions[0];
int jump = Mathf.FloorToInt(act[1]);
int jump = actionBuffers.DiscreteActions[1];
// Look up the index in the movement action list:
if (movement == 1) { directionX = -1; }

directionX * 40f, directionY * 300f, directionZ * 40f));
```
Note that the above code example is a simplified extract from the AreaAgent
class, which provides alternate implementations for both the discrete and the
continuous action spaces.
#### Masking Discrete Actions
When using Discrete Actions, it is possible to specify that some actions are

decide to perform the masked action. In order to mask an action, override the
`Agent.CollectDiscreteActionMasks()` virtual method, and call
`DiscreteActionMasker.SetMask()` in it:
`Agent.WriteDiscreteActionMask()` virtual method, and call
`WriteMask()` on the provided `IDiscreteActionMask`:
public override void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker){
actionMasker.SetMask(branch, actionIndices)
public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
{
actionMasker.WriteMask(branch, actionIndices)
}
```

the action
- `actionIndices` is a list of `int` corresponding to the indices of the actions
that the Agent cannot perform.
that the Agent **cannot** perform.
For example, if you have an Agent with 2 branches and on the first branch
(branch 0) there are 4 possible actions : _"do nothing"_, _"jump"_, _"shoot"_

```csharp
SetMask(0, new int[2]{1,2})
WriteMask(0, new int[2]{1,2})
- You can call `SetMask` multiple times if you want to put masks on multiple
- You can call `WriteMask` multiple times if you want to put masks on multiple
branches.
- You cannot mask all the actions of a branch.
- You cannot mask actions in continuous control.

- Actions can either use `Discrete` or `Continuous` spaces.
- When using `Discrete` it is possible to assign multiple action branches, and
to mask certain actions.
- Agents can either use `Discrete` or `Continuous` actions.
- Discrete actions can have multiple action branches, and it's possible to mask
certain actions so that they won't be taken.
- When using continuous control, action values should be clipped to an
- Continuous action values should be clipped to an
appropriate range. The provided PPO model automatically clips these values
between -1 and 1, but third party training systems may not do so.

4
docs/Learning-Environment-Design.md


important to ensure that your environment parameters are updated at each step to
the correct values. To enable this, we expose a `EnvironmentParameters` C# class
that you can use to retrieve the values of the parameters defined in the
training configurations for both of those features.
training configurations for both of those features. Please see our
[documentation](Training-ML-Agents.md#environment-parameters)
for curriculum learning and environment parameter randomization for details.
We recommend modifying the environment from the Agent's `OnEpisodeBegin()`
function by leveraging `Academy.Instance.EnvironmentParameters`. See the

64
docs/Python-API.md


terminates the communication.
- **Behavior Specs : `env.behavior_specs`** Returns a Mapping of
`BehaviorName` to `BehaviorSpec` objects (read only).
A `BehaviorSpec` contains information such as the observation shapes, the
action type (multi-discrete or continuous) and the action shape. Note that
A `BehaviorSpec` contains the observation shapes and the
`ActionSpec` (which defines the action shape). Note that
the `BehaviorSpec` for a specific group is fixed throughout the simulation.
The number of entries in the Mapping can change over time in the simulation
if new Agent behaviors are created in the simulation.

number of agents is not guaranteed to remain constant during the simulation
and it is not unusual to have either `DecisionSteps` or `TerminalSteps`
contain no Agents at all.
- **Set Actions :`env.set_actions(behavior_name: str, action: np.array)`** Sets
the actions for a whole agent group. `action` is a 2D `np.array` of
`dtype=np.int32` in the discrete action case and `dtype=np.float32` in the
continuous action case. The first dimension of `action` is the number of
agents that requested a decision since the last call to `env.step()`. The
second dimension is the number of discrete actions in multi-discrete action
type and the number of actions in continuous action type.
- **Set Actions :`env.set_actions(behavior_name: str, action: ActionTuple)`** Sets
the actions for a whole agent group. `action` is an `ActionTuple`, which
is made up of a 2D `np.array` of `dtype=np.int32` for discrete actions, and
`dtype=np.float32` for continuous actions. The first dimension of `np.array`
in the tuple is the number of agents that requested a decision since the
last call to `env.step()`. The second dimension is the number of discrete or
continuous actions for the corresponding array.
`env.set_action_for_agent(agent_group: str, agent_id: int, action: np.array)`**
`env.set_action_for_agent(agent_group: str, agent_id: int, action: ActionTuple)`**
identifier of the Agent. Action is a 1D array of type `dtype=np.int32` and
size equal to the number of discrete actions in multi-discrete action type and
of type `dtype=np.float32` and size equal to the number of actions in
continuous action type.
identifier of the Agent. `action` is an `ActionTuple` as described above.
**Note:** If no action is provided for an agent group between two calls to
`env.step()` then the default action will be all zeros (in either discrete or
continuous action space)

- `agent_id` is an int vector of length batch size containing unique identifier
for the corresponding Agent. This is used to track Agents across simulation
steps.
- `action_mask` is an optional list of two dimensional array of booleans. Only
available in multi-discrete action space type. Each array corresponds to an
- `action_mask` is an optional list of two dimensional arrays of booleans which is only
available when using multi-discrete actions. Each array corresponds to an
action branch. The first dimension of each array is the batch size and the
second contains a mask for each action of the branch. If true, the action is
not available for the agent during this simulation step.

- `reward` is a float. Corresponds to the rewards collected by the agent since
the last simulation step.
- `agent_id` is an int and an unique identifier for the corresponding Agent.
- `action_mask` is an optional list of one dimensional array of booleans. Only
available in multi-discrete action space type. Each array corresponds to an
- `action_mask` is an optional list of one dimensional arrays of booleans which is only
available when using multi-discrete actions. Each array corresponds to an
action branch. Each array contains a mask for each action of the branch. If
true, the action is not available for the agent during this simulation step.

#### BehaviorSpec
An Agent behavior can either have discrete or continuous actions. To check which
type it is, use `spec.is_action_discrete()` or `spec.is_action_continuous()` to
see which one it is. If discrete, the action tensors are expected to be
`np.int32`. If continuous, the actions are expected to be `np.float32`.
A `BehaviorSpec` has the following fields :
- `observation_shapes` is a List of Tuples of int : Each Tuple corresponds to an

- `action_type` is the type of data of the action. it can be discrete or
continuous. If discrete, the action tensors are expected to be `np.int32`. If
continuous, the actions are expected to be `np.float32`.
- `action_size` is an `int` corresponding to the expected dimension of the
action array.
- In continuous action space it is the number of floats that constitute the
action.
- In discrete action space (same as multi-discrete) it corresponds to the
number of branches (the number of independent actions)
- `discrete_action_branches` is a Tuple of int only for discrete action space.
Each int corresponds to the number of different options for each branch of the
action. For example : In a game direction input (no movement, left, right) and
- `action_spec` is an `ActionSpec` namedtuple that defines the number and types
of actions for the Agent.
An `ActionSpec` has the following fields and properties:
- `continuous_size` is the number of floats that constitute the continuous actions.
- `discrete_size` is the number of branches (the number of independent actions) that
constitute the multi-discrete actions.
- `discrete_branches` is a Tuple of ints. Each int corresponds to the number of
different options for each branch of the action. For example:
In a game direction input (no movement, left, right) and
the first one with 3 options and the second with 2 options. (`action_size = 2`
the first one with 3 options and the second with 2 options. (`discrete_size = 2`
### Communicating additional information with the Environment

4
docs/Training-Configuration-File.md


A few considerations when deciding to use memory:
- LSTM does not work well with continuous vector action space. Please use
discrete vector action space for better results.
- LSTM does not work well with continuous vector actions. Please use
discrete actions for better results.
- Since the memories must be sent back and forth between Python and Unity, using
too large `memory_size` will slow down training.
- Adding a recurrent layer increases the complexity of the neural network, it is

10
gym-unity/gym_unity/envs/__init__.py


import gym
from gym import error, spaces
from mlagents_envs.base_env import BaseEnv
from mlagents_envs.base_env import ActionTuple, BaseEnv
from mlagents_envs.base_env import DecisionSteps, TerminalSteps
from mlagents_envs import logging_util

action = self._flattener.lookup_action(action)
action = np.array(action).reshape((1, self.action_size))
self._env.set_actions(self.name, action)
action_tuple = ActionTuple()
if self.group_spec.action_spec.is_continuous():
action_tuple.add_continuous(action)
else:
action_tuple.add_discrete(action)
self._env.set_actions(self.name, action_tuple)
self._env.step()
decision_step, terminal_step = self._env.get_steps(self.name)

148
ml-agents-envs/mlagents_envs/base_env.py


)
class _ActionTupleBase(ABC):
"""
An object whose fields correspond to action data of continuous and discrete
spaces. Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size),
respectively. Note, this also holds when continuous or discrete size is
zero.
"""
def __init__(
self,
continuous: Optional[np.ndarray] = None,
discrete: Optional[np.ndarray] = None,
):
self._continuous: Optional[np.ndarray] = None
self._discrete: Optional[np.ndarray] = None
if continuous is not None:
self.add_continuous(continuous)
if discrete is not None:
self.add_discrete(discrete)
@property
def continuous(self) -> np.ndarray:
return self._continuous
@property
def discrete(self) -> np.ndarray:
return self._discrete
def add_continuous(self, continuous: np.ndarray) -> None:
if continuous.dtype != np.float32:
continuous = continuous.astype(np.float32, copy=False)
if self._discrete is None:
self._discrete = np.zeros(
(continuous.shape[0], 0), dtype=self.discrete_dtype
)
self._continuous = continuous
def add_discrete(self, discrete: np.ndarray) -> None:
if discrete.dtype != self.discrete_dtype:
discrete = discrete.astype(self.discrete_dtype, copy=False)
if self._continuous is None:
self._continuous = np.zeros((discrete.shape[0], 0), dtype=np.float32)
self._discrete = discrete
@property
@abstractmethod
def discrete_dtype(self) -> np.dtype:
pass
class ActionTuple(_ActionTupleBase):
"""
An object whose fields correspond to actions of different types.
Continuous and discrete actions are numpy arrays of type float32 and
int32, respectively and are type checked on construction.
Dimensions are of (n_agents, continuous_size) and (n_agents, discrete_size),
respectively. Note, this also holds when continuous or discrete size is
zero.
"""
@property
def discrete_dtype(self) -> np.dtype:
"""
The dtype of a discrete action.
"""
return np.int32
class ActionSpec(NamedTuple):
"""
A NamedTuple containing utility functions and information about the action spaces

"""
return len(self.discrete_branches)
def empty_action(self, n_agents: int) -> np.ndarray:
def empty_action(self, n_agents: int) -> ActionTuple:
Generates a numpy array corresponding to an empty action (all zeros)
Generates ActionTuple corresponding to an empty action (all zeros)
if self.is_continuous():
return np.zeros((n_agents, self.continuous_size), dtype=np.float32)
return np.zeros((n_agents, self.discrete_size), dtype=np.int32)
_continuous = np.zeros((n_agents, self.continuous_size), dtype=np.float32)
_discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
return ActionTuple(continuous=_continuous, discrete=_discrete)
def random_action(self, n_agents: int) -> np.ndarray:
def random_action(self, n_agents: int) -> ActionTuple:
Generates a numpy array corresponding to a random action (either discrete
Generates ActionTuple corresponding to a random action (either discrete
if self.is_continuous():
action = np.random.uniform(
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
).astype(np.float32)
else:
branch_size = self.discrete_branches
action = np.column_stack(
_continuous = np.random.uniform(
low=-1.0, high=1.0, size=(n_agents, self.continuous_size)
)
_discrete = np.zeros((n_agents, self.discrete_size), dtype=np.int32)
if self.discrete_size > 0:
_discrete = np.column_stack(
branch_size[i], # type: ignore
self.discrete_branches[i], # type: ignore
size=(n_agents),
dtype=np.int32,
)

return action
return ActionTuple(continuous=_continuous, discrete=_discrete)
self, actions: np.ndarray, n_agents: int, name: str
) -> np.ndarray:
self, actions: ActionTuple, n_agents: Optional[int], name: str
) -> ActionTuple:
if self.continuous_size > 0:
_size = self.continuous_size
else:
_size = self.discrete_size
_expected_shape = (n_agents, _size)
if actions.shape != _expected_shape:
_expected_shape = (
(n_agents, self.continuous_size)
if n_agents is not None
else (self.continuous_size,)
)
if actions.continuous.shape != _expected_shape:
f"The behavior {name} needs an input of dimension "
f"The behavior {name} needs a continuous input of dimension "
f"received input of dimension {actions.shape}"
f"received input of dimension {actions.continuous.shape}"
_expected_type = np.float32 if self.is_continuous() else np.int32
if actions.dtype != _expected_type:
actions = actions.astype(_expected_type)
_expected_shape = (
(n_agents, self.discrete_size)
if n_agents is not None
else (self.discrete_size,)
)
if actions.discrete.shape != _expected_shape:
raise UnityActionException(
f"The behavior {name} needs a discrete input of dimension "
f"{_expected_shape} for (<number of agents>, <action size>) but "
f"received input of dimension {actions.discrete.shape}"
)
return actions
@staticmethod

"""
@abstractmethod
def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
:param action: A two dimensional np.ndarray corresponding to the action
(either int or float)
:param action: ActionTuple tuple of continuous and/or discrete action.
Actions are np.arrays with dimensions (n_agents, continuous_size) and
(n_agents, discrete_size), respectively.
self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
) -> None:
"""
Sets the action for one of the agents in the simulation for the next

:param action: A one dimensional np.ndarray corresponding to the action
(either int or float)
:param action: ActionTuple tuple of continuous and/or discrete action
Actions are np.arrays with dimensions (1, continuous_size) and
(1, discrete_size), respectively. Note, this initial dimensions of 1 is because
this action is meant for a single agent.
"""
@abstractmethod

22
ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.py


name='mlagents_envs/communicator_objects/agent_action.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n5mlagents_envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\"K\n\x10\x41gentActionProto\x12\x16\n\x0evector_actions\x18\x01 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02J\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04J\x04\x08\x05\x10\x06\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n5mlagents_envs/communicator_objects/agent_action.proto\x12\x14\x63ommunicator_objects\"\x8c\x01\n\x10\x41gentActionProto\x12!\n\x19vector_actions_deprecated\x18\x01 \x03(\x02\x12\r\n\x05value\x18\x04 \x01(\x02\x12\x1a\n\x12\x63ontinuous_actions\x18\x06 \x03(\x02\x12\x18\n\x10\x64iscrete_actions\x18\x07 \x03(\x05J\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04J\x04\x08\x05\x10\x06\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)

containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='vector_actions', full_name='communicator_objects.AgentActionProto.vector_actions', index=0,
name='vector_actions_deprecated', full_name='communicator_objects.AgentActionProto.vector_actions_deprecated', index=0,
number=1, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='continuous_actions', full_name='communicator_objects.AgentActionProto.continuous_actions', index=2,
number=6, type=2, cpp_type=6, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='discrete_actions', full_name='communicator_objects.AgentActionProto.discrete_actions', index=3,
number=7, type=5, cpp_type=1, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

extension_ranges=[],
oneofs=[
],
serialized_start=79,
serialized_end=154,
serialized_start=80,
serialized_end=220,
)
DESCRIPTOR.message_types_by_name['AgentActionProto'] = _AGENTACTIONPROTO

12
ml-agents-envs/mlagents_envs/communicator_objects/agent_action_pb2.pyi


class AgentActionProto(google___protobuf___message___Message):
DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
vector_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
vector_actions_deprecated = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
continuous_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___float]
discrete_actions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
vector_actions : typing___Optional[typing___Iterable[builtin___float]] = None,
vector_actions_deprecated : typing___Optional[typing___Iterable[builtin___float]] = None,
continuous_actions : typing___Optional[typing___Iterable[builtin___float]] = None,
discrete_actions : typing___Optional[typing___Iterable[builtin___int]] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> AgentActionProto: ...

def ClearField(self, field_name: typing_extensions___Literal[u"value",u"vector_actions"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"continuous_actions",u"discrete_actions",u"value",u"vector_actions_deprecated"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"value",b"value",u"vector_actions",b"vector_actions"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"continuous_actions",b"continuous_actions",u"discrete_actions",b"discrete_actions",u"value",b"value",u"vector_actions_deprecated",b"vector_actions_deprecated"]) -> None: ...

82
ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.py


name='mlagents_envs/communicator_objects/brain_parameters.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n9mlagents_envs/communicator_objects/brain_parameters.proto\x12\x14\x63ommunicator_objects\x1a\x33mlagents_envs/communicator_objects/space_type.proto\"\xd9\x01\n\x14\x42rainParametersProto\x12\x1a\n\x12vector_action_size\x18\x03 \x03(\x05\x12\"\n\x1avector_action_descriptions\x18\x05 \x03(\t\x12\x46\n\x18vector_action_space_type\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08J\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03J\x04\x08\x04\x10\x05\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n9mlagents_envs/communicator_objects/brain_parameters.proto\x12\x14\x63ommunicator_objects\x1a\x33mlagents_envs/communicator_objects/space_type.proto\"\x8b\x01\n\x0f\x41\x63tionSpecProto\x12\x1e\n\x16num_continuous_actions\x18\x01 \x01(\x05\x12\x1c\n\x14num_discrete_actions\x18\x02 \x01(\x05\x12\x1d\n\x15\x64iscrete_branch_sizes\x18\x03 \x03(\x05\x12\x1b\n\x13\x61\x63tion_descriptions\x18\x04 \x03(\t\"\xb6\x02\n\x14\x42rainParametersProto\x12%\n\x1dvector_action_size_deprecated\x18\x03 \x03(\x05\x12-\n%vector_action_descriptions_deprecated\x18\x05 \x03(\t\x12Q\n#vector_action_space_type_deprecated\x18\x06 \x01(\x0e\x32$.communicator_objects.SpaceTypeProto\x12\x12\n\nbrain_name\x18\x07 \x01(\t\x12\x13\n\x0bis_training\x18\x08 \x01(\x08\x12:\n\x0b\x61\x63tion_spec\x18\t \x01(\x0b\x32%.communicator_objects.ActionSpecProtoJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03J\x04\x08\x04\x10\x05\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
,
dependencies=[mlagents__envs_dot_communicator__objects_dot_space__type__pb2.DESCRIPTOR,])

_ACTIONSPECPROTO = _descriptor.Descriptor(
name='ActionSpecProto',
full_name='communicator_objects.ActionSpecProto',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='num_continuous_actions', full_name='communicator_objects.ActionSpecProto.num_continuous_actions', index=0,
number=1, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='num_discrete_actions', full_name='communicator_objects.ActionSpecProto.num_discrete_actions', index=1,
number=2, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='discrete_branch_sizes', full_name='communicator_objects.ActionSpecProto.discrete_branch_sizes', index=2,
number=3, type=5, cpp_type=1, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='action_descriptions', full_name='communicator_objects.ActionSpecProto.action_descriptions', index=3,
number=4, type=9, cpp_type=9, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=137,
serialized_end=276,
)
_BRAINPARAMETERSPROTO = _descriptor.Descriptor(
name='BrainParametersProto',
full_name='communicator_objects.BrainParametersProto',

fields=[
_descriptor.FieldDescriptor(
name='vector_action_size', full_name='communicator_objects.BrainParametersProto.vector_action_size', index=0,
name='vector_action_size_deprecated', full_name='communicator_objects.BrainParametersProto.vector_action_size_deprecated', index=0,
number=3, type=5, cpp_type=1, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,

name='vector_action_descriptions', full_name='communicator_objects.BrainParametersProto.vector_action_descriptions', index=1,
name='vector_action_descriptions_deprecated', full_name='communicator_objects.BrainParametersProto.vector_action_descriptions_deprecated', index=1,
number=5, type=9, cpp_type=9, label=3,
has_default_value=False, default_value=[],
message_type=None, enum_type=None, containing_type=None,

name='vector_action_space_type', full_name='communicator_objects.BrainParametersProto.vector_action_space_type', index=2,
name='vector_action_space_type_deprecated', full_name='communicator_objects.BrainParametersProto.vector_action_space_type_deprecated', index=2,
number=6, type=14, cpp_type=8, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='action_spec', full_name='communicator_objects.BrainParametersProto.action_spec', index=5,
number=9, type=11, cpp_type=10, label=1,
has_default_value=False, default_value=None,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

extension_ranges=[],
oneofs=[
],
serialized_start=137,
serialized_end=354,
serialized_start=279,
serialized_end=589,
_BRAINPARAMETERSPROTO.fields_by_name['vector_action_space_type'].enum_type = mlagents__envs_dot_communicator__objects_dot_space__type__pb2._SPACETYPEPROTO
_BRAINPARAMETERSPROTO.fields_by_name['vector_action_space_type_deprecated'].enum_type = mlagents__envs_dot_communicator__objects_dot_space__type__pb2._SPACETYPEPROTO
_BRAINPARAMETERSPROTO.fields_by_name['action_spec'].message_type = _ACTIONSPECPROTO
DESCRIPTOR.message_types_by_name['ActionSpecProto'] = _ACTIONSPECPROTO
ActionSpecProto = _reflection.GeneratedProtocolMessageType('ActionSpecProto', (_message.Message,), dict(
DESCRIPTOR = _ACTIONSPECPROTO,
__module__ = 'mlagents_envs.communicator_objects.brain_parameters_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.ActionSpecProto)
))
_sym_db.RegisterMessage(ActionSpecProto)
BrainParametersProto = _reflection.GeneratedProtocolMessageType('BrainParametersProto', (_message.Message,), dict(
DESCRIPTOR = _BRAINPARAMETERSPROTO,

45
ml-agents-envs/mlagents_envs/communicator_objects/brain_parameters_pb2.pyi


builtin___int = int
class ActionSpecProto(google___protobuf___message___Message):
DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
num_continuous_actions = ... # type: builtin___int
num_discrete_actions = ... # type: builtin___int
discrete_branch_sizes = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
action_descriptions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text]
def __init__(self,
*,
num_continuous_actions : typing___Optional[builtin___int] = None,
num_discrete_actions : typing___Optional[builtin___int] = None,
discrete_branch_sizes : typing___Optional[typing___Iterable[builtin___int]] = None,
action_descriptions : typing___Optional[typing___Iterable[typing___Text]] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> ActionSpecProto: ...
def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
if sys.version_info >= (3,):
def ClearField(self, field_name: typing_extensions___Literal[u"action_descriptions",u"discrete_branch_sizes",u"num_continuous_actions",u"num_discrete_actions"]) -> None: ...
else:
def ClearField(self, field_name: typing_extensions___Literal[u"action_descriptions",b"action_descriptions",u"discrete_branch_sizes",b"discrete_branch_sizes",u"num_continuous_actions",b"num_continuous_actions",u"num_discrete_actions",b"num_discrete_actions"]) -> None: ...
vector_action_size = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
vector_action_descriptions = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text]
vector_action_space_type = ... # type: mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto
vector_action_size_deprecated = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
vector_action_descriptions_deprecated = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text]
vector_action_space_type_deprecated = ... # type: mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto
@property
def action_spec(self) -> ActionSpecProto: ...
vector_action_size : typing___Optional[typing___Iterable[builtin___int]] = None,
vector_action_descriptions : typing___Optional[typing___Iterable[typing___Text]] = None,
vector_action_space_type : typing___Optional[mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto] = None,
vector_action_size_deprecated : typing___Optional[typing___Iterable[builtin___int]] = None,
vector_action_descriptions_deprecated : typing___Optional[typing___Iterable[typing___Text]] = None,
vector_action_space_type_deprecated : typing___Optional[mlagents_envs___communicator_objects___space_type_pb2___SpaceTypeProto] = None,
action_spec : typing___Optional[ActionSpecProto] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> BrainParametersProto: ...

def ClearField(self, field_name: typing_extensions___Literal[u"brain_name",u"is_training",u"vector_action_descriptions",u"vector_action_size",u"vector_action_space_type"]) -> None: ...
def HasField(self, field_name: typing_extensions___Literal[u"action_spec"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"action_spec",u"brain_name",u"is_training",u"vector_action_descriptions_deprecated",u"vector_action_size_deprecated",u"vector_action_space_type_deprecated"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"brain_name",b"brain_name",u"is_training",b"is_training",u"vector_action_descriptions",b"vector_action_descriptions",u"vector_action_size",b"vector_action_size",u"vector_action_space_type",b"vector_action_space_type"]) -> None: ...
def HasField(self, field_name: typing_extensions___Literal[u"action_spec",b"action_spec"]) -> builtin___bool: ...
def ClearField(self, field_name: typing_extensions___Literal[u"action_spec",b"action_spec",u"brain_name",b"brain_name",u"is_training",b"is_training",u"vector_action_descriptions_deprecated",b"vector_action_descriptions_deprecated",u"vector_action_size_deprecated",b"vector_action_size_deprecated",u"vector_action_space_type_deprecated",b"vector_action_space_type_deprecated"]) -> None: ...

13
ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py


name='mlagents_envs/communicator_objects/capabilities.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"}\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\x94\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='hybridActions', full_name='communicator_objects.UnityRLCapabilitiesProto.hybridActions', index=3,
number=4, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

extension_ranges=[],
oneofs=[
],
serialized_start=79,
serialized_end=204,
serialized_start=80,
serialized_end=228,
)
DESCRIPTOR.message_types_by_name['UnityRLCapabilitiesProto'] = _UNITYRLCAPABILITIESPROTO

6
ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi


baseRLCapabilities = ... # type: builtin___bool
concatenatedPngObservations = ... # type: builtin___bool
compressedChannelMapping = ... # type: builtin___bool
hybridActions = ... # type: builtin___bool
def __init__(self,
*,

hybridActions : typing___Optional[builtin___bool] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> UnityRLCapabilitiesProto: ...

def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions"]) -> None: ...

32
ml-agents-envs/mlagents_envs/environment.py


DecisionSteps,
TerminalSteps,
BehaviorSpec,
ActionTuple,
BehaviorName,
AgentId,
BehaviorMapping,

# * 1.0.0 - initial version
# * 1.1.0 - support concatenated PNGs for compressed observations.
# * 1.2.0 - support compression mapping for stacked compressed observations.
API_VERSION = "1.2.0"
# * 1.3.0 - support action spaces with both continuous and discrete actions.
API_VERSION = "1.3.0"
# Default port that the editor listens on. If an environment executable
# isn't specified, this port will be used.

capabilities.baseRLCapabilities = True
capabilities.concatenatedPngObservations = True
capabilities.compressedChannelMapping = True
capabilities.hybridActions = True
return capabilities
@staticmethod

self._env_state: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
self._env_specs: Dict[str, BehaviorSpec] = {}
self._env_actions: Dict[str, np.ndarray] = {}
self._env_actions: Dict[str, ActionTuple] = {}
self._is_first_message = True
self._update_behavior_specs(aca_output)

f"agent group in the environment"
)
def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:
def set_actions(self, behavior_name: BehaviorName, action: ActionTuple) -> None:
self._assert_behavior_exists(behavior_name)
if behavior_name not in self._env_state:
return

self._env_actions[behavior_name] = action
def set_action_for_agent(
self, behavior_name: BehaviorName, agent_id: AgentId, action: np.ndarray
self, behavior_name: BehaviorName, agent_id: AgentId, action: ActionTuple
) -> None:
self._assert_behavior_exists(behavior_name)
if behavior_name not in self._env_state:

action = action_spec._validate_action(action, num_agents, behavior_name)
action = action_spec._validate_action(action, None, behavior_name)
if behavior_name not in self._env_actions:
self._env_actions[behavior_name] = action_spec.empty_action(num_agents)
try:

agent_id
)
) from ie
self._env_actions[behavior_name][index] = action
if action_spec.continuous_size > 0:
self._env_actions[behavior_name].continuous[index] = action.continuous[0, :]
if action_spec.discrete_size > 0:
self._env_actions[behavior_name].discrete[index] = action.discrete[0, :]
def get_steps(
self, behavior_name: BehaviorName

@timed
def _generate_step_input(
self, vector_action: Dict[str, np.ndarray]
self, vector_action: Dict[str, ActionTuple]
) -> UnityInputProto:
rl_in = UnityRLInputProto()
for b in vector_action:

for i in range(n_agents):
action = AgentActionProto(vector_actions=vector_action[b][i])
action = AgentActionProto()
if vector_action[b].continuous is not None:
action.vector_actions_deprecated.extend(
vector_action[b].continuous[i]
)
action.continuous_actions.extend(vector_action[b].continuous[i])
if vector_action[b].discrete is not None:
action.vector_actions_deprecated.extend(
vector_action[b].discrete[i]
)
action.discrete_actions.extend(vector_action[b].discrete[i])
rl_in.agent_actions[b].value.extend([action])
rl_in.command = STEP
rl_in.side_channel = bytes(

18
ml-agents-envs/mlagents_envs/mock_communicator.py


from .communicator import Communicator
from .environment import UnityEnvironment
from mlagents_envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.brain_parameters_pb2 import (
BrainParametersProto,
ActionSpecProto,
)
from mlagents_envs.communicator_objects.unity_rl_initialization_output_pb2 import (
UnityRLInitializationOutputProto,
)

NONE as COMPRESSION_TYPE_NONE,
PNG as COMPRESSION_TYPE_PNG,
)
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
class MockCommunicator(Communicator):

self.vec_obs_size = vec_obs_size
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
if self.is_discrete:
action_spec = ActionSpecProto(
num_discrete_actions=2, discrete_branch_sizes=[3, 2]
)
else:
action_spec = ActionSpecProto(num_continuous_actions=2)
vector_action_size=[2],
vector_action_descriptions=["", ""],
vector_action_space_type=discrete if self.is_discrete else continuous,
brain_name=self.brain_name,
is_training=True,
brain_name=self.brain_name, is_training=True, action_spec=action_spec
)
rl_init = UnityRLInitializationOutputProto(
name="RealFakeAcademy",

27
ml-agents-envs/mlagents_envs/rpc_utils.py


from mlagents_envs.base_env import (
BehaviorSpec,
BehaviorSpec,
DecisionSteps,
TerminalSteps,
)

:return: BehaviorSpec object.
"""
observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
if brain_param_proto.vector_action_space_type == 1:
action_spec = ActionSpec(brain_param_proto.vector_action_size[0], ())
# proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
if (
brain_param_proto.action_spec.num_continuous_actions == 0
and brain_param_proto.action_spec.num_discrete_actions == 0
):
if brain_param_proto.vector_action_space_type_deprecated == 1:
action_spec = ActionSpec(
brain_param_proto.vector_action_size_deprecated[0], ()
)
else:
action_spec = ActionSpec(
0, tuple(brain_param_proto.vector_action_size_deprecated)
)
action_spec = ActionSpec(0, tuple(brain_param_proto.vector_action_size))
action_spec_proto = brain_param_proto.action_spec
action_spec = ActionSpec(
action_spec_proto.num_continuous_actions,
tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
)
return BehaviorSpec(observation_shape, action_spec)

], # pylint: disable=unsubscriptable-object
) -> np.ndarray:
if len(agent_info_list) == 0:
return np.zeros((0, shape[0]), dtype=np.float32)
return np.zeros((0,) + shape, dtype=np.float32)
np_obs = np.array(
[
agent_obs.observations[obs_index].float_data.data

)
).reshape((len(agent_info_list),) + shape)
_raise_on_nan_and_inf(np_obs, "observations")
return np_obs

6
ml-agents-envs/mlagents_envs/tests/test_envs.py


import pytest
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.base_env import DecisionSteps, TerminalSteps
from mlagents_envs.base_env import DecisionSteps, TerminalSteps, ActionTuple
from mlagents_envs.exception import UnityEnvironmentException, UnityActionException
from mlagents_envs.mock_communicator import MockCommunicator

env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents - 1))
decision_steps, terminal_steps = env.get_steps("RealFakeBrain")
n_agents = len(decision_steps)
env.set_actions("RealFakeBrain", spec.action_spec.empty_action(n_agents) - 1)
_empty_act = spec.action_spec.empty_action(n_agents)
next_action = ActionTuple(_empty_act.continuous - 1, _empty_act.discrete - 1)
env.set_actions("RealFakeBrain", next_action)
env.step()
env.close()

33
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


return agent_info_protos
# The arguments here are the DecisionSteps, TerminalSteps and actions for a single agent name
# The arguments here are the DecisionSteps, TerminalSteps and continuous/discrete actions for a single agent name
decision_steps: DecisionSteps, terminal_steps: TerminalSteps, actions: np.ndarray
decision_steps: DecisionSteps,
terminal_steps: TerminalSteps,
continuous_actions: np.ndarray,
discrete_actions: np.ndarray,
agent_action_protos = [
AgentActionProto(vector_actions=action) for action in actions
]
agent_action_protos = []
num_agents = (
len(continuous_actions)
if continuous_actions is not None
else len(discrete_actions)
)
for i in range(num_agents):
proto = AgentActionProto()
if continuous_actions is not None:
proto.continuous_actions.extend(continuous_actions[i])
proto.vector_actions_deprecated.extend(continuous_actions[i])
if discrete_actions is not None:
proto.discrete_actions.extend(discrete_actions[i])
proto.vector_actions_deprecated.extend(discrete_actions[i])
agent_action_protos.append(proto)
agent_info_action_pair_protos = [
AgentInfoActionPairProto(agent_info=agent_info_proto, action_info=action_proto)
for agent_info_proto, action_proto in zip(

def test_agent_behavior_spec_from_proto():
agent_proto = generate_list_agent_proto(1, [(3,), (4,)])[0]
bp = BrainParametersProto()
bp.vector_action_size.extend([5, 4])
bp.vector_action_space_type = 0
bp.vector_action_size_deprecated.extend([5, 4])
bp.vector_action_space_type_deprecated = 0
behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert behavior_spec.action_spec.is_discrete()
assert not behavior_spec.action_spec.is_continuous()

bp = BrainParametersProto()
bp.vector_action_size.extend([6])
bp.vector_action_space_type = 1
bp.vector_action_size_deprecated.extend([6])
bp.vector_action_space_type_deprecated = 1
behavior_spec = behavior_spec_from_proto(bp, agent_proto)
assert not behavior_spec.action_spec.is_discrete()
assert behavior_spec.action_spec.is_continuous()

27
ml-agents-envs/mlagents_envs/tests/test_steps.py


assert specs.discrete_branches == ()
assert specs.discrete_size == 0
assert specs.continuous_size == 3
assert specs.empty_action(5).shape == (5, 3)
assert specs.empty_action(5).dtype == np.float32
assert specs.empty_action(5).continuous.shape == (5, 3)
assert specs.empty_action(5).continuous.dtype == np.float32
assert specs.empty_action(5).shape == (5, 1)
assert specs.empty_action(5).dtype == np.int32
assert specs.empty_action(5).discrete.shape == (5, 1)
assert specs.empty_action(5).discrete.dtype == np.int32
specs = ActionSpec(3, (3,))
assert specs.continuous_size == 3
assert specs.discrete_branches == (3,)
assert specs.discrete_size == 1
assert specs.empty_action(5).continuous.shape == (5, 3)
assert specs.empty_action(5).continuous.dtype == np.float32
assert specs.empty_action(5).discrete.shape == (5, 1)
assert specs.empty_action(5).discrete.dtype == np.int32
def test_action_generator():

zero_action = specs.empty_action(4)
zero_action = specs.empty_action(4).continuous
random_action = specs.random_action(4)
print(specs.random_action(4))
random_action = specs.random_action(4).continuous
print(random_action)
assert random_action.dtype == np.float32
assert random_action.shape == (4, action_len)
assert np.min(random_action) >= -1

action_shape = (10, 20, 30)
specs = ActionSpec.create_discrete(action_shape)
zero_action = specs.empty_action(4)
zero_action = specs.empty_action(4).discrete
random_action = specs.random_action(4)
random_action = specs.random_action(4).discrete
assert random_action.dtype == np.int32
assert random_action.shape == (4, len(action_shape))
assert np.min(random_action) >= 0

3
ml-agents/mlagents/trainers/action_info.py


class ActionInfo(NamedTuple):
action: Any
env_action: Any
value: Any
outputs: ActionInfoOutputs
agent_ids: List[AgentId]

return ActionInfo([], [], {}, [])
return ActionInfo([], [], [], {}, [])

23
ml-agents/mlagents/trainers/agent_processor.py


import queue
from mlagents_envs.base_env import (
ActionTuple,
DecisionSteps,
DecisionStep,
TerminalSteps,

from mlagents.trainers.trajectory import Trajectory, AgentExperience
from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo, ActionInfoOutputs
from mlagents.trainers.torch.action_log_probs import LogProbsTuple
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.behavior_id_utils import get_global_agent_id

done = terminated # Since this is an ongoing step
interrupted = step.interrupted if terminated else False
# Add the outputs of the last eval
action = stored_take_action_outputs["action"][idx]
if self.policy.use_continuous_act:
action_pre = stored_take_action_outputs["pre_action"][idx]
else:
action_pre = None
action_probs = stored_take_action_outputs["log_probs"][idx]
stored_actions = stored_take_action_outputs["action"]
action_tuple = ActionTuple(
continuous=stored_actions.continuous[idx],
discrete=stored_actions.discrete[idx],
)
stored_action_probs = stored_take_action_outputs["log_probs"]
log_probs_tuple = LogProbsTuple(
continuous=stored_action_probs.continuous[idx],
discrete=stored_action_probs.discrete[idx],
)
action_mask = stored_decision_step.action_mask
prev_action = self.policy.retrieve_previous_action([global_id])[0, :]
experience = AgentExperience(

done=done,
action=action,
action_probs=action_probs,
action_pre=action_pre,
action=action_tuple,
action_probs=log_probs_tuple,
action_mask=action_mask,
prev_action=prev_action,
interrupted=interrupted,

17
ml-agents/mlagents/trainers/demo_loader.py


[next_pair_info.agent_info], behavior_spec
)
previous_action = (
np.array(pair_infos[idx].action_info.vector_actions, dtype=np.float32) * 0
np.array(
pair_infos[idx].action_info.vector_actions_deprecated, dtype=np.float32
)
* 0
pair_infos[idx - 1].action_info.vector_actions, dtype=np.float32
pair_infos[idx - 1].action_info.vector_actions_deprecated,
dtype=np.float32,
)
next_done = len(next_terminal_step) == 1

demo_raw_buffer["rewards"].append(next_reward)
demo_raw_buffer["obs"].append(current_obs)
demo_raw_buffer["actions"].append(current_pair_info.action_info.vector_actions)
# TODO: update the demonstraction files and read from the new proto format
if behavior_spec.action_spec.continuous_size > 0:
demo_raw_buffer["continuous_action"].append(
current_pair_info.action_info.vector_actions_deprecated
)
if behavior_spec.action_spec.discrete_size > 0:
demo_raw_buffer["discrete_action"].append(
current_pair_info.action_info.vector_actions_deprecated
)
demo_raw_buffer["prev_action"].append(previous_action)
if next_done:
demo_raw_buffer.resequence_and_append(

1
ml-agents/mlagents/trainers/env_manager.py


from abc import ABC, abstractmethod
from typing import List, Dict, NamedTuple, Iterable, Tuple
from mlagents_envs.base_env import (
DecisionSteps,

4
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


[self.value_heads, self.policy.memory_out, self.memory_out], feed_dict
)
prev_action = (
batch["actions"][-1] if not self.policy.use_continuous_act else None
batch["discrete_action"][-1]
if not self.policy.use_continuous_act
else None
)
else:
value_estimates = self.sess.run(self.value_heads, feed_dict)

40
ml-agents/mlagents/trainers/policy/policy.py


from typing import Dict, List, Optional
import numpy as np
from mlagents_envs.base_env import DecisionSteps
from mlagents_envs.base_env import ActionTuple, BehaviorSpec, DecisionSteps
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.settings import TrainerSettings, NetworkSettings

self.trainer_settings = trainer_settings
self.network_settings: NetworkSettings = trainer_settings.network_settings
self.seed = seed
if (
self.behavior_spec.action_spec.continuous_size > 0
and self.behavior_spec.action_spec.discrete_size > 0
):
raise UnityPolicyException("Trainers do not support mixed action spaces.")
self.act_size = (
list(self.behavior_spec.action_spec.discrete_branches)
if self.behavior_spec.action_spec.is_discrete()

1 for shape in behavior_spec.observation_shapes if len(shape) == 3
)
self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
# This line will be removed in the ActionBuffer change
self.num_branches = (
self.behavior_spec.action_spec.continuous_size
+ self.behavior_spec.action_spec.discrete_size
)
self.previous_action_dict: Dict[str, np.array] = {}
self.previous_action_dict: Dict[str, np.ndarray] = {}
self.memory_dict: Dict[str, np.ndarray] = {}
self.normalize = trainer_settings.network_settings.normalize
self.use_recurrent = self.network_settings.memory is not None

) -> None:
if memory_matrix is None:
return
for index, agent_id in enumerate(agent_ids):
self.memory_dict[agent_id] = memory_matrix[index, :]

if agent_id in self.memory_dict:
self.memory_dict.pop(agent_id)
def make_empty_previous_action(self, num_agents):
def make_empty_previous_action(self, num_agents: int) -> np.ndarray:
return np.zeros((num_agents, self.num_branches), dtype=np.int)
return np.zeros(
(num_agents, self.behavior_spec.action_spec.discrete_size), dtype=np.int32
)
self, agent_ids: List[str], action_matrix: Optional[np.ndarray]
self, agent_ids: List[str], action_tuple: ActionTuple
if action_matrix is None:
return
self.previous_action_dict[agent_id] = action_matrix[index, :]
self.previous_action_dict[agent_id] = action_tuple.discrete[index, :]
action_matrix = np.zeros((len(agent_ids), self.num_branches), dtype=np.int)
action_matrix = self.make_empty_previous_action(len(agent_ids))
for index, agent_id in enumerate(agent_ids):
if agent_id in self.previous_action_dict:
action_matrix[index, :] = self.previous_action_dict[agent_id]

raise NotImplementedError
@staticmethod
def check_nan_action(action: Optional[np.ndarray]) -> None:
def check_nan_action(action: Optional[ActionTuple]) -> None:
d = np.sum(action)
d = np.sum(action.continuous)
raise RuntimeError("NaN action detected.")
raise RuntimeError("Continuous NaN action detected.")
d = np.sum(action.discrete)
has_nan = np.isnan(d)
if has_nan:
raise RuntimeError("Discrete NaN action detected.")
@abstractmethod
def update_normalization(self, vector_obs: np.ndarray) -> None:

33
ml-agents/mlagents/trainers/policy/tf_policy.py


from mlagents.tf_utils import tf
from mlagents import tf_utils
from mlagents_envs.exception import UnityException
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.action_log_probs import LogProbsTuple
from mlagents_envs.base_env import DecisionSteps
from mlagents_envs.base_env import DecisionSteps, ActionTuple, BehaviorSpec
from mlagents.trainers.tf.models import ModelUtils
from mlagents.trainers.settings import TrainerSettings, EncoderType
from mlagents.trainers import __version__

reparameterize,
condition_sigma_on_obs,
)
if (
self.behavior_spec.action_spec.continuous_size > 0
and self.behavior_spec.action_spec.discrete_size > 0
):
raise UnityPolicyException(
"TensorFlow does not support mixed action spaces. Please run with the Torch framework."
)
# for ghost trainer save/load snapshots
self.assign_phs: List[tf.Tensor] = []
self.assign_ops: List[tf.Operation] = []

feed_dict[self.prev_action] = self.retrieve_previous_action(
global_agent_ids
)
feed_dict[self.memory_in] = self.retrieve_memories(global_agent_ids)
feed_dict = self.fill_eval_dict(feed_dict, decision_requests)
run_out = self._execute_model(feed_dict, self.inference_dict)

)
self.save_memories(global_agent_ids, run_out.get("memory_out"))
# For Compatibility with buffer changes for hybrid action support
if "log_probs" in run_out:
log_probs_tuple = LogProbsTuple()
if self.behavior_spec.action_spec.is_continuous():
log_probs_tuple.add_continuous(run_out["log_probs"])
else:
log_probs_tuple.add_discrete(run_out["log_probs"])
run_out["log_probs"] = log_probs_tuple
if "action" in run_out:
action_tuple = ActionTuple()
env_action_tuple = ActionTuple()
if self.behavior_spec.action_spec.is_continuous():
action_tuple.add_continuous(run_out["pre_action"])
env_action_tuple.add_continuous(run_out["action"])
else:
action_tuple.add_discrete(run_out["action"])
env_action_tuple.add_discrete(run_out["action"])
run_out["action"] = action_tuple
run_out["env_action"] = env_action_tuple
env_action=run_out.get("env_action"),
value=run_out.get("value"),
outputs=run_out,
agent_ids=decision_requests.agent_id,

84
ml-agents/mlagents/trainers/policy/torch_policy.py


SeparateActorCritic,
GlobalSteps,
)
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
EPSILON = 1e-7 # Small value to avoid divide by zero

conditional_sigma=self.condition_sigma_on_obs,
tanh_squash=tanh_squash,
)
self._clip_action = not tanh_squash
# Save the m_size needed for export
self._export_m_size = self.m_size
# m_size needed for training is determined by network, not trainer settings

self._clip_action = not tanh_squash
@property
def export_memory_size(self) -> int:

) -> Tuple[SplitObservations, np.ndarray]:
obs = ModelUtils.list_to_tensor_list(decision_requests.obs)
mask = None
if not self.use_continuous_act:
if self.behavior_spec.action_spec.discrete_size > 0:
mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
if decision_requests.action_mask is not None:
mask = torch.as_tensor(

masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
seq_len: int = 1,
all_log_probs: bool = False,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
critic_obs: Optional[List[List[torch.Tensor]]] = None,
) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor, torch.Tensor]:
"""
:param vec_obs: List of vector observations.
:param vis_obs: List of visual observations.

:param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action.
:return: Tuple of actions, actions clipped to -1, 1, log probabilities (dependent on all_log_probs),
entropies, and output memories, all as Torch Tensors.
:return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
if memories is None:
dists, memories = self.actor_critic.get_dists(obs, masks, memories, seq_len)
else:
# If we're using LSTM. we need to execute the values to get the critic memories
dists, _, memories = self.actor_critic.get_dist_and_value(
obs, masks, memories, seq_len
)
action_list = self.actor_critic.sample_action(dists)
log_probs, entropies, all_logs = ModelUtils.get_probs_and_entropy(
action_list, dists
)
actions = torch.stack(action_list, dim=-1)
if self.use_continuous_act:
actions = actions[:, :, 0]
else:
actions = actions[:, 0, :]
# Use the sum of entropy across actions, not the mean
entropy_sum = torch.sum(entropies, dim=1)
if self._clip_action and self.use_continuous_act:
clipped_action = torch.clamp(actions, -3, 3) / 3
else:
clipped_action = actions
return (
actions,
clipped_action,
all_logs if all_log_probs else log_probs,
entropy_sum,
memories,
actions, log_probs, entropies, memories = self.actor_critic.get_action_stats(
obs, masks, memories, seq_len
return (actions, log_probs, entropies, memories)
actions: torch.Tensor,
actions: AgentAction,
) -> Tuple[torch.Tensor, torch.Tensor, Dict[str, torch.Tensor]]:
dists, value_heads, _ = self.actor_critic.get_dist_and_value(
obs, masks, memories, seq_len, critic_obs
) -> Tuple[ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor]]:
log_probs, entropies, value_heads = self.actor_critic.get_stats_and_value(
obs, actions, masks, memories, seq_len, critic_obs
action_list = [actions[..., i] for i in range(actions.shape[-1])]
log_probs, entropies, _ = ModelUtils.get_probs_and_entropy(action_list, dists)
# Use the sum of entropy across actions, not the mean
entropy_sum = torch.sum(entropies, dim=1)
return log_probs, entropy_sum, value_heads
return log_probs, entropies, value_heads
@timed
def evaluate(

:return: Outputs from network as defined by self.inference_dict.
"""
obs, masks = self._split_decision_step(decision_requests)
memories = torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(
0
) if self.use_recurrent else None
memories = (
torch.as_tensor(self.retrieve_memories(global_agent_ids)).unsqueeze(0)
if self.use_recurrent
else None
)
action, clipped_action, log_probs, entropy, memories = self.sample_actions(
action, log_probs, entropy, memories = self.sample_actions(
run_out["pre_action"] = ModelUtils.to_numpy(action)
run_out["action"] = ModelUtils.to_numpy(clipped_action)
# Todo - make pre_action difference
run_out["log_probs"] = ModelUtils.to_numpy(log_probs)
action_tuple = action.to_action_tuple()
run_out["action"] = action_tuple
# This is the clipped action which is not saved to the buffer
# but is exclusively sent to the environment.
env_action_tuple = action.to_action_tuple(clip=self._clip_action)
run_out["env_action"] = env_action_tuple
run_out["log_probs"] = log_probs.to_log_probs_tuple()
run_out["entropy"] = ModelUtils.to_numpy(entropy)
run_out["learning_rate"] = 0.0
if self.use_recurrent:

self.check_nan_action(run_out.get("action"))
return ActionInfo(
action=run_out.get("action"),
env_action=run_out.get("env_action"),
value=run_out.get("value"),
outputs=run_out,
agent_ids=list(decision_requests.agent_id),

9
ml-agents/mlagents/trainers/ppo/optimizer_tf.py


self.policy.sequence_length_ph: self.policy.sequence_length,
self.policy.mask_input: mini_batch["masks"] * burn_in_mask,
self.advantage: mini_batch["advantages"],
self.all_old_log_probs: mini_batch["action_probs"],
if self.policy.output_pre is not None and "actions_pre" in mini_batch:
feed_dict[self.policy.output_pre] = mini_batch["actions_pre"]
if self.policy.use_continuous_act: # For hybrid action buffer support
feed_dict[self.all_old_log_probs] = mini_batch["continuous_log_probs"]
feed_dict[self.policy.output_pre] = mini_batch["continuous_action"]
feed_dict[self.policy.output] = mini_batch["actions"]
feed_dict[self.all_old_log_probs] = mini_batch["discrete_log_probs"]
feed_dict[self.policy.output] = mini_batch["discrete_action"]
if self.policy.use_recurrent:
feed_dict[self.policy.prev_action] = mini_batch["prev_action"]
feed_dict[self.policy.action_masks] = mini_batch["action_mask"]

12
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.settings import TrainerSettings, PPOSettings
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents.trainers.torch.utils import ModelUtils

advantage = advantages.unsqueeze(-1)
decay_epsilon = self.hyperparameters.epsilon
r_theta = torch.exp(log_probs - old_log_probs)
p_opt_a = r_theta * advantage
p_opt_b = (

]
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
if self.policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(batch["actions_pre"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
actions = AgentAction.from_dict(batch)
memories = [
ModelUtils.list_to_tensor(batch["memory"][i])

critic_obs=critic_obs,
seq_len=self.policy.sequence_length,
)
old_log_probs = ActionLogProbs.from_dict(batch).flatten()
log_probs = log_probs.flatten()
loss_masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
value_loss = self.ppo_value_loss(
values, old_values, returns, decay_eps, loss_masks

log_probs,
ModelUtils.list_to_tensor(batch["action_probs"]),
old_log_probs,
loss_masks,
)
loss = (

6
ml-agents/mlagents/trainers/sac/optimizer_tf.py


feed_dict[self.rewards_holders[name]] = batch[f"{name}_rewards"]
if self.policy.use_continuous_act:
feed_dict[self.policy_network.external_action_in] = batch["actions"]
feed_dict[self.policy_network.external_action_in] = batch[
"continuous_action"
]
feed_dict[policy.output] = batch["actions"]
feed_dict[policy.output] = batch["discrete_action"]
if self.policy.use_recurrent:
feed_dict[policy.prev_action] = batch["prev_action"]
feed_dict[policy.action_masks] = batch["action_mask"]

278
ml-agents/mlagents/trainers/sac/optimizer_torch.py


import numpy as np
from typing import Dict, List, Mapping, cast, Tuple, Optional
from typing import Dict, List, Mapping, NamedTuple, cast, Tuple, Optional
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.torch.agent_action import AgentAction
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents_envs.base_env import ActionSpec
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.settings import TrainerSettings, SACSettings
from contextlib import ExitStack

action_spec: ActionSpec,
):
super().__init__()
self.action_spec = action_spec
if self.action_spec.is_continuous():
self.act_size = self.action_spec.continuous_size
num_value_outs = 1
num_action_ins = self.act_size
num_value_outs = max(sum(action_spec.discrete_branches), 1)
num_action_ins = int(action_spec.continuous_size)
else:
self.act_size = self.action_spec.discrete_branches
num_value_outs = sum(self.act_size)
num_action_ins = 0
self.q1_network = ValueNetwork(
stream_names,
observation_shapes,

)
return q1_out, q2_out
class TargetEntropy(NamedTuple):
discrete: List[float] = [] # One per branch
continuous: float = 0.0
class LogEntCoef(nn.Module):
def __init__(self, discrete, continuous):
super().__init__()
self.discrete = discrete
self.continuous = continuous
def __init__(self, policy: TorchPolicy, trainer_params: TrainerSettings):
super().__init__(policy, trainer_params)
hyperparameters: SACSettings = cast(SACSettings, trainer_params.hyperparameters)

self.policy = policy
self.act_size = policy.act_size
policy_network_settings = policy.network_settings
self.tau = hyperparameters.tau

name: int(not self.reward_signals[name].ignore_done)
for name in self.stream_names
}
self._action_spec = self.policy.behavior_spec.action_spec
self.policy.behavior_spec.action_spec,
self._action_spec,
)
self.target_network = ValueNetwork(

self.policy.actor_critic.critic, self.target_network, 1.0
)
self._log_ent_coef = torch.nn.Parameter(
torch.log(torch.as_tensor([self.init_entcoef] * len(self.act_size))),
# We create one entropy coefficient per action, whether discrete or continuous.
_disc_log_ent_coef = torch.nn.Parameter(
torch.log(
torch.as_tensor(
[self.init_entcoef] * len(self._action_spec.discrete_branches)
)
),
if self.policy.use_continuous_act:
self.target_entropy = torch.as_tensor(
-1
* self.continuous_target_entropy_scale
* np.prod(self.act_size[0]).astype(np.float32)
)
else:
self.target_entropy = [
self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
for i in self.act_size
]
_cont_log_ent_coef = torch.nn.Parameter(
torch.log(torch.as_tensor([self.init_entcoef])), requires_grad=True
)
self._log_ent_coef = TorchSACOptimizer.LogEntCoef(
discrete=_disc_log_ent_coef, continuous=_cont_log_ent_coef
)
_cont_target = (
-1
* self.continuous_target_entropy_scale
* np.prod(self._action_spec.continuous_size).astype(np.float32)
)
_disc_target = [
self.discrete_target_entropy_scale * np.log(i).astype(np.float32)
for i in self._action_spec.discrete_branches
]
self.target_entropy = TorchSACOptimizer.TargetEntropy(
continuous=_cont_target, discrete=_disc_target
)
self.policy.actor_critic.distribution.parameters()
self.policy.actor_critic.action_model.parameters()
)
value_params = list(self.value_network.parameters()) + list(
self.policy.actor_critic.critic.parameters()

value_params, lr=hyperparameters.learning_rate
)
self.entropy_optimizer = torch.optim.Adam(
[self._log_ent_coef], lr=hyperparameters.learning_rate
self._log_ent_coef.parameters(), lr=hyperparameters.learning_rate
)
self._move_to_device(default_device())

def sac_value_loss(
self,
log_probs: torch.Tensor,
log_probs: ActionLogProbs,
discrete: bool,
_ent_coef = torch.exp(self._log_ent_coef)
_cont_ent_coef = self._log_ent_coef.continuous.exp()
_disc_ent_coef = self._log_ent_coef.discrete.exp()
if not discrete:
if self._action_spec.discrete_size <= 0:
action_probs = log_probs.exp()
disc_action_probs = log_probs.all_discrete_tensor.exp()
q1p_out[name] * action_probs, self.act_size
q1p_out[name] * disc_action_probs,
self._action_spec.discrete_branches,
q2p_out[name] * action_probs, self.act_size
q2p_out[name] * disc_action_probs,
self._action_spec.discrete_branches,
)
_q1p_mean = torch.mean(
torch.stack(

min_policy_qs[name] = torch.min(_q1p_mean, _q2p_mean)
value_losses = []
if not discrete:
if self._action_spec.discrete_size <= 0:
_ent_coef * log_probs, dim=1
_cont_ent_coef * log_probs.continuous_tensor, dim=1
)
value_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(values[name], v_backup), loss_masks

disc_log_probs = log_probs.all_discrete_tensor
log_probs * log_probs.exp(), self.act_size
disc_log_probs * disc_log_probs.exp(),
self._action_spec.discrete_branches,
torch.sum(_ent_coef[i] * _lp, dim=1, keepdim=True)
torch.sum(_disc_ent_coef[i] * _lp, dim=1, keepdim=True)
for i, _lp in enumerate(branched_per_action_ent)
]
)

branched_ent_bonus, axis=0
)
# Add continuous entropy bonus to minimum Q
if self._action_spec.continuous_size > 0:
v_backup += torch.sum(
_cont_ent_coef * log_probs.continuous_tensor,
dim=1,
keepdim=True,
)
value_loss = 0.5 * ModelUtils.masked_mean(
torch.nn.functional.mse_loss(values[name], v_backup.squeeze()),
loss_masks,

def sac_policy_loss(
self,
log_probs: torch.Tensor,
log_probs: ActionLogProbs,
discrete: bool,
_ent_coef = torch.exp(self._log_ent_coef)
_cont_ent_coef, _disc_ent_coef = (
self._log_ent_coef.continuous,
self._log_ent_coef.discrete,
)
_cont_ent_coef = _cont_ent_coef.exp()
_disc_ent_coef = _disc_ent_coef.exp()
if not discrete:
mean_q1 = mean_q1.unsqueeze(1)
batch_policy_loss = torch.mean(_ent_coef * log_probs - mean_q1, dim=1)
policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
else:
action_probs = log_probs.exp()
batch_policy_loss = 0
if self._action_spec.discrete_size > 0:
disc_log_probs = log_probs.all_discrete_tensor
disc_action_probs = disc_log_probs.exp()
log_probs * action_probs, self.act_size
disc_log_probs * disc_action_probs, self._action_spec.discrete_branches
mean_q1 * action_probs, self.act_size
mean_q1 * disc_action_probs, self._action_spec.discrete_branches
torch.sum(_ent_coef[i] * _lp - _qt, dim=1, keepdim=True)
torch.sum(_disc_ent_coef[i] * _lp - _qt, dim=1, keepdim=False)
for i, (_lp, _qt) in enumerate(
zip(branched_per_action_ent, branched_q_term)
)

batch_policy_loss = torch.squeeze(branched_policy_loss)
policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
batch_policy_loss += torch.sum(branched_policy_loss, dim=1)
all_mean_q1 = torch.sum(disc_action_probs * mean_q1, dim=1)
else:
all_mean_q1 = mean_q1
if self._action_spec.continuous_size > 0:
cont_log_probs = log_probs.continuous_tensor
batch_policy_loss += torch.mean(
_cont_ent_coef * cont_log_probs - all_mean_q1.unsqueeze(1), dim=1
)
policy_loss = ModelUtils.masked_mean(batch_policy_loss, loss_masks)
self, log_probs: torch.Tensor, loss_masks: torch.Tensor, discrete: bool
self, log_probs: ActionLogProbs, loss_masks: torch.Tensor
if not discrete:
_cont_ent_coef, _disc_ent_coef = (
self._log_ent_coef.continuous,
self._log_ent_coef.discrete,
)
entropy_loss = 0
if self._action_spec.discrete_size > 0:
target_current_diff = torch.sum(log_probs + self.target_entropy, dim=1)
entropy_loss = -1 * ModelUtils.masked_mean(
self._log_ent_coef * target_current_diff, loss_masks
)
else:
with torch.no_grad():
# Break continuous into separate branch
disc_log_probs = log_probs.all_discrete_tensor
log_probs * log_probs.exp(), self.act_size
disc_log_probs * disc_log_probs.exp(),
self._action_spec.discrete_branches,
branched_per_action_ent, self.target_entropy
branched_per_action_ent, self.target_entropy.discrete
)
],
axis=1,

)
entropy_loss = -1 * ModelUtils.masked_mean(
torch.mean(self._log_ent_coef * target_current_diff, axis=1), loss_masks
entropy_loss += -1 * ModelUtils.masked_mean(
torch.mean(_disc_ent_coef * target_current_diff, axis=1), loss_masks
)
if self._action_spec.continuous_size > 0:
with torch.no_grad():
cont_log_probs = log_probs.continuous_tensor
target_current_diff = torch.sum(
cont_log_probs + self.target_entropy.continuous, dim=1
)
# We update all the _cont_ent_coef as one block
entropy_loss += -1 * ModelUtils.masked_mean(
_cont_ent_coef * target_current_diff, loss_masks
)
return entropy_loss

) -> Dict[str, torch.Tensor]:
condensed_q_output = {}
onehot_actions = ModelUtils.actions_to_onehot(discrete_actions, self.act_size)
onehot_actions = ModelUtils.actions_to_onehot(
discrete_actions, self._action_spec.discrete_branches
)
branched_q = ModelUtils.break_into_branches(item, self.act_size)
branched_q = ModelUtils.break_into_branches(
item, self._action_spec.discrete_branches
)
only_action_qs = torch.stack(
[
torch.sum(_act * _q, dim=1, keepdim=True)

AgentBuffer.obs_list_to_obs_batch(batch["next_obs"])
)
act_masks = ModelUtils.list_to_tensor(batch["action_mask"])
if self.policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(batch["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(batch["actions"], dtype=torch.long)
actions = AgentAction.from_dict(batch)
memories_list = [
ModelUtils.list_to_tensor(batch["memory"][i])

self.target_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
(sampled_actions, _, log_probs, _, _) = self.policy.sample_actions(
(
sampled_actions,
log_probs,
_,
value_estimates,
_,
) = self.policy.actor_critic.get_action_stats_and_value(
seq_len=self.policy.sequence_length,
all_log_probs=not self.policy.use_continuous_act,
sequence_length=self.policy.sequence_length,
)
cont_sampled_actions = sampled_actions.continuous_tensor
cont_actions = actions.continuous_tensor
q1p_out, q2p_out = self.value_network(
obs,
cont_sampled_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
q2_grad=False,
value_estimates, _ = self.policy.actor_critic.critic_pass(
obs, memories, sequence_length=self.policy.sequence_length
q1_out, q2_out = self.value_network(
obs,
cont_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
if self.policy.use_continuous_act:
squeezed_actions = actions.squeeze(-1)
# Only need grad for q1, as that is used for policy.
q1p_out, q2p_out = self.value_network(
obs,
sampled_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
q2_grad=False,
)
q1_out, q2_out = self.value_network(
obs,
squeezed_actions,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
if self._action_spec.discrete_size > 0:
disc_actions = actions.discrete_tensor
q1_stream = self._condense_q_streams(q1_out, disc_actions)
q2_stream = self._condense_q_streams(q2_out, disc_actions)
else:
else:
# For discrete, you don't need to backprop through the Q for the policy
q1p_out, q2p_out = self.value_network(
obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
q1_grad=False,
q2_grad=False,
)
q1_out, q2_out = self.value_network(
obs,
memories=q_memories,
sequence_length=self.policy.sequence_length,
)
q1_stream = self._condense_q_streams(q1_out, actions)
q2_stream = self._condense_q_streams(q2_out, actions)
with torch.no_grad():
target_values, _ = self.target_network(

)
masks = ModelUtils.list_to_tensor(batch["masks"], dtype=torch.bool)
use_discrete = not self.policy.use_continuous_act
dones = ModelUtils.list_to_tensor(batch["done"])
q1_loss, q2_loss = self.sac_q_loss(

log_probs, value_estimates, q1p_out, q2p_out, masks, use_discrete
log_probs, value_estimates, q1p_out, q2p_out, masks
policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks, use_discrete)
entropy_loss = self.sac_entropy_loss(log_probs, masks, use_discrete)
policy_loss = self.sac_policy_loss(log_probs, q1p_out, masks)
entropy_loss = self.sac_entropy_loss(log_probs, masks)
total_value_loss = q1_loss + q2_loss + value_loss

"Losses/Value Loss": value_loss.item(),
"Losses/Q1 Loss": q1_loss.item(),
"Losses/Q2 Loss": q2_loss.item(),
"Policy/Entropy Coeff": torch.mean(torch.exp(self._log_ent_coef)).item(),
"Policy/Discrete Entropy Coeff": torch.mean(
torch.exp(self._log_ent_coef.discrete)
).item(),
"Policy/Continuous Entropy Coeff": torch.mean(
torch.exp(self._log_ent_coef.continuous)
).item(),
"Policy/Learning Rate": decay_lr,
}

2
ml-agents/mlagents/trainers/simple_env_manager.py


self.previous_all_action_info = all_action_info
for brain_name, action_info in all_action_info.items():
self.env.set_actions(brain_name, action_info.action)
self.env.set_actions(brain_name, action_info.env_action)
self.env.step()
all_step_result = self._generate_all_results()

56
ml-agents/mlagents/trainers/subprocess_env_manager.py


from typing import Dict, NamedTuple, List, Any, Optional, Callable, Set
import cloudpickle
import enum
import time
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.exception import (

logger = logging_util.get_logger(__name__)
WORKER_SHUTDOWN_TIMEOUT_S = 10
class EnvironmentCommand(enum.Enum):

RESET = 4
CLOSE = 5
ENV_EXITED = 6
CLOSED = 7
class EnvironmentRequest(NamedTuple):

self.previous_step: EnvironmentStep = EnvironmentStep.empty(worker_id)
self.previous_all_action_info: Dict[str, ActionInfo] = {}
self.waiting = False
self.closed = False
def send(self, cmd: EnvironmentCommand, payload: Any = None) -> None:
try:

except (BrokenPipeError, EOFError):
raise UnityCommunicationException("UnityEnvironment worker: recv failed.")
def close(self):
def request_close(self):
try:
self.conn.send(EnvironmentRequest(EnvironmentCommand.CLOSE))
except (BrokenPipeError, EOFError):

pass
logger.debug(f"UnityEnvWorker {self.worker_id} joining process.")
self.process.join()
def worker(

if req.cmd == EnvironmentCommand.STEP:
all_action_info = req.payload
for brain_name, action_info in all_action_info.items():
if len(action_info.action) != 0:
env.set_actions(brain_name, action_info.action)
if len(action_info.agent_ids) > 0:
env.set_actions(brain_name, action_info.env_action)
env.step()
all_step_result = _generate_all_results()
# The timers in this process are independent from all the processes and the "main" process

EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
)
_send_response(EnvironmentCommand.ENV_EXITED, ex)
except Exception as ex:
logger.error(
f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception."
)
step_queue.put(
EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
)
_send_response(EnvironmentCommand.ENV_EXITED, ex)
# If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process
# will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread()
# See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for
# more info.
step_queue.cancel_join_thread()
step_queue.close()
parent_conn.close()
step_queue.put(EnvironmentResponse(EnvironmentCommand.CLOSED, worker_id, None))
step_queue.close()
class SubprocessEnvManager(EnvManager):

super().__init__()
self.env_workers: List[UnityEnvWorker] = []
self.step_queue: Queue = Queue()
self.workers_alive = 0
for worker_idx in range(n_env):
self.env_workers.append(
self.create_worker(

self.workers_alive += 1
@staticmethod
def create_worker(

def close(self) -> None:
logger.debug("SubprocessEnvManager closing.")
for env_worker in self.env_workers:
env_worker.request_close()
# Pull messages out of the queue until every worker has CLOSED or we time out.
deadline = time.time() + WORKER_SHUTDOWN_TIMEOUT_S
while self.workers_alive > 0 and time.time() < deadline:
try:
step: EnvironmentResponse = self.step_queue.get_nowait()
env_worker = self.env_workers[step.worker_id]
if step.cmd == EnvironmentCommand.CLOSED and not env_worker.closed:
env_worker.closed = True
self.workers_alive -= 1
# Discard all other messages.
except EmptyQueueException:
pass
# Sanity check to kill zombie workers and report an issue if they occur.
if self.workers_alive > 0:
logger.error("SubprocessEnvManager had workers that didn't signal shutdown")
for env_worker in self.env_workers:
if not env_worker.closed and env_worker.process.is_alive():
env_worker.process.terminate()
logger.error(
"A SubprocessEnvManager worker did not shut down correctly so it was forcefully terminated."
)
for env_worker in self.env_workers:
env_worker.close()
def _postprocess_steps(
self, env_steps: List[EnvironmentResponse]

24
ml-agents/mlagents/trainers/tests/mock_brain.py


import numpy as np
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.torch.action_log_probs import LogProbsTuple
from mlagents.trainers.trajectory import Trajectory, AgentExperience
from mlagents_envs.base_env import (
DecisionSteps,

ActionTuple,
)

steps_list = []
action_size = action_spec.discrete_size + action_spec.continuous_size
action_probs = np.ones(
int(np.sum(action_spec.discrete_branches) + action_spec.continuous_size),
dtype=np.float32,
)
for _i in range(length - 1):
obs = []
for _shape in observation_shapes:

action = np.zeros(action_size, dtype=np.float32)
action_pre = np.zeros(action_size, dtype=np.float32)
action = ActionTuple(
continuous=np.zeros(action_spec.continuous_size, dtype=np.float32),
discrete=np.zeros(action_spec.discrete_size, dtype=np.int32),
)
action_probs = LogProbsTuple(
continuous=np.ones(action_spec.continuous_size, dtype=np.float32),
discrete=np.ones(action_spec.discrete_size, dtype=np.float32),
)
action_mask = (
[
[False for _ in range(branch)]

else None
)
prev_action = np.ones(action_size, dtype=np.float32)
if action_spec.is_discrete():
prev_action = np.ones(action_size, dtype=np.int32)
else:
prev_action = np.ones(action_size, dtype=np.float32)
max_step = False
memory = np.ones(memory_size, dtype=np.float32)
agent_id = "test_agent"

done=done,
action=action,
action_probs=action_probs,
action_pre=action_pre,
action_mask=action_mask,
prev_action=prev_action,
interrupted=max_step,

done=not max_step_complete,
action=action,
action_probs=action_probs,
action_pre=action_pre,
action_mask=action_mask,
prev_action=prev_action,
interrupted=max_step_complete,

91
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
ActionTuple,
BaseEnv,
BehaviorSpec,
DecisionSteps,

OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)
STEP_SIZE = 0.1
STEP_SIZE = 0.2
TIME_PENALTY = 0.01
MIN_STEPS = int(1.0 / STEP_SIZE) + 1

def __init__(
self,
brain_names,
use_discrete,
action_size=1,
action_sizes=(1, 0),
self.discrete = use_discrete
if use_discrete:
action_spec = ActionSpec.create_discrete(
tuple(2 for _ in range(action_size))
)
else:
action_spec = ActionSpec.create_continuous(action_size)
continuous_action_size, discrete_action_size = action_sizes
discrete_tuple = tuple(2 for _ in range(discrete_action_size))
action_spec = ActionSpec(continuous_action_size, discrete_tuple)
self.total_action_size = (
continuous_action_size + discrete_action_size
) # to set the goals/positions
self.action_spec = action_spec
self.action_size = action_size
self.action_spec = action_spec
self.names = brain_names
self.positions: Dict[str, List[float]] = {}
self.step_count: Dict[str, float] = {}

def _take_action(self, name: str) -> bool:
deltas = []
for _act in self.action[name][0]:
if self.discrete:
deltas.append(1 if _act else -1)
else:
deltas.append(_act)
_act = self.action[name]
if self.action_spec.continuous_size > 0:
for _cont in _act.continuous[0]:
deltas.append(_cont)
if self.action_spec.discrete_size > 0:
for _disc in _act.discrete[0]:
deltas.append(1 if _disc else -1)
for i, _delta in enumerate(deltas):
_delta = clamp(_delta, -self.step_size, self.step_size)
self.positions[name][i] += _delta

return done
def _generate_mask(self):
if self.discrete:
action_mask = None
if self.action_spec.discrete_size > 0:
ndmask = np.array(2 * self.action_size * [False], dtype=np.bool)
ndmask = np.array(
2 * self.action_spec.discrete_size * [False], dtype=np.bool
)
else:
action_mask = None
return action_mask
def _compute_reward(self, name: str, done: bool) -> float:

def _reset_agent(self, name):
self.goal[name] = self.random.choice([-1, 1])
self.positions[name] = [0.0 for _ in range(self.action_size)]
self.positions[name] = [0.0 for _ in range(self.total_action_size)]
self.step_count[name] = 0
self.rewards[name] = 0
self.agent_id[name] = self.agent_id[name] + 1

class MemoryEnvironment(SimpleEnvironment):
def __init__(self, brain_names, use_discrete, step_size=0.2):
super().__init__(brain_names, use_discrete, step_size=step_size)
def __init__(self, brain_names, action_sizes=(1, 0), step_size=0.2):
super().__init__(brain_names, action_sizes=action_sizes, step_size=step_size)
# Number of steps to reveal the goal for. Lower is harder. Should be
# less than 1/step_size to force agent to use memory
self.num_show_steps = 2

def __init__(
self,
brain_names,
use_discrete,
action_sizes=(1, 0),
use_discrete,
action_sizes=action_sizes,
)
self.demonstration_protos: Dict[str, List[AgentInfoActionPairProto]] = {}
self.n_demos = n_demos

def step(self) -> None:
super().step()
for name in self.names:
discrete_actions = (
self.action[name].discrete
if self.action_spec.discrete_size > 0
else None
)
continuous_actions = (
self.action[name].continuous
if self.action_spec.continuous_size > 0
else None
)
self.step_result[name][0], self.step_result[name][1], self.action[name]
self.step_result[name][0],
self.step_result[name][1],
continuous_actions,
discrete_actions,
)
self.demonstration_protos[name] = self.demonstration_protos[name][
-self.n_demos :

self.reset()
for _ in range(self.n_demos):
for name in self.names:
if self.discrete:
self.action[name] = [[1]] if self.goal[name] > 0 else [[0]]
if self.action_spec.discrete_size > 0:
self.action[name] = ActionTuple(
np.array([], dtype=np.float32),
np.array(
[[1]] if self.goal[name] > 0 else [[0]], dtype=np.int32
),
)
self.action[name] = [[float(self.goal[name])]]
self.action[name] = ActionTuple(
np.array([[float(self.goal[name])]], dtype=np.float32),
np.array([], dtype=np.int32),
)
class UnexpectedExceptionEnvironment(SimpleEnvironment):
def __init__(self, brain_names, use_discrete, to_raise):
super().__init__(brain_names, use_discrete)
self.to_raise = to_raise
def step(self) -> None:
raise self.to_raise()

66
ml-agents/mlagents/trainers/tests/tensorflow/test_ppo.py


dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
# Test update
update_buffer = mb.simulate_rollout(
BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec
)
behavior_spec = optimizer.policy.behavior_spec
update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, behavior_spec)
# NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
if discrete:
n_agents = len(update_buffer["discrete_log_probs"])
update_buffer["discrete_log_probs"] = np.ones(
(n_agents, int(sum(behavior_spec.action_spec.discrete_branches))),
dtype=np.float32,
)
else:
n_agents = len(update_buffer["continuous_log_probs"])
update_buffer["continuous_log_probs"] = np.ones(
(n_agents, behavior_spec.action_spec.continuous_size), dtype=np.float32
)
optimizer.update(
update_buffer,
num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,

dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
# Test update
update_buffer = mb.simulate_rollout(
BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec
)
behavior_spec = optimizer.policy.behavior_spec
update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, behavior_spec)
# Mock out reward signal eval
update_buffer["advantages"] = update_buffer["environment_rewards"]
update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]

# NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
if discrete:
n_agents = len(update_buffer["discrete_log_probs"])
update_buffer["discrete_log_probs"] = np.ones(
(n_agents, int(sum(behavior_spec.action_spec.discrete_branches))),
dtype=np.float32,
)
else:
n_agents = len(update_buffer["continuous_log_probs"])
update_buffer["continuous_log_probs"] = np.ones(
(n_agents, behavior_spec.action_spec.continuous_size), dtype=np.float32
)
optimizer.update(
update_buffer,
num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,

use_visual=False,
)
# Test update
update_buffer = mb.simulate_rollout(
BUFFER_INIT_SAMPLES, optimizer.policy.behavior_spec
)
behavior_spec = optimizer.policy.behavior_spec
update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES, behavior_spec)
# Mock out reward signal eval
update_buffer["advantages"] = update_buffer["environment_rewards"]
update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]

# NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
n_agents = len(update_buffer["continuous_log_probs"])
update_buffer["continuous_log_probs"] = np.ones(
(n_agents, behavior_spec.action_spec.continuous_size), dtype=np.float32
)
optimizer.update(
update_buffer,
num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,

buffer["curiosity_returns"] = buffer["environment_rewards"]
buffer["curiosity_value_estimates"] = buffer["environment_rewards"]
buffer["advantages"] = buffer["environment_rewards"]
# NOTE: This is because TF outputs the log probs of all actions whereas PyTorch does not
if use_discrete:
n_agents = len(buffer["discrete_log_probs"])
buffer["discrete_log_probs"].reset_field()
for _ in range(n_agents):
buffer["discrete_log_probs"].append(
np.ones(
int(sum(mock_behavior_spec.action_spec.discrete_branches)),
dtype=np.float32,
)
)
else:
n_agents = len(buffer["continuous_log_probs"])
buffer["continuous_log_probs"].reset_field()
for _ in range(n_agents):
buffer["continuous_log_probs"].append(
np.ones(
mock_behavior_spec.action_spec.continuous_size, dtype=np.float32
)
)
trainer.update_buffer = buffer
trainer._update_policy()

128
ml-agents/mlagents/trainers/tests/tensorflow/test_simple_rl.py


assert all(reward > success_threshold for reward in processed_rewards)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ppo(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_ppo(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
new_hyperparams = attr.evolve(
PPO_TF_CONFIG.hyperparameters, batch_size=64, buffer_size=640
)

_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_visual_ppo(num_visual, use_discrete):
def test_visual_ppo(num_visual, action_sizes):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=0,
step_size=0.2,

def test_visual_advanced_ppo(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
action_sizes=(0, 1),
num_visual=num_visual,
num_vector=0,
step_size=0.5,

PPO_TF_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=300,
max_steps=400,
summary_freq=100,
framework=FrameworkType.TENSORFLOW,
)

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_ppo(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_recurrent_ppo(action_sizes):
env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
new_network_settings = attr.evolve(
PPO_TF_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_sac(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
config = attr.evolve(SAC_TF_CONFIG, framework=FrameworkType.TENSORFLOW)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_sac(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
config = attr.evolve(
SAC_TF_CONFIG, framework=FrameworkType.TENSORFLOW, max_steps=900
)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_sac(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_sac(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
new_hyperparams = attr.evolve(SAC_TF_CONFIG.hyperparameters, buffer_init_steps=2000)
config = attr.evolve(
SAC_TF_CONFIG,

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.8)
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_visual_sac(num_visual, use_discrete):
def test_visual_sac(num_visual, action_sizes):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=0,
step_size=0.2,

def test_visual_advanced_sac(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
action_sizes=(0, 1),
num_visual=num_visual,
num_vector=0,
step_size=0.5,

SAC_TF_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=100,
max_steps=200,
framework=FrameworkType.TENSORFLOW,
)
# The number of steps is pretty small for these encoders

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
step_size = 0.2 if use_discrete else 0.5
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_recurrent_sac(action_sizes):
step_size = 0.2 if action_sizes == (0, 1) else 0.5
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
[BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
)
new_networksettings = attr.evolve(
SAC_TF_CONFIG.network_settings,

_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ghost(action_sizes):
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000

_check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost_fails(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ghost_fails(action_sizes):
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
)
# This config should fail because the ghosted policy is never swapped with a competent policy.
# Swap occurs after max step is reached.

)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_asymm_ghost(action_sizes):
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0,

_check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost_fails(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_asymm_ghost_fails(action_sizes):
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
)
# This config should fail because the team that us not learning when both have reached
# max step should be executing the initial, untrained poliy.

@pytest.fixture(scope="session")
def simple_record(tmpdir_factory):
def record_demo(use_discrete, num_visual=0, num_vector=1):
def record_demo(action_sizes, num_visual=0, num_vector=1):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=num_vector,
n_demos=100,

env.solve()
continuous_size, discrete_size = action_sizes
use_discrete = True if discrete_size > 0 else False
vector_action_size=[2] if use_discrete else [1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
vector_action_size_deprecated=[2] if use_discrete else [1],
vector_action_descriptions_deprecated=[""],
vector_action_space_type_deprecated=discrete
if use_discrete
else continuous,
brain_name=BRAIN_NAME,
is_training=True,
)

return record_demo
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail(simple_record, use_discrete, trainer_config):
demo_path = simple_record(use_discrete)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
def test_gail(simple_record, action_sizes, trainer_config):
demo_path = simple_record(action_sizes)
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_ppo(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail_visual_ppo(simple_record, action_sizes):
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
use_discrete=use_discrete,
action_sizes=action_sizes,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)

_check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_sac(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail_visual_sac(simple_record, action_sizes):
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
use_discrete=use_discrete,
action_sizes=action_sizes,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)

12
ml-agents/mlagents/trainers/tests/tensorflow/test_tf_policy.py


[], np.array([], dtype=np.float32), np.array([0]), None
)
result = policy.get_action(step_with_agents, worker_id=0)
assert result == ActionInfo(None, None, {}, [0])
assert result == ActionInfo(None, None, None, {}, [0])
def test_take_action_returns_action_info_when_available():

policy_eval_out = {
"action": np.array([1.0], dtype=np.float32),
"action": np.array([[1.0]], dtype=np.float32),
"pre_action": np.array([[1.0]], dtype=np.float32),
"memory_out": np.array([[2.5]], dtype=np.float32),
"value": np.array([1.1], dtype=np.float32),
}

)
result = policy.get_action(step_with_agents)
print(result)
policy_eval_out["action"], policy_eval_out["value"], policy_eval_out, [0]
policy_eval_out["action"],
policy_eval_out["env_action"],
policy_eval_out["value"],
policy_eval_out,
[0],
)
assert result == expected

41
ml-agents/mlagents/trainers/tests/test_agent_processor.py


AgentManagerQueue,
)
from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.torch.action_log_probs import LogProbsTuple
from mlagents_envs.base_env import ActionSpec
from mlagents_envs.base_env import ActionSpec, ActionTuple
def create_mock_policy():

mock_policy.retrieve_previous_action.return_value = np.zeros(
(1, 1), dtype=np.float32
)
mock_policy.retrieve_previous_action.return_value = np.zeros((1, 1), dtype=np.int32)
return mock_policy

)
fake_action_outputs = {
"action": [0.1, 0.1],
"action": ActionTuple(continuous=np.array([[0.1], [0.1]])),
"pre_action": [0.1, 0.1],
"log_probs": [0.1, 0.1],
"log_probs": LogProbsTuple(continuous=np.array([[0.1], [0.1]])),
}
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
num_agents=2,

fake_action_info = ActionInfo(
action=[0.1, 0.1],
action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
env_action=ActionTuple(continuous=np.array([[0.1], [0.1]])),
value=[0.1, 0.1],
outputs=fake_action_outputs,
agent_ids=mock_decision_steps.agent_id,

action_spec=ActionSpec.create_continuous(2),
)
processor.add_experiences(
mock_decision_steps, mock_terminal_steps, 0, ActionInfo([], [], {}, [])
mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
)
# Assert that the AgentProcessor is still empty
assert len(processor.experience_buffers[0]) == 0

max_trajectory_length=5,
stats_reporter=StatsReporter("testcat"),
)
"action": [0.1],
"action": ActionTuple(continuous=np.array([[0.1]])),
"pre_action": [0.1],
"log_probs": [0.1],
"log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],

done=True,
)
fake_action_info = ActionInfo(
action=[0.1],
action=ActionTuple(continuous=np.array([[0.1]])),
env_action=ActionTuple(continuous=np.array([[0.1]])),
value=[0.1],
outputs=fake_action_outputs,
agent_ids=mock_decision_step.agent_id,

processor.add_experiences(
mock_decision_step, mock_terminal_step, _ep, fake_action_info
)
add_calls.append(mock.call([get_global_agent_id(_ep, 0)], [0.1]))
add_calls.append(
mock.call([get_global_agent_id(_ep, 0)], fake_action_outputs["action"])
)
processor.add_experiences(
mock_done_decision_step, mock_done_terminal_step, _ep, fake_action_info
)

max_trajectory_length=5,
stats_reporter=StatsReporter("testcat"),
)
"action": [0.1],
"action": ActionTuple(continuous=np.array([[0.1]])),
"pre_action": [0.1],
"log_probs": [0.1],
"log_probs": LogProbsTuple(continuous=np.array([[0.1]])),
mock_decision_step, mock_terminal_step = mb.create_mock_steps(
num_agents=1,
observation_shapes=[(8,)],

action=[0.1],
action=ActionTuple(continuous=np.array([[0.1]])),
env_action=ActionTuple(continuous=np.array([[0.1]])),
value=[0.1],
outputs=fake_action_outputs,
agent_ids=mock_decision_step.agent_id,

10
ml-agents/mlagents/trainers/tests/test_demo_loader.py


assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)
assert len(demo_buffer["actions"]) == total_expected - 1
assert (
len(demo_buffer["continuous_action"]) == total_expected - 1
or len(demo_buffer["discrete_action"]) == total_expected - 1
)
def test_load_demo_dir():

assert len(pair_infos) == total_expected
_, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)
assert len(demo_buffer["actions"]) == total_expected - 1
assert (
len(demo_buffer["continuous_action"]) == total_expected - 1
or len(demo_buffer["discrete_action"]) == total_expected - 1
)
def test_demo_mismatch():

33
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents_envs.exception import UnityEnvironmentException
from mlagents.trainers.tests.simple_test_envs import SimpleEnvironment
from mlagents.trainers.tests.simple_test_envs import (
SimpleEnvironment,
UnexpectedExceptionEnvironment,
)
from mlagents.trainers.stats import StatsReporter
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.tests.check_env_trains import (

@pytest.mark.parametrize("num_envs", [1, 4])
def test_subprocess_env_endtoend(num_envs):
def simple_env_factory(worker_id, config):
env = SimpleEnvironment(["1D"], use_discrete=True)
env = SimpleEnvironment(["1D"], action_sizes=(0, 1))
return env
env_manager = SubprocessEnvManager(

assert all(
val > 0.7 for val in StatsReporter.writers[0].get_last_rewards().values()
)
env_manager.close()
class CustomTestOnlyException(Exception):
pass
@pytest.mark.parametrize("num_envs", [1, 4])
def test_subprocess_failing_step(num_envs):
def failing_step_env_factory(_worker_id, _config):
env = UnexpectedExceptionEnvironment(
["1D"], use_discrete=True, to_raise=CustomTestOnlyException
)
return env
env_manager = SubprocessEnvManager(
failing_step_env_factory, EngineConfig.default_config()
)
# Expect the exception raised to be routed back up to the top level.
with pytest.raises(CustomTestOnlyException):
check_environment_trains(
failing_step_env_factory(0, []),
{"1D": ppo_dummy_config()},
env_manager=env_manager,
success_threshold=None,
)
env_manager.close()

7
ml-agents/mlagents/trainers/tests/test_trajectory.py


"memory",
"masks",
"done",
"actions_pre",
"actions",
"action_probs",
"continuous_action",
"discrete_action",
"continuous_log_probs",
"discrete_log_probs",
"action_mask",
"prev_action",
"environment_rewards",

13
ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py


).unsqueeze(0)
with torch.no_grad():
_, _, log_probs1, _, _ = policy1.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
_, log_probs1, _, _ = policy1.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
_, _, log_probs2, _, _ = policy2.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories, all_log_probs=True
_, log_probs2, _, _ = policy2.sample_actions(
vec_obs, vis_obs, masks=masks, memories=memories
np.testing.assert_array_equal(log_probs1, log_probs2)
np.testing.assert_array_equal(
log_probs1.all_discrete_tensor, log_probs2.all_discrete_tensor
)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])

2
ml-agents/mlagents/trainers/tests/torch/test_distributions.py


optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)
for _ in range(50):
dist_inst = gauss_dist(sample_embedding)[0]
dist_inst = gauss_dist(sample_embedding)
if tanh_squash:
assert isinstance(dist_inst, TanhGaussianDistInstance)
else:

90
ml-agents/mlagents/trainers/tests/torch/test_networks.py


from mlagents.trainers.torch.networks import (
NetworkBody,
ValueNetwork,
SimpleActor,
from mlagents.trainers.torch.distributions import (
GaussianDistInstance,
CategoricalDistInstance,
)
from mlagents.trainers.tests.torch.test_encoders import compare_models
def test_networkbody_vector():

assert _out[0] == pytest.approx(1.0, abs=0.1)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_actor(use_discrete):
obs_size = 4
network_settings = NetworkSettings()
obs_shapes = [(obs_size,)]
act_size = [2]
if use_discrete:
masks = torch.ones((1, 1))
action_spec = ActionSpec.create_discrete(tuple(act_size))
else:
masks = None
action_spec = ActionSpec.create_continuous(act_size[0])
actor = SimpleActor(obs_shapes, network_settings, action_spec)
# Test get_dist
sample_obs = torch.ones((1, obs_size))
dists, _ = actor.get_dists([sample_obs], [], masks=masks)
for dist in dists:
if use_discrete:
assert isinstance(dist, CategoricalDistInstance)
else:
assert isinstance(dist, GaussianDistInstance)
# Test sample_actions
actions = actor.sample_action(dists)
for act in actions:
if use_discrete:
assert act.shape == (1, 1)
else:
assert act.shape == (1, act_size[0])
# Test forward
actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
[sample_obs], [], masks=masks
)
for act in actions:
# This is different from above for ONNX export
if use_discrete:
assert act.shape == tuple(act_size)
else:
assert act.shape == (act_size[0], 1)
assert mem_size == 0
assert is_cont == int(not use_discrete)
assert act_size_vec == torch.tensor(act_size)
memory=NetworkSettings.MemorySettings() if lstm else None
memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
act_size = [2]
act_size = 2
mask = torch.ones([1, act_size * 2])
action_spec = ActionSpec.create_continuous(act_size[0])
# action_spec = ActionSpec.create_continuous(act_size[0])
action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size)))
actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
if lstm:
sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))

else:
assert value_out[stream].shape == (1,)
# Test get_dist_and_value
dists, value_out, mem_out = actor.get_dist_and_value(
[sample_obs], [], memories=memories
# Test get action stats and_value
action, log_probs, entropies, value_out, mem_out = actor.get_action_stats_and_value(
[sample_obs], [], memories=memories, masks=mask
if lstm:
assert action.continuous_tensor.shape == (64, 2)
else:
assert action.continuous_tensor.shape == (1, 2)
assert len(action.discrete_list) == 2
for _disc in action.discrete_list:
if lstm:
assert _disc.shape == (64, 1)
else:
assert _disc.shape == (1, 1)
for dist in dists:
assert isinstance(dist, GaussianDistInstance)
# Test normalization
actor.update_normalization(sample_obs)
if isinstance(actor, SeparateActorCritic):
for act_proc, crit_proc in zip(
actor.network_body.vector_processors,
actor.critic.network_body.vector_processors,
):
assert compare_models(act_proc, crit_proc)

28
ml-agents/mlagents/trainers/tests/torch/test_policy.py


from mlagents.trainers.tests import mock_brain as mb
from mlagents.trainers.settings import TrainerSettings, NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.agent_action import AgentAction
VECTOR_ACTION_SPACE = 2
VECTOR_OBS_SPACE = 8

run_out = policy.evaluate(decision_step, list(decision_step.agent_id))
if discrete:
run_out["action"].shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
run_out["action"].discrete.shape == (NUM_AGENTS, len(DISCRETE_ACTION_SPACE))
assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
assert run_out["action"].continuous.shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])

buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
vec_obs = [ModelUtils.list_to_tensor(buffer["vector_obs"])]
act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
if policy.use_continuous_act:
actions = ModelUtils.list_to_tensor(buffer["actions"]).unsqueeze(-1)
else:
actions = ModelUtils.list_to_tensor(buffer["actions"], dtype=torch.long)
agent_action = AgentAction.from_dict(buffer)
vis_obs = []
for idx, _ in enumerate(policy.actor_critic.network_body.visual_processors):
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])

vec_obs,
vis_obs,
masks=act_masks,
actions=actions,
actions=agent_action,
memories=memories,
seq_len=policy.sequence_length,
)

_size = policy.behavior_spec.action_spec.continuous_size
assert log_probs.shape == (64, _size)
assert log_probs.flatten().shape == (64, _size)
assert entropy.shape == (64,)
for val in values.values():
assert val.shape == (64,)

if len(memories) > 0:
memories = torch.stack(memories).unsqueeze(0)
(
sampled_actions,
clipped_actions,
log_probs,
entropies,
memories,
) = policy.sample_actions(
(sampled_actions, log_probs, entropies, memories) = policy.sample_actions(
all_log_probs=not policy.use_continuous_act,
assert log_probs.shape == (
assert log_probs.all_discrete_tensor.shape == (
assert log_probs.shape == (64, policy.behavior_spec.action_spec.continuous_size)
assert clipped_actions.shape == (
assert log_probs.continuous_tensor.shape == (
64,
policy.behavior_spec.action_spec.continuous_size,
)

15
ml-agents/mlagents/trainers/tests/torch/test_ppo.py


update_buffer["extrinsic_returns"] = update_buffer["environment_rewards"]
update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
# NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
# in PyTorch it is saved as the total probability per branch. So we need to modify the
# log prob in the fake buffer here.
update_buffer["action_probs"] = np.ones_like(update_buffer["actions"])
return_stats = optimizer.update(
update_buffer,
num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,

update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
update_buffer["curiosity_returns"] = update_buffer["environment_rewards"]
update_buffer["curiosity_value_estimates"] = update_buffer["environment_rewards"]
# NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
# in PyTorch it is saved as the total probability per branch. So we need to modify the
# log prob in the fake buffer here.
update_buffer["action_probs"] = np.ones_like(update_buffer["actions"])
optimizer.update(
update_buffer,
num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,

update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
update_buffer["gail_returns"] = update_buffer["environment_rewards"]
update_buffer["gail_value_estimates"] = update_buffer["environment_rewards"]
update_buffer["continuous_log_probs"] = np.ones_like(
update_buffer["continuous_action"]
)
optimizer.update(
update_buffer,
num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,

update_buffer["extrinsic_value_estimates"] = update_buffer["environment_rewards"]
update_buffer["gail_returns"] = update_buffer["environment_rewards"]
update_buffer["gail_value_estimates"] = update_buffer["environment_rewards"]
# NOTE: In TensorFlow, the log_probs are saved as one for every discrete action, whereas
# in PyTorch it is saved as the total probability per branch. So we need to modify the
# log prob in the fake buffer here.
update_buffer["action_probs"] = np.ones_like(update_buffer["actions"])
optimizer.update(
update_buffer,
num_sequences=update_buffer.num_experiences // optimizer.policy.sequence_length,

2
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


for _ in range(200):
curiosity_rp.update(buffer)
prediction = curiosity_rp._network.predict_action(buffer)[0]
target = torch.tensor(buffer["actions"][0])
target = torch.tensor(buffer["continuous_action"][0])
error = torch.mean((prediction - target) ** 2).item()
assert error < 0.001

11
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py


np.random.normal(size=shape).astype(np.float32)
for shape in behavior_spec.observation_shapes
]
action = behavior_spec.action_spec.random_action(1)[0, :]
action_buffer = behavior_spec.action_spec.random_action(1)
action = {}
if behavior_spec.action_spec.continuous_size > 0:
action["continuous_action"] = action_buffer.continuous
if behavior_spec.action_spec.discrete_size > 0:
action["discrete_action"] = action_buffer.discrete
for _ in range(number):
curr_split_obs = SplitObservations.from_observations(curr_observations)
next_split_obs = SplitObservations.from_observations(next_observations)

)
buffer["vector_obs"].append(curr_split_obs.vector_observations)
buffer["next_vector_in"].append(next_split_obs.vector_observations)
buffer["actions"].append(action)
for _act_type, _act in action.items():
buffer[_act_type].append(_act[0, :])
buffer["reward"].append(np.ones(1, dtype=np.float32) * reward)
buffer["masks"].append(np.ones(1, dtype=np.float32))
buffer["done"] = np.zeros(number, dtype=np.float32)

3
ml-agents/mlagents/trainers/tests/torch/test_sac.py


"Losses/Value Loss",
"Losses/Q1 Loss",
"Losses/Q2 Loss",
"Policy/Entropy Coeff",
"Policy/Continuous Entropy Coeff",
"Policy/Discrete Entropy Coeff",
"Policy/Learning Rate",
]
for stat in required_stats:

132
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
DemonstrationMetaProto,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
from mlagents_envs.communicator_objects.space_type_pb2 import discrete, continuous
from mlagents_envs.communicator_objects.brain_parameters_pb2 import (
BrainParametersProto,
ActionSpecProto,
)
from mlagents.trainers.tests.dummy_config import ppo_dummy_config, sac_dummy_config
from mlagents.trainers.tests.check_env_trains import (

SAC_TORCH_CONFIG = attr.evolve(sac_dummy_config(), framework=FrameworkType.PYTORCH)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ppo(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ppo(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_ppo(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_ppo(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
new_hyperparams = attr.evolve(
PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=640
)

check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_visual_ppo(num_visual, use_discrete):
def test_visual_ppo(num_visual, action_sizes):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=0,
step_size=0.2,

def test_visual_advanced_ppo(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
action_sizes=(0, 1),
num_visual=num_visual,
num_vector=0,
step_size=0.5,

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_ppo(use_discrete):
env = MemoryEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_recurrent_ppo(action_sizes):
env = MemoryEnvironment([BRAIN_NAME], action_sizes=action_sizes)
new_network_settings = attr.evolve(
PPO_TORCH_CONFIG.network_settings,
memory=NetworkSettings.MemorySettings(memory_size=16),

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_sac(use_discrete):
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_sac(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_2d_sac(use_discrete):
env = SimpleEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, action_size=2, step_size=0.8
)
@pytest.mark.parametrize("action_sizes", [(0, 2), (2, 0)])
def test_2d_sac(action_sizes):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.8)
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=10000
SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_visual_sac(num_visual, use_discrete):
def test_visual_sac(num_visual, action_sizes):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=0,
step_size=0.2,

def test_visual_advanced_sac(vis_encode_type, num_visual):
env = SimpleEnvironment(
[BRAIN_NAME],
use_discrete=True,
action_sizes=(0, 1),
num_visual=num_visual,
num_vector=0,
step_size=0.5,

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.5)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
step_size = 0.2 if use_discrete else 0.5
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_recurrent_sac(action_sizes):
step_size = 0.2 if action_sizes == (0, 1) else 0.5
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
[BRAIN_NAME], action_sizes=action_sizes, step_size=step_size
)
new_networksettings = attr.evolve(
SAC_TORCH_CONFIG.network_settings,

check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ghost(action_sizes):
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0, save_steps=2000, swap_steps=2000

@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_ghost_fails(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ghost_fails(action_sizes):
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", BRAIN_NAME + "?team=1"], action_sizes=action_sizes
)
# This config should fail because the ghosted policy is never swapped with a competent policy.
# Swap occurs after max step is reached.

)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_asymm_ghost(action_sizes):
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
)
self_play_settings = SelfPlaySettings(
play_against_latest_model_ratio=1.0,

check_environment_trains(env, {BRAIN_NAME: config, brain_name_opp: config})
@pytest.mark.parametrize("use_discrete", [True, False])
def test_simple_asymm_ghost_fails(use_discrete):
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_asymm_ghost_fails(action_sizes):
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], use_discrete=use_discrete
[BRAIN_NAME + "?team=0", brain_name_opp + "?team=1"], action_sizes=action_sizes
)
# This config should fail because the team that us not learning when both have reached
# max step should be executing the initial, untrained poliy.

@pytest.fixture(scope="session")
def simple_record(tmpdir_factory):
def record_demo(use_discrete, num_visual=0, num_vector=1):
def record_demo(action_sizes, num_visual=0, num_vector=1):
use_discrete=use_discrete,
action_sizes=action_sizes,
num_visual=num_visual,
num_vector=num_vector,
n_demos=100,

env.solve()
agent_info_protos = env.demonstration_protos[BRAIN_NAME]
meta_data_proto = DemonstrationMetaProto()
continuous_action_size, discrete_action_size = action_sizes
action_spec_proto = ActionSpecProto(
num_continuous_actions=continuous_action_size,
num_discrete_actions=discrete_action_size,
discrete_branch_sizes=[2] if discrete_action_size > 0 else None,
)
vector_action_size=[2] if use_discrete else [1],
vector_action_descriptions=[""],
vector_action_space_type=discrete if use_discrete else continuous,
brain_name=BRAIN_NAME,
is_training=True,
brain_name=BRAIN_NAME, is_training=True, action_spec=action_spec_proto
action_type = "Discrete" if use_discrete else "Continuous"
action_type = "Discrete" if action_sizes else "Continuous"
demo_path_name = "1DTest" + action_type + ".demo"
demo_path = str(tmpdir_factory.mktemp("tmp_demo").join(demo_path_name))
write_demo(demo_path, meta_data_proto, brain_param_proto, agent_info_protos)

@pytest.mark.parametrize("use_discrete", [True, False])
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail(simple_record, use_discrete, trainer_config):
demo_path = simple_record(use_discrete)
env = SimpleEnvironment([BRAIN_NAME], use_discrete=use_discrete, step_size=0.2)
def test_gail(simple_record, action_sizes, trainer_config):
demo_path = simple_record(action_sizes)
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_sizes, step_size=0.2)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)
reward_signals = {
RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_ppo(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail_visual_ppo(simple_record, action_sizes):
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
use_discrete=use_discrete,
action_sizes=action_sizes,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1500)

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.parametrize("use_discrete", [True, False])
def test_gail_visual_sac(simple_record, use_discrete):
demo_path = simple_record(use_discrete, num_visual=1, num_vector=0)
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_gail_visual_sac(simple_record, action_sizes):
demo_path = simple_record(action_sizes, num_visual=1, num_vector=0)
use_discrete=use_discrete,
action_sizes=action_sizes,
step_size=0.2,
)
bc_settings = BehavioralCloningSettings(demo_path=demo_path, steps=1000)

44
ml-agents/mlagents/trainers/tests/torch/test_utils.py


from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.torch.distributions import (
CategoricalDistInstance,
GaussianDistInstance,
)
def test_min_visual_size():

]
for res, exp in zip(oh_actions, expected_result):
assert torch.equal(res, exp)
def test_get_probs_and_entropy():
# Test continuous
# Add two dists to the list. This isn't done in the code but we'd like to support it.
dist_list = [
GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
]
action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))]
log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
action_list, dist_list
)
assert log_probs.shape == (1, 2, 2)
assert entropies.shape == (1, 1, 2)
assert all_probs is None
for log_prob in log_probs.flatten():
# Log prob of standard normal at 0
assert log_prob == pytest.approx(-0.919, abs=0.01)
for ent in entropies.flatten():
# entropy of standard normal at 0
assert ent == pytest.approx(1.42, abs=0.01)
# Test continuous
# Add two dists to the list.
act_size = 2
test_prob = torch.tensor(
[[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]
) # High prob for first action
dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)]
action_list = [torch.tensor([0]), torch.tensor([1])]
log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
action_list, dist_list
)
assert all_probs.shape == (1, len(dist_list * act_size))
assert entropies.shape == (1, len(dist_list))
# Make sure the first action has high probability than the others.
assert log_probs.flatten()[0] > log_probs.flatten()[1]
def test_masked_mean():

6
ml-agents/mlagents/trainers/tf/components/bc/module.py


self.policy.batch_size_ph: n_sequences,
self.policy.sequence_length_ph: self.policy.sequence_length,
}
feed_dict[self.model.action_in_expert] = mini_batch_demo["actions"]
feed_dict[self.model.action_in_expert] = mini_batch_demo["discrete_action"]
feed_dict[self.policy.action_masks] = np.ones(
(
self.n_sequences * self.policy.sequence_length,

)
else:
feed_dict[self.model.action_in_expert] = mini_batch_demo[
"continuous_action"
]
if self.policy.vec_obs_size > 0:
feed_dict[self.policy.vector_in] = mini_batch_demo["vector_obs"]
for i, _ in enumerate(self.policy.visual_in):

10
ml-agents/mlagents/trainers/tf/components/reward_signals/curiosity/signal.py


def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
feed_dict: Dict[tf.Tensor, Any] = {
self.policy.batch_size_ph: len(mini_batch["actions"]),
self.policy.batch_size_ph: len(mini_batch["vector_obs"]),
self.policy.sequence_length_ph: self.policy.sequence_length,
}
if self.policy.use_vec_obs:

feed_dict[self.model.next_visual_in[i]] = _next_obs
if self.policy.use_continuous_act:
feed_dict[self.policy.selected_actions] = mini_batch["actions"]
feed_dict[self.policy.selected_actions] = mini_batch["continuous_action"]
feed_dict[self.policy.output] = mini_batch["actions"]
feed_dict[self.policy.output] = mini_batch["discrete_action"]
unscaled_reward = self.policy.sess.run(
self.model.intrinsic_reward, feed_dict=feed_dict
)

policy.mask_input: mini_batch["masks"],
}
if self.policy.use_continuous_act:
feed_dict[policy.selected_actions] = mini_batch["actions"]
feed_dict[policy.selected_actions] = mini_batch["continuous_action"]
feed_dict[policy.output] = mini_batch["actions"]
feed_dict[policy.output] = mini_batch["discrete_action"]
if self.policy.use_vec_obs:
feed_dict[policy.vector_in] = mini_batch["vector_obs"]
feed_dict[self.model.next_vector_in] = mini_batch["next_vector_in"]

17
ml-agents/mlagents/trainers/tf/components/reward_signals/gail/signal.py


def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
feed_dict: Dict[tf.Tensor, Any] = {
self.policy.batch_size_ph: len(mini_batch["actions"]),
self.policy.batch_size_ph: len(mini_batch["vector_obs"]),
self.policy.sequence_length_ph: self.policy.sequence_length,
}
if self.model.use_vail:

feed_dict[self.policy.visual_in[i]] = _obs
if self.policy.use_continuous_act:
feed_dict[self.policy.selected_actions] = mini_batch["actions"]
feed_dict[self.policy.selected_actions] = mini_batch["continuous_action"]
feed_dict[self.policy.output] = mini_batch["actions"]
feed_dict[self.policy.output] = mini_batch["discrete_action"]
feed_dict[self.model.done_policy_holder] = np.array(
mini_batch["done"]
).flatten()

if self.model.use_vail:
feed_dict[self.model.use_noise] = [1]
feed_dict[self.model.action_in_expert] = np.array(mini_batch_demo["actions"])
feed_dict[policy.selected_actions] = mini_batch["actions"]
feed_dict[policy.selected_actions] = mini_batch["continuous_action"]
feed_dict[self.model.action_in_expert] = np.array(
mini_batch_demo["continuous_action"]
)
feed_dict[policy.output] = mini_batch["actions"]
feed_dict[policy.output] = mini_batch["discrete_action"]
feed_dict[self.model.action_in_expert] = np.array(
mini_batch_demo["discrete_action"]
)
if self.policy.use_vis_obs > 0:
for i in range(len(policy.visual_in)):

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存