浏览代码

Merging master

/develop/rm-rf-new-models
vincentpierre 4 年前
当前提交
14378aa5
共有 37 个文件被更改,包括 510 次插入459 次删除
  1. 8
      com.unity.ml-agents/CHANGELOG.md
  2. 29
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  3. 58
      com.unity.ml-agents/Editor/BrainParametersDrawer.cs
  4. 46
      com.unity.ml-agents/Editor/DemonstrationDrawer.cs
  5. 1
      com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
  6. 26
      com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
  7. 2
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  8. 41
      com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
  9. 32
      com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
  10. 3
      com.unity.ml-agents/Runtime/Agent.cs
  11. 58
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  12. 48
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  13. 109
      com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
  14. 10
      com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs
  15. 6
      com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs
  16. 4
      com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs
  17. 27
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  18. 2
      com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs
  19. 121
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  20. 3
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  21. 2
      com.unity.ml-agents/package.json
  22. 5
      docs/Getting-Started.md
  23. 59
      docs/Learning-Environment-Design-Agents.md
  24. 38
      docs/Learning-Environment-Examples.md
  25. 3
      docs/Learning-Environment-Executable.md
  26. 4
      docs/ML-Agents-Overview.md
  27. 3
      docs/Python-API.md
  28. 4
      docs/Training-Configuration-File.md
  29. 11
      docs/Training-on-Microsoft-Azure.md
  30. 4
      ml-agents-envs/mlagents_envs/base_env.py
  31. 14
      ml-agents/mlagents/trainers/cli_utils.py
  32. 2
      ml-agents/mlagents/trainers/demo_loader.py
  33. 4
      ml-agents/mlagents/trainers/policy/torch_policy.py
  34. 3
      ml-agents/mlagents/trainers/tests/mock_brain.py
  35. 2
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  36. 84
      ml-agents/mlagents/trainers/torch/networks.py
  37. 93
      docs/images/monitor.png

8
com.unity.ml-agents/CHANGELOG.md


#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- TensorFlow trainers have been removed, please use the Torch trainers instead. (#4707)
- PyTorch trainers now support training agents with both continuous and discrete action spaces.
Currently, this can only be done with Actuators. Please see
[here](../Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicActuatorComponent.cs) for an
example of how to use Actuators. (#4702)
- PyTorch trainers now support training agents with both continuous and discrete action spaces. (#4702)
- Agents with both continuous and discrete actions are now supported. You can specify
both continuous and discrete action sizes in Behavior Parameters. (#4702, #4718)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- `ActionSpec.validate_action()` now enforces that `UnityEnvironment.set_action_for_agent()` receives a 1D `np.array`.

29
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


float m_TimeSinceModelReload;
// Whether or not the model needs to be reloaded
bool m_RequireReload;
const string k_BehaviorName = "m_BehaviorName";
const string k_BrainParametersName = "m_BrainParameters";
const string k_ModelName = "m_Model";
const string k_InferenceDeviceName = "m_InferenceDevice";
const string k_BehaviorTypeName = "m_BehaviorType";
const string k_TeamIdName = "TeamId";
const string k_UseChildSensorsName = "m_UseChildSensors";
const string k_ObservableAttributeHandlingName = "m_ObservableAttributeHandling";
public override void OnInspectorGUI()
{

bool needBrainParametersUpdate; // Whether the brain parameters changed
// Drawing the Behavior Parameters
EditorGUI.indentLevel++;

{
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
EditorGUILayout.PropertyField(so.FindProperty(k_BehaviorName));
EditorGUI.BeginChangeCheck();
EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
EditorGUILayout.PropertyField(so.FindProperty(k_BrainParametersName), true);
needBrainParametersUpdate = EditorGUI.EndChangeCheck();
EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
EditorGUILayout.PropertyField(so.FindProperty(k_ModelName), true);
EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
EditorGUILayout.PropertyField(so.FindProperty(k_InferenceDeviceName), true);
EditorGUI.indentLevel--;
}
needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();

EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
EditorGUILayout.PropertyField(so.FindProperty(k_BehaviorTypeName));
EditorGUILayout.PropertyField(so.FindProperty("TeamId"));
EditorGUILayout.PropertyField(so.FindProperty(k_TeamIdName));
EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_ObservableAttributeHandling"), true);
EditorGUILayout.PropertyField(so.FindProperty(k_UseChildSensorsName), true);
EditorGUILayout.PropertyField(so.FindProperty(k_ObservableAttributeHandlingName), true);
}
EditorGUI.EndDisabledGroup();

// Display all failed checks
D.logEnabled = false;
Model barracudaModel = null;
var model = (NNModel)serializedObject.FindProperty("m_Model").objectReferenceValue;
var model = (NNModel)serializedObject.FindProperty(k_ModelName).objectReferenceValue;
var behaviorParameters = (BehaviorParameters)target;
// Grab the sensor components, since we need them to determine the observation sizes.

58
com.unity.ml-agents/Editor/BrainParametersDrawer.cs


// The height of a line in the Unity Inspectors
const float k_LineHeight = 17f;
const int k_VecObsNumLine = 3;
const string k_ActionSizePropName = "VectorActionSize";
const string k_ActionTypePropName = "VectorActionSpaceType";
const string k_ActionSpecName = "m_ActionSpec";
const string k_ContinuousActionSizeName = "m_NumContinuousActions";
const string k_DiscreteBranchSizeName = "BranchSizes";
const string k_ActionDescriptionPropName = "VectorActionDescriptions";
const string k_VecObsPropName = "VectorObservationSize";
const string k_NumVecObsPropName = "NumStackedVectorObservations";

EditorGUI.LabelField(position, "Vector Action");
position.y += k_LineHeight;
EditorGUI.indentLevel++;
var bpVectorActionType = property.FindPropertyRelative(k_ActionTypePropName);
EditorGUI.PropertyField(
position,
bpVectorActionType,
new GUIContent("Space Type",
"Corresponds to whether state vector contains a single integer (Discrete) " +
"or a series of real-valued floats (Continuous)."));
var actionSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
DrawContinuousVectorAction(position, actionSpecProperty);
if (bpVectorActionType.enumValueIndex == 1)
{
DrawContinuousVectorAction(position, property);
}
else
{
DrawDiscreteVectorAction(position, property);
}
DrawDiscreteVectorAction(position, actionSpecProperty);
}
/// <summary>

/// to make the custom GUI for.</param>
static void DrawContinuousVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
// This check is here due to:
// https://fogbugz.unity3d.com/f/cases/1246524/
// If this case has been resolved, please remove this if condition.
if (vecActionSize.arraySize != 1)
{
vecActionSize.arraySize = 1;
}
var continuousActionSize =
vecActionSize.GetArrayElementAtIndex(0);
var continuousActionSize = property.FindPropertyRelative(k_ContinuousActionSizeName);
new GUIContent("Space Size", "Length of continuous action vector."));
new GUIContent("Continuous Action Size", "Length of continuous action vector."));
}
/// <summary>

/// to make the custom GUI for.</param>
static void DrawDiscreteVectorAction(Rect position, SerializedProperty property)
{
var vecActionSize = property.FindPropertyRelative(k_ActionSizePropName);
var branchSizes = property.FindPropertyRelative(k_DiscreteBranchSizeName);
position, "Branches Size", vecActionSize.arraySize);
position, "Discrete Branch Size", branchSizes.arraySize);
if (newSize != vecActionSize.arraySize)
if (newSize != branchSizes.arraySize)
vecActionSize.arraySize = newSize;
branchSizes.arraySize = newSize;
}
position.y += k_LineHeight;

branchIndex < vecActionSize.arraySize;
branchIndex < branchSizes.arraySize;
vecActionSize.GetArrayElementAtIndex(branchIndex);
branchSizes.GetArrayElementAtIndex(branchIndex);
EditorGUI.PropertyField(
position,

/// <returns>The height of the drawer of the Vector Action.</returns>
static float GetHeightDrawVectorAction(SerializedProperty property)
{
var actionSize = 2 + property.FindPropertyRelative(k_ActionSizePropName).arraySize;
if (property.FindPropertyRelative(k_ActionTypePropName).enumValueIndex == 0)
{
actionSize += 1;
}
return actionSize * k_LineHeight;
var actionSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
var numActionLines = 3 + actionSpecProperty.FindPropertyRelative(k_DiscreteBranchSizeName).arraySize;
return numActionLines * k_LineHeight;
}
}
}

46
com.unity.ml-agents/Editor/DemonstrationDrawer.cs


using System.Text;
using UnityEditor;
using Unity.MLAgents.Demonstrations;
using Unity.MLAgents.Policies;
namespace Unity.MLAgents.Editor

SerializedProperty m_BrainParameters;
SerializedProperty m_DemoMetaData;
SerializedProperty m_ObservationShapes;
const string k_BrainParametersName = "brainParameters";
const string k_MetaDataName = "metaData";
const string k_ObservationSummariesName = "observationSummaries";
const string k_DemonstrationName = "demonstrationName";
const string k_NumberStepsName = "numberSteps";
const string k_NumberEpisodesName = "numberEpisodes";
const string k_MeanRewardName = "meanReward";
const string k_ActionSpecName = "ActionSpec";
const string k_NumContinuousActionsName = "m_NumContinuousActions";
const string k_NumDiscreteActionsName = "m_NumDiscreteActions";
const string k_ShapeName = "shape";
m_BrainParameters = serializedObject.FindProperty("brainParameters");
m_DemoMetaData = serializedObject.FindProperty("metaData");
m_ObservationShapes = serializedObject.FindProperty("observationSummaries");
m_BrainParameters = serializedObject.FindProperty(k_BrainParametersName);
m_DemoMetaData = serializedObject.FindProperty(k_MetaDataName);
m_ObservationShapes = serializedObject.FindProperty(k_ObservationSummariesName);
}
/// <summary>

{
var nameProp = property.FindPropertyRelative("demonstrationName");
var experiencesProp = property.FindPropertyRelative("numberSteps");
var episodesProp = property.FindPropertyRelative("numberEpisodes");
var rewardsProp = property.FindPropertyRelative("meanReward");
var nameProp = property.FindPropertyRelative(k_DemonstrationName);
var experiencesProp = property.FindPropertyRelative(k_NumberStepsName);
var episodesProp = property.FindPropertyRelative(k_NumberEpisodesName);
var rewardsProp = property.FindPropertyRelative(k_MeanRewardName);
var nameLabel = nameProp.displayName + ": " + nameProp.stringValue;
var experiencesLabel = experiencesProp.displayName + ": " + experiencesProp.intValue;

/// </summary>
void MakeActionsProperty(SerializedProperty property)
{
var actSizeProperty = property.FindPropertyRelative("VectorActionSize");
var actSpaceTypeProp = property.FindPropertyRelative("VectorActionSpaceType");
var actSpecProperty = property.FindPropertyRelative(k_ActionSpecName);
var continuousSizeProperty = actSpecProperty.FindPropertyRelative(k_NumContinuousActionsName);
var discreteSizeProperty = actSpecProperty.FindPropertyRelative(k_NumDiscreteActionsName);
var vecActSizeLabel =
actSizeProperty.displayName + ": " + BuildIntArrayLabel(actSizeProperty);
var actSpaceTypeLabel = actSpaceTypeProp.displayName + ": " +
(SpaceType)actSpaceTypeProp.enumValueIndex;
var continuousSizeLabel =
continuousSizeProperty.displayName + ": " + continuousSizeProperty.intValue;
var discreteSizeLabel = discreteSizeProperty.displayName + ": " +
discreteSizeProperty.intValue;
EditorGUILayout.LabelField(vecActSizeLabel);
EditorGUILayout.LabelField(actSpaceTypeLabel);
EditorGUILayout.LabelField(continuousSizeLabel);
EditorGUILayout.LabelField(discreteSizeLabel);
}
/// <summary>

for (var i = 0; i < numObservations; i++)
{
var summary = obsSummariesProperty.GetArrayElementAtIndex(i);
var shapeProperty = summary.FindPropertyRelative("shape");
var shapeProperty = summary.FindPropertyRelative(k_ShapeName);
shapesLabels.Add(BuildIntArrayLabel(shapeProperty));
}

1
com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs


/// <summary>
/// Check if the segment is empty.
/// </summary>
/// <returns>Whether or not the segment is empty.</returns>
public bool IsEmpty()
{
return Array == null || Array.Length == 0;

26
com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs


using System;
using System.Collections.Generic;
using UnityEngine;
namespace Unity.MLAgents.Actuators
{

public readonly struct ActionSpec
[Serializable]
public struct ActionSpec
[SerializeField]
int m_NumContinuousActions;
/// <summary>
/// An array of branch sizes for our action space.
///

///
/// For an IActuator with a Continuous it will be null.
/// </summary>
public readonly int[] BranchSizes;
public int[] BranchSizes;
public int NumContinuousActions { get; }
public int NumContinuousActions { get { return m_NumContinuousActions; } set { m_NumContinuousActions = value; } }
public int NumDiscreteActions { get; }
public int NumDiscreteActions { get { return BranchSizes == null ? 0 : BranchSizes.Length; } }
public int SumOfDiscreteBranchSizes { get; }
public int SumOfDiscreteBranchSizes { get { return BranchSizes == null ? 0 : BranchSizes.Sum(); } }
/// <summary>
/// Creates a Continuous <see cref="ActionSpec"/> with the number of actions available.

public static ActionSpec MakeContinuous(int numActions)
{
var actuatorSpace = new ActionSpec(numActions, 0);
var actuatorSpace = new ActionSpec(numActions, null);
return actuatorSpace;
}

public static ActionSpec MakeDiscrete(params int[] branchSizes)
{
var numActions = branchSizes.Length;
var actuatorSpace = new ActionSpec(0, numActions, branchSizes);
var actuatorSpace = new ActionSpec(0, branchSizes);
internal ActionSpec(int numContinuousActions, int numDiscreteActions, int[] branchSizes = null)
internal ActionSpec(int numContinuousActions, int[] branchSizes = null)
NumContinuousActions = numContinuousActions;
NumDiscreteActions = numDiscreteActions;
m_NumContinuousActions = numContinuousActions;
SumOfDiscreteBranchSizes = branchSizes?.Sum() ?? 0;
}
/// <summary>

2
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


}
}
return new ActionSpec(numContinuousActions, numDiscreteActions, combinedBranchSizes);
return new ActionSpec(numContinuousActions, combinedBranchSizes);
}
/// <summary>

41
com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs


return (ContinuousActions.GetHashCode() * 397) ^ DiscreteActions.GetHashCode();
}
}
/// <summary>
/// Packs the continuous and discrete actions into one float array. The array passed into this method
/// must have a Length that is greater than or equal to the sum of the Lengths of
/// <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/>.
/// </summary>
/// <param name="destination">A float array to pack actions into whose length is greater than or
/// equal to the addition of the Lengths of this objects <see cref="ContinuousActions"/> and
/// <see cref="DiscreteActions"/> segments.</param>
/// [Obsolete("PackActions has been deprecated.")]
public void PackActions(in float[] destination)
{
Debug.Assert(destination.Length >= ContinuousActions.Length + DiscreteActions.Length,
$"argument '{nameof(destination)}' is not large enough to pack the actions into.\n" +
$"{nameof(destination)}.Length: {destination.Length}\n" +
$"{nameof(ContinuousActions)}.Length + {nameof(DiscreteActions)}.Length: {ContinuousActions.Length + DiscreteActions.Length}");
var start = 0;
if (ContinuousActions.Length > 0)
{
Array.Copy(ContinuousActions.Array,
ContinuousActions.Offset,
destination,
start,
ContinuousActions.Length);
start = ContinuousActions.Length;
}
if (start >= destination.Length)
{
return;
}
if (DiscreteActions.Length > 0)
{
Array.Copy(DiscreteActions.Array,
DiscreteActions.Offset,
destination,
start,
DiscreteActions.Length);
}
}
}
/// <summary>

32
com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs


/// Create a VectorActuator that forwards to the provided IActionReceiver.
/// </summary>
/// <param name="actionReceiver">The <see cref="IActionReceiver"/> used for OnActionReceived and WriteDiscreteActionMask.</param>
/// <param name="vectorActionSize">For discrete action spaces, the branch sizes for each action.
/// For continuous action spaces, the number of actions is the 0th element.</param>
/// <param name="spaceType"></param>
/// <param name="actionSpec"></param>
/// <exception cref="ArgumentOutOfRangeException">Thrown for invalid <see cref="SpaceType"/></exception>
int[] vectorActionSize,
SpaceType spaceType,
ActionSpec actionSpec,
ActionSpec = actionSpec;
switch (spaceType)
if (actionSpec.NumContinuousActions == 0)
case SpaceType.Continuous:
ActionSpec = ActionSpec.MakeContinuous(vectorActionSize[0]);
suffix = "-Continuous";
break;
case SpaceType.Discrete:
ActionSpec = ActionSpec.MakeDiscrete(vectorActionSize);
suffix = "-Discrete";
break;
default:
throw new ArgumentOutOfRangeException(nameof(spaceType),
spaceType,
"Unknown enum value.");
suffix = "-Discrete";
}
else if (actionSpec.NumDiscreteActions == 0)
{
suffix = "-Continuous";
}
else
{
suffix = $"-Continuous-{actionSpec.NumContinuousActions}-Discrete-{actionSpec.NumDiscreteActions}";
}
Name = name + suffix;
}

3
com.unity.ml-agents/Runtime/Agent.cs


using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using UnityEngine;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;

// Support legacy OnActionReceived
// TODO don't set this up if the sizes are 0?
var param = m_PolicyFactory.BrainParameters;
m_VectorActuator = new VectorActuator(this, param.VectorActionSize, param.VectorActionSpaceType);
m_VectorActuator = new VectorActuator(this, param.ActionSpec);
m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1);
m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions()];

58
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


{
var brainParametersProto = new BrainParametersProto
{
VectorActionSizeDeprecated = { bp.VectorActionSize },
IsTraining = isTraining
IsTraining = isTraining,
ActionSpec = ToActionSpecProto(bp.ActionSpec),
if (bp.VectorActionSize != null)
{
brainParametersProto.VectorActionSizeDeprecated.AddRange(bp.VectorActionSize);
}
if (bp.VectorActionDescriptions != null)
{
brainParametersProto.VectorActionDescriptionsDeprecated.AddRange(bp.VectorActionDescriptions);

var brainParametersProto = new BrainParametersProto
{
BrainName = name,
IsTraining = isTraining
IsTraining = isTraining,
ActionSpec = ToActionSpecProto(actionSpec),
var actionSpecProto = new ActionSpecProto
{
NumContinuousActions = actionSpec.NumContinuousActions,
NumDiscreteActions = actionSpec.NumDiscreteActions,
};
if (actionSpec.BranchSizes != null)
{
actionSpecProto.DiscreteBranchSizes.AddRange(actionSpec.BranchSizes);
}
brainParametersProto.ActionSpec = actionSpecProto;
var supportHybrid = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.HybridActions;
if (!supportHybrid)

{
var bp = new BrainParameters
{
VectorActionSize = bpp.VectorActionSizeDeprecated.ToArray(),
VectorActionSpaceType = (SpaceType)bpp.VectorActionSpaceTypeDeprecated
ActionSpec = ToActionSpec(bpp.ActionSpec),
}
/// <summary>
/// Convert a ActionSpecProto to a ActionSpec struct.
/// </summary>
/// <param name="actionSpecProto">An instance of an action spec protobuf object.</param>
/// <returns>An ActionSpec struct.</returns>
public static ActionSpec ToActionSpec(this ActionSpecProto actionSpecProto)
{
var actionSpec = new ActionSpec(actionSpecProto.NumContinuousActions);
if (actionSpecProto.DiscreteBranchSizes != null)
{
actionSpec.BranchSizes = actionSpecProto.DiscreteBranchSizes.ToArray();
}
return actionSpec;
}
/// <summary>
/// Convert a ActionSpec struct to a ActionSpecProto.
/// </summary>
/// <param name="actionSpecProto">An instance of an action spec struct.</param>
/// <returns>An ActionSpecProto.</returns>
public static ActionSpecProto ToActionSpecProto(this ActionSpec actionSpec)
{
var actionSpecProto = new ActionSpecProto
{
NumContinuousActions = actionSpec.NumContinuousActions,
NumDiscreteActions = actionSpec.NumDiscreteActions,
};
if (actionSpec.BranchSizes != null)
{
actionSpecProto.DiscreteBranchSizes.AddRange(actionSpec.BranchSizes);
}
return actionSpecProto;
}
#endregion

48
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


BrainParameters brainParameters, TensorProxy tensorProxy,
SensorComponent[] sensorComponents, int observableAttributeTotalSize)
{
// TODO: Update this check after intergrating ActionSpec into BrainParameters
var numberActionsBp = brainParameters.VectorActionSize.Length;
var numberActionsBp = brainParameters.ActionSpec.NumDiscreteActions;
var numberActionsT = tensorProxy.shape[tensorProxy.shape.Length - 1];
if (numberActionsBp != numberActionsT)
{

{
var failedModelChecks = new List<string>();
var tensorTester = new Dictionary<string, Func<BrainParameters, ActuatorComponent[], TensorShape?, int, int, string>>();
if (model.HasContinuousOutputs())
// If the model expects an output but it is not in this list
var modelContinuousActionSize = model.ContinuousOutputSize();
var continuousError = CheckContinuousActionOutputShape(brainParameters, actuatorComponents, modelContinuousActionSize);
if (continuousError != null)
tensorTester[model.ContinuousOutputName()] = CheckContinuousActionOutputShape;
failedModelChecks.Add(continuousError);
if (model.HasDiscreteOutputs())
{
tensorTester[model.DiscreteOutputName()] = CheckDiscreteActionOutputShape;
}
// If the model expects an output but it is not in this list
var modelContinuousActionSize = model.ContinuousOutputSize();
foreach (var name in model.outputs)
var discreteError = CheckDiscreteActionOutputShape(brainParameters, actuatorComponents, modelSumDiscreteBranchSizes);
if (discreteError != null)
if (tensorTester.ContainsKey(name))
{
var tester = tensorTester[name];
var error = tester.Invoke(brainParameters, actuatorComponents, model.GetShapeByName(name), modelContinuousActionSize, modelSumDiscreteBranchSizes);
if (error != null)
{
failedModelChecks.Add(error);
}
}
failedModelChecks.Add(discreteError);
}
return failedModelChecks;
}

/// check failed. If the check passed, returns null.
/// </returns>
static string CheckDiscreteActionOutputShape(
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, int modelSumDiscreteBranchSizes)
var sumOfDiscreteBranchSizes = 0;
if (brainParameters.VectorActionSpaceType == SpaceType.Discrete)
{
sumOfDiscreteBranchSizes += brainParameters.VectorActionSize.Sum();
}
// TODO: check each branch size instead of sum of branch sizes
var sumOfDiscreteBranchSizes = brainParameters.ActionSpec.SumOfDiscreteBranchSizes;
foreach (var actuatorComponent in actuatorComponents)
{

/// <returns>If the Check failed, returns a string containing information about why the
/// check failed. If the check passed, returns null.</returns>
static string CheckContinuousActionOutputShape(
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, TensorShape? shape, int modelContinuousActionSize, int modelSumDiscreteBranchSizes)
BrainParameters brainParameters, ActuatorComponent[] actuatorComponents, int modelContinuousActionSize)
var numContinuousActions = 0;
if (brainParameters.VectorActionSpaceType == SpaceType.Continuous)
{
numContinuousActions += brainParameters.NumActions;
}
var numContinuousActions = brainParameters.ActionSpec.NumContinuousActions;
foreach (var actuatorComponent in actuatorComponents)
{

109
com.unity.ml-agents/Runtime/Policies/BrainParameters.cs


using System;
using UnityEngine;
using UnityEngine.Serialization;
using Unity.MLAgents.Actuators;
namespace Unity.MLAgents.Policies
{

/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </remarks>
[Serializable]
public class BrainParameters
public class BrainParameters : ISerializationCallbackReceiver
{
/// <summary>
/// The number of the observations that are added in

[FormerlySerializedAs("numStackedVectorObservations")]
[Range(1, 50)] public int NumStackedVectorObservations = 1;
[SerializeField]
internal ActionSpec m_ActionSpec = new ActionSpec(0, null);
/// The size of the action space.
/// The specification of the Action space for the BrainParameters.
/// </summary>
public ActionSpec ActionSpec
{
get { return m_ActionSpec; }
set
{
m_ActionSpec.NumContinuousActions = value.NumContinuousActions;
m_ActionSpec.BranchSizes = value.BranchSizes;
SyncDeprecatedActionFields();
}
}
/// <summary>
/// (Deprecated) The size of the action space.
/// </summary>
/// <remarks>The size specified is interpreted differently depending on whether
/// the agent uses the continuous or the discrete action space.</remarks>

/// For the discrete action space: the number of branches in the action space.
/// </value>
/// [Obsolete("VectorActionSize has been deprecated, please use ActionSpec instead.")]
[FormerlySerializedAs("vectorActionSize")]
public int[] VectorActionSize = new[] { 1 };

public string[] VectorActionDescriptions;
/// <summary>
/// Defines if the action is discrete or continuous.
/// (Deprecated) Defines if the action is discrete or continuous.
/// [Obsolete("VectorActionSpaceType has been deprecated, please use ActionSpec instead.")]
[SerializeField]
[HideInInspector]
internal bool hasUpgradedBrainParametersWithActionSpec;
/// The number of actions specified by this Brain.
/// (Deprecated) The number of actions specified by this Brain.
/// [Obsolete("NumActions has been deprecated, please use ActionSpec instead.")]
switch (VectorActionSpaceType)
{
case SpaceType.Discrete:
return VectorActionSize.Length;
case SpaceType.Continuous:
return VectorActionSize[0];
default:
return 0;
}
return ActionSpec.NumContinuousActions > 0 ? ActionSpec.NumContinuousActions : ActionSpec.NumDiscreteActions;
}
}

{
VectorObservationSize = VectorObservationSize,
NumStackedVectorObservations = NumStackedVectorObservations,
VectorActionSize = (int[])VectorActionSize.Clone(),
VectorActionSpaceType = VectorActionSpaceType
ActionSpec = new ActionSpec(ActionSpec.NumContinuousActions, ActionSpec.BranchSizes),
VectorActionSize = (int[])VectorActionSize.Clone(),
VectorActionSpaceType = VectorActionSpaceType,
}
/// <summary>
/// Propogate ActionSpec fields from deprecated fields
/// </summary>
private void UpdateToActionSpec()
{
if (!hasUpgradedBrainParametersWithActionSpec)
{
if (VectorActionSpaceType == SpaceType.Continuous)
{
m_ActionSpec.NumContinuousActions = VectorActionSize[0];
m_ActionSpec.BranchSizes = null;
}
if (VectorActionSpaceType == SpaceType.Discrete)
{
m_ActionSpec.NumContinuousActions = 0;
m_ActionSpec.BranchSizes = (int[])VectorActionSize.Clone();
}
hasUpgradedBrainParametersWithActionSpec = true;
}
}
/// <summary>
/// Sync values in ActionSpec fields to deprecated fields
/// </summary>
private void SyncDeprecatedActionFields()
{
if (m_ActionSpec.NumContinuousActions == 0)
{
VectorActionSize = (int[])ActionSpec.BranchSizes.Clone();
VectorActionSpaceType = SpaceType.Discrete;
}
else if (m_ActionSpec.NumDiscreteActions == 0)
{
VectorActionSize = new[] { m_ActionSpec.NumContinuousActions };
VectorActionSpaceType = SpaceType.Continuous;
}
else
{
VectorActionSize = null;
}
}
/// <summary>
/// Called by Unity immediately before serializing this object.
/// </summary>
public void OnBeforeSerialize()
{
UpdateToActionSpec();
SyncDeprecatedActionFields();
}
/// <summary>
/// Called by Unity immediately after deserializing this object.
/// </summary>
public void OnAfterDeserialize()
{
UpdateToActionSpec();
SyncDeprecatedActionFields();
}
}
}

10
com.unity.ml-agents/Tests/Editor/Actuators/VectorActuatorTests.cs


public void TestConstruct()
{
var ar = new TestActionReceiver();
var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
var va1 = new VectorActuator(ar, new[] { 4 }, SpaceType.Continuous, "name");
var va1 = new VectorActuator(ar, ActionSpec.MakeContinuous(4), "name");
Assert.IsTrue(va1.ActionSpec.NumContinuousActions == 4);
Assert.IsTrue(va1.ActionSpec.SumOfDiscreteBranchSizes == 0);

public void TestOnActionReceived()
{
var ar = new TestActionReceiver();
var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
var discreteActions = new[] { 0, 1, 1 };
var ab = new ActionBuffers(ActionSegment<float>.Empty,

public void TestResetData()
{
var ar = new TestActionReceiver();
var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
var discreteActions = new[] { 0, 1, 1 };
var ab = new ActionBuffers(ActionSegment<float>.Empty,

public void TestWriteDiscreteActionMask()
{
var ar = new TestActionReceiver();
var va = new VectorActuator(ar, new[] { 1, 2, 3 }, SpaceType.Discrete, "name");
var va = new VectorActuator(ar, ActionSpec.MakeDiscrete(1, 2, 3), "name");
var bdam = new ActuatorDiscreteActionMask(new[] { va }, 6, 3);
var groundTruthMask = new[] { false, true, false, false, true, true };

6
com.unity.ml-agents/Tests/Editor/DemonstrationTests.cs


bp.BrainParameters.VectorObservationSize = 3;
bp.BrainParameters.NumStackedVectorObservations = 2;
bp.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
bp.BrainParameters.VectorActionSize = new[] { 2, 2 };
bp.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
bp.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);
gameobj.AddComponent<TestAgent>();

bpA.BrainParameters.VectorObservationSize = 3;
bpA.BrainParameters.NumStackedVectorObservations = 1;
bpA.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
bpA.BrainParameters.VectorActionSize = new[] { 2, 2 };
bpA.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
bpA.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);
agentGo1.AddComponent<ObservationAgent>();
var agent1 = agentGo1.GetComponent<ObservationAgent>();

4
com.unity.ml-agents/Tests/Editor/EditModeTestInternalBrainTensorApplier.cs


[Test]
public void ApplyDiscreteActionOutput()
{
var actionSpec = ActionSpec.MakeDiscrete(new int[] { 2, 3 });
var actionSpec = ActionSpec.MakeDiscrete(2, 3);
var inputTensor = new TensorProxy()
{
shape = new long[] { 2, 5 },

[Test]
public void ApplyHybridActionOutput()
{
var actionSpec = new ActionSpec(3, 2, new int[] { 2, 3 });
var actionSpec = new ActionSpec(3, new int[] { 2, 3 });
var continuousInputTensor = new TensorProxy()
{
shape = new long[] { 2, 3 },

27
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
bp2.BrainParameters.VectorActionSize = new[] { 1 };
bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();

{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
bp2.BrainParameters.VectorActionSize = new[] { 1 };
bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();

{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
var agent1 = agentGo1.AddComponent<TestAgent>();
var behaviorParameters = agentGo1.GetComponent<BehaviorParameters>();
behaviorParameters.BrainParameters.NumStackedVectorObservations = 3;

{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
bp2.BrainParameters.VectorActionSize = new[] { 1 };
bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp2.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
var agent2 = agentGo2.AddComponent<TestAgent>();
var aca = Academy.Instance;

{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;

// Make sure that Agents with HeuristicPolicies step their sensors each Academy step.
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
bp1.BrainParameters.ActionSpec = ActionSpec.MakeContinuous(1);
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;

2
com.unity.ml-agents/Tests/Editor/ModelRunnerTest.cs


ActionSpec GetHybrid0vis53vec_3c_2dActionSpec()
{
return new ActionSpec(3, 1, new int[] { 2 });
return new ActionSpec(3, new int[] { 2 });
}
[SetUp]

121
com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs


{
var validBrainParameters = new BrainParameters();
validBrainParameters.VectorObservationSize = 8;
validBrainParameters.VectorActionSize = new[] { 2 };
validBrainParameters.VectorActionSpaceType = SpaceType.Continuous;
validBrainParameters.ActionSpec = ActionSpec.MakeContinuous(2);
return validBrainParameters;
}

validBrainParameters.VectorObservationSize = 0;
validBrainParameters.VectorActionSize = new[] { 2, 3 };
validBrainParameters.VectorActionSpaceType = SpaceType.Discrete;
validBrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 3);
// TODO: update and enable this after integrating action spec into BrainParameters
// BrainParameters GetHybridBrainParameters()
// {
// var validBrainParameters = new BrainParameters();
// validBrainParameters.VectorObservationSize = 53;
// validBrainParameters.VectorActionSize = new[] { 2 };
// validBrainParameters.NumStackedVectorObservations = 1;
// validBrainParameters.VectorActionSpaceType = SpaceType.Discrete;
// return validBrainParameters;
// }
BrainParameters GetHybridBrainParameters()
{
var validBrainParameters = new BrainParameters();
validBrainParameters.VectorObservationSize = 53;
validBrainParameters.NumStackedVectorObservations = 1;
validBrainParameters.ActionSpec = new ActionSpec(3, new int[] { 2 });
return validBrainParameters;
}
[SetUp]
public void SetUp()

Assert.AreEqual(0, errors.Count()); // There should not be any errors
}
// TODO: update and enable this test after integrating action spec into BrainParameters
// [Test]
// public void TestCheckModelValidHybrid()
// {
// var model = ModelLoader.Load(hybridModel);
// var validBrainParameters = GetHybridBrainParameters();
[Test]
public void TestCheckModelValidHybrid()
{
var model = ModelLoader.Load(hybridONNXModel);
var validBrainParameters = GetHybridBrainParameters();
// var errors = BarracudaModelParamLoader.CheckModel(
// model, validBrainParameters,
// new SensorComponent[] { }, new ActuatorComponent[0]
// );
// Assert.AreEqual(0, errors.Count()); // There should not be any errors
// }
var errors = BarracudaModelParamLoader.CheckModel(
model, validBrainParameters,
new SensorComponent[] { }, new ActuatorComponent[0]
);
Assert.AreEqual(0, errors.Count()); // There should not be any errors
}
[TestCase(true)]
[TestCase(false)]

Assert.Greater(errors.Count(), 0);
}
// TODO: update and enable this test after integrating action spec into BrainParameters
// [Test]
// public void TestCheckModelThrowsVectorObservationHybrid()
// {
// var model = ModelLoader.Load(hybridModel);
[Test]
public void TestCheckModelThrowsVectorObservationHybrid()
{
var model = ModelLoader.Load(hybridONNXModel);
// var brainParameters = GetHybridBrainParameters();
// brainParameters.VectorObservationSize = 9; // Invalid observation
// var errors = BarracudaModelParamLoader.CheckModel(
// model, brainParameters,
// new SensorComponent[] { }, new ActuatorComponent[0]
// );
// Assert.Greater(errors.Count(), 0);
var brainParameters = GetHybridBrainParameters();
brainParameters.VectorObservationSize = 9; // Invalid observation
var errors = BarracudaModelParamLoader.CheckModel(
model, brainParameters,
new SensorComponent[] { }, new ActuatorComponent[0]
);
Assert.Greater(errors.Count(), 0);
// brainParameters = GetContinuous2vis8vec2actionBrainParameters();
// brainParameters.NumStackedVectorObservations = 2;// Invalid stacking
// errors = BarracudaModelParamLoader.CheckModel(
// model, brainParameters,
// new SensorComponent[] { }, new ActuatorComponent[0]
// );
// Assert.Greater(errors.Count(), 0);
// }
brainParameters = GetContinuous2vis8vec2actionBrainParameters();
brainParameters.NumStackedVectorObservations = 2;// Invalid stacking
errors = BarracudaModelParamLoader.CheckModel(
model, brainParameters,
new SensorComponent[] { }, new ActuatorComponent[0]
);
Assert.Greater(errors.Count(), 0);
}
[TestCase(true)]
[TestCase(false)]

var brainParameters = GetContinuous2vis8vec2actionBrainParameters();
brainParameters.VectorActionSize = new[] { 3 }; // Invalid action
brainParameters.ActionSpec = ActionSpec.MakeContinuous(3); // Invalid action
brainParameters.VectorActionSpaceType = SpaceType.Discrete;// Invalid SpaceType
brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3); // Invalid SpaceType
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}

var model = useDeprecatedNNModel ? ModelLoader.Load(discreteNNModel) : ModelLoader.Load(discreteONNXModel);
var brainParameters = GetDiscrete1vis0vec_2_3action_recurrModelBrainParameters();
brainParameters.VectorActionSize = new[] { 3, 3 }; // Invalid action
brainParameters.ActionSpec = ActionSpec.MakeDiscrete(3, 3); // Invalid action
brainParameters.VectorActionSpaceType = SpaceType.Continuous;// Invalid SpaceType
brainParameters.ActionSpec = ActionSpec.MakeContinuous(2); // Invalid SpaceType
// TODO: update and enable this test after integrating action spec into BrainParameters
// [Test]
// public void TestCheckModelThrowsActionHybrid()
// {
// var model = ModelLoader.Load(hybridModel);
[Test]
public void TestCheckModelThrowsActionHybrid()
{
var model = ModelLoader.Load(hybridONNXModel);
// var brainParameters = GetHybridBrainParameters();
// brainParameters.VectorActionSize = new[] { 3 }; // Invalid action
// var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
// Assert.Greater(errors.Count(), 0);
var brainParameters = GetHybridBrainParameters();
brainParameters.ActionSpec = new ActionSpec(3, new int[] { 3 }); ; // Invalid discrete action size
var errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
// brainParameters = GetContinuous2vis8vec2actionBrainParameters();
// brainParameters.VectorActionSpaceType = SpaceType.Discrete;// Invalid SpaceType
// errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
// Assert.Greater(errors.Count(), 0);
// }
brainParameters = GetContinuous2vis8vec2actionBrainParameters();
brainParameters.ActionSpec = ActionSpec.MakeDiscrete(2); // Missing continuous action
errors = BarracudaModelParamLoader.CheckModel(model, brainParameters, new SensorComponent[] { sensor_21_20_3, sensor_20_22_3 }, new ActuatorComponent[0]);
Assert.Greater(errors.Count(), 0);
}
[Test]
public void TestCheckModelThrowsNoModel()

3
com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs


behaviorParams.BrainParameters.VectorObservationSize = 3;
behaviorParams.BrainParameters.NumStackedVectorObservations = 2;
behaviorParams.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
behaviorParams.BrainParameters.VectorActionSize = new[] { 2, 2 };
behaviorParams.BrainParameters.VectorActionSpaceType = SpaceType.Discrete;
behaviorParams.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);
behaviorParams.BehaviorName = "TestBehavior";
behaviorParams.TeamId = 42;
behaviorParams.UseChildSensors = true;

2
com.unity.ml-agents/package.json


"unity": "2018.4",
"description": "Use state-of-the-art machine learning to create intelligent character behaviors in any Unity environment (games, robotics, film, etc.).",
"dependencies": {
"com.unity.barracuda": "1.1.2-preview",
"com.unity.barracuda": "1.2.1-preview",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",

5
docs/Getting-Started.md


eight elements: the `x` and `z` components of the agent cube's rotation and the
`x`, `y`, and `z` components of the ball's relative position and velocity.
#### Behavior Parameters : Vector Action Space
#### Behavior Parameters : Actions
An Agent is given instructions in the form of actions.
ML-Agents Toolkit classifies actions into two types: continuous and discrete.

Number of Visual Observations (per agent): 0
Vector Observation space size (per agent): 8
Number of stacked Vector Observation: 1
Vector Action space type: continuous
Vector Action space size (per agent): [2]
Vector Action descriptions: ,
INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain 3DBallLearning:
batch_size: 64
beta: 0.001

59
docs/Learning-Environment-Design-Agents.md


- [Raycast Observations](#raycast-observations)
- [RayCast Observation Summary & Best Practices](#raycast-observation-summary--best-practices)
- [Actions](#actions)
- [Continuous Action Space](#continuous-action-space)
- [Discrete Action Space](#discrete-action-space)
- [Continuous Actions](#continuous-actions)
- [Discrete Actions](#discrete-actions)
- [Masking Discrete Actions](#masking-discrete-actions)
- [Actions Summary & Best Practices](#actions-summary--best-practices)
- [Rewards](#rewards)

method calls `VectorSensor.AddObservation()` such that vector size adds up to 8,
the Behavior Parameters of the Agent are set with vector observation space
with a state size of 8.
- `Agent.OnActionReceived()` — The vector action spaces result
- `Agent.OnActionReceived()` — The action results
in a small change in the agent cube's rotation at each step. In this example,
an Agent receives a small positive reward for each step it keeps the ball on the
agent cube's head and a larger, negative reward for dropping the ball. An

An action is an instruction from the Policy that the agent carries out. The
action is passed to the Agent as the `ActionBuffers` parameter when the Academy invokes the
agent's `OnActionReceived()` function. There are two types of actions supported:
agent's `OnActionReceived()` function. There are two types of actions that an Agent can use:
**Continuous** and **Discrete**.
Neither the Policy nor the training algorithm know anything about what the

for an Agent is in the `OnActionReceived()` function.
For example, if you designed an agent to move in two dimensions, you could use
either continuous or the discrete vector actions. In the continuous case, you
would set the vector action size to two (one for each dimension), and the
agent's Policy would create an action with two floating point values. In the
either continuous or the discrete actions. In the continuous case, you
would set the action size to two (one for each dimension), and the
agent's Policy would output an action with two floating point values. In the
movement), and the Policy would create an action array containing two elements
with values ranging from zero to one.
movement), and the Policy would output an action array containing two elements
with values ranging from zero to one. You could alternatively use a combination of continuous
and discrete actions e.g., using one continuous action for horizontal movement
and a discrete branch of size two for the vertical movement.
The [3DBall](Learning-Environment-Examples.md#3dball-3d-balance-ball) and
[Area](Learning-Environment-Examples.md#push-block) example environments are set
up to use either the continuous or the discrete vector action spaces.
### Continuous Action Space
### Continuous Actions
is an array with length equal to the `Vector Action Space Size` property value. The
is an array with length equal to the `Continuous Action Size` property value. The
The [Reacher example](Learning-Environment-Examples.md#reacher) defines a
continuous action space with four control values.
The [Reacher example](Learning-Environment-Examples.md#reacher) uses
continuous actions with four control values.
![reacher](images/reacher.png)

```
By default the output from our provided PPO algorithm pre-clamps the values of
`vectorAction` into the [-1, 1] range. It is a best practice to manually clip
`ActionBuffers.ContinuousActions` into the [-1, 1] range. It is a best practice to manually clip
### Discrete Action Space
### Discrete Actions
is an array of integers. When defining the discrete vector action space, `Branches`
is an array of integers with length equal to `Discrete Branch Size`. When defining the discrete actions, `Branches`
is an array of integers, each value corresponds to the number of possibilities for each branch.
For example, if we wanted an Agent that can move in a plane and jump, we could

### Actions Summary & Best Practices
- Agents can either use `Discrete` or `Continuous` actions.
- Agents can use `Discrete` and/or `Continuous` actions.
- In general, smaller action spaces will make for easier learning.
- Be sure to set the Vector Action's Space Size to the number of used Vector
Actions, and not greater, as doing the latter can interfere with the
- In general, fewer actions will make for easier learning.
- Be sure to set the Continuous Action Size and Discrete Branch Size to the desired
number for each type of action, and not greater, as doing the latter can interfere with the
efficiency of the training process.
- Continuous action values should be clipped to an
appropriate range. The provided PPO model automatically clips these values

be stacked and used collectively for decision making. This results in the
effective size of the vector observation being passed to the Policy being:
_Space Size_ x _Stacked Vectors_.
- `Vector Action`
- `Space Type` - Corresponds to whether action vector contains a single
integer (Discrete) or a series of real-valued floats (Continuous).
- `Space Size` (Continuous) - Length of action vector.
- `Branches` (Discrete) - An array of integers, defines multiple concurrent
discrete actions. The values in the `Branches` array correspond to the
number of possible discrete values for each action branch.
- `Actions`
- `Continuous Actions` - The number of concurrent continuous actions that
the Agent can take.
- `Discrete Branches` - An array of integers, defines multiple concurrent
discrete actions. The values in the `Discrete Branches` array correspond
to the number of possible discrete values for each action branch.
- `Model` - The neural network model used for inference (obtained after
training)
- `Inference Device` - Whether to use CPU or GPU to run the model during

38
docs/Learning-Environment-Examples.md


- +1.0 for arriving at optimal state.
- Behavior Parameters:
- Vector Observation space: One variable corresponding to current state.
- Vector Action space: (Discrete) Two possible actions (Move left, move
- Actions: 1 discrete action branch with 3 actions (Move left, do nothing, move
right).
- Visual Observations: None
- Float Properties: None

cube, and position and velocity of ball.
- Vector Observation space (Hard Version): 5 variables corresponding to
rotation of the agent cube and position of ball.
- Vector Action space: (Continuous) Size of 2, with one value corresponding to
- Actions: 2 continuous actions, with one value corresponding to
X-rotation, and the other to Z-rotation.
- Visual Observations: Third-person view from the upper-front of the agent. Use
`Visual3DBall` scene.

- -1.0 if the agent navigates to an obstacle (episode ends).
- Behavior Parameters:
- Vector Observation space: None
- Vector Action space: (Discrete) Size of 4, corresponding to movement in
cardinal directions. Note that for this environment,
- Actions: 1 discrete action branch with 5 actions, corresponding to movement in
cardinal directions or not moving. Note that for this environment,
[action masking](Learning-Environment-Design-Agents.md#masking-discrete-actions)
is turned on by default (this option can be toggled using the `Mask Actions`
checkbox within the `trueAgent` GameObject). The trained model file provided

- Behavior Parameters:
- Vector Observation space: 9 variables corresponding to position, velocity
and orientation of ball and racket.
- Vector Action space: (Continuous) Size of 3, corresponding to movement
- Actions: 3 continuous actions, corresponding to movement
toward net or away from net, jumping and rotation.
- Visual Observations: None
- Float Properties: Three

- Vector Observation space: (Continuous) 70 variables corresponding to 14
ray-casts each detecting one of three possible objects (wall, goal, or
block).
- Vector Action space: (Discrete) Size of 6, corresponding to turn clockwise
and counterclockwise and move along four different face directions.
- Actions: 1 discrete action branch with 7 actions, corresponding to turn clockwise
and counterclockwise, move along four different face directions, or do nothing.
- Visual Observations (Optional): One first-person camera. Use
`VisualPushBlock` scene. **The visual observation version of this
environment does not train with the provided default training parameters.**

- Vector Observation space: Size of 74, corresponding to 14 ray casts each
detecting 4 possible objects. plus the global position of the agent and
whether or not the agent is grounded.
- Vector Action space: (Discrete) 4 Branches:
- Actions: 4 discrete action branches:
- Forward Motion (3 possible actions: Forward, Backwards, No Action)
- Rotation (3 possible actions: Rotate Left, Rotate Right, No Action)
- Side Motion (3 possible actions: Left, Right, No Action)

- Behavior Parameters:
- Vector Observation space: 26 variables corresponding to position, rotation,
velocity, and angular velocities of the two arm rigid bodies.
- Vector Action space: (Continuous) Size of 4, corresponding to torque
- Actions: 4 continuous actions, corresponding to torque
applicable to two joints.
- Visual Observations: None.
- Float Properties: Five

- Vector Observation space: 172 variables corresponding to position, rotation,
velocity, and angular velocities of each limb plus the acceleration and
angular acceleration of the body.
- Vector Action space: (Continuous) Size of 20, corresponding to target
- Actions: 20 continuous actions, corresponding to target
rotations for joints.
- Visual Observations: None
- Float Properties: None

- Vector Observation space: 64 variables corresponding to position, rotation,
velocity, and angular velocities of each limb plus the acceleration and
angular acceleration of the body.
- Vector Action space: (Continuous) Size of 9, corresponding to target
- Actions: 9 continuous actions, corresponding to target
rotations for joints.
- Visual Observations: None
- Float Properties: None

agent is frozen and/or shot its laser (2), plus ray-based perception of
objects around agent's forward direction (49; 7 raycast angles with 7
measurements for each).
- Vector Action space: (Discrete) 4 Branches:
- Actions: 4 discrete action ranches:
- Forward Motion (3 possible actions: Forward, Backwards, No Action)
- Side Motion (3 possible actions: Left, Right, No Action)
- Rotation (3 possible actions: Rotate Left, Rotate Right, No Action)

- Behavior Parameters:
- Vector Observation space: 30 corresponding to local ray-casts detecting
objects, goals, and walls.
- Vector Action space: (Discrete) 1 Branch, 4 actions corresponding to agent
- Actions: 1 discrete action Branch, with 4 actions corresponding to agent
rotation and forward/backward movement.
- Visual Observations (Optional): First-person view for the agent. Use
`VisualHallway` scene. **The visual observation version of this environment

- Behavior Parameters:
- Vector Observation space: 6 corresponding to local position of agent and
green cube.
- Vector Action space: (Continuous) 3 corresponding to agent force applied for
- Actions: 3 continuous actions corresponding to agent force applied for
the jump.
- Visual Observations: None
- Float Properties: Two

degrees each detecting 6 possible object types, along with the object's
distance. The forward ray-casts contribute 264 state dimensions and backward
72 state dimensions over three observation stacks.
- Vector Action space: (Discrete) Three branched actions corresponding to
- Actions: 3 discrete branched actions corresponding to
forward, backward, sideways movement, as well as rotation.
- Visual Observations: None
- Float Properties: Two

degrees each detecting 5 possible object types, along with the object's
distance. The forward ray-casts contribute 231 state dimensions and backward
63 state dimensions over three observation stacks.
- Striker Vector Action space: (Discrete) Three branched actions corresponding
- Striker Actions: 3 discrete branched actions corresponding
- Goalie Vector Action space: (Discrete) Three branched actions corresponding
- Goalie Actions: 3 discrete branched actions corresponding
to forward, backward, sideways movement, as well as rotation.
- Visual Observations: None
- Float Properties: Two

- Behavior Parameters:
- Vector Observation space: 243 variables corresponding to position, rotation,
velocity, and angular velocities of each limb, along with goal direction.
- Vector Action space: (Continuous) Size of 39, corresponding to target
- Actions: 39 continuous actions, corresponding to target
rotations and strength applicable to the joints.
- Visual Observations: None
- Float Properties: Four

- Vector Observation space: 148 corresponding to local ray-casts detecting
switch, bricks, golden brick, and walls, plus variable indicating switch
state.
- Vector Action space: (Discrete) 4 corresponding to agent rotation and
- Actions: 1 discrete action branch, with 4 actions corresponding to agent rotation and
forward/backward movement.
- Visual Observations (Optional): First-person camera per-agent. Us
`VisualPyramids` scene. **The visual observation version of this environment

3
docs/Learning-Environment-Executable.md


Number of Visual Observations (per agent): 0
Vector Observation space size (per agent): 8
Number of stacked Vector Observation: 1
Vector Action space type: continuous
Vector Action space size (per agent): [2]
Vector Action descriptions: ,
INFO:mlagents_envs:Hyperparameters for the PPO Trainer of brain Ball3DLearning:
batch_size: 64
beta: 0.001

4
docs/ML-Agents-Overview.md


one in which opposing agents are equal in form, function and objective. Examples
of symmetric games are our Tennis and Soccer example environments. In
reinforcement learning, this means both agents have the same observation and
action spaces and learn from the same reward function and so _they can share the
actions and learn from the same reward function and so _they can share the
have the same observation or action spaces and so sharing policy networks is not
have the same observation or actions and so sharing policy networks is not
necessarily ideal.
With self-play, an agent learns in adversarial games by competing against fixed,

3
docs/Python-API.md


name of the group the Agent belongs to and `agent_id` is the integer
identifier of the Agent. `action` is an `ActionTuple` as described above.
**Note:** If no action is provided for an agent group between two calls to
`env.step()` then the default action will be all zeros (in either discrete or
continuous action space)
`env.step()` then the default action will be all zeros.
#### DecisionSteps and DecisionStep

4
docs/Training-Configuration-File.md


| `init_path` | (default = None) Initialize trainer from a previously saved model. Note that the prior run should have used the same trainer configurations as the current run, and have been saved with the same version of ML-Agents. <br><br>You should provide the full path to the folder where the checkpoints were saved, e.g. `./models/{run-id}/{behavior_name}`. This option is provided in case you want to initialize different behaviors from different runs; in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize all models from the same run. |
| `threaded` | (default = `true`) By default, model updates can happen while the environment is being stepped. This violates the [on-policy](https://spinningup.openai.com/en/latest/user/algorithms.html#the-on-policy-algorithms) assumption of PPO slightly in exchange for a training speedup. To maintain the strict on-policyness of PPO, you can disable parallel updates by setting `threaded` to `false`. There is usually no reason to turn `threaded` off for SAC. |
| `hyperparameters -> learning_rate` | (default = `3e-4`) Initial learning rate for gradient descent. Corresponds to the strength of each gradient descent update step. This should typically be decreased if training is unstable, and the reward does not consistently increase. <br><br>Typical range: `1e-5` - `1e-3` |
| `hyperparameters -> batch_size` | Number of experiences in each iteration of gradient descent. **This should always be multiple times smaller than `buffer_size`**. If you are using a continuous action space, this value should be large (in the order of 1000s). If you are using a discrete action space, this value should be smaller (in order of 10s). <br><br> Typical range: (Continuous - PPO): `512` - `5120`; (Continuous - SAC): `128` - `1024`; (Discrete, PPO & SAC): `32` - `512`. |
| `hyperparameters -> batch_size` | Number of experiences in each iteration of gradient descent. **This should always be multiple times smaller than `buffer_size`**. If you are using continuous actions, this value should be large (on the order of 1000s). If you are using only discrete actions, this value should be smaller (on the order of 10s). <br><br> Typical range: (Continuous - PPO): `512` - `5120`; (Continuous - SAC): `128` - `1024`; (Discrete, PPO & SAC): `32` - `512`. |
| `hyperparameters -> buffer_size` | (default = `10240` for PPO and `50000` for SAC)<br> **PPO:** Number of experiences to collect before updating the policy model. Corresponds to how many experiences should be collected before we do any learning or updating of the model. **This should be multiple times larger than `batch_size`**. Typically a larger `buffer_size` corresponds to more stable training updates. <br> **SAC:** The max size of the experience buffer - on the order of thousands of times longer than your episodes, so that SAC can learn from old as well as new experiences. <br><br>Typical range: PPO: `2048` - `409600`; SAC: `50000` - `1000000` |
| `hyperparameters -> learning_rate_schedule` | (default = `linear` for PPO and `constant` for SAC) Determines how learning rate changes over time. For PPO, we recommend decaying learning rate until max_steps so learning converges more stably. However, for some cases (e.g. training for an unknown amount of time) this feature can be disabled. For SAC, we recommend holding learning rate constant so that the agent can continue to learn until its Q function converges naturally. <br><br>`linear` decays the learning_rate linearly, reaching 0 at max_steps, while `constant` keeps the learning rate constant for the entire training run. |
| `network_settings -> hidden_units` | (default = `128`) Number of units in the hidden layers of the neural network. Correspond to how many units are in each fully connected layer of the neural network. For simple problems where the correct action is a straightforward combination of the observation inputs, this should be small. For problems where the action is a very complex interaction between the observation variables, this should be larger. <br><br> Typical range: `32` - `512` |

A few considerations when deciding to use memory:
- LSTM does not work well with continuous vector actions. Please use
- LSTM does not work well with continuous actions. Please use
discrete actions for better results.
- Since the memories must be sent back and forth between Python and Unity, using
too large `memory_size` will slow down training.

11
docs/Training-on-Microsoft-Azure.md


1. [Move](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/copy-files-to-linux-vm-using-scp)
the `ml-agents` sub-folder of this ml-agents repo to the remote Azure
instance, and set it as the working directory.
2. Install the required packages with `pip3 install .`.
2. Install the required packages:
Torch: `pip3 install torch==1.7.0 -f https://download.pytorch.org/whl/torch_stable.html` and
MLAgents: `pip3 install mlagents`
## Testing

```python
from mlagents_envs.environment import UnityEnvironment
env = UnityEnvironment(<your_env>)
env = UnityEnvironment(file_name="<your_env>", seed=1, side_channels=[])
Where `<your_env>` corresponds to the path to your environment executable.
Where `<your_env>` corresponds to the path to your environment executable (i.e. `/home/UserName/Build/yourFile`).
**Note:** When running your environment in headless mode, you must append `--no-graphics` to your mlagents-learn command, as it won't train otherwise.
You can test this simply by aborting a training and check if it says "Model Saved" or "Aborted", or see if it generated the .onnx in the result folder.
## Running Training on your Virtual Machine

4
ml-agents-envs/mlagents_envs/base_env.py


since the last simulation step.
- agent_id is an int and an unique identifier for the corresponding Agent.
- action_mask is an optional list of one dimensional array of booleans.
Only available in multi-discrete action space type.
Only available when using multi-discrete actions.
Each array corresponds to an action branch. Each array contains a mask
for each action of the branch. If true, the action is not available for
the agent during this simulation step.

identifier for the corresponding Agent. This is used to track Agents
across simulation steps.
- action_mask is an optional list of two dimensional array of booleans.
Only available in multi-discrete action space type.
Only available when using multi-discrete actions.
Each array corresponds to an action branch. The first dimension of each
array is the batch size and the second contains a mask for each action of
the branch. If true, the action is not available for the agent during

14
ml-agents/mlagents/trainers/cli_utils.py


logger = logging_util.get_logger(__name__)
class RaiseDeprecationWarning(argparse.Action):
class RaiseRemovedWarning(argparse.Action):
"""
Internal custom Action to raise warning when argument is called.
"""

def __call__(self, arg_parser, namespace, values, option_string=None):
logger.warning(f"The command line argument {option_string} was deprecated")
logger.warning(f"The command line argument {option_string} was removed.")
class DetectDefault(argparse.Action):

argparser.add_argument(
"--torch",
default=False,
action=RaiseDeprecationWarning,
help="(Deprecated) Use the PyTorch framework. Note that this option is not required anymore as PyTorch is the"
"default framework, and will be removed in the next release.",
action=RaiseRemovedWarning,
help="(Removed) Use the PyTorch framework.",
action=RaiseDeprecationWarning,
help="(Deprecated) Use the TensorFlow framework instead of PyTorch. Install TensorFlow "
"before using this option.",
action=RaiseRemovedWarning,
help="(Removed) Use the TensorFlow framework.",
)
eng_conf = argparser.add_argument_group(title="Engine Configuration")

2
ml-agents/mlagents/trainers/demo_loader.py


# check action dimensions in demonstration match
if behavior_spec.action_spec != expected_behavior_spec.action_spec:
raise RuntimeError(
"The action spaces {} in demonstration do not match the policy's {}.".format(
"The actions {} in demonstration do not match the policy's {}.".format(
behavior_spec.action_spec, expected_behavior_spec.action_spec
)
)

4
ml-agents/mlagents/trainers/policy/torch_policy.py


"""
Policy that uses a multilayer perceptron to map the observations to actions. Could
also use a CNN to encode visual input prior to the MLP. Supports discrete and
continuous action spaces, as well as recurrent networks.
continuous actions, as well as recurrent networks.
:param seed: Random seed.
:param behavior_spec: Assigned BehaviorSpec object.
:param trainer_settings: Defined training parameters.

:param seq_len: Sequence length when using RNN.
:return: Tuple of AgentAction, ActionLogProbs, entropies, and output memories.
"""
actions, log_probs, entropies, _, memories = self.actor_critic.get_action_stats_and_value(
actions, log_probs, entropies, memories = self.actor_critic.get_action_stats(
obs, masks, memories, seq_len
)
return (actions, log_probs, entropies, memories)

3
ml-agents/mlagents/trainers/tests/mock_brain.py


:int num_agents: Number of "agents" to imitate.
:List observation_shapes: A List of the observation spaces in your steps
:int num_vector_acts: Number of actions in your action space
:bool discrete: Whether or not action space is discrete
:int action_spec: ActionSpec for the agent
:bool done: Whether all the agents in the batch are done
"""
obs_list = []

2
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


SAC_TORCH_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=4000,
max_steps=3500,
)
check_environment_trains(env, {BRAIN_NAME: config})

84
ml-agents/mlagents/trainers/torch/networks.py


"""
pass
def get_action_stats(
self,
inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor, torch.Tensor]:
"""
Returns sampled actions.
If memory is enabled, return the memories as well.
:param vec_inputs: A List of vector inputs as tensors.
:param vis_inputs: A List of visual inputs as tensors.
:param masks: If using discrete actions, a Tensor of action masks.
:param memories: If using memory, a Tensor of initial memories.
:param sequence_length: If using memory, the sequence length.
:return: A Tuple of AgentAction, ActionLogProbs, entropies, and memories.
Memories will be None if not using memory.
"""
pass
@abc.abstractmethod
def forward(
self,

AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
]:
"""
Returns distributions, from which actions can be sampled, and value estimates.
Returns sampled actions and value estimates.
If memory is enabled, return the memories as well.
:param inputs: A List of vector inputs as tensors.
:param masks: If using discrete actions, a Tensor of action masks.

def update_normalization(self, buffer: AgentBuffer) -> None:
self.network_body.update_normalization(buffer)
def get_action_stats(
self,
inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor, torch.Tensor]:
encoding, memories = self.network_body(
inputs, memories=memories, sequence_length=sequence_length
)
action, log_probs, entropies = self.action_model(encoding, masks)
return action, log_probs, entropies, memories
def forward(
self,

def memory_size(self) -> int:
return self.network_body.memory_size + self.critic.memory_size
def _get_actor_critic_mem(
self, memories: Optional[torch.Tensor] = None
) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor]]:
if self.use_lstm and memories is not None:
# Use only the back half of memories for critic and actor
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
else:
critic_mem = None
actor_mem = None
return actor_mem, critic_mem
def critic_pass(
self,
inputs: List[torch.Tensor],

actor_mem, critic_mem = None, None
if self.use_lstm:
# Use only the back half of memories for critic
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
actor_mem, critic_mem = self._get_actor_critic_mem(memories)
value_outputs, critic_mem_out = self.critic(
inputs, memories=critic_mem, sequence_length=sequence_length
)

memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor]]:
if self.use_lstm:
# Use only the back half of memories for critic and actor
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
else:
critic_mem = None
actor_mem = None
actor_mem, critic_mem = self._get_actor_critic_mem(memories)
encoding, actor_mem_outs = self.network_body(
inputs, memories=actor_mem, sequence_length=sequence_length
)

return log_probs, entropies, value_outputs
def get_action_stats(
self,
inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[AgentAction, ActionLogProbs, torch.Tensor, torch.Tensor]:
actor_mem, critic_mem = self._get_actor_critic_mem(memories)
action, log_probs, entropies, actor_mem_out = super().get_action_stats(
inputs, masks=masks, memories=actor_mem, sequence_length=sequence_length
)
if critic_mem is not None:
# Make memories with the actor mem unchanged
memories_out = torch.cat([actor_mem_out, critic_mem], dim=-1)
else:
memories_out = None
return action, log_probs, entropies, memories_out
def get_action_stats_and_value(
self,
inputs: List[torch.Tensor],

) -> Tuple[
AgentAction, ActionLogProbs, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor
]:
if self.use_lstm:
# Use only the back half of memories for critic and actor
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
else:
critic_mem = None
actor_mem = None
actor_mem, critic_mem = self._get_actor_critic_mem(memories)
encoding, actor_mem_outs = self.network_body(
inputs, memories=actor_mem, sequence_length=sequence_length
)

93
docs/images/monitor.png

之前 之后
正在加载...
取消
保存