浏览代码

Merge master and add Saver class for save/load checkpoints

/develop/add-fire
GitHub 4 年前
当前提交
bd6bcd2f
共有 146 个文件被更改,包括 6389 次插入3543 次删除
  1. 2
      .yamato/com.unity.ml-agents-performance.yml
  2. 20
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/DirectionIndicator.cs
  3. 977
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
  4. 2
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity.meta
  5. 962
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStatic.unity
  6. 161
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  7. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
  8. 2
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta
  9. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
  10. 2
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta
  11. 4
      com.unity.ml-agents.extensions/Editor/Unity.ML-Agents.Extensions.Editor.asmdef
  12. 1
      com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs
  13. 29
      com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs
  14. 8
      com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs
  15. 54
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
  16. 117
      com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs
  17. 80
      com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs
  18. 68
      com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs
  19. 88
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs
  20. 67
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs
  21. 5
      com.unity.ml-agents/CHANGELOG.md
  22. 33
      com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs
  23. 2
      com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
  24. 51
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  25. 72
      com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs
  26. 2
      com.unity.ml-agents/Runtime/Actuators/IActuator.cs
  27. 2
      com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs
  28. 2
      com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs
  29. 201
      com.unity.ml-agents/Runtime/Agent.cs
  30. 14
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  31. 2
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  32. 6
      com.unity.ml-agents/Runtime/DecisionRequester.cs
  33. 118
      com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
  34. 17
      com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
  35. 37
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  36. 20
      com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
  37. 3
      com.unity.ml-agents/Runtime/Policies/IPolicy.cs
  38. 15
      com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
  39. 46
      com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs
  40. 3
      com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
  41. 4
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  42. 2
      config/ppo/WalkerDynamic.yaml
  43. 2
      config/ppo/WalkerStatic.yaml
  44. 31
      docs/Learning-Environment-Examples.md
  45. 8
      ml-agents-envs/mlagents_envs/exception.py
  46. 2
      ml-agents/mlagents/trainers/ghost/trainer.py
  47. 3
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  48. 20
      ml-agents/mlagents/trainers/policy/policy.py
  49. 131
      ml-agents/mlagents/trainers/policy/tf_policy.py
  50. 72
      ml-agents/mlagents/trainers/policy/torch_policy.py
  51. 10
      ml-agents/mlagents/trainers/ppo/optimizer_tf.py
  52. 3
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  53. 32
      ml-agents/mlagents/trainers/ppo/trainer.py
  54. 2
      ml-agents/mlagents/trainers/sac/optimizer.py
  55. 9
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  56. 31
      ml-agents/mlagents/trainers/sac/trainer.py
  57. 8
      ml-agents/mlagents/trainers/settings.py
  58. 41
      ml-agents/mlagents/trainers/stats.py
  59. 3
      ml-agents/mlagents/trainers/tests/mock_brain.py
  60. 27
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  61. 10
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  62. 62
      ml-agents/mlagents/trainers/tests/test_env_param_manager.py
  63. 175
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  64. 8
      ml-agents/mlagents/trainers/tests/test_ppo.py
  65. 1
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  66. 25
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  67. 7
      ml-agents/mlagents/trainers/tests/test_sac.py
  68. 8
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  69. 20
      ml-agents/mlagents/trainers/tests/test_tf_policy.py
  70. 8
      ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py
  71. 26
      ml-agents/mlagents/trainers/tf/models.py
  72. 43
      ml-agents/mlagents/trainers/torch/networks.py
  73. 43
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  74. 2
      ml-agents/mlagents/trainers/trainer/trainer.py
  75. 10
      ml-agents/mlagents/trainers/trainer_controller.py
  76. 21
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/DynamicTarget.prefab
  77. 19
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/StaticTarget.prefab
  78. 82
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
  79. 523
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab
  80. 7
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab.meta
  81. 8
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets.meta
  82. 10
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta
  83. 10
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta
  84. 10
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta
  85. 10
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta
  86. 8
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Platforms.meta
  87. 8
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll.meta
  88. 1001
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamicVariableSpeed.unity
  89. 7
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamicVariableSpeed.unity.meta
  90. 1001
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStaticVariableSpeed.unity
  91. 9
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStaticVariableSpeed.unity.meta
  92. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamicVariableSpeed.nn
  93. 11
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamicVariableSpeed.nn.meta

2
.yamato/com.unity.ml-agents-performance.yml


variables:
UNITY_VERSION: {{ editor.version }}
commands:
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u {{ editor.version }} -c editor --wait --fast
- curl -s https://artifactory.internal.unity3d.com/core-automation/tools/utr-standalone/utr --output utr
- chmod +x ./utr

20
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/DirectionIndicator.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine;
public bool updatedByAgent; //should this be updated by the agent? If not, it will use local settings
void OnEnable()
{
m_StartingYPos = transform.position.y;

{
transform.position = new Vector3(transformToFollow.position.x, m_StartingYPos + heightOffset, transformToFollow.position.z);
if (updatedByAgent)
return;
transform.position = new Vector3(transformToFollow.position.x, m_StartingYPos + heightOffset,
transformToFollow.position.z);
}
//Public method to allow an agent to directly update this component
public void MatchOrientation(Transform t)
{
transform.position = new Vector3(t.position.x, m_StartingYPos + heightOffset, t.position.z);
transform.rotation = t.rotation;
}
}
}

977
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity.meta


fileFormatVersion: 2
guid: 79d5d2687bfbe45f5b78bd6c04992e0d
guid: 65c87f50b8c81433d8fd7f6550773467
DefaultImporter:
externalObjects: {}
userData:

962
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStatic.unity
文件差异内容过多而无法显示
查看文件

161
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


using System;
using MLAgentsExamples;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgentsExamples;

public class WalkerAgent : Agent
{
public float maximumWalkingSpeed = 999; //The max walk velocity magnitude an agent will be rewarded for
Vector3 m_WalkDir; //Direction to the target
// Quaternion m_WalkDirLookRot; //Will hold the rotation to our target
[Header("Walk Speed")]
[Range(0.1f, 10)]
[SerializeField]
//The walking speed to try and achieve
private float m_TargetWalkingSpeed = 10;
public float MTargetWalkingSpeed // property
{
get { return m_TargetWalkingSpeed; }
set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); }
}
const float m_maxWalkingSpeed = 10; //The max walking speed
//Should the agent sample a new goal velocity each episode?
//If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin()
//If false, the goal velocity will be walkingSpeed
public bool randomizeWalkSpeedEachEpisode;
//The direction an agent will walk during training.
private Vector3 m_WorldDirToWalk = Vector3.right;
[Header("Target To Walk Towards")] [Space(10)]
public TargetController target; //Target the agent will walk towards.
[Header("Target To Walk Towards")] public Transform target; //Target the agent will walk towards during training.
[Header("Body Parts")] [Space(10)] public Transform hips;
[Header("Body Parts")] public Transform hips;
public Transform chest;
public Transform spine;
public Transform head;

public Transform forearmR;
public Transform handR;
[Header("Orientation")] [Space(10)]
public OrientationCubeController orientationCube;
OrientationCubeController m_OrientationCube;
//The indicator graphic gameobject that points towards the target
DirectionIndicator m_DirectionIndicator;
orientationCube.UpdateOrientation(hips, target.transform);
m_OrientationCube = GetComponentInChildren<OrientationCubeController>();
m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();
//Setup each body part
m_JdController = GetComponent<JointDriveController>();

}
//Random start rotation to help generalize
transform.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
UpdateOrientationObjects();
orientationCube.UpdateOrientation(hips, target.transform);
//Set our goal walking speed
MTargetWalkingSpeed =
randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : MTargetWalkingSpeed;
SetResetParameters();
}

//Get velocities in the context of our orientation cube's space
//Note: You can get these velocities in world space as well but it may not train as well.
sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.velocity));
sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR)
{

/// </summary>
public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(Quaternion.FromToRotation(hips.forward, orientationCube.transform.forward));
sensor.AddObservation(Quaternion.FromToRotation(head.forward, orientationCube.transform.forward));
var cubeForward = m_OrientationCube.transform.forward;
sensor.AddObservation(orientationCube.transform.InverseTransformPoint(target.transform.position));
//velocity we want to match
var velGoal = cubeForward * MTargetWalkingSpeed;
//ragdoll's avg vel
var avgVel = GetAvgVelocity();
//current ragdoll velocity. normalized
sensor.AddObservation(Vector3.Distance(velGoal, avgVel));
//avg body vel relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel));
//vel goal relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal));
//rotation deltas
sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));
//Position of target position relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));
foreach (var bodyPart in m_JdController.bodyPartsList)
{

bpDict[forearmR].SetJointStrength(vectorAction[++i]);
}
//Update OrientationCube and DirectionIndicator
void UpdateOrientationObjects()
{
m_WorldDirToWalk = target.position - hips.position;
m_OrientationCube.UpdateOrientation(hips, target);
if (m_DirectionIndicator)
{
m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
}
}
var cubeForward = orientationCube.transform.forward;
orientationCube.UpdateOrientation(hips, target.transform);
UpdateOrientationObjects();
var cubeForward = m_OrientationCube.transform.forward;
// a. Velocity alignment with goal direction.
var moveTowardsTargetReward = Vector3.Dot(cubeForward,
Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed));
if (float.IsNaN(moveTowardsTargetReward))
// a. Match target speed
//This reward will approach 1 if it matches perfectly and approach zero as it deviates
var matchSpeedReward = GetMatchingVelocityReward(cubeForward * MTargetWalkingSpeed, GetAvgVelocity());
//Check for NaNs
if (float.IsNaN(matchSpeedReward))
$" cubeForward: {cubeForward}\n"+
$" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n"+
$" maximumWalkingSpeed: {maximumWalkingSpeed}"
$" cubeForward: {cubeForward}\n" +
$" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n" +
$" maximumWalkingSpeed: {m_maxWalkingSpeed}"
// b. Rotation alignment with goal direction.
var lookAtTargetReward = Vector3.Dot(cubeForward, head.forward);
// b. Rotation alignment with target direction.
//This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
var lookAtTargetReward = (Vector3.Dot(cubeForward, head.forward) + 1) * .5F;
//Check for NaNs
$" cubeForward: {cubeForward}\n"+
$" cubeForward: {cubeForward}\n" +
// c. Encourage head height. //Should normalize to ~1
var headHeightOverFeetReward =
((head.position.y - footL.position.y) + (head.position.y - footR.position.y) / 10);
if (float.IsNaN(headHeightOverFeetReward))
AddReward(matchSpeedReward * lookAtTargetReward);
}
//Returns the average velocity of all of the body parts
//Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
//...using the average helps prevent this erratic movement
Vector3 GetAvgVelocity()
{
Vector3 velSum = Vector3.zero;
Vector3 avgVel = Vector3.zero;
//ALL RBS
int numOfRB = 0;
foreach (var item in m_JdController.bodyPartsList)
throw new ArgumentException(
"NaN in headHeightOverFeetReward.\n" +
$" head.position: {head.position}\n"+
$" footL.position: {footL.position}\n"+
$" footR.position: {footR.position}"
);
numOfRB++;
velSum += item.rb.velocity;
AddReward(
+ 0.02f * moveTowardsTargetReward
+ 0.02f * lookAtTargetReward
+ 0.005f * headHeightOverFeetReward
);
avgVel = velSum / numOfRB;
return avgVel;
}
//normalized value of the difference in avg speed vs goal walking speed.
public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity)
{
//distance between our actual velocity and goal velocity
var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, MTargetWalkingSpeed);
//return the value on a declining sigmoid shaped curve that decays from 1 to 0
//This reward will approach 1 if it matches perfectly and approach zero as it deviates
return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / MTargetWalkingSpeed, 2), 2);
}
/// <summary>

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta


fileFormatVersion: 2
guid: e785133c5b0ac461588106642550d1b3
guid: 8cbae6de45ea44d0c97366e252052722
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta


fileFormatVersion: 2
guid: 8dfd4337ed40e4d48872a4f86919c9da
guid: 185990f76b7804d1e83378e9d4454c6b
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj

4
com.unity.ml-agents.extensions/Editor/Unity.ML-Agents.Extensions.Editor.asmdef


{
"name": "Unity.ML-Agents.Extensions.Editor",
"references": [
"Unity.ML-Agents.Extensions"
"Unity.ML-Agents.Extensions",
"Unity.ML-Agents",
"Unity.ML-Agents.Editor"
],
"includePlatforms": [
"Editor"

1
com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs


using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.EditorTests")]
[assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.Editor")]

29
com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs


return new Pose { rotation = t.rotation, position = t.position };
}
/// <inheritdoc/>
protected internal override Object GetObjectAt(int index)
{
return m_Bodies[index];
}
internal IEnumerable<ArticulationBody> GetEnabledArticulationBodies()
{
if (m_Bodies == null)
{
yield break;
}
for (var i = 0; i < m_Bodies.Length; i++)
{
var articBody = m_Bodies[i];
if (articBody == null)
{
// Ignore a virtual root.
continue;
}
if (IsPoseEnabled(i))
{
yield return articBody;
}
}
}
}
}
#endif // UNITY_2020_1_OR_NEWER

8
com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs


var poseExtractor = new ArticulationBodyPoseExtractor(RootBody);
var numPoseObservations = poseExtractor.GetNumPoseObservations(Settings);
var numJointObservations = 0;
// Start from i=1 to ignore the root
for (var i = 1; i < poseExtractor.Bodies.Length; i++)
foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
numJointObservations += ArticulationBodyJointExtractor.NumObservations(
poseExtractor.Bodies[i], Settings
);
numJointObservations += ArticulationBodyJointExtractor.NumObservations(articBody, Settings);
}
return new[] { numPoseObservations + numJointObservations };
}

54
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs


using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents.Sensors;

string m_SensorName;
PoseExtractor m_PoseExtractor;
IJointExtractor[] m_JointExtractors;
List<IJointExtractor> m_JointExtractors;
/// Construct a new PhysicsBodySensor
/// Construct a new PhysicsBodySensor
/// <param name="rootBody">The root Rigidbody. This has no Joints on it (but other Joints may connect to it).</param>
/// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
/// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
/// <param name="poseExtractor"></param>
Rigidbody rootBody,
GameObject rootGameObject,
GameObject virtualRoot,
RigidBodyPoseExtractor poseExtractor,
string sensorName=null
string sensorName
var poseExtractor = new RigidBodyPoseExtractor(rootBody, rootGameObject, virtualRoot);
m_SensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{rootBody?.name}" : sensorName;
m_SensorName = sensorName;
var rigidBodies = poseExtractor.Bodies;
if (rigidBodies != null)
{
m_JointExtractors = new IJointExtractor[rigidBodies.Length - 1]; // skip the root
for (var i = 1; i < rigidBodies.Length; i++)
{
var jointExtractor = new RigidBodyJointExtractor(rigidBodies[i]);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors[i - 1] = jointExtractor;
}
}
else
m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
foreach(var rb in poseExtractor.GetEnabledRigidbodies())
m_JointExtractors = new IJointExtractor[0];
var jointExtractor = new RigidBodyJointExtractor(rb);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors.Add(jointExtractor);
}
var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);

m_Settings = settings;
var numJointExtractorObservations = 0;
var articBodies = poseExtractor.Bodies;
if (articBodies != null)
m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
m_JointExtractors = new IJointExtractor[articBodies.Length - 1]; // skip the root
for (var i = 1; i < articBodies.Length; i++)
{
var jointExtractor = new ArticulationBodyJointExtractor(articBodies[i]);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors[i - 1] = jointExtractor;
}
}
else
{
m_JointExtractors = new IJointExtractor[0];
var jointExtractor = new ArticulationBodyJointExtractor(articBody);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors.Add(jointExtractor);
}
var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);

117
com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs


using System;
using Object = UnityEngine.Object;
namespace Unity.MLAgents.Extensions.Sensors
{

{
if (m_ParentIndices == null)
{
return -1;
throw new NullReferenceException("No parent indices set");
}
return m_ParentIndices[index];

public void SetPoseEnabled(int index, bool val)
{
m_PoseEnabled[index] = val;
}
public bool IsPoseEnabled(int index)
{
return m_PoseEnabled[index];
}
/// <summary>

/// <returns></returns>
protected internal abstract Vector3 GetLinearVelocityAt(int index);
/// <summary>
/// Return the underlying object at the given index. This is only
/// used for display in the inspector.
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
protected internal virtual Object GetObjectAt(int index)
{
return null;
}
/// <summary>
/// Update the internal model space transform storage based on the underlying system.

Debug.DrawLine(current.position+offset, current.position+offset+.1f*localRight, Color.blue);
}
}
/// <summary>
/// Simplified representation of the a node in the hierarchy for display.
/// </summary>
internal struct DisplayNode
{
/// <summary>
/// Underlying object in the hierarchy. Pass to EditorGUIUtility.ObjectContent() for display.
/// </summary>
public Object NodeObject;
/// <summary>
/// Whether the poses for the object are enabled.
/// </summary>
public bool Enabled;
/// <summary>
/// Depth in the hierarchy, used for adjusting the indent level.
/// </summary>
public int Depth;
/// <summary>
/// The index of the corresponding object in the PoseExtractor.
/// </summary>
public int OriginalIndex;
}
/// <summary>
/// Get a list of display nodes in depth-first order.
/// </summary>
/// <returns></returns>
internal IList<DisplayNode> GetDisplayNodes()
{
if (NumPoses == 0)
{
return Array.Empty<DisplayNode>();
}
var nodesOut = new List<DisplayNode>(NumPoses);
// List of children for each node
var tree = new Dictionary<int, List<int>>();
for (var i = 0; i < NumPoses; i++)
{
var parent = GetParentIndex(i);
if (i == -1)
{
continue;
}
if (!tree.ContainsKey(parent))
{
tree[parent] = new List<int>();
}
tree[parent].Add(i);
}
// Store (index, depth) in the stack
var stack = new Stack<(int, int)>();
stack.Push((0, 0));
while (stack.Count != 0)
{
var (current, depth) = stack.Pop();
var obj = GetObjectAt(current);
var node = new DisplayNode
{
NodeObject = obj,
Enabled = IsPoseEnabled(current),
OriginalIndex = current,
Depth = depth
};
nodesOut.Add(node);
// Add children
if (tree.ContainsKey(current))
{
// Push to the stack in reverse order
var children = tree[current];
for (var childIdx = children.Count-1; childIdx >= 0; childIdx--)
{
stack.Push((children[childIdx], depth+1));
}
}
// Safety check
// This shouldn't even happen, but in case we have a cycle in the graph
// exit instead of looping forever and eating up all the memory.
if (nodesOut.Count > NumPoses)
{
return nodesOut;
}
}
return nodesOut;
}
}
/// <summary>

80
com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs


/// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
/// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
/// separate from the actual Rigidbodies in the hierarchy. For locomotion tasks, with ragdolls, this provides
/// a stabilized refernece frame, which can improve learning.</param>
public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null, GameObject virtualRoot = null)
/// a stabilized reference frame, which can improve learning.</param>
/// <param name="enableBodyPoses">Optional mapping of whether a body's psoe should be enabled or not.</param>
public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null,
GameObject virtualRoot = null, Dictionary<Rigidbody, bool> enableBodyPoses = null)
{
if (rootBody == null)
{

Rigidbody[] rbs;
Joint[] joints;
joints = rootBody.GetComponentsInChildren <Joint>();
joints = rootGameObject.GetComponentsInChildren<Joint>();
}
if (rbs == null || rbs.Length == 0)

}
if (rbs[0] != rootBody)
if (rbs[0] != rootBody)
{
Debug.Log("Expected root body at index 0");
return;

}
}
var joints = rootBody.GetComponentsInChildren <Joint>();
foreach (var j in joints)
{
var parent = j.connectedBody;

// By default, ignore the root
SetPoseEnabled(0, false);
if (enableBodyPoses != null)
{
foreach (var pair in enableBodyPoses)
{
var rb = pair.Key;
if (bodyToIndex.TryGetValue(rb, out var index))
{
SetPoseEnabled(index, pair.Value);
}
}
}
}
/// <inheritdoc/>

return new Pose { rotation = body.rotation, position = body.position };
}
/// <inheritdoc/>
protected internal override Object GetObjectAt(int index)
{
if (index == 0 && m_VirtualRoot != null)
{
return m_VirtualRoot;
}
return m_Bodies[index];
}
/// <summary>
/// Get a dictionary indicating which Rigidbodies' poses are enabled or disabled.
/// </summary>
/// <returns></returns>
internal Dictionary<Rigidbody, bool> GetBodyPosesEnabled()
{
var bodyPosesEnabled = new Dictionary<Rigidbody, bool>(m_Bodies.Length);
for (var i = 0; i < m_Bodies.Length; i++)
{
var rb = m_Bodies[i];
if (rb == null)
{
continue; // skip virtual root
}
bodyPosesEnabled[rb] = IsPoseEnabled(i);
}
return bodyPosesEnabled;
}
internal IEnumerable<Rigidbody> GetEnabledRigidbodies()
{
if (m_Bodies == null)
{
yield break;
}
for (var i = 0; i < m_Bodies.Length; i++)
{
var rb = m_Bodies[i];
if (rb == null)
{
// Ignore a virtual root.
continue;
}
if (IsPoseEnabled(i))
{
yield return rb;
}
}
}
}
}

68
com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs


using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents.Sensors;

/// <summary>
/// Optional sensor name. This must be unique for each Agent.
/// </summary>
[SerializeField]
[SerializeField]
[HideInInspector]
RigidBodyPoseExtractor m_PoseExtractor;
/// <summary>
/// Creates a PhysicsBodySensor.
/// </summary>

return new PhysicsBodySensor(RootBody, gameObject, VirtualRoot, Settings, sensorName);
var _sensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{RootBody?.name}" : sensorName;
return new PhysicsBodySensor(GetPoseExtractor(), Settings, _sensorName);
}
/// <inheritdoc/>

return new[] { 0 };
}
// TODO static method in PhysicsBodySensor?
// TODO only update PoseExtractor when body changes?
var poseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot);
var poseExtractor = GetPoseExtractor();
// Start from i=1 to ignore the root
for (var i = 1; i < poseExtractor.Bodies.Length; i++)
foreach(var rb in poseExtractor.GetEnabledRigidbodies())
var body = poseExtractor.Bodies[i];
var joint = body?.GetComponent<Joint>();
numJointObservations += RigidBodyJointExtractor.NumObservations(body, joint, Settings);
var joint = rb.GetComponent<Joint>();
numJointObservations += RigidBodyJointExtractor.NumObservations(rb, joint, Settings);
}
/// <summary>
/// Get the DisplayNodes of the hierarchy.
/// </summary>
/// <returns></returns>
internal IList<PoseExtractor.DisplayNode> GetDisplayNodes()
{
return GetPoseExtractor().GetDisplayNodes();
}
/// <summary>
/// Lazy construction of the PoseExtractor.
/// </summary>
/// <returns></returns>
RigidBodyPoseExtractor GetPoseExtractor()
{
if (m_PoseExtractor == null)
{
ResetPoseExtractor();
}
return m_PoseExtractor;
}
/// <summary>
/// Reset the pose extractor, trying to keep the enabled state of the corresponding poses the same.
/// </summary>
internal void ResetPoseExtractor()
{
// Get the current enabled state of each body, so that we can reinitialize with them.
Dictionary<Rigidbody, bool> bodyPosesEnabled = null;
if (m_PoseExtractor != null)
{
bodyPosesEnabled = m_PoseExtractor.GetBodyPosesEnabled();
}
m_PoseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot, bodyPosesEnabled);
}
/// <summary>
/// Toggle the pose at the given index.
/// </summary>
/// <param name="index"></param>
/// <param name="enabled"></param>
internal void SetPoseEnabled(int index, bool enabled)
{
GetPoseExtractor().SetPoseEnabled(index, enabled);
}
}

88
com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs


using System;
using UnityEngine;
using NUnit.Framework;
using Unity.MLAgents.Extensions.Sensors;

public class PoseExtractorTests
{
class UselessPoseExtractor : PoseExtractor
class BasicPoseExtractor : PoseExtractor
{
protected internal override Pose GetPoseAt(int index)
{

protected internal override Vector3 GetLinearVelocityAt(int index)
protected internal override Vector3 GetLinearVelocityAt(int index)
}
class UselessPoseExtractor : BasicPoseExtractor
{
public void Init(int[] parentIndices)
{
Setup(parentIndices);

poseExtractor.UpdateModelSpacePoses();
Assert.AreEqual(0, poseExtractor.NumPoses);
// Iterating through poses and velocities should be an empty loop
foreach (var pose in poseExtractor.GetEnabledModelSpacePoses())
{
throw new UnityAgentsException("This shouldn't happen");
}
foreach (var pose in poseExtractor.GetEnabledLocalSpacePoses())
{
throw new UnityAgentsException("This shouldn't happen");
}
foreach (var vel in poseExtractor.GetEnabledModelSpaceVelocities())
{
throw new UnityAgentsException("This shouldn't happen");
}
foreach (var vel in poseExtractor.GetEnabledLocalSpaceVelocities())
{
throw new UnityAgentsException("This shouldn't happen");
}
// Getting a parent index should throw an index exception
Assert.Throws <NullReferenceException>(
() => poseExtractor.GetParentIndex(0)
);
// DisplayNodes should be empty
var displayNodes = poseExtractor.GetDisplayNodes();
Assert.AreEqual(0, displayNodes.Count);
}
[Test]

Assert.AreEqual(size, localPoseIndex);
}
class BadPoseExtractor : PoseExtractor
[Test]
public void TestChainDisplayNodes()
{
var size = 4;
var chain = new ChainPoseExtractor(size);
var displayNodes = chain.GetDisplayNodes();
Assert.AreEqual(size, displayNodes.Count);
for (var i = 0; i < size; i++)
{
var displayNode = displayNodes[i];
Assert.AreEqual(i, displayNode.OriginalIndex);
Assert.AreEqual(null, displayNode.NodeObject);
Assert.AreEqual(i, displayNode.Depth);
Assert.AreEqual(true, displayNode.Enabled);
}
}
[Test]
public void TestDisplayNodesLoop()
{
// Degenerate case with a loop
var poseExtractor = new UselessPoseExtractor();
poseExtractor.Init(new[] {-1, 2, 1});
// This just shouldn't blow up
poseExtractor.GetDisplayNodes();
// Self-loop
poseExtractor.Init(new[] {-1, 1});
// This just shouldn't blow up
poseExtractor.GetDisplayNodes();
}
class BadPoseExtractor : BasicPoseExtractor
{
public BadPoseExtractor()
{

}
Setup(parents);
}
protected internal override Pose GetPoseAt(int index)
{
return Pose.identity;
}
protected internal override Vector3 GetLinearVelocityAt(int index)
{
return Vector3.zero;
}
}
[Test]

var bad = new BadPoseExtractor();
});
}
}
public class PoseExtensionTests

67
com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs


var rootRb = go.AddComponent<Rigidbody>();
var poseExtractor = new RigidBodyPoseExtractor(rootRb);
Assert.AreEqual(1, poseExtractor.NumPoses);
// Also pass the GameObject
poseExtractor = new RigidBodyPoseExtractor(rootRb, go);
Assert.AreEqual(1, poseExtractor.NumPoses);
}
[Test]
public void TestNoBodiesFound()
{
// Check that if we can't find any bodies under the game object, we get an empty extractor
var gameObj = new GameObject();
var rootRb = gameObj.AddComponent<Rigidbody>();
var otherGameObj = new GameObject();
var poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
Assert.AreEqual(0, poseExtractor.NumPoses);
// Add an RB under the other GameObject. Constructor will find a rigid body, but not the root.
var otherRb = otherGameObj.AddComponent<Rigidbody>();
poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
Assert.AreEqual(0, poseExtractor.NumPoses);
}
[Test]

Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(0).position);
Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(0).rotation);
Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(0));
// Check DisplayNodes gives expected results
var displayNodes = poseExtractor.GetDisplayNodes();
Assert.AreEqual(2, displayNodes.Count);
Assert.AreEqual(rb1, displayNodes[0].NodeObject);
Assert.AreEqual(false, displayNodes[0].Enabled);
Assert.AreEqual(rb2, displayNodes[1].NodeObject);
Assert.AreEqual(true, displayNodes[1].Enabled);
}
[Test]

Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(1).position);
Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(1).rotation);
Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(1));
}
[Test]
public void TestBodyPosesEnabledDictionary()
{
// * rootObj
// - rb1
// * go2
// - rb2
// - joint
var rootObj = new GameObject();
var rb1 = rootObj.AddComponent<Rigidbody>();
var go2 = new GameObject();
var rb2 = go2.AddComponent<Rigidbody>();
go2.transform.SetParent(rootObj.transform);
var joint = go2.AddComponent<ConfigurableJoint>();
joint.connectedBody = rb1;
var poseExtractor = new RigidBodyPoseExtractor(rb1);
// Expect the root body disabled and the attached one enabled.
Assert.IsFalse(poseExtractor.IsPoseEnabled(0));
Assert.IsTrue(poseExtractor.IsPoseEnabled(1));
var bodyPosesEnabled = poseExtractor.GetBodyPosesEnabled();
Assert.IsFalse(bodyPosesEnabled[rb1]);
Assert.IsTrue(bodyPosesEnabled[rb2]);
// Swap the values
bodyPosesEnabled[rb1] = true;
bodyPosesEnabled[rb2] = false;
var poseExtractor2 = new RigidBodyPoseExtractor(rb1, null, null, bodyPosesEnabled);
Assert.IsTrue(poseExtractor2.IsPoseEnabled(0));
Assert.IsFalse(poseExtractor2.IsPoseEnabled(1));
}
}
}

5
com.unity.ml-agents/CHANGELOG.md


#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.3.0-preview] 2020-08-12
## [1.3.0-preview] - 2020-08-12
### Major Changes
#### com.unity.ml-agents (C#)

Previously, this would result in an infinite loop and cause the editor to hang.
(#4226)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The algorithm used to normalize observations was introducing NaNs if the initial observations were too large
due to incorrect initialization. The initialization was fixed and is now the observation means from the
first trajectory processed. (#4299)
## [1.2.0-preview] - 2020-07-15

33
com.unity.ml-agents/Runtime/Actuators/ActionSegment.cs


/// the offset into the original array, and an length.
/// </summary>
/// <typeparam name="T">The type of object stored in the underlying <see cref="Array"/></typeparam>
internal readonly struct ActionSegment<T> : IEnumerable<T>, IEquatable<ActionSegment<T>>
public readonly struct ActionSegment<T> : IEnumerable<T>, IEquatable<ActionSegment<T>>
where T : struct
{
/// <summary>

/// </summary>
public static ActionSegment<T> Empty = new ActionSegment<T>(System.Array.Empty<T>(), 0, 0);
static void CheckParameters(T[] actionArray, int offset, int length)
static void CheckParameters(IReadOnlyCollection<T> actionArray, int offset, int length)
if (offset + length > actionArray.Length)
if (offset + length > actionArray.Count)
$"are out of bounds of actionArray: {actionArray.Length}.");
$"are out of bounds of actionArray: {actionArray.Count}.");
/// Construct an <see cref="ActionSegment{T}"/> with just an actionArray. The <see cref="Offset"/> will
/// be set to 0 and the <see cref="Length"/> will be set to `actionArray.Length`.
/// </summary>
/// <param name="actionArray">The action array to use for the this segment.</param>
public ActionSegment(T[] actionArray) : this(actionArray, 0, actionArray.Length) { }
/// <summary>
/// Construct an <see cref="ActionSegment{T}"/> with an underlying array
/// and offset, and a length.
/// </summary>

public ActionSegment(T[] actionArray, int offset, int length)
{
#if DEBUG
#endif
Array = actionArray;
Offset = offset;
Length = length;

}
return Array[Offset + index];
}
set
{
if (index < 0 || index > Length)
{
throw new IndexOutOfRangeException($"Index out of bounds, expected a number between 0 and {Length}");
}
Array[Offset + index] = value;
}
}
/// <summary>
/// Sets the segment of the backing array to all zeros.
/// </summary>
public void Clear()
{
System.Array.Clear(Array, Offset, Length);
}
/// <inheritdoc cref="IEnumerable{T}.GetEnumerator"/>

2
com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs


/// <summary>
/// Defines the structure of an Action Space to be used by the Actuator system.
/// </summary>
internal readonly struct ActionSpec
public readonly struct ActionSpec
{
/// <summary>

51
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


/// <summary>
/// Returns the previously stored actions for the actuators in this list.
/// </summary>
public float[] StoredContinuousActions { get; private set; }
// public float[] StoredContinuousActions { get; private set; }
public int[] StoredDiscreteActions { get; private set; }
// public int[] StoredDiscreteActions { get; private set; }
public ActionBuffers StoredActions { get; private set; }
/// <summary>
/// Create an ActuatorList with a preset capacity.

// Sort the Actuators by name to ensure determinism
SortActuators();
StoredContinuousActions = numContinuousActions == 0 ? Array.Empty<float>() : new float[numContinuousActions];
StoredDiscreteActions = numDiscreteBranches == 0 ? Array.Empty<int>() : new int[numDiscreteBranches];
var continuousActions = numContinuousActions == 0 ? ActionSegment<float>.Empty :
new ActionSegment<float>(new float[numContinuousActions]);
var discreteActions = numDiscreteBranches == 0 ? ActionSegment<int>.Empty : new ActionSegment<int>(new int[numDiscreteBranches]);
StoredActions = new ActionBuffers(continuousActions, discreteActions);
m_DiscreteActionMask = new ActuatorDiscreteActionMask(actuators, sumOfDiscreteBranches, numDiscreteBranches);
m_ReadyForExecution = true;
}

/// continuous actions for the IActuators in this list.</param>
/// <param name="discreteActionBuffer">The action buffer which contains all of the
/// discrete actions for the IActuators in this list.</param>
public void UpdateActions(float[] continuousActionBuffer, int[] discreteActionBuffer)
public void UpdateActions(ActionBuffers actions)
UpdateActionArray(continuousActionBuffer, StoredContinuousActions);
UpdateActionArray(discreteActionBuffer, StoredDiscreteActions);
UpdateActionArray(actions.ContinuousActions, StoredActions.ContinuousActions);
UpdateActionArray(actions.DiscreteActions, StoredActions.DiscreteActions);
static void UpdateActionArray<T>(T[] sourceActionBuffer, T[] destination)
static void UpdateActionArray<T>(ActionSegment<T> sourceActionBuffer, ActionSegment<T> destination)
where T : struct
if (sourceActionBuffer == null || sourceActionBuffer.Length == 0)
if (sourceActionBuffer.Length <= 0)
Array.Clear(destination, 0, destination.Length);
destination.Clear();
}
else
{

Array.Copy(sourceActionBuffer, destination, destination.Length);
Array.Copy(sourceActionBuffer.Array,
sourceActionBuffer.Offset,
destination.Array,
destination.Offset,
destination.Length);
}
}

for (var i = 0; i < m_Actuators.Count; i++)
{
var actuator = m_Actuators[i];
m_DiscreteActionMask.CurrentBranchOffset = offset;
actuator.WriteDiscreteActionMask(m_DiscreteActionMask);
offset += actuator.ActionSpec.NumDiscreteActions;
if (actuator.ActionSpec.NumDiscreteActions > 0)
{
m_DiscreteActionMask.CurrentBranchOffset = offset;
actuator.WriteDiscreteActionMask(m_DiscreteActionMask);
offset += actuator.ActionSpec.NumDiscreteActions;
}
}
}

var continuousActions = ActionSegment<float>.Empty;
if (numContinuousActions > 0)
{
continuousActions = new ActionSegment<float>(StoredContinuousActions,
continuousActions = new ActionSegment<float>(StoredActions.ContinuousActions.Array,
continuousStart,
numContinuousActions);
}

{
discreteActions = new ActionSegment<int>(StoredDiscreteActions,
discreteActions = new ActionSegment<int>(StoredActions.DiscreteActions.Array,
discreteStart,
numDiscreteActions);
}

}
/// <summary>
/// Resets the <see cref="StoredContinuousActions"/> and <see cref="StoredDiscreteActions"/> buffers to be all
/// Resets the <see cref="ActionBuffers"/> to be all
/// zeros and calls <see cref="IActuator.ResetData"/> on each <see cref="IActuator"/> managed by this object.
/// </summary>
public void ResetData()

return;
}
Array.Clear(StoredContinuousActions, 0, StoredContinuousActions.Length);
Array.Clear(StoredDiscreteActions, 0, StoredDiscreteActions.Length);
StoredActions.Clear();
m_DiscreteActionMask.ResetMask();
}

72
com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs


using System;
using System.Linq;
using UnityEngine;
namespace Unity.MLAgents.Actuators
{

/// </summary>
internal readonly struct ActionBuffers
public readonly struct ActionBuffers
{
/// <summary>
/// An empty action buffer.

public ActionSegment<int> DiscreteActions { get; }
/// <summary>
/// Create an <see cref="ActionBuffers"/> instance with discrete actions stored as a float array. This exists
/// to achieve backward compatibility with the former Agent methods which used a float array for both continuous
/// and discrete actions.
/// </summary>
/// <param name="discreteActions">The float array of discrete actions.</param>
/// <returns>An <see cref="ActionBuffers"/> instance initialized with a <see cref="DiscreteActions"/>
/// <see cref="ActionSegment{T}"/> initialized from a float array.</returns>
public static ActionBuffers FromDiscreteActions(float[] discreteActions)
{
return new ActionBuffers(ActionSegment<float>.Empty, discreteActions == null ? ActionSegment<int>.Empty
: new ActionSegment<int>(Array.ConvertAll(discreteActions,
x => (int)x)));
}
public ActionBuffers(float[] continuousActions, int[] discreteActions)
: this(new ActionSegment<float>(continuousActions), new ActionSegment<int>(discreteActions)) { }
/// <summary>
/// Construct an <see cref="ActionBuffers"/> instance with the continuous and discrete actions that will
/// be used.
/// </summary>

DiscreteActions = discreteActions;
}
/// <summary>
/// Clear the <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/> segments to be all zeros.
/// </summary>
public void Clear()
{
ContinuousActions.Clear();
DiscreteActions.Clear();
}
/// <inheritdoc cref="ValueType.Equals(object)"/>
public override bool Equals(object obj)
{

return (ContinuousActions.GetHashCode() * 397) ^ DiscreteActions.GetHashCode();
}
}
/// <summary>
/// Packs the continuous and discrete actions into one float array. The array passed into this method
/// must have a Length that is greater than or equal to the sum of the Lengths of
/// <see cref="ContinuousActions"/> and <see cref="DiscreteActions"/>.
/// </summary>
/// <param name="destination">A float array to pack actions into whose length is greater than or
/// equal to the addition of the Lengths of this objects <see cref="ContinuousActions"/> and
/// <see cref="DiscreteActions"/> segments.</param>
public void PackActions(in float[] destination)
{
Debug.Assert(destination.Length >= ContinuousActions.Length + DiscreteActions.Length,
$"argument '{nameof(destination)}' is not large enough to pack the actions into.\n" +
$"{nameof(destination)}.Length: {destination.Length}\n" +
$"{nameof(ContinuousActions)}.Length + {nameof(DiscreteActions)}.Length: {ContinuousActions.Length + DiscreteActions.Length}");
var start = 0;
if (ContinuousActions.Length > 0)
{
Array.Copy(ContinuousActions.Array,
ContinuousActions.Offset,
destination,
start,
ContinuousActions.Length);
start = ContinuousActions.Length;
}
if (start >= destination.Length)
{
return;
}
if (DiscreteActions.Length > 0)
{
Array.Copy(DiscreteActions.Array,
DiscreteActions.Offset,
destination,
start,
DiscreteActions.Length);
}
}
internal interface IActionReceiver
public interface IActionReceiver
{
/// <summary>

2
com.unity.ml-agents/Runtime/Actuators/IActuator.cs


/// <summary>
/// Abstraction that facilitates the execution of actions.
/// </summary>
internal interface IActuator : IActionReceiver
public interface IActuator : IActionReceiver
{
int TotalNumberOfActions { get; }

2
com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs


/// <summary>
/// Interface for writing a mask to disable discrete actions for agents for the next decision.
/// </summary>
internal interface IDiscreteActionMask
public interface IDiscreteActionMask
{
/// <summary>
/// Modifies an action mask for discrete control agents.

2
com.unity.ml-agents/Runtime/Actuators/VectorActuator.cs


namespace Unity.MLAgents.Actuators
{
internal class VectorActuator : IActuator
public class VectorActuator : IActuator
{
IActionReceiver m_ActionReceiver;

201
com.unity.ml-agents/Runtime/Agent.cs


using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Sensors.Reflection;
using Unity.MLAgents.Demonstrations;

/// to separate between different agents in the environment.
/// </summary>
public int episodeId;
}
/// <summary>
/// Struct that contains the action information sent from the Brain to the
/// Agent.
/// </summary>
internal struct AgentAction
{
public float[] vectorActions;
public void ClearActions()
{
Array.Clear(storedVectorActions, 0, storedVectorActions.Length);
}
public void CopyActions(ActionBuffers actionBuffers)
{
actionBuffers.PackActions(storedVectorActions);
}
}
/// <summary>

/// can only take an action when it touches the ground, so several frames might elapse between
/// one decision and the need for the next.
///
/// Use the <see cref="OnActionReceived"/> function to implement the actions your agent can take,
/// Use the <see cref="OnActionReceived(float[])"/> function to implement the actions your agent can take,
/// such as moving to reach a goal or interacting with its environment.
///
/// When you call <see cref="EndEpisode"/> on an agent or the agent reaches its <see cref="MaxStep"/> count,

"docs/Learning-Environment-Design-Agents.md")]
[Serializable]
[RequireComponent(typeof(BehaviorParameters))]
public class Agent : MonoBehaviour, ISerializationCallbackReceiver
public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver
{
IPolicy m_Brain;
BehaviorParameters m_PolicyFactory;

/// Current Agent information (message sent to Brain).
AgentInfo m_Info;
/// Current Agent action (message sent from Brain).
AgentAction m_Action;
/// Represents the reward the agent accumulated during the current step.
/// It is reset to 0 at the beginning of every step.

internal VectorSensor collectObservationsSensor;
/// <summary>
/// List of IActuators that this Agent will delegate actions to if any exist.
/// </summary>
ActuatorManager m_ActuatorManager;
/// <summary>
/// VectorActuator which is used by default if no other sensors exist on this Agent. This VectorSensor will
/// delegate its actions to <see cref="OnActionReceived(float[])"/> by default in order to keep backward compatibility
/// with the current behavior of Agent.
/// </summary>
IActuator m_VectorActuator;
/// <summary>
/// This is used to avoid allocation of a float array every frame if users are still using the old
/// OnActionReceived method.
/// </summary>
float[] m_LegacyActionCache;
/// <summary>
/// Called when the attached [GameObject] becomes enabled and active.
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </summary>

m_PolicyFactory = GetComponent<BehaviorParameters>();
m_Info = new AgentInfo();
m_Action = new AgentAction();
sensors = new List<ISensor>();
Academy.Instance.AgentIncrementStep += AgentIncrementStep;

InitializeSensors();
}
using (TimerStack.Instance.Scoped("InitializeActuators"))
{
InitializeActuators();
}
m_Info.storedVectorActions = new float[m_ActuatorManager.TotalNumberOfActions];
// The first time the Academy resets, all Agents in the scene will be
// forced to reset through the <see cref="AgentForceReset"/> event.
// To avoid the Agent resetting twice, the Agents will not begin their

/// set the reward assigned to the current step with a specific value rather than
/// increasing or decreasing it.
///
/// Typically, you assign rewards in the Agent subclass's <see cref="OnActionReceived(float[])"/>
/// Typically, you assign rewards in the Agent subclass's <see cref="IActionReceiver.OnActionReceived"/>
/// implementation after carrying out the received action and evaluating its success.
///
/// Rewards are used during reinforcement learning; they are ignored during inference.

/// <remarks>
/// Call `RequestAction()` to repeat the previous action returned by the agent's
/// most recent decision. A new decision is not requested. When you call this function,
/// the Agent instance invokes <seealso cref="OnActionReceived(float[])"/> with the
/// the Agent instance invokes <seealso cref="IActionReceiver.OnActionReceived"/> with the
/// existing action vector.
///
/// You can use `RequestAction()` in situations where an agent must take an action

/// at the end of an episode.
void ResetData()
{
var param = m_PolicyFactory.BrainParameters;
m_ActionMasker = new DiscreteActionMasker(param);
// If we haven't initialized vectorActions, initialize to 0. This should only
// happen during the creation of the Agent. In subsequent episodes, vectorAction
// should stay the previous action before the Done(), so that it is properly recorded.
if (m_Action.vectorActions == null)
{
m_Action.vectorActions = new float[param.NumActions];
m_Info.storedVectorActions = new float[param.NumActions];
}
m_ActuatorManager?.ResetData();
}
/// <summary>

/// control of an agent using keyboard, mouse, or game controller input.
///
/// Your heuristic implementation can use any decision making logic you specify. Assign decision
/// values to the float[] array, <paramref name="actionsOut"/>, passed to your function as a parameter.
/// values to the <see cref="ActionBuffers.ContinuousActions"/> and <see cref="ActionBuffers.DiscreteActions"/>
/// arrays , passed to your function as a parameter.
/// <seealso cref="OnActionReceived(float[])"/> function, which receives this array and
/// <seealso cref="IActionReceiver.OnActionReceived"/> function, which receives this array and
/// implements the corresponding agent behavior. See [Actions] for more information
/// about agent actions.
/// Note : Do not create a new float array of action in the `Heuristic()` method,

/// You can also use the [Input System package], which provides a more flexible and
/// configurable input system.
/// <code>
/// public override void Heuristic(float[] actionsOut)
/// public override void Heuristic(ActionBuffers actionsOut)
/// actionsOut[0] = Input.GetAxis("Horizontal");
/// actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
/// actionsOut[2] = Input.GetAxis("Vertical");
/// actionsOut.ContinuousActions[0] = Input.GetAxis("Horizontal");
/// actionsOut.ContinuousActions[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
/// actionsOut.ContinuousActions[2] = Input.GetAxis("Vertical");
/// <param name="actionsOut">Array for the output actions.</param>
/// <seealso cref="OnActionReceived(float[])"/>
public virtual void Heuristic(float[] actionsOut)
/// <param name="actionsOut">The <see cref="ActionBuffers"/> which contain the continuous and
/// discrete action buffers to write to.</param>
/// <seealso cref="IActionReceiver.OnActionReceived"/>
public virtual void Heuristic(in ActionBuffers actionsOut)
Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions.");
Array.Clear(actionsOut, 0, actionsOut.Length);
// For backward compatibility
switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
{
case SpaceType.Continuous:
Heuristic(actionsOut.ContinuousActions.Array);
actionsOut.DiscreteActions.Clear();
break;
case SpaceType.Discrete:
var convertedOut = Array.ConvertAll(actionsOut.DiscreteActions.Array, x => (float)x);
Heuristic(convertedOut);
var discreteActionSegment = actionsOut.DiscreteActions;
for (var i = 0; i < actionsOut.DiscreteActions.Length; i++)
{
discreteActionSegment[i] = (int)convertedOut[i];
}
actionsOut.ContinuousActions.Clear();
break;
}
}
/// <summary>

#if DEBUG
// Make sure the names are actually unique
for (var i = 0; i < sensors.Count - 1; i++)
{
Debug.Assert(

#endif
}
void InitializeActuators()
{
ActuatorComponent[] attachedActuators;
if (m_PolicyFactory.UseChildActuators)
{
attachedActuators = GetComponentsInChildren<ActuatorComponent>();
}
else
{
attachedActuators = GetComponents<ActuatorComponent>();
}
// Support legacy OnActionReceived
var param = m_PolicyFactory.BrainParameters;
m_VectorActuator = new VectorActuator(this, param.VectorActionSize, param.VectorActionSpaceType);
m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1);
m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions];
m_ActuatorManager.Add(m_VectorActuator);
foreach (var actuatorComponent in attachedActuators)
{
m_ActuatorManager.Add(actuatorComponent.CreateActuator());
}
}
/// <summary>
/// Sends the Agent info to the linked Brain.
/// </summary>

if (m_Info.done)
{
Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);
m_Info.ClearActions();
Array.Copy(m_Action.vectorActions, m_Info.storedVectorActions, m_Action.vectorActions.Length);
m_ActuatorManager.StoredActions.PackActions(m_Info.storedVectorActions);
m_ActionMasker.ResetMask();
UpdateSensors();
using (TimerStack.Instance.Scoped("CollectObservations"))
{

{
if (m_PolicyFactory.BrainParameters.VectorActionSpaceType == SpaceType.Discrete)
{
CollectDiscreteActionMasks(m_ActionMasker);
}
m_ActuatorManager.WriteActionMask();
m_Info.discreteActionMasks = m_ActionMasker.GetMask();
m_Info.discreteActionMasks = m_ActuatorManager.DiscreteActionMask?.GetMask();
m_Info.reward = m_Reward;
m_Info.done = false;
m_Info.maxStepReached = false;

/// <summary>
/// Returns a read-only view of the observations that were generated in
/// <see cref="CollectObservations(VectorSensor)"/>. This is mainly useful inside of a
/// <see cref="Heuristic(float[])"/> method to avoid recomputing the observations.
/// <see cref="Heuristic(float[], int[])"/> method to avoid recomputing the observations.
/// </summary>
/// <returns>A read-only view of the observations list.</returns>
public ReadOnlyCollection<float> GetObservations()

///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="OnActionReceived(float[])"/>
public virtual void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
/// <seealso cref="IActionReceiver.OnActionReceived"/>
public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
if (m_ActionMasker == null)
{
m_ActionMasker = new DiscreteActionMasker(actionMask);
}
CollectDiscreteActionMasks(m_ActionMasker);
ActionSpec IActionReceiver.ActionSpec { get; }
/// <summary>
/// Implement `OnActionReceived()` to specify agent behavior at every step, based

/// three values in the action array to use as the force components. During
/// training, the agent's policy learns to set those particular elements of
/// the array to maximize the training rewards the agent receives. (Of course,
/// if you implement a <seealso cref="Heuristic"/> function, it must use the same
/// if you implement a <seealso cref="Heuristic(float[], int[])"/> function, it must use the same
/// elements of the action array for the same purpose since there is no learning
/// involved.)
///

///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_5_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="vectorAction">
/// An array containing the action vector. The length of the array is specified
/// by the <see cref="BrainParameters"/> of the agent's associated
/// <see cref="BehaviorParameters"/> component.
/// <param name="actions">
/// Struct containing the buffers of actions to be executed at this step.
public virtual void OnActionReceived(float[] vectorAction) {}
public virtual void OnActionReceived(ActionBuffers actions)
{
actions.PackActions(m_LegacyActionCache);
OnActionReceived(m_LegacyActionCache);
}
/// <summary>
/// Implement `OnEpisodeBegin()` to set up an Agent instance at the beginning

public virtual void OnEpisodeBegin() {}
/// <summary>
/// Returns the last action that was decided on by the Agent.
/// Gets the last ActionBuffer for this agent.
/// <returns>
/// The last action that was decided by the Agent (or null if no decision has been made).
/// </returns>
/// <seealso cref="OnActionReceived(float[])"/>
public float[] GetAction()
public ActionBuffers GetStoredContinuousActions()
return m_Action.vectorActions;
return m_ActuatorManager.StoredActions;
}
/// <summary>

if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;
OnActionReceived(m_Action.vectorActions);
m_ActuatorManager.ExecuteActions();
}
if ((m_StepCount >= MaxStep) && (MaxStep > 0))

void DecideAction()
{
if (m_Action.vectorActions == null)
if (m_ActuatorManager.StoredActions.ContinuousActions.Array == null)
var action = m_Brain?.DecideAction();
if (action == null)
{
Array.Clear(m_Action.vectorActions, 0, m_Action.vectorActions.Length);
}
else
{
Array.Copy(action, m_Action.vectorActions, action.Length);
}
var actions = m_Brain?.DecideAction() ?? new ActionBuffers();
m_Info.CopyActions(actions);
m_ActuatorManager.UpdateActions(actions);
}
}
}

14
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


}
#region AgentAction
public static AgentAction ToAgentAction(this AgentActionProto aap)
{
return new AgentAction
{
vectorActions = aap.VectorActions.ToArray()
};
}
public static List<AgentAction> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
public static List<float[]> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
var agentActions = new List<AgentAction>(proto.Value.Count);
var agentActions = new List<float[]>(proto.Value.Count);
agentActions.Add(ap.ToAgentAction());
agentActions.Add(ap.VectorActions.ToArray());
}
return agentActions;
}

2
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


var agentId = m_OrderedAgentsRequestingDecisions[brainName][i];
if (m_LastActionsReceived[brainName].ContainsKey(agentId))
{
m_LastActionsReceived[brainName][agentId] = agentAction.vectorActions;
m_LastActionsReceived[brainName][agentId] = agentAction;
}
}
}

6
com.unity.ml-agents/Runtime/DecisionRequester.cs


/// that the Agent will request a decision every 5 Academy steps. /// </summary>
[Range(1, 20)]
[Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
"of 5 means that the Agent will request a decision every 5 Academy steps.")]
"of 5 means that the Agent will request a decision every 5 Academy steps.")]
public int DecisionPeriod = 5;
/// <summary>

[Tooltip("Indicates whether or not the agent will take an action during the Academy " +
"steps where it does not request a decision. Has no effect when DecisionPeriod " +
"is set to 1.")]
"steps where it does not request a decision. Has no effect when DecisionPeriod " +
"is set to 1.")]
[FormerlySerializedAs("RepeatAction")]
public bool TakeActionsBetweenDecisions = true;

118
com.unity.ml-agents/Runtime/DiscreteActionMasker.cs


using System;
using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Actuators;
namespace Unity.MLAgents
{

/// may be illegal. For example, if an agent is adjacent to a wall or other obstacle
/// you could mask any actions that direct the agent to move into the blocked space.
/// </remarks>
public class DiscreteActionMasker
public class DiscreteActionMasker : IDiscreteActionMask
/// When using discrete control, is the starting indices of the actions
/// when all the branches are concatenated with each other.
int[] m_StartingActionIndices;
bool[] m_CurrentMask;
readonly BrainParameters m_BrainParameters;
IDiscreteActionMask m_Delegate;
internal DiscreteActionMasker(BrainParameters brainParameters)
internal DiscreteActionMasker(IDiscreteActionMask actionMask)
m_BrainParameters = brainParameters;
m_Delegate = actionMask;
}
/// <summary>

/// <param name="actionIndices">The indices of the masked actions.</param>
public void SetMask(int branch, IEnumerable<int> actionIndices)
{
// If the branch does not exist, raise an error
if (branch >= m_BrainParameters.VectorActionSize.Length)
throw new UnityAgentsException(
"Invalid Action Masking : Branch " + branch + " does not exist.");
var totalNumberActions = m_BrainParameters.VectorActionSize.Sum();
// By default, the masks are null. If we want to specify a new mask, we initialize
// the actionMasks with trues.
if (m_CurrentMask == null)
{
m_CurrentMask = new bool[totalNumberActions];
}
// If this is the first time the masked actions are used, we generate the starting
// indices for each branch.
if (m_StartingActionIndices == null)
{
m_StartingActionIndices = Utilities.CumSum(m_BrainParameters.VectorActionSize);
}
// Perform the masking
foreach (var actionIndex in actionIndices)
{
if (actionIndex >= m_BrainParameters.VectorActionSize[branch])
{
throw new UnityAgentsException(
"Invalid Action Masking: Action Mask is too large for specified branch.");
}
m_CurrentMask[actionIndex + m_StartingActionIndices[branch]] = true;
}
}
/// <summary>
/// Get the current mask for an agent.
/// </summary>
/// <returns>A mask for the agent. A boolean array of length equal to the total number of
/// actions.</returns>
internal bool[] GetMask()
{
if (m_CurrentMask != null)
{
AssertMask();
}
return m_CurrentMask;
m_Delegate.WriteMask(branch, actionIndices);
/// <summary>
/// Makes sure that the current mask is usable.
/// </summary>
void AssertMask()
public void WriteMask(int branch, IEnumerable<int> actionIndices)
// Action Masks can only be used in Discrete Control.
if (m_BrainParameters.VectorActionSpaceType != SpaceType.Discrete)
{
throw new UnityAgentsException(
"Invalid Action Masking : Can only set action mask for Discrete Control.");
}
var numBranches = m_BrainParameters.VectorActionSize.Length;
for (var branchIndex = 0; branchIndex < numBranches; branchIndex++)
{
if (AreAllActionsMasked(branchIndex))
{
throw new UnityAgentsException(
"Invalid Action Masking : All the actions of branch " + branchIndex +
" are masked.");
}
}
m_Delegate.WriteMask(branch, actionIndices);
/// <summary>
/// Resets the current mask for an agent.
/// </summary>
internal void ResetMask()
public bool[] GetMask()
if (m_CurrentMask != null)
{
Array.Clear(m_CurrentMask, 0, m_CurrentMask.Length);
}
return m_Delegate.GetMask();
/// <summary>
/// Checks if all the actions in the input branch are masked.
/// </summary>
/// <param name="branch"> The index of the branch to check.</param>
/// <returns> True if all the actions of the branch are masked.</returns>
bool AreAllActionsMasked(int branch)
public void ResetMask()
if (m_CurrentMask == null)
{
return false;
}
var start = m_StartingActionIndices[branch];
var end = m_StartingActionIndices[branch + 1];
for (var i = start; i < end; i++)
{
if (!m_CurrentMask[i])
{
return false;
}
}
return true;
m_Delegate.ResetMask();
}
}
}

17
com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs


using System;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Sensors;

internal class BarracudaPolicy : IPolicy
{
protected ModelRunner m_ModelRunner;
ActionBuffers m_LastActionBuffer;
int m_AgentId;

List<int[]> m_SensorShapes;
SpaceType m_SpaceType;
/// <inheritdoc />
public BarracudaPolicy(

{
var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, brainParameters, inferenceDevice);
m_ModelRunner = modelRunner;
m_SpaceType = brainParameters.VectorActionSpaceType;
}
/// <inheritdoc />

}
/// <inheritdoc />
public float[] DecideAction()
public ref readonly ActionBuffers DecideAction()
return m_ModelRunner?.GetAction(m_AgentId);
var actions = m_ModelRunner?.GetAction(m_AgentId);
if (m_SpaceType == SpaceType.Continuous)
{
m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
return ref m_LastActionBuffer;
}
m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
return ref m_LastActionBuffer;
}
public void Dispose()

37
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


[Tooltip("Use all Sensor components attached to child GameObjects of this Agent.")]
bool m_UseChildSensors = true;
[HideInInspector]
[SerializeField]
[Tooltip("Use all Actuator components attached to child GameObjects of this Agent.")]
bool m_UseChildActuators = true;
/// <summary>
/// Whether or not to use all the sensor components attached to child GameObjects of the agent.
/// Note that changing this after the Agent has been initialized will not have any effect.

set { m_UseChildSensors = value; }
}
/// <summary>
/// Whether or not to use all the actuator components attached to child GameObjects of the agent.
/// Note that changing this after the Agent has been initialized will not have any effect.
/// </summary>
public bool UseChildActuators
{
get { return m_UseChildActuators; }
set { m_UseChildActuators = value; }
}
[HideInInspector, SerializeField]
ObservableAttributeOptions m_ObservableAttributeHandling = ObservableAttributeOptions.Ignore;

switch (m_BehaviorType)
{
case BehaviorType.HeuristicOnly:
return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
return GenerateHeuristicPolicy(heuristic);
case BehaviorType.InferenceOnly:
{
if (m_Model == null)

}
else
{
return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
return GenerateHeuristicPolicy(heuristic);
return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
return GenerateHeuristicPolicy(heuristic);
}
internal IPolicy GenerateHeuristicPolicy(HeuristicPolicy.ActionGenerator heuristic)
{
var numContinuousActions = 0;
var numDiscreteActions = 0;
if (m_BrainParameters.VectorActionSpaceType == SpaceType.Continuous)
{
numContinuousActions = m_BrainParameters.NumActions;
}
else if (m_BrainParameters.VectorActionSpaceType == SpaceType.Discrete)
{
numDiscreteActions = m_BrainParameters.NumActions;
}
return new HeuristicPolicy(heuristic, numContinuousActions, numDiscreteActions);
}
internal void UpdateAgentPolicy()

20
com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs


using System.Collections.Generic;
using System;
using System.Collections;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies

/// </summary>
internal class HeuristicPolicy : IPolicy
{
public delegate void ActionGenerator(float[] actionsOut);
public delegate void ActionGenerator(in ActionBuffers actionBuffers);
float[] m_LastDecision;
ActionBuffers m_ActionBuffers;
bool m_Done;
bool m_DecisionRequested;

/// <inheritdoc />
public HeuristicPolicy(ActionGenerator heuristic, int numActions)
public HeuristicPolicy(ActionGenerator heuristic, int numContinuousActions, int numDiscreteActions)
m_LastDecision = new float[numActions];
var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);
var discreteDecision = new ActionSegment<int>(new int[numDiscreteActions], 0, numDiscreteActions);
m_ActionBuffers = new ActionBuffers(continuousDecision, discreteDecision);
}
/// <inheritdoc />

m_Done = info.done;
m_DecisionRequested = true;
public float[] DecideAction()
public ref readonly ActionBuffers DecideAction()
m_Heuristic.Invoke(m_LastDecision);
m_Heuristic.Invoke(m_ActionBuffers);
return m_LastDecision;
return ref m_ActionBuffers;
}
public void Dispose()

public float this[int index]
{
get { return 0.0f; }
set { }
set {}
}
}

3
com.unity.ml-agents/Runtime/Policies/IPolicy.cs


using System;
using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies

/// it must be taken now. The Brain is expected to update the actions
/// of the Agents at this point the latest.
/// </summary>
float[] DecideAction();
ref readonly ActionBuffers DecideAction();
}
}

15
com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs


using UnityEngine;
using System.Collections.Generic;
using System;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies

{
int m_AgentId;
string m_FullyQualifiedBehaviorName;
SpaceType m_SpaceType;
ActionBuffers m_LastActionBuffer;
internal ICommunicator m_Communicator;

{
m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
m_Communicator = Academy.Instance.Communicator;
m_SpaceType = brainParameters.VectorActionSpaceType;
m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, brainParameters);
}

}
/// <inheritdoc />
public float[] DecideAction()
public ref readonly ActionBuffers DecideAction()
return m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
var actions = m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
if (m_SpaceType == SpaceType.Continuous)
{
m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
return ref m_LastActionBuffer;
}
m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
return ref m_LastActionBuffer;
}
public void Dispose()

46
com.unity.ml-agents/Tests/Editor/Actuators/ActuatorManagerTests.cs


actuator1ActionSpaceDef.SumOfDiscreteBranchSizes + actuator2ActionSpaceDef.SumOfDiscreteBranchSizes,
actuator1ActionSpaceDef.NumDiscreteActions + actuator2ActionSpaceDef.NumDiscreteActions);
manager.UpdateActions(new[]
{ 0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, 10f, 11f }, Array.Empty<int>());
manager.UpdateActions(new ActionBuffers(new[]
{ 0f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, 10f, 11f }, Array.Empty<int>()));
Assert.IsTrue(12 == manager.StoredContinuousActions.Length);
Assert.IsTrue(0 == manager.StoredDiscreteActions.Length);
Assert.IsTrue(12 == manager.StoredActions.ContinuousActions.Length);
Assert.IsTrue(0 == manager.StoredActions.DiscreteActions.Length);
}
[Test]

actuator1ActionSpaceDef.SumOfDiscreteBranchSizes + actuator2ActionSpaceDef.SumOfDiscreteBranchSizes,
actuator1ActionSpaceDef.NumDiscreteActions + actuator2ActionSpaceDef.NumDiscreteActions);
manager.UpdateActions(Array.Empty<float>(),
new[] { 0, 1, 2, 3, 4, 5, 6});
manager.UpdateActions(new ActionBuffers(Array.Empty<float>(),
new[] { 0, 1, 2, 3, 4, 5, 6}));
Assert.IsTrue(0 == manager.StoredContinuousActions.Length);
Assert.IsTrue(7 == manager.StoredDiscreteActions.Length);
Assert.IsTrue(0 == manager.StoredActions.ContinuousActions.Length);
Assert.IsTrue(7 == manager.StoredActions.DiscreteActions.Length);
}
[Test]

manager.Add(actuator2);
var discreteActionBuffer = new[] { 0, 1, 2, 3, 4, 5, 6};
manager.UpdateActions(Array.Empty<float>(),
discreteActionBuffer);
manager.UpdateActions(new ActionBuffers(Array.Empty<float>(),
discreteActionBuffer));
manager.ExecuteActions();
var actuator1Actions = actuator1.LastActionBuffer.DiscreteActions;

manager.Add(actuator2);
var continuousActionBuffer = new[] { 0f, 1f, 2f, 3f, 4f, 5f};
manager.UpdateActions(continuousActionBuffer,
Array.Empty<int>());
manager.UpdateActions(new ActionBuffers(continuousActionBuffer,
Array.Empty<int>()));
manager.ExecuteActions();
var actuator1Actions = actuator1.LastActionBuffer.ContinuousActions;

manager.Add(actuator1);
manager.Add(actuator2);
var continuousActionBuffer = new[] { 0f, 1f, 2f, 3f, 4f, 5f};
manager.UpdateActions(continuousActionBuffer,
Array.Empty<int>());
manager.UpdateActions(new ActionBuffers(continuousActionBuffer,
Array.Empty<int>()));
Assert.IsTrue(manager.StoredContinuousActions.SequenceEqual(continuousActionBuffer));
Assert.IsTrue(manager.StoredActions.ContinuousActions.SequenceEqual(continuousActionBuffer));
}
[Test]

manager.Add(actuator1);
manager.Add(actuator2);
var discreteActionBuffer = new[] { 0, 1, 2, 3, 4, 5};
manager.UpdateActions(Array.Empty<float>(),
discreteActionBuffer);
manager.UpdateActions(new ActionBuffers(Array.Empty<float>(),
discreteActionBuffer));
Debug.Log(manager.StoredDiscreteActions);
Debug.Log(manager.StoredActions.DiscreteActions);
Assert.IsTrue(manager.StoredDiscreteActions.SequenceEqual(discreteActionBuffer));
Assert.IsTrue(manager.StoredActions.DiscreteActions.SequenceEqual(discreteActionBuffer));
}
[Test]

manager.Add(actuator1);
manager.Add(actuator2);
var continuousActionBuffer = new[] { 0f, 1f, 2f, 3f, 4f, 5f};
manager.UpdateActions(continuousActionBuffer,
Array.Empty<int>());
manager.UpdateActions(new ActionBuffers(continuousActionBuffer,
Array.Empty<int>()));
Assert.IsTrue(manager.StoredContinuousActions.SequenceEqual(continuousActionBuffer));
Assert.IsTrue(manager.StoredActions.ContinuousActions.SequenceEqual(continuousActionBuffer));
Assert.IsTrue(manager.StoredContinuousActions.SequenceEqual(new[] { 0f, 0f, 0f, 0f, 0f, 0f}));
Assert.IsTrue(manager.StoredActions.ContinuousActions.SequenceEqual(new[] { 0f, 0f, 0f, 0f, 0f, 0f}));
}
[Test]

3
com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs


using NUnit.Framework;
using Unity.MLAgents.Actuators;
using UnityEngine;
using Unity.MLAgents.Policies;

public class BehaviorParameterTests
{
static void DummyHeuristic(float[] actionsOut)
static void DummyHeuristic(in ActionBuffers actionsOut)
{
// No-op
}

4
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


using NUnit.Framework;
using System.Reflection;
using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Sensors.Reflection;
using Unity.MLAgents.Policies;

{
public Action OnRequestDecision;
ObservationWriter m_ObsWriter = new ObservationWriter();
static ActionBuffers s_EmptyActionBuffers = new ActionBuffers(Array.Empty<float>(), Array.Empty<int>());
public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
foreach (var sensor in sensors)

OnRequestDecision?.Invoke();
}
public float[] DecideAction() { return new float[0]; }
public ref readonly ActionBuffers DecideAction() { return ref s_EmptyActionBuffers; }
public void Dispose() {}
}

2
config/ppo/WalkerDynamic.yaml


gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 20000000
max_steps: 30000000
time_horizon: 1000
summary_freq: 30000
threaded: true

2
config/ppo/WalkerStatic.yaml


gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 20000000
max_steps: 30000000
time_horizon: 1000
summary_freq: 30000
threaded: true

31
docs/Learning-Environment-Examples.md


- Set-up: Physics-based Humanoid agents with 26 degrees of freedom. These DOFs
correspond to articulation of the following body-parts: hips, chest, spine,
head, thighs, shins, feet, arms, forearms and hands.
- Goal: The agents must move its body toward the goal direction as quickly as
possible without falling.
- `WalkerStatic` - Goal direction is always forward.
- Goal: The agents must move its body toward the goal direction without falling.
- `WalkerDynamicVariableSpeed`- Goal direction and walking speed are randomized.
- `WalkerStatic` - Goal direction is always forward.
- `WalkerStaticVariableSpeed` - Goal direction is always forward. Walking
speed is randomized
- +0.02 times body velocity in the goal direction. (run towards target)
- +0.01 times head direction alignment with goal direction. (face towards target)
- +0.005 times head y position - left foot y position. (encourage head height)
- +0.005 times head y position - right foot y position. (encourage head height)
The reward function is now geometric meaning the reward each step is a product
of all the rewards instead of a sum, this helps the agent try to maximize all
rewards instead of the easiest rewards.
- Body velocity matches goal velocity. (normalized between (0,1))
- Head direction alignment with goal direction. (normalized between (0,1))
- Vector Observation space: 236 variables corresponding to position, rotation,
- Vector Observation space: 243 variables corresponding to position, rotation,
velocity, and angular velocities of each limb, along with goal direction.
- Vector Action space: (Continuous) Size of 39, corresponding to target
rotations and strength applicable to the joints.

- Recommended Minimum:
- Recommended Maximum:
- hip_mass: Mass of the hip component of the walker
- Default: 15
- Default: 8
- Recommended Minimum: 7
- Recommended Maximum: 28
- chest_mass: Mass of the chest component of the walker

- spine_mass: Mass of the spine component of the walker
- Default: 10
- Default: 8
- Benchmark Mean Reward for `WalkerStatic`: 1500
- Benchmark Mean Reward for `WalkerDynamic`: 700
- Benchmark Mean Reward for `WalkerDynamic`: 2500
- Benchmark Mean Reward for `WalkerDynamicVariableSpeed`: 2500
- Benchmark Mean Reward for `WalkerStatic`: 3500
- Benchmark Mean Reward for `WalkerStaticVariableSpeed`: 3500
## Pyramids

8
ml-agents-envs/mlagents_envs/exception.py


def __init__(self, worker_id):
message = self.MESSAGE_TEMPLATE.format(str(worker_id))
super().__init__(message)
class UnityPolicyException(UnityException):
"""
Related to errors with the Trainer.
"""
pass

2
ml-agents/mlagents/trainers/ghost/trainer.py


"""
policy = self.trainer.create_policy(parsed_behavior_id, behavior_spec)
policy.create_tf_graph()
policy.initialize_or_load()
self.trainer.saver.initialize_or_load(policy)
policy.init_load_weights()
team_id = parsed_behavior_id.team_id
self.controller.subscribe_team_id(team_id, self)

3
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


self.reward_signals[reward_signal.value].update_dict
)
@classmethod
self, learning_rate: tf.Tensor, name: str = "Adam"
cls, learning_rate: tf.Tensor, name: str = "Adam"
) -> tf.train.Optimizer:
return tf.train.AdamOptimizer(learning_rate=learning_rate, name=name)

20
ml-agents/mlagents/trainers/policy/policy.py


from mlagents_envs.base_env import DecisionSteps
from mlagents_envs.exception import UnityException
from mlagents.model_serialization import SerializationSettings
from mlagents.trainers.action_info import ActionInfo
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.settings import TrainerSettings, NetworkSettings

seed: int,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
model_path: str,
load: bool = False,
tanh_squash: bool = False,
reparameterize: bool = False,
condition_sigma_on_obs: bool = True,

self.vis_obs_size = sum(
1 for shape in behavior_spec.observation_shapes if len(shape) == 3
)
self.model_path = model_path
self.initialize_path = self.trainer_settings.init_path
self._keep_checkpoints = self.trainer_settings.keep_checkpoints
self.vis_obs_shape = (
[shape for shape in behavior_spec.observation_shapes if len(shape) == 3][0]
if self.vis_obs_size > 0
else None
)
self.use_continuous_act = behavior_spec.is_action_continuous()
self.num_branches = self.behavior_spec.action_size
self.previous_action_dict: Dict[str, np.array] = {}

self.load = load
self.h_size = self.network_settings.hidden_units
num_layers = self.network_settings.num_layers
if num_layers < 1:

@abstractmethod
def get_current_step(self):
pass
@abstractmethod
def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
pass
@abstractmethod
def save(self, output_filepath: str, settings: SerializationSettings) -> None:
pass
@abstractmethod

131
ml-agents/mlagents/trainers/policy/tf_policy.py


from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple, Callable
from mlagents.model_serialization import SerializationSettings, export_policy_model
from mlagents.tf_utils import tf
from mlagents import tf_utils
from mlagents_envs.exception import UnityException

GaussianDistribution,
MultiCategoricalDistribution,
)
from mlagents.tf_utils.globals import get_rank
logger = get_logger(__name__)

Contains a learning model, and the necessary
functions to save/load models and create the input placeholders.
"""
# Callback function used at the start of training to synchronize weights.
# By default, this nothing.
# If this needs to be used, it should be done from outside ml-agents.
broadcast_global_variables: Callable[[int], None] = lambda root_rank: None
def __init__(
self,

model_path: str,
load: bool = False,
tanh_squash: bool = False,
reparameterize: bool = False,
condition_sigma_on_obs: bool = True,

:param seed: Random seed to use for TensorFlow.
:param brain: The corresponding Brain for this policy.
:param trainer_settings: The trainer parameters.
:param model_path: Where to load/save the model.
:param load: If True, load model from model_path. Otherwise, create new model.
model_path,
load,
tanh_squash,
reparameterize,
condition_sigma_on_obs,

self.assign_ops: List[tf.Operation] = []
self.update_dict: Dict[str, tf.Tensor] = {}
self.inference_dict: Dict[str, tf.Tensor] = {}
self.first_normalization_update: bool = False
self.saver: Optional[tf.Operation] = None
self.rank = get_rank()
if create_tf_graph:
self.create_tf_graph()

# We do an initialize to make the Policy usable out of the box. If an optimizer is needed,
# it will re-load the full graph
self._initialize_graph()
self.initialize()
def _create_encoder(
self,

ver = LooseVersion(version_string)
return tuple(map(int, ver.version[0:3]))
def _check_model_version(self, version: str) -> None:
"""
Checks whether the model being loaded was created with the same version of
ML-Agents, and throw a warning if not so.
"""
if self.version_tensors is not None:
loaded_ver = tuple(
num.eval(session=self.sess) for num in self.version_tensors
)
if loaded_ver != TFPolicy._convert_version_string(version):
logger.warning(
f"The model checkpoint you are loading from was saved with ML-Agents version "
f"{loaded_ver[0]}.{loaded_ver[1]}.{loaded_ver[2]} but your current ML-Agents"
f"version is {version}. Model may not behave properly."
)
def _initialize_graph(self):
def initialize(self):
self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None:
with self.graph.as_default():
self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
logger.info(f"Loading model from {model_path}.")
ckpt = tf.train.get_checkpoint_state(model_path)
if ckpt is None:
raise UnityPolicyException(
"The model {} could not be loaded. Make "
"sure you specified the right "
"--run-id and that the previous run you are loading from had the same "
"behavior names.".format(model_path)
)
try:
self.saver.restore(self.sess, ckpt.model_checkpoint_path)
except tf.errors.NotFoundError:
raise UnityPolicyException(
"The model {} was found but could not be loaded. Make "
"sure the model is from the same version of ML-Agents, has the same behavior parameters, "
"and is using the same trainer configuration as the current run.".format(
model_path
)
)
self._check_model_version(__version__)
if reset_global_steps:
self._set_step(0)
logger.info(
"Starting training from step 0 and saving to {}.".format(
self.model_path
)
)
else:
logger.info(f"Resuming training from step {self.get_current_step()}.")
def initialize_or_load(self):
# If there is an initialize path, load from that. Else, load from the set model path.
# If load is set to True, don't reset steps to 0. Else, do. This allows a user to,
# e.g., resume from an initialize path.
reset_steps = not self.load
if self.initialize_path is not None:
self._load_graph(self.initialize_path, reset_global_steps=reset_steps)
elif self.load:
self._load_graph(self.model_path, reset_global_steps=reset_steps)
else:
self._initialize_graph()
def get_weights(self):
with self.graph.as_default():
_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

step = self.sess.run(self.global_step)
return step
def _set_step(self, step: int) -> int:
def set_step(self, step: int) -> int:
"""
Sets current model step to step without creating additional ops.
:param step: Step to set the current model step to.

"""
return list(self.update_dict.keys())
def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
"""
Checkpoints the policy on disk.
:param checkpoint_path: filepath to write the checkpoint
:param settings: SerializationSettings for exporting the model.
"""
# Save the TF checkpoint and graph definition
with self.graph.as_default():
if self.saver:
self.saver.save(self.sess, f"{checkpoint_path}.ckpt")
tf.train.write_graph(
self.graph, self.model_path, "raw_graph_def.pb", as_text=False
)
# also save the policy so we have optimized model files for each checkpoint
self.save(checkpoint_path, settings)
def save(self, output_filepath: str, settings: SerializationSettings) -> None:
"""
Saves the serialized model, given a path and SerializationSettings
This method will save the policy graph to the given filepath. The path
should be provided without an extension as multiple serialized model formats
may be generated as a result.
:param output_filepath: path (without suffix) for the model file(s)
:param settings: SerializationSettings for how to save the model.
"""
export_policy_model(output_filepath, settings, self.graph, self.sess)
def update_normalization(self, vector_obs: np.ndarray) -> None:
"""
If this policy normalizes vector observations, this will update the norm values in the graph.

self.sess.run(
self.update_normalization_op, feed_dict={self.vector_in: vector_obs}
)
if self.first_normalization_update:
self.sess.run(
self.init_normalization_op, feed_dict={self.vector_in: vector_obs}
)
self.first_normalization_update = False
else:
self.sess.run(
self.update_normalization_op, feed_dict={self.vector_in: vector_obs}
)
@property
def use_vis_obs(self):

self.normalization_steps: Optional[tf.Variable] = None
self.running_mean: Optional[tf.Variable] = None
self.running_variance: Optional[tf.Variable] = None
self.init_normalization_op: Optional[tf.Operation] = None
self.update_normalization_op: Optional[tf.Operation] = None
self.value: Optional[tf.Tensor] = None
self.all_log_probs: tf.Tensor = None

self.behavior_spec.observation_shapes
)
if self.normalize:
self.first_normalization_update = True
self.init_normalization_op = normalization_tensors.init_op
self.normalization_steps = normalization_tensors.steps
self.running_mean = normalization_tensors.running_mean
self.running_variance = normalization_tensors.running_variance

72
ml-agents/mlagents/trainers/policy/torch_policy.py


import numpy as np
import torch
import os
from torch import onnx
from mlagents.model_serialization import SerializationSettings
from mlagents.trainers.policy import Policy
from mlagents_envs.base_env import DecisionSteps, BehaviorSpec
from mlagents_envs.timers import timed

from mlagents.trainers.torch.networks import SharedActorCritic, SeparateActorCritic
from mlagents.trainers.torch.networks import (
SharedActorCritic,
SeparateActorCritic,
GlobalSteps,
)
from mlagents.trainers.torch.utils import ModelUtils
EPSILON = 1e-7 # Small value to avoid divide by zero

seed: int,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
model_path: str,
load: bool = False,
tanh_squash: bool = False,
reparameterize: bool = False,
separate_critic: bool = True,

seed,
behavior_spec,
trainer_settings,
model_path,
load,
self.global_step = 0
self.global_step = (
GlobalSteps()
) # could be much simpler if TorchPolicy is nn.Module
if TestingConfiguration.device != "cpu":
torch.set_default_tensor_type(torch.cuda.FloatTensor)
else:

agent_ids=list(decision_requests.agent_id),
)
def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
"""
Checkpoints the policy on disk.
:param checkpoint_path: filepath to write the checkpoint
:param settings: SerializationSettings for exporting the model.
"""
if not os.path.exists(self.model_path):
os.makedirs(self.model_path)
torch.save(self.actor_critic.state_dict(), f"{checkpoint_path}.pt")
def save(self, output_filepath: str, settings: SerializationSettings) -> None:
self.export_model(self.global_step)
def load_model(self, step=0): # TODO: this doesn't work
load_path = self.model_path + "/model-" + str(step) + ".pt"
self.actor_critic.load_state_dict(torch.load(load_path))
def export_model(self, step=0):
fake_vec_obs = [torch.zeros([1] + [self.vec_obs_size])]
fake_vis_obs = [torch.zeros([1] + [84, 84, 3])]
fake_masks = torch.ones([1] + self.actor_critic.act_size)
# fake_memories = torch.zeros([1] + [self.m_size])
export_path = "./model-" + str(step) + ".onnx"
output_names = ["action", "action_probs"]
input_names = ["vector_observation", "action_mask"]
dynamic_axes = {"vector_observation": [0], "action": [0], "action_probs": [0]}
onnx.export(
self.actor_critic,
(fake_vec_obs, fake_vis_obs, fake_masks),
export_path,
verbose=True,
opset_version=12,
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
)
@property
def use_vis_obs(self):
return self.vis_obs_size > 0

Gets current model step.
:return: current model step.
"""
step = self.global_step
return self.global_step.current_step
def set_step(self, step: int) -> int:
"""
Sets current model step to step without creating additional ops.
:param step: Step to set the current model step to.
:return: The step the model was set to.
"""
self.global_step.current_step = step
return step
def increment_step(self, n_steps):

self.global_step += n_steps
self.global_step.increment(n_steps)
return self.get_current_step()
def load_weights(self, values: List[np.ndarray]) -> None:

def get_weights(self) -> List[np.ndarray]:
return []
def get_modules(self):
return {"Policy": self.actor_critic, "global_step": self.global_step}

10
ml-agents/mlagents/trainers/ppo/optimizer_tf.py


self.stream_names = list(self.reward_signals.keys())
self.tf_optimizer: Optional[tf.train.AdamOptimizer] = None
self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
self.grads = None
self.update_batch: Optional[tf.Operation] = None

"decay_beta": self.decay_beta,
}
)
self.policy.initialize_or_load()
def _create_cc_critic(
self, h_size: int, num_layers: int, vis_encode_type: EncoderType

)
def _create_ppo_optimizer_ops(self):
self.tf_optimizer = self.create_optimizer_op(self.learning_rate)
self.grads = self.tf_optimizer.compute_gradients(self.loss)
self.update_batch = self.tf_optimizer.minimize(self.loss)
self.tf_optimizer_op = self.create_optimizer_op(self.learning_rate)
self.grads = self.tf_optimizer_op.compute_gradients(self.loss)
self.update_batch = self.tf_optimizer_op.minimize(self.loss)
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:

3
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


update_stats.update(reward_provider.update(batch))
return update_stats
def get_modules(self):
return {"Optimizer": self.optimizer}

32
ml-agents/mlagents/trainers/ppo/trainer.py


:param artifact_path: The directory within which to store artifacts from this trainer.
"""
super().__init__(
brain_name, trainer_settings, training, artifact_path, reward_buff_cap
brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
self.load = load
self.seed = seed
if TestingConfiguration.max_steps > 0:
self.trainer_settings.max_steps = TestingConfiguration.max_steps

self.seed,
behavior_spec,
self.trainer_settings,
model_path=self.artifact_path,
load=self.load,
condition_sigma_on_obs=False, # Faster training for PPO
)
return policy

self.seed,
behavior_spec,
self.trainer_settings,
self.artifact_path,
self.load,
def create_ppo_optimizer(self) -> PPOOptimizer:
if self.framework == FrameworkType.PYTORCH:
return TorchPPOOptimizer( # type: ignore
cast(TorchPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
else:
return PPOOptimizer( # type: ignore
cast(TFPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
) -> None:

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
if self.framework == FrameworkType.PYTORCH:
self.optimizer = TorchPPOOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore
else:
self.optimizer = PPOOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore
self.optimizer = self.create_ppo_optimizer()
self.saver.register(self.policy)
self.saver.register(self.optimizer)
self.saver.initialize_or_load()
# Needed to resume loads properly
self.step = policy.get_current_step()

2
ml-agents/mlagents/trainers/sac/optimizer.py


[self.policy.update_normalization_op, target_update_norm]
)
self.policy.initialize_or_load()
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",

9
ml-agents/mlagents/trainers/sac/optimizer_torch.py


self, reward_signal_minibatches: Mapping[str, AgentBuffer], num_sequences: int
) -> Dict[str, float]:
return {}
def get_modules(self):
return {
"Optimizer:value_network": self.value_network,
"Optimizer:target_network": self.target_network,
"Optimizer:policy_optimizer": self.policy_optimizer,
"Optimizer:value_optimizer": self.value_optimizer,
"Optimizer:entropy_optimizer": self.entropy_optimizer,
}

31
ml-agents/mlagents/trainers/sac/trainer.py


:param artifact_path: The directory within which to store artifacts from this trainer.
"""
super().__init__(
brain_name, trainer_settings, training, artifact_path, reward_buff_cap
brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
self.load = load
self.seed = seed
self.policy: Policy = None # type: ignore
self.optimizer: SACOptimizer = None # type: ignore

self.seed,
behavior_spec,
self.trainer_settings,
self.artifact_path,
self.load,
tanh_squash=True,
reparameterize=True,
create_tf_graph=False,

self.seed,
behavior_spec,
self.trainer_settings,
self.artifact_path,
self.load,
condition_sigma_on_obs=True,
tanh_squash=True,
separate_critic=True,

for stat, stat_list in batch_update_stats.items():
self._stats_reporter.add_stat(stat, np.mean(stat_list))
def create_sac_optimizer(self) -> SACOptimizer:
if self.framework == FrameworkType.PYTORCH:
return TorchSACOptimizer( # type: ignore
cast(TorchPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
else:
return SACOptimizer( # type: ignore
cast(TFPolicy, self.policy), self.trainer_settings # type: ignore
) # type: ignore
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
) -> None:

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
if self.framework == FrameworkType.PYTORCH:
self.optimizer = TorchSACOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore
else:
self.optimizer = SACOptimizer( # type: ignore
self.policy, self.trainer_settings # type: ignore
) # type: ignore
self.optimizer = self.create_sac_optimizer()
self.saver.register(self.policy)
self.saver.register(self.optimizer)
self.saver.initialize_or_load()
# Needed to resume loads properly
self.step = policy.get_current_step()
# Assume steps were updated at the correct ratio before

8
ml-agents/mlagents/trainers/settings.py


device = "cpu"
class SerializationSettings:
convert_to_barracuda = True
convert_to_onnx = True
onnx_opset = 9
@attr.s(auto_attribs=True)
class ExportableSettings:
def as_dict(self):

PROGRESS: str = "progress"
REWARD: str = "reward"
behavior: str
behavior: str = attr.ib(default="")
min_lesson_length: int = 0
signal_smoothing: bool = True
threshold: float = attr.ib(default=0.0)

41
ml-agents/mlagents/trainers/stats.py


from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import set_gauge
from mlagents.tf_utils import tf, generate_session_config
from mlagents.tf_utils.globals import get_rank
logger = get_logger(__name__)

class GaugeWriter(StatsWriter):
"""
Write all stats that we recieve to the timer gauges, so we can track them offline easily
Write all stats that we receive to the timer gauges, so we can track them offline easily
"""
@staticmethod

# If self-play, we want to print ELO as well as reward
self.self_play = False
self.self_play_team = -1
self.rank = get_rank()
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int

stats_summary = stats_summary = values["Is Training"]
stats_summary = values["Is Training"]
elapsed_time = time.time() - self.training_start_time
log_info: List[str] = [category]
log_info.append(f"Step: {step}")
log_info.append(f"Time Elapsed: {elapsed_time:0.3f} s")
logger.info(
"{}: Step: {}. "
"Time Elapsed: {:0.3f} s "
"Mean "
"Reward: {:0.3f}"
". Std of Reward: {:0.3f}. {}".format(
category,
step,
time.time() - self.training_start_time,
stats_summary.mean,
stats_summary.std,
is_training,
)
)
if self.rank is not None:
log_info.append(f"Rank: {self.rank}")
log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")
log_info.append(is_training)
logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ")
log_info.append(f"ELO: {elo_stats.mean:0.3f}")
logger.info(
"{}: Step: {}. No episode was completed since last summary. {}".format(
category, step, is_training
)
)
log_info.append("No episode was completed since last summary")
log_info.append(is_training)
logger.info(". ".join(log_info))
def add_property(
self, category: str, property_type: StatsPropertyType, value: Any

3
ml-agents/mlagents/trainers/tests/mock_brain.py


memory=memory,
)
steps_list.append(experience)
obs = []
for _shape in observation_shapes:
obs.append(np.ones(_shape, dtype=np.float32))
last_experience = AgentExperience(
obs=obs,
reward=reward,

27
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


import os
import tempfile
import pytest
from mlagents.trainers.tests.test_nn_policy import create_policy_mock
from mlagents.trainers.settings import TrainerSettings
from mlagents.tf_utils import tf
from mlagents.model_serialization import SerializationSettings
def test_barracuda_converter():

# cleanup
os.remove(tmpfile)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_policy_conversion(tmpdir, rnn, visual, discrete):
tf.reset_default_graph()
dummy_config = TrainerSettings()
policy = create_policy_mock(
dummy_config,
use_rnn=rnn,
model_path=os.path.join(tmpdir, "test"),
use_discrete=discrete,
use_visual=visual,
)
settings = SerializationSettings(policy.model_path, "MockBrain")
checkpoint_path = f"{tmpdir}/MockBrain-1"
policy.checkpoint(checkpoint_path, settings)
# These checks taken from test_barracuda_converter
assert os.path.isfile(checkpoint_path + ".nn")
assert os.path.getsize(checkpoint_path + ".nn") > 100

10
ml-agents/mlagents/trainers/tests/test_bcmodule.py


NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TFPolicy(
0,
mock_behavior_specs,
trainer_config,
"test",
False,
tanhresample,
tanhresample,
0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
)
with policy.graph.as_default():
bc_module = BCModule(

default_num_epoch=3,
settings=bc_settings,
)
policy.initialize_or_load() # Normally the optimizer calls this after the BCModule is created
policy.initialize() # Normally the optimizer calls this after the BCModule is created
return bc_module

62
ml-agents/mlagents/trainers/tests/test_env_param_manager.py


yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
)
param_manager = EnvironmentParameterManager(
run_options.environment_parameters, 1337, False
)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (False, False)
assert param_manager.get_current_lesson_number() == {"param_1": 2}
param_manager = EnvironmentParameterManager(
run_options.environment_parameters, 1337, False
)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (False, False)
assert param_manager.get_current_lesson_number() == {"param_1": 2}
test_everything_config_yaml = """

"param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
"param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
}
test_curriculum_no_behavior_yaml = """
environment_parameters:
param_1:
curriculum:
- name: Lesson1
completion_criteria:
measure: reward
threshold: 30
min_lesson_length: 100
require_reset: true
value: 1
- name: Lesson2
value: 2
"""
def test_curriculum_no_behavior():
with pytest.raises(TypeError):
run_options = RunOptions.from_dict(
yaml.safe_load(test_curriculum_no_behavior_yaml)
)
EnvironmentParameterManager(run_options.environment_parameters, 1337, False)

175
ml-agents/mlagents/trainers/tests/test_nn_policy.py


import pytest
import os
import unittest
import tempfile
from mlagents.model_serialization import SerializationSettings
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.tf.models import ModelUtils, Tensor3DShape

from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
from mlagents.trainers import __version__
VECTOR_ACTION_SPACE = 2

NUM_AGENTS = 12
EPSILON = 1e-7
def create_policy_mock(

use_visual: bool = False,
model_path: str = "",
load: bool = False,
seed: int = 0,
) -> TFPolicy:
mock_spec = mb.setup_test_behavior_specs(

trainer_settings.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TFPolicy(
seed, mock_spec, trainer_settings, model_path=model_path, load=load
)
policy = TFPolicy(seed, mock_spec, trainer_settings)
def test_load_save(tmp_path):
path1 = os.path.join(tmp_path, "runid1")
path2 = os.path.join(tmp_path, "runid2")
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params, model_path=path1)
policy.initialize_or_load()
policy._set_step(2000)
mock_brain_name = "MockBrain"
checkpoint_path = f"{policy.model_path}/{mock_brain_name}-2000"
serialization_settings = SerializationSettings(policy.model_path, mock_brain_name)
policy.checkpoint(checkpoint_path, serialization_settings)
assert len(os.listdir(tmp_path)) > 0
# Try load from this path
policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
policy2.initialize_or_load()
_compare_two_policies(policy, policy2)
assert policy2.get_current_step() == 2000
# Try initialize from path 1
trainer_params.output_path = path2
trainer_params.init_path = path1
policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
policy3.initialize_or_load()
_compare_two_policies(policy2, policy3)
# Assert that the steps are 0.
assert policy3.get_current_step() == 0
class ModelVersionTest(unittest.TestCase):
def test_version_compare(self):
# Test write_stats
with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
path1 = tempfile.mkdtemp()
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params, model_path=path1)
policy.initialize_or_load()
policy._check_model_version(
"0.0.0"
) # This is not the right version for sure
# Assert that 1 warning has been thrown with incorrect version
assert len(cm.output) == 1
policy._check_model_version(__version__) # This should be the right version
# Assert that no additional warnings have been thrown wth correct ver
assert len(cm.output) == 1
def _compare_two_policies(policy1: TFPolicy, policy2: TFPolicy) -> None:
"""
Make sure two policies have the same output for the same input.

assert run_out["action"].shape == (NUM_AGENTS, VECTOR_ACTION_SPACE)
def test_large_normalization():
behavior_spec = mb.setup_test_behavior_specs(
use_discrete=True, use_visual=False, vector_action_space=[2], vector_obs_space=1
)
# Taken from Walker seed 3713 which causes NaN without proper initialization
large_obs1 = [
1800.00036621,
1799.96972656,
1800.01245117,
1800.07214355,
1800.02758789,
1799.98303223,
1799.88647461,
1799.89575195,
1800.03479004,
1800.14025879,
1800.17675781,
1800.20581055,
1800.33740234,
1800.36450195,
1800.43457031,
1800.45544434,
1800.44604492,
1800.56713867,
1800.73901367,
]
large_obs2 = [
1799.99975586,
1799.96679688,
1799.92980957,
1799.89550781,
1799.93774414,
1799.95300293,
1799.94067383,
1799.92993164,
1799.84057617,
1799.69873047,
1799.70605469,
1799.82849121,
1799.85095215,
1799.76977539,
1799.78283691,
1799.76708984,
1799.67163086,
1799.59191895,
1799.5135498,
1799.45556641,
1799.3717041,
]
policy = TFPolicy(
0,
behavior_spec,
TrainerSettings(network_settings=NetworkSettings(normalize=True)),
"testdir",
False,
)
time_horizon = len(large_obs1)
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
)
for i in range(time_horizon):
trajectory.steps[i].obs[0] = np.array([large_obs1[i]], dtype=np.float32)
trajectory_buffer = trajectory.to_agentbuffer()
policy.update_normalization(trajectory_buffer["vector_obs"])
# Check that the running mean and variance is correct
steps, mean, variance = policy.sess.run(
[policy.normalization_steps, policy.running_mean, policy.running_variance]
)
assert mean[0] == pytest.approx(np.mean(large_obs1, dtype=np.float32), abs=0.01)
assert variance[0] / steps == pytest.approx(
np.var(large_obs1, dtype=np.float32), abs=0.01
)
time_horizon = len(large_obs2)
trajectory = make_fake_trajectory(
length=time_horizon,
max_step_complete=True,
observation_shapes=[(1,)],
action_space=[2],
)
for i in range(time_horizon):
trajectory.steps[i].obs[0] = np.array([large_obs2[i]], dtype=np.float32)
trajectory_buffer = trajectory.to_agentbuffer()
policy.update_normalization(trajectory_buffer["vector_obs"])
steps, mean, variance = policy.sess.run(
[policy.normalization_steps, policy.running_mean, policy.running_variance]
)
assert mean[0] == pytest.approx(
np.mean(large_obs1 + large_obs2, dtype=np.float32), abs=0.01
)
assert variance[0] / steps == pytest.approx(
np.var(large_obs1 + large_obs2, dtype=np.float32), abs=0.01
)
time_horizon = 6
trajectory = make_fake_trajectory(
length=time_horizon,

assert steps == 6
assert mean[0] == 0.5
# Note: variance is divided by number of steps, and initialized to 1 to avoid
# divide by 0. The right answer is 0.25
assert (variance[0] - 1) / steps == 0.25
# Note: variance is initalized to the variance of the initial trajectory + EPSILON
# (to avoid divide by 0) and multiplied by the number of steps. The correct answer is 0.25
assert variance[0] / steps == pytest.approx(0.25, abs=0.01)
# Make another update, this time with all 1's
time_horizon = 10
trajectory = make_fake_trajectory(

assert steps == 16
assert mean[0] == 0.8125
assert (variance[0] - 1) / steps == pytest.approx(0.152, abs=0.01)
assert variance[0] / steps == pytest.approx(0.152, abs=0.01)
def test_min_visual_size():

8
ml-agents/mlagents/trainers/tests/test_ppo.py


import attr
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer_tf import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy

0, mock_specs, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = PPOOptimizer(policy, trainer_settings)
policy.initialize()
return optimizer

)
@mock.patch.object(RLTrainer, "create_saver")
def test_trainer_increment_step(ppo_optimizer):
def test_trainer_increment_step(ppo_optimizer, mock_create_saver):
trainer_params = PPO_CONFIG
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}

assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
@mock.patch.object(RLTrainer, "create_saver")
def test_add_get_policy(ppo_optimizer, dummy_config):
def test_add_get_policy(ppo_optimizer, mock_create_saver, dummy_config):
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}
ppo_optimizer.return_value = mock_optimizer

1
ml-agents/mlagents/trainers/tests/test_reward_signals.py


optimizer = SACOptimizer(policy, trainer_settings)
else:
optimizer = PPOOptimizer(policy, trainer_settings)
optimizer.policy.initialize()
return optimizer

25
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


import os
from unittest import mock
import pytest
import mlagents.trainers.tests.mock_brain as mb

return self.update_policy
def add_policy(self, mock_behavior_id, mock_policy):
def checkpoint_path(brain_name, step):
return os.path.join(self.saver.model_path, f"{brain_name}-{step}")
mock_saver = mock.Mock()
mock_saver.model_path = self.artifact_path
mock_saver.save_checkpoint.side_effect = checkpoint_path
self.saver = mock_saver
def create_tf_policy(self):
def create_tf_policy(self, parsed_behavior_id, behavior_spec):
def create_torch_policy(self):
def create_torch_policy(self, parsed_behavior_id, behavior_spec):
return mock.Mock()
def _process_trajectory(self, trajectory):

"test_trainer",
TrainerSettings(max_steps=100, checkpoint_interval=10, summary_freq=20),
True,
False,
"mock_model_path",
0,
)
trainer.set_is_policy_updating(True)

def test_advance(mocked_clear_update_buffer, mocked_save_model):
trainer = create_rl_trainer()
mock_policy = mock.Mock()
mock_policy.model_path = "mock_model_path"
trainer.add_policy("TestBrain", mock_policy)
trajectory_queue = AgentManagerQueue("testbrain")
policy_queue = AgentManagerQueue("testbrain")

def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary):
trainer = create_rl_trainer()
mock_policy = mock.Mock()
mock_policy.model_path = "mock_model_path"
trainer.add_policy("TestBrain", mock_policy)
trajectory_queue = AgentManagerQueue("testbrain")
policy_queue = AgentManagerQueue("testbrain")

checkpoint_range = range(
checkpoint_interval, num_trajectories * time_horizon, checkpoint_interval
)
calls = [
mock.call(f"{mock_policy.model_path}/{trainer.brain_name}-{step}", mock.ANY)
for step in checkpoint_range
]
mock_policy.checkpoint.assert_has_calls(calls, any_order=True)
calls = [mock.call(trainer.brain_name, step) for step in checkpoint_range]
trainer.saver.save_checkpoint.assert_has_calls(calls, any_order=True)
add_checkpoint_calls = [
mock.call(

f"{mock_policy.model_path}/{trainer.brain_name}-{step}.nn",
f"{trainer.saver.model_path}/{trainer.brain_name}-{step}.nn",
None,
mock.ANY,
),

7
ml-agents/mlagents/trainers/tests/test_sac.py


from mlagents.tf_utils import tf
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.sac.trainer import SACTrainer
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy

0, mock_brain, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = SACOptimizer(policy, trainer_settings)
optimizer.policy.initialize()
return optimizer

assert trainer2.update_buffer.num_experiences == buffer_len
@mock.patch.object(RLTrainer, "create_saver")
def test_add_get_policy(sac_optimizer, dummy_config):
def test_add_get_policy(sac_optimizer, mock_create_saver, dummy_config):
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}
sac_optimizer.return_value = mock_optimizer

policy = trainer.create_policy(behavior_id, specs)
policy.get_current_step = lambda: 200
trainer.add_policy(behavior_id, policy)
trainer.saver.initialize_or_load(policy)
trainer.saver.initialize_or_load(policy)
trainer.optimizer.update_reward_signals = mock.Mock()
trainer.optimizer.update_reward_signals.return_value = {}
trainer.optimizer.update.return_value = {}

8
ml-agents/mlagents/trainers/tests/test_simple_rl.py


# The reward processor is passed as an argument to _check_environment_trains.
# It is applied to the list pf all final rewards for each brain individually.
# It is applied to the list of all final rewards for each brain individually.
# Custom reward processors shuld be built within the test function and passed to _check_environment_trains
# Custom reward processors should be built within the test function and passed to _check_environment_trains
# Default is average over the last 5 final rewards
def default_reward_processor(rewards, last_n_rewards=5):
rewards_to_use = rewards[-last_n_rewards:]

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
step_size = 0.2 if use_discrete else 1.0
step_size = 0.5 if use_discrete else 0.2
env = MemoryEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
)

swap_steps=5000,
team_change=2000,
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2000)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=3000)
_check_environment_trains(
env, {BRAIN_NAME: config, brain_name_opp: config}, success_threshold=None
)

20
ml-agents/mlagents/trainers/tests/test_tf_policy.py


from mlagents.model_serialization import SerializationSettings
from unittest import mock
from mlagents.trainers.settings import TrainerSettings
import numpy as np

# Test dev versions
result = TFPolicy._convert_version_string("200.300.100.dev0")
assert result == (200, 300, 100)
@mock.patch("mlagents.trainers.policy.tf_policy.export_policy_model")
@mock.patch("time.time", mock.MagicMock(return_value=12345))
def test_checkpoint_writes_tf_and_nn_checkpoints(export_policy_model_mock):
mock_brain = basic_mock_brain()
test_seed = 4 # moving up in the world
policy = FakePolicy(test_seed, mock_brain, TrainerSettings(), "output")
n_steps = 5
policy.get_current_step = MagicMock(return_value=n_steps)
policy.saver = MagicMock()
serialization_settings = SerializationSettings("output", mock_brain.brain_name)
checkpoint_path = f"output/{mock_brain.brain_name}-{n_steps}"
policy.checkpoint(checkpoint_path, serialization_settings)
policy.saver.save.assert_called_once_with(policy.sess, f"{checkpoint_path}.ckpt")
export_policy_model_mock.assert_called_once_with(
checkpoint_path, serialization_settings, policy.graph, policy.sess
)

8
ml-agents/mlagents/trainers/tests/torch/test_bcmodule.py


NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TorchPolicy(
0,
mock_behavior_specs,
trainer_config,
"test",
False,
tanhresample,
tanhresample,
0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
)
bc_module = BCModule(
policy,

26
ml-agents/mlagents/trainers/tf/models.py


class NormalizerTensors(NamedTuple):
init_op: tf.Operation
update_op: tf.Operation
steps: tf.Tensor
running_mean: tf.Tensor

:return: A NormalizerTensors tuple that holds running mean, running variance, number of steps,
and the update operation.
"""
steps = tf.get_variable(
"normalization_steps",
[],

dtype=tf.float32,
initializer=tf.ones_initializer(),
)
update_normalization = ModelUtils.create_normalizer_update(
initialize_normalization, update_normalization = ModelUtils.create_normalizer_update(
update_normalization, steps, running_mean, running_variance
initialize_normalization,
update_normalization,
steps,
running_mean,
running_variance,
)
@staticmethod

running_mean: tf.Tensor,
running_variance: tf.Tensor,
) -> tf.Operation:
) -> Tuple[tf.Operation, tf.Operation]:
"""
Creates the update operation for the normalizer.
:param vector_input: Vector observation to use for updating the running mean and variance.

update_mean = tf.assign(running_mean, new_mean)
update_variance = tf.assign(running_variance, new_variance)
update_norm_step = tf.assign(steps, total_new_steps)
return tf.group([update_mean, update_variance, update_norm_step])
# First mean and variance calculated normally
initial_mean, initial_variance = tf.nn.moments(vector_input, axes=[0])
initialize_mean = tf.assign(running_mean, initial_mean)
# Multiplied by total_new_step because it is divided by total_new_step in the normalization
initialize_variance = tf.assign(
running_variance,
(initial_variance + EPSILON) * tf.cast(total_new_steps, dtype=tf.float32),
)
return (
tf.group([initialize_mean, initialize_variance, update_norm_step]),
tf.group([update_mean, update_variance, update_norm_step]),
)
@staticmethod
def create_vector_observation_encoder(

43
ml-agents/mlagents/trainers/torch/networks.py


memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
vec_encodes = []
encodes = []
for idx, encoder in enumerate(self.vector_encoders):
vec_input = vec_inputs[idx]
if actions is not None:

vec_encodes.append(hidden)
encodes.append(hidden)
vis_encodes = []
vis_encodes.append(hidden)
encodes.append(hidden)
if len(vec_encodes) > 0 and len(vis_encodes) > 0:
vec_encodes_tensor = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
vis_encodes_tensor = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
encoding = torch.stack(
[vec_encodes_tensor, vis_encodes_tensor], dim=-1
).sum(dim=-1)
elif len(vec_encodes) > 0:
encoding = torch.stack(vec_encodes, dim=-1).sum(dim=-1)
elif len(vis_encodes) > 0:
encoding = torch.stack(vis_encodes, dim=-1).sum(dim=-1)
else:
if len(encodes) == 0:
# Constants don't work in Barracuda
encoding = encodes[0]
if len(encodes) > 1:
for _enc in encodes[1:]:
encoding += _enc
if self.use_lstm:
# Resize to (batch, sequence length, encoding size)

)
action_list = self.sample_action(dists)
sampled_actions = torch.stack(action_list, dim=-1)
if self.act_type == ActionType.CONTINUOUS:
log_probs = dists[0].log_prob(sampled_actions)
else:
log_probs = dists[0].all_log_prob()
dists[0].pdf(sampled_actions),
log_probs,
self.version_number,
self.memory_size,
self.is_continuous_int,

class GlobalSteps(nn.Module):
def __init__(self):
super().__init__()
self.global_step = torch.Tensor([0])
self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
@property
def current_step(self):
return int(self.__global_step.item())
@current_step.setter
def current_step(self, value):
self.__global_step[:] = value
self.global_step += value
self.__global_step += value
class LearningRate(nn.Module):

43
ml-agents/mlagents/trainers/trainer/rl_trainer.py


# # Unity ML-Agents Toolkit
import os
from mlagents.model_serialization import SerializationSettings, copy_model_files
from mlagents.trainers.policy.checkpoint_manager import (
NNCheckpoint,
NNCheckpointManager,

from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.settings import TestingConfiguration, FrameworkType
from mlagents.trainers.settings import (
TestingConfiguration,
TrainerSettings,
FrameworkType,
)
from mlagents.trainers.saver.saver import BaseSaver
from mlagents.trainers.saver.torch_saver import TorchSaver
from mlagents.trainers.saver.tf_saver import TFSaver
from mlagents.trainers.exception import UnityTrainerException
try:

self.trainer_settings.max_steps = TestingConfiguration.max_steps
self._next_save_step = 0
self._next_summary_step = 0
self.saver = self.create_saver(
self.framework, self.trainer_settings, self.artifact_path, self.load
)
def end_episode(self) -> None:
"""

"""
pass
@staticmethod
def create_saver(
framework: str, trainer_settings: TrainerSettings, model_path: str, load: bool
) -> BaseSaver:
if framework == FrameworkType.PYTORCH:
saver = TorchSaver( # type: ignore
trainer_settings, model_path, load
)
else:
saver = TFSaver( # type: ignore
trainer_settings, model_path, load
)
return saver
def _policy_mean_reward(self) -> Optional[float]:
""" Returns the mean episode reward for the current policy. """
rewards = self.cumulative_returns_since_policy_update

logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
policy = list(self.policies.values())[0]
model_path = policy.model_path
settings = SerializationSettings(model_path, self.brain_name)
checkpoint_path = os.path.join(model_path, f"{self.brain_name}-{self.step}")
policy.checkpoint(checkpoint_path, settings)
checkpoint_path = self.saver.save_checkpoint(self.brain_name, self.step)
new_checkpoint = NNCheckpoint(
int(self.step),
f"{checkpoint_path}.nn",

elif n_policies == 0:
logger.warning("Trainer has no policies, not saving anything.")
return
policy = list(self.policies.values())[0]
# Copy the checkpointed model files to the final output location
copy_model_files(model_checkpoint.file_path, f"{policy.model_path}.nn")
self.saver.copy_final_model(model_checkpoint.file_path)
model_checkpoint, file_path=f"{policy.model_path}.nn"
model_checkpoint, file_path=f"{self.saver.model_path}.nn"
)
NNCheckpointManager.track_final_checkpoint(self.brain_name, final_checkpoint)

2
ml-agents/mlagents/trainers/trainer/trainer.py


brain_name: str,
trainer_settings: TrainerSettings,
training: bool,
load: bool,
artifact_path: str,
reward_buff_cap: int = 1,
):

self._threaded = trainer_settings.threaded
self._stats_reporter = StatsReporter(brain_name)
self.is_training = training
self.load = load
self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
self.policy_queues: List[AgentManagerQueue[Policy]] = []
self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []

10
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManager
from mlagents.tf_utils.globals import get_rank
class TrainerController:

self.kill_trainers = False
np.random.seed(training_seed)
tf.set_random_seed(training_seed)
self.rank = get_rank()
@timed
def _save_models(self):

if self.rank is not None and self.rank != 0:
return
for brain_name in self.trainers.keys():
self.trainers[brain_name].save_model()
self.logger.info("Saved Model")

"""
Saves models for all trainers.
"""
if self.rank is not None and self.rank != 0:
return
for brain_name in self.trainers.keys():
self.trainers[brain_name].save_model()

) in self.param_manager.get_current_lesson_number().items():
for trainer in self.trainers.values():
trainer.stats_reporter.set_stat(
f"Environment/Lesson/{param_name}", lesson_number
f"Environment/Lesson Number/{param_name}", lesson_number
)
for trainer in self.trainers.values():

21
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/DynamicTarget.prefab


m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3840539935788495952}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 1, y: 1, z: 1}
m_LocalScale: {x: 1.2356956, y: 1.2356961, z: 1.2356961}
m_LocalPosition: {x: 0, y: 1, z: 1}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3840539935788495952}
m_Enabled: 1
m_CastShadows: 0
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1

respawnIfTouched: 1
respawnIfFallsOffPlatform: 1
fallDistance: 5
triggerIsTouching: 0
onTriggerEnterEvent:
m_PersistentCalls:
m_Calls: []

onTriggerExitEvent:
m_PersistentCalls:
m_Calls: []
colliderIsTouching: 0
m_Calls:
- m_Target: {fileID: 0}
m_MethodName: TouchedTarget
m_Mode: 1
m_Arguments:
m_ObjectArgument: {fileID: 0}
m_ObjectArgumentAssemblyTypeName: UnityEngine.Object, UnityEngine
m_IntArgument: 0
m_FloatArgument: 0
m_StringArgument:
m_BoolArgument: 0
m_CallState: 2
m_Calls: []
onCollisionStayEvent:
m_PersistentCalls:
m_Calls: []

19
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/StaticTarget.prefab


m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3840539935788495952}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 6.2, y: 1.15, z: 3.824}
m_LocalScale: {x: 1.2356956, y: 1.2356961, z: 1.2356961}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 1, y: 1, z: 1}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 0

m_IsKinematic: 0
m_Interpolate: 0
m_Constraints: 0
m_CollisionDetection: 0
m_CollisionDetection: 3
--- !u!114 &3631016866778687563
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
tagToDetect: agent
moveTargetToRandomPosIfTouched: 0
targetSpawnRadius: 0
onTrtesiggerEnterEvent:
m_PersistentCalls:
m_Calls: []
triggerIsTouching: 0
spawnRadius: 0
respawnIfTouched: 0
respawnIfFallsOffPlatform: 1
fallDistance: 5
onTriggerEnterEvent:
m_PersistentCalls:
m_Calls: []

onTriggerExitEvent:
m_PersistentCalls:
m_Calls: []
colliderIsTouching: 0
onCollisionEnterEvent:
m_PersistentCalls:
m_Calls: []

82
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab


m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 895268871264836243}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0.15, z: 0}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 895268873051627235}

- component: {fileID: 895268871377934302}
- component: {fileID: 895268871377934301}
m_Layer: 0
m_Name: WalkerRagdoll
m_Name: WalkerRagdollBase
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0

m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 895268871377934275}
m_LocalRotation: {x: 0, y: 0.7071068, z: 0, w: 0.7071068}
m_LocalPosition: {x: 0, y: 3.07, z: 0}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 3, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 895268871264836332}

m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 90, z: 0}
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &895268871377934297
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 236
VectorObservationSize: 243
m_Model: {fileID: 11400000, guid: 3c6170922a9ad4d9f85261699ca00f5d, type: 3}
m_Model: {fileID: 11400000, guid: f598eaeeef9f94691989a2cfaaafb565, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: WalkerDynamic

maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
maximumWalkingSpeed: 999
targetWalkingSpeed: 10
randomizeWalkSpeedEachEpisode: 1
walkDirectionMethod: 0
worldDirToWalk: {x: 1, y: 0, z: 0}
worldPosToWalkTo: {x: 0, y: 0, z: 0}
target: {fileID: 0}
hips: {fileID: 895268871264836332}
chest: {fileID: 7933235354845945071}

armR: {fileID: 7933235355057813930}
forearmR: {fileID: 7933235353195701980}
handR: {fileID: 7933235354616748502}
orientationCube: {fileID: 7559180363928843817}
--- !u!114 &895268871377934303
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
maxJointSpring: 40000
jointDampen: 3000
maxJointForceLimit: 10000
jointDampen: 5000
maxJointForceLimit: 20000
bodyPartsList: []
--- !u!114 &895268871377934302
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 1513f8a85fedd47efba089213b7c5bde, type: 3}
m_Name:
m_EditorClassIdentifier:
updatedByAgent: 0
transformToFollow: {fileID: 895268871264836332}
targetToLookAt: {fileID: 0}
heightOffset: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235353030744116}
serializedVersion: 2
m_Mass: 3
m_Mass: 4
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0.55, y: 0, z: 0}
m_Axis: {x: 0, y: -1, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.7000002, y: 0, z: 0}
m_ConnectedAnchor: {x: -0.7000001, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235353041637840}
serializedVersion: 2
m_Mass: 1
m_Mass: 2
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0, y: 0, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.70000064, y: 0, z: 0}
m_ConnectedAnchor: {x: -0.70000017, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235353195701956}
serializedVersion: 2
m_Mass: 2
m_Mass: 3
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: -0.5, y: 0, z: 0}
m_Axis: {x: 0, y: 1, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.5000005, y: 0, z: 0}
m_ConnectedAnchor: {x: 0.5, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0.5, z: 0}
m_Axis: {x: -1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.39999408, y: -0.29999986, z: 0}
m_ConnectedAnchor: {x: -0.39999396, y: -0.29999995, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235353240438151}
serializedVersion: 2
m_Mass: 2
m_Mass: 3
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0.5, y: 0, z: 0}
m_Axis: {x: 0, y: -1, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.5000005, y: 0, z: 0}
m_ConnectedAnchor: {x: -0.5, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0.5, z: 0}
m_Axis: {x: -1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.00000011920929, y: -0.5, z: 0}
m_ConnectedAnchor: {x: 0, y: -0.5, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0, z: -0.1}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.00000011920929, y: -0.60000014, z: 0}
m_ConnectedAnchor: {x: 0, y: -0.60000014, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 1, z: 0}
m_XMotion: 0

m_Anchor: {x: 0, y: 0.5, z: 0}
m_Axis: {x: -1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.00000011920929, y: -0.5, z: 0}
m_ConnectedAnchor: {x: 0, y: -0.5, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0.5, z: 0}
m_Axis: {x: -1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.39999408, y: -0.29999986, z: 0}
m_ConnectedAnchor: {x: 0.39999396, y: -0.29999995, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235354074184675}
serializedVersion: 2
m_Mass: 5
m_Mass: 6
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0, y: -0.85, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0, y: 0.5119996, z: 0}
m_ConnectedAnchor: {x: 0, y: 0.5119997, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235354616748503}
serializedVersion: 2
m_Mass: 1
m_Mass: 2
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0, y: 0, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.70000064, y: 0, z: 0}
m_ConnectedAnchor: {x: 0.70000017, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_Anchor: {x: 0, y: -0.3, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0, y: 0.383, z: 0}
m_ConnectedAnchor: {x: 0, y: 0.3829999, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_Anchor: {x: 0, y: -0.5, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0, y: 0.3050003, z: 0}
m_ConnectedAnchor: {x: 0, y: 0.30500042, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0, z: -0.1}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.00000011920929, y: -0.60000014, z: 0}
m_ConnectedAnchor: {x: 0, y: -0.60000014, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 1, z: 0}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235355057813906}
serializedVersion: 2
m_Mass: 3
m_Mass: 4
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: -0.55, y: 0, z: 0}
m_Axis: {x: 0, y: 1, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.7000002, y: 0, z: 0}
m_ConnectedAnchor: {x: 0.7000001, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

type: 3}
m_PrefabInstance: {fileID: 7597605653427724053}
m_PrefabAsset: {fileID: 0}
--- !u!114 &7559180363928843817 stripped
MonoBehaviour:
m_CorrespondingSourceObject: {fileID: 114705911240010044, guid: 72f745913c5a34df5aaadd5c1f0024cb,
type: 3}
m_PrefabInstance: {fileID: 7597605653427724053}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 771e78c5e980e440e8cd19716b55075f, type: 3}
m_Name:
m_EditorClassIdentifier:

523
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!1 &6907050159044240885
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6902197503240654641}
- component: {fileID: 6894500521640151429}
- component: {fileID: 6885223417161833361}
- component: {fileID: 6859132155796343735}
m_Layer: 0
m_Name: Wall (1)
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6902197503240654641
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907050159044240885}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: -50, y: 0, z: 0}
m_LocalScale: {x: 1, y: 5, z: 101}
m_Children: []
m_Father: {fileID: 6902102727328990095}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &6894500521640151429
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907050159044240885}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!23 &6885223417161833361
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907050159044240885}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 0
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!65 &6859132155796343735
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907050159044240885}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}
--- !u!1 &6907401236047902865
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6902265967514060089}
- component: {fileID: 6891025662345346653}
- component: {fileID: 6859036447448677835}
- component: {fileID: 6884684845870454579}
m_Layer: 14
m_Name: Ground
m_TagString: ground
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6902265967514060089
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907401236047902865}
m_LocalRotation: {x: 0, y: 0.7071068, z: 0, w: 0.7071068}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 100, y: 1, z: 100}
m_Children: []
m_Father: {fileID: 6902107422946006027}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 90, z: 0}
--- !u!33 &6891025662345346653
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907401236047902865}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!65 &6859036447448677835
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907401236047902865}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}
--- !u!23 &6884684845870454579
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907401236047902865}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: acba6bf2a290a496bb8989b42bf8698d, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!1 &6907666814270504157
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6902102727328990095}
m_Layer: 0
m_Name: Walls
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6902102727328990095
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907666814270504157}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 2, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 6901873285403999439}
- {fileID: 6902197503240654641}
- {fileID: 6901900959948323433}
- {fileID: 6905948743199606957}
m_Father: {fileID: 6902107422946006027}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &6907680617094430597
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6901873285403999439}
- component: {fileID: 6894618984257886823}
- component: {fileID: 6884854148710353183}
- component: {fileID: 6863062098498978603}
m_Layer: 0
m_Name: Wall
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6901873285403999439
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907680617094430597}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 50, y: 0, z: 0}
m_LocalScale: {x: 1, y: 5, z: 101}
m_Children: []
m_Father: {fileID: 6902102727328990095}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &6894618984257886823
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907680617094430597}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!23 &6884854148710353183
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907680617094430597}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 0
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!65 &6863062098498978603
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907680617094430597}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}
--- !u!1 &6907740118844148851
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6902107422946006027}
m_Layer: 0
m_Name: PlatformDynamicTarget
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &6902107422946006027
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907740118844148851}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 6902102727328990095}
- {fileID: 6902265967514060089}
m_Father: {fileID: 0}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &6907828132384848309
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6905948743199606957}
- component: {fileID: 6894463671975680535}
- component: {fileID: 6884868534516719387}
- component: {fileID: 6859048605259525735}
m_Layer: 0
m_Name: Wall (3)
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6905948743199606957
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907828132384848309}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: -50}
m_LocalScale: {x: 100, y: 5, z: 1}
m_Children: []
m_Father: {fileID: 6902102727328990095}
m_RootOrder: 3
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &6894463671975680535
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907828132384848309}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!23 &6884868534516719387
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907828132384848309}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 0
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!65 &6859048605259525735
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907828132384848309}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}
--- !u!1 &6907860845836169157
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6901900959948323433}
- component: {fileID: 6893927248293796423}
- component: {fileID: 6885176866006237333}
- component: {fileID: 6859395915623032135}
m_Layer: 0
m_Name: Wall (2)
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6901900959948323433
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907860845836169157}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 50}
m_LocalScale: {x: 100, y: 5, z: 1}
m_Children: []
m_Father: {fileID: 6902102727328990095}
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &6893927248293796423
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907860845836169157}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!23 &6885176866006237333
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907860845836169157}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 0
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!65 &6859395915623032135
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907860845836169157}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}

7
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab.meta


fileFormatVersion: 2
guid: f0d7741d9e06247f6843b921a206b978
PrefabImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets.meta


fileFormatVersion: 2
guid: 88818c9b63c96424aa8e0fca85552133
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

10
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta


fileFormatVersion: 2
guid: 9f87b3070a0fd4a1e838131a91399c2f
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/Demonstrations/ExpertWalkerDy.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

10
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta


fileFormatVersion: 2
guid: a4b02e2c382c247919eb63ce72e90a3b
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/Demonstrations/ExpertWalkerDyVS.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

10
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta


fileFormatVersion: 2
guid: edcbb505552464c5c829886a4a3817dd
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/Demonstrations/ExpertWalkerStVS.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

10
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta


fileFormatVersion: 2
guid: 1f3a5d62e6aea4b5eb053ac33f11b06d
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/Demonstrations/ExpertWalkerSta.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

8
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Platforms.meta


fileFormatVersion: 2
guid: cd296ba30964e4cf086044f1a7618c0b
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll.meta


fileFormatVersion: 2
guid: d64d77dc566364a31896e5da2ac8534b
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

1001
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamicVariableSpeed.unity
文件差异内容过多而无法显示
查看文件

7
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamicVariableSpeed.unity.meta


fileFormatVersion: 2
guid: 2b839ee93e7a4467f9f8b4803c4a239b
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

1001
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStaticVariableSpeed.unity
文件差异内容过多而无法显示
查看文件

9
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStaticVariableSpeed.unity.meta


fileFormatVersion: 2
guid: 0295e51cc064f41b28ef97e70902cf13
timeCreated: 1520420566
licenseType: Free
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamicVariableSpeed.nn
文件差异内容过多而无法显示
查看文件

11
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamicVariableSpeed.nn.meta


fileFormatVersion: 2
guid: 205590a7f0a844b24b82b7f8355a1529
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj
11400002: model data
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 19ed1486aa27d4903b34839f37b8f69f, type: 3}

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存