浏览代码

Merge remote-tracking branch 'origin/master' into release_6-to-master

/release_6_branch
Christopher Goy 4 年前
当前提交
5a233353
共有 201 个文件被更改,包括 2614 次插入4261 次删除
  1. 2
      .circleci/config.yml
  2. 9
      DevProject/Assets/ML-Agents/Scripts/Tests/Performance/SensorPerformanceTests.cs
  3. 16
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  4. 9
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
  5. 9
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs
  6. 30
      Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
  7. 37
      Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  8. 29
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  9. 29
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  10. 23
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  11. 23
      Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
  12. 23
      Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
  13. 12
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  14. 20
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/DirectionIndicator.cs
  15. 31
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
  16. 4
      Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
  17. 20
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  18. 977
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
  19. 2
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity.meta
  20. 962
      Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStatic.unity
  21. 218
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  22. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
  23. 2
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta
  24. 1001
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
  25. 2
      Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta
  26. 31
      Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  27. 17
      Project/Assets/ML-Agents/Examples/Worm/Scripts/WormAgent.cs
  28. 2
      Project/ProjectSettings/ProjectVersion.txt
  29. 4
      com.unity.ml-agents.extensions/Editor/Unity.ML-Agents.Extensions.Editor.asmdef
  30. 1
      com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs
  31. 29
      com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs
  32. 8
      com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs
  33. 54
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
  34. 117
      com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs
  35. 80
      com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs
  36. 68
      com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs
  37. 88
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs
  38. 67
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs
  39. 29
      com.unity.ml-agents/CHANGELOG.md
  40. 201
      com.unity.ml-agents/Runtime/Agent.cs
  41. 14
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  42. 2
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  43. 6
      com.unity.ml-agents/Runtime/DecisionRequester.cs
  44. 118
      com.unity.ml-agents/Runtime/DiscreteActionMasker.cs
  45. 17
      com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
  46. 37
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  47. 20
      com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs
  48. 3
      com.unity.ml-agents/Runtime/Policies/IPolicy.cs
  49. 15
      com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
  50. 3
      com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs
  51. 48
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  52. 7
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  53. 2
      config/ppo/WalkerDynamic.yaml
  54. 2
      config/ppo/WalkerStatic.yaml
  55. 51
      docs/Learning-Environment-Create-New.md
  56. 31
      docs/Learning-Environment-Examples.md
  57. 2
      gym-unity/gym_unity/__init__.py
  58. 2
      ml-agents-envs/mlagents_envs/__init__.py
  59. 8
      ml-agents-envs/mlagents_envs/exception.py
  60. 2
      ml-agents/mlagents/trainers/__init__.py
  61. 2
      ml-agents/mlagents/trainers/ghost/trainer.py
  62. 3
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  63. 15
      ml-agents/mlagents/trainers/policy/policy.py
  64. 115
      ml-agents/mlagents/trainers/policy/tf_policy.py
  65. 10
      ml-agents/mlagents/trainers/ppo/optimizer.py
  66. 17
      ml-agents/mlagents/trainers/ppo/trainer.py
  67. 2
      ml-agents/mlagents/trainers/sac/optimizer.py
  68. 17
      ml-agents/mlagents/trainers/sac/trainer.py
  69. 8
      ml-agents/mlagents/trainers/settings.py
  70. 41
      ml-agents/mlagents/trainers/stats.py
  71. 27
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  72. 10
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  73. 62
      ml-agents/mlagents/trainers/tests/test_env_param_manager.py
  74. 62
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  75. 8
      ml-agents/mlagents/trainers/tests/test_ppo.py
  76. 1
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  77. 21
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  78. 6
      ml-agents/mlagents/trainers/tests/test_sac.py
  79. 8
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  80. 20
      ml-agents/mlagents/trainers/tests/test_tf_policy.py
  81. 26
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  82. 2
      ml-agents/mlagents/trainers/trainer/trainer.py
  83. 10
      ml-agents/mlagents/trainers/trainer_controller.py
  84. 3
      test_requirements.txt
  85. 2
      utils/validate_release_links.py
  86. 21
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/DynamicTarget.prefab
  87. 19
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/StaticTarget.prefab
  88. 82
      Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab
  89. 523
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab
  90. 7
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab.meta
  91. 8
      Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets.meta
  92. 10
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta
  93. 10
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta
  94. 10
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta
  95. 10
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta

2
.circleci/config.yml


. venv/bin/activate
mkdir test-reports
pip freeze > test-reports/pip_versions.txt
pytest -n 2 --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
pytest --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=test-reports/junit.xml -p no:warnings
- run:
name: Verify there are no hidden/missing metafiles.

9
DevProject/Assets/ML-Agents/Scripts/Tests/Performance/SensorPerformanceTests.cs


using NUnit.Framework;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Sensors.Reflection;

{
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
{
}
}

sensor.AddObservation(new Quaternion(1, 2, 3, 4));
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
{
}
}

[Observable]
public Quaternion QuaternionField = new Quaternion(1, 2, 3, 4);
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
{
}
}

get { return m_QuaternionField; }
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
{
}
}

16
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


using System;
using Unity.MLAgents.Actuators;
using Random = UnityEngine.Random;
public class Ball3DAgent : Agent
{

sensor.AddObservation(m_BallRb.velocity);
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);
var actionZ = 2f * Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f);
var actionX = 2f * Mathf.Clamp(actionBuffers.ContinuousActions[1], -1f, 1f);
if ((gameObject.transform.rotation.z < 0.25f && actionZ > 0f) ||
(gameObject.transform.rotation.z > -0.25f && actionZ < 0f))

SetResetParameters();
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = -Input.GetAxis("Horizontal");
actionsOut[1] = Input.GetAxis("Vertical");
var continuousActionsOut = actionsOut.ContinuousActions;
continuousActionsOut[0] = -Input.GetAxis("Horizontal");
continuousActionsOut[1] = Input.GetAxis("Vertical");
}
public void SetBall()

9
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs


using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class Ball3DHardAgent : Agent

sensor.AddObservation((ball.transform.position - gameObject.transform.position));
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);
var continuousActions = actionBuffers.ContinuousActions;
var actionZ = 2f * Mathf.Clamp(continuousActions[0], -1f, 1f);
var actionX = 2f * Mathf.Clamp(continuousActions[1], -1f, 1f);
if ((gameObject.transform.rotation.z < 0.25f && actionZ > 0f) ||
(gameObject.transform.rotation.z > -0.25f && actionZ < 0f))

9
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs


using UnityEngine;
using UnityEngine.SceneManagement;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using UnityEngine.Serialization;
/// <summary>

/// Controls the movement of the GameObject based on the actions received.
/// </summary>
/// <param name="vectorAction"></param>
public void ApplyAction(float[] vectorAction)
public void ApplyAction(ActionSegment<int> vectorAction)
var movement = (int)vectorAction[0];
var movement = vectorAction[0];
var direction = 0;

if (Academy.Instance.IsCommunicatorOn)
{
// Apply the previous step's actions
ApplyAction(m_Agent.GetAction());
ApplyAction(m_Agent.GetStoredActionBuffers().DiscreteActions);
m_Agent?.RequestDecision();
}
else

// Apply the previous step's actions
ApplyAction(m_Agent.GetAction());
ApplyAction(m_Agent.GetStoredActionBuffers().DiscreteActions);
m_TimeSinceDecision = 0f;
m_Agent?.RequestDecision();

30
Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs


using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class BouncerAgent : Agent

sensor.AddObservation(target.transform.localPosition);
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
for (var i = 0; i < vectorAction.Length; i++)
var continuousActions = actionBuffers.ContinuousActions;
for (var i = 0; i < continuousActions.Length; i++)
vectorAction[i] = Mathf.Clamp(vectorAction[i], -1f, 1f);
continuousActions[i] = Mathf.Clamp(continuousActions[i], -1f, 1f);
var x = vectorAction[0];
var y = ScaleAction(vectorAction[1], 0, 1);
var z = vectorAction[2];
var x = continuousActions[0];
var y = ScaleAction(continuousActions[1], 0, 1);
var z = continuousActions[2];
vectorAction[0] * vectorAction[0] +
vectorAction[1] * vectorAction[1] +
vectorAction[2] * vectorAction[2]) / 3f);
continuousActions[0] * continuousActions[0] +
continuousActions[1] * continuousActions[1] +
continuousActions[2] * continuousActions[2]) / 3f);
m_LookDir = new Vector3(x, y, z);
}

}
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = Input.GetAxis("Horizontal");
actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
actionsOut[2] = Input.GetAxis("Vertical");
var continuousActionsOut = actionsOut.ContinuousActions;
continuousActionsOut[0] = Input.GetAxis("Horizontal");
continuousActionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
continuousActionsOut[2] = Input.GetAxis("Vertical");
}
void Update()

37
Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


using System;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgentsExamples;
using Unity.MLAgents.Sensors;
using Random = UnityEngine.Random;

AddReward(1f);
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
var continuousActions = actionBuffers.ContinuousActions;
bpDict[leg0Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg1Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg2Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg3Upper].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[leg0Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg1Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg2Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg3Lower].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[leg0Upper].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[leg1Upper].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[leg2Upper].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[leg3Upper].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[leg0Lower].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[leg1Lower].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[leg2Lower].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[leg3Lower].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[leg0Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg1Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg2Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg3Upper].SetJointStrength(vectorAction[++i]);
bpDict[leg0Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg1Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg2Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg3Lower].SetJointStrength(vectorAction[++i]);
bpDict[leg0Upper].SetJointStrength(continuousActions[++i]);
bpDict[leg1Upper].SetJointStrength(continuousActions[++i]);
bpDict[leg2Upper].SetJointStrength(continuousActions[++i]);
bpDict[leg3Upper].SetJointStrength(continuousActions[++i]);
bpDict[leg0Lower].SetJointStrength(continuousActions[++i]);
bpDict[leg1Lower].SetJointStrength(continuousActions[++i]);
bpDict[leg2Lower].SetJointStrength(continuousActions[++i]);
bpDict[leg3Lower].SetJointStrength(continuousActions[++i]);
}
void FixedUpdate()

29
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


using System;
using Unity.MLAgents.Actuators;
using Random = UnityEngine.Random;
public class FoodCollectorAgent : Agent
{

return new Color32(r, g, b, 255);
}
public void MoveAgent(float[] act)
public void MoveAgent(ActionSegment<int> act)
{
m_Shoot = false;

gameObject.GetComponentInChildren<Renderer>().material = normalMaterial;
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
MoveAgent(vectorAction);
MoveAgent(actionBuffers.DiscreteActions);
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = 0f;
actionsOut[1] = 0f;
actionsOut[2] = 0f;
var discreteActionsOut = actionsOut.DiscreteActions;
discreteActionsOut[0] = 0;
discreteActionsOut[1] = 0;
discreteActionsOut[2] = 0;
actionsOut[2] = 2f;
discreteActionsOut[2] = 2;
actionsOut[0] = 1f;
discreteActionsOut[0] = 1;
actionsOut[2] = 1f;
discreteActionsOut[2] = 1;
actionsOut[0] = 2f;
discreteActionsOut[0] = 2;
actionsOut[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
discreteActionsOut[3] = Input.GetKey(KeyCode.Space) ? 1 : 0;
}
public override void OnEpisodeBegin()

29
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


using UnityEngine;
using System.Linq;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using UnityEngine.Serialization;
public class GridAgent : Agent

m_ResetParams = Academy.Instance.EnvironmentParameters;
}
public override void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
public override void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
{
// Mask the necessary actions if selected by the user.
if (maskActions)

if (positionX == 0)
{
actionMasker.SetMask(0, new []{ k_Left});
actionMask.WriteMask(0, new []{ k_Left});
actionMasker.SetMask(0, new []{k_Right});
actionMask.WriteMask(0, new []{k_Right});
actionMasker.SetMask(0, new []{k_Down});
actionMask.WriteMask(0, new []{k_Down});
actionMasker.SetMask(0, new []{k_Up});
actionMask.WriteMask(0, new []{k_Up});
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
var action = Mathf.FloorToInt(vectorAction[0]);
var action = actionBuffers.DiscreteActions[0];
var targetPos = transform.position;
switch (action)

}
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = k_NoAction;
var discreteActionsOut = actionsOut.DiscreteActions;
discreteActionsOut[0] = k_NoAction;
actionsOut[0] = k_Right;
discreteActionsOut[0] = k_Right;
actionsOut[0] = k_Up;
discreteActionsOut[0] = k_Up;
actionsOut[0] = k_Left;
discreteActionsOut[0] = k_Left;
actionsOut[0] = k_Down;
discreteActionsOut[0] = k_Down;
}
}

23
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


using System.Collections;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class HallwayAgent : Agent

m_GroundRenderer.material = m_GroundMaterial;
}
public void MoveAgent(float[] act)
public void MoveAgent(ActionSegment<int> act)
var action = Mathf.FloorToInt(act[0]);
var action = act[0];
switch (action)
{
case 1:

m_AgentRb.AddForce(dirToGo * m_HallwaySettings.agentRunSpeed, ForceMode.VelocityChange);
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
MoveAgent(vectorAction);
MoveAgent(actionBuffers.DiscreteActions);
}
void OnCollisionEnter(Collision col)

}
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = 0;
var discreteActionsOut = actionsOut.DiscreteActions;
discreteActionsOut[0] = 0;
actionsOut[0] = 3;
discreteActionsOut[0] = 3;
actionsOut[0] = 1;
discreteActionsOut[0] = 1;
actionsOut[0] = 4;
discreteActionsOut[0] = 4;
actionsOut[0] = 2;
discreteActionsOut[0] = 2;
}
}

23
Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs


using System.Collections;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
public class PushAgentBasic : Agent
{

/// <summary>
/// Moves the agent according to the selected action.
/// </summary>
public void MoveAgent(float[] act)
public void MoveAgent(ActionSegment<int> act)
var action = Mathf.FloorToInt(act[0]);
var action = act[0];
switch (action)
{

/// <summary>
/// Called every step of the engine. Here the agent takes an action.
/// </summary>
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
MoveAgent(vectorAction);
MoveAgent(actionBuffers.DiscreteActions);
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = 0;
var discreteActionsOut = actionsOut.DiscreteActions;
discreteActionsOut[0] = 0;
actionsOut[0] = 3;
discreteActionsOut[0] = 3;
actionsOut[0] = 1;
discreteActionsOut[0] = 1;
actionsOut[0] = 4;
discreteActionsOut[0] = 4;
actionsOut[0] = 2;
discreteActionsOut[0] = 2;
}
}

23
Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs


using UnityEngine;
using Random = UnityEngine.Random;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class PyramidAgent : Agent

}
}
public void MoveAgent(float[] act)
public void MoveAgent(ActionSegment<int> act)
var action = Mathf.FloorToInt(act[0]);
var action = act[0];
switch (action)
{
case 1:

m_AgentRb.AddForce(dirToGo * 2f, ForceMode.VelocityChange);
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
MoveAgent(vectorAction);
MoveAgent(actionBuffers.DiscreteActions);
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = 0;
var discreteActionsOut = actionsOut.DiscreteActions;
discreteActionsOut[0] = 0;
actionsOut[0] = 3;
discreteActionsOut[0] = 3;
actionsOut[0] = 1;
discreteActionsOut[0] = 1;
actionsOut[0] = 4;
discreteActionsOut[0] = 4;
actionsOut[0] = 2;
discreteActionsOut[0] = 2;
}
}

12
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class ReacherAgent : Agent

/// <summary>
/// The agent's four actions correspond to torques on each of the two joints.
/// </summary>
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
var torqueX = Mathf.Clamp(vectorAction[0], -1f, 1f) * 150f;
var torqueZ = Mathf.Clamp(vectorAction[1], -1f, 1f) * 150f;
var torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f) * 150f;
var torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[1], -1f, 1f) * 150f;
torqueX = Mathf.Clamp(vectorAction[2], -1f, 1f) * 150f;
torqueZ = Mathf.Clamp(vectorAction[3], -1f, 1f) * 150f;
torqueX = Mathf.Clamp(actionBuffers.ContinuousActions[2], -1f, 1f) * 150f;
torqueZ = Mathf.Clamp(actionBuffers.ContinuousActions[3], -1f, 1f) * 150f;
m_RbB.AddTorque(new Vector3(torqueX, 0f, torqueZ));
}

20
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/DirectionIndicator.cs


using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using UnityEngine;
public bool updatedByAgent; //should this be updated by the agent? If not, it will use local settings
void OnEnable()
{
m_StartingYPos = transform.position.y;

{
transform.position = new Vector3(transformToFollow.position.x, m_StartingYPos + heightOffset, transformToFollow.position.z);
if (updatedByAgent)
return;
transform.position = new Vector3(transformToFollow.position.x, m_StartingYPos + heightOffset,
transformToFollow.position.z);
}
//Public method to allow an agent to directly update this component
public void MatchOrientation(Transform t)
{
transform.position = new Vector3(t.position.x, m_StartingYPos + heightOffset, t.position.z);
transform.rotation = t.rotation;
}
}
}

31
Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs


using System;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Policies;
public class AgentSoccer : Agent

m_ResetParams = Academy.Instance.EnvironmentParameters;
}
public void MoveAgent(float[] act)
public void MoveAgent(ActionSegment<int> act)
{
var dirToGo = Vector3.zero;
var rotateDir = Vector3.zero;

var forwardAxis = (int)act[0];
var rightAxis = (int)act[1];
var rotateAxis = (int)act[2];
var forwardAxis = act[0];
var rightAxis = act[1];
var rotateAxis = act[2];
switch (forwardAxis)
{

ForceMode.VelocityChange);
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
{
if (position == Position.Goalie)

// Existential penalty cumulant for Generic
timePenalty -= m_Existential;
}
MoveAgent(vectorAction);
MoveAgent(actionBuffers.DiscreteActions);
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
Array.Clear(actionsOut, 0, actionsOut.Length);
var discreteActionsOut = actionsOut.DiscreteActions;
discreteActionsOut.Clear();
actionsOut[0] = 1f;
discreteActionsOut[0] = 1;
actionsOut[0] = 2f;
discreteActionsOut[0] = 2;
actionsOut[2] = 1f;
discreteActionsOut[2] = 1;
actionsOut[2] = 2f;
discreteActionsOut[2] = 2;
actionsOut[1] = 1f;
discreteActionsOut[1] = 1;
actionsOut[1] = 2f;
discreteActionsOut[1] = 2;
}
}
/// <summary>

4
Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs


using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class TemplateAgent : Agent

}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
{
}

20
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


using UnityEngine;
using UnityEngine.UI;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class TennisAgent : Agent

sensor.AddObservation(m_InvertMult * gameObject.transform.rotation.z);
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
var moveX = Mathf.Clamp(vectorAction[0], -1f, 1f) * m_InvertMult;
var moveY = Mathf.Clamp(vectorAction[1], -1f, 1f);
var rotate = Mathf.Clamp(vectorAction[2], -1f, 1f) * m_InvertMult;
var continuousActions = actionBuffers.ContinuousActions;
var moveX = Mathf.Clamp(continuousActions[0], -1f, 1f) * m_InvertMult;
var moveY = Mathf.Clamp(continuousActions[1], -1f, 1f);
var rotate = Mathf.Clamp(continuousActions[2], -1f, 1f) * m_InvertMult;
if (moveY > 0.5 && transform.position.y - transform.parent.transform.position.y < -1.5f)
{

m_TextComponent.text = score.ToString();
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = Input.GetAxis("Horizontal"); // Racket Movement
actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f; // Racket Jumping
actionsOut[2] = Input.GetAxis("Vertical"); // Racket Rotation
var continuousActionsOut = actionsOut.ContinuousActions;
continuousActionsOut[0] = Input.GetAxis("Horizontal"); // Racket Movement
continuousActionsOut[1] = Input.GetKey(KeyCode.Space) ? 1f : 0f; // Racket Jumping
continuousActionsOut[2] = Input.GetAxis("Vertical"); // Racket Rotation
}
public override void OnEpisodeBegin()

977
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerDynamic.unity.meta


fileFormatVersion: 2
guid: 79d5d2687bfbe45f5b78bd6c04992e0d
guid: 65c87f50b8c81433d8fd7f6550773467
DefaultImporter:
externalObjects: {}
userData:

962
Project/Assets/ML-Agents/Examples/Walker/Scenes/WalkerStatic.unity
文件差异内容过多而无法显示
查看文件

218
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


using System;
using MLAgentsExamples;
using Unity.MLAgents.Actuators;
using Unity.MLAgentsExamples;
using Unity.MLAgents.Sensors;
using BodyPart = Unity.MLAgentsExamples.BodyPart;

{
public float maximumWalkingSpeed = 999; //The max walk velocity magnitude an agent will be rewarded for
Vector3 m_WalkDir; //Direction to the target
// Quaternion m_WalkDirLookRot; //Will hold the rotation to our target
[Header("Walk Speed")]
[Range(0.1f, 10)]
[SerializeField]
//The walking speed to try and achieve
private float m_TargetWalkingSpeed = 10;
[Header("Target To Walk Towards")] [Space(10)]
public TargetController target; //Target the agent will walk towards.
public float MTargetWalkingSpeed // property
{
get { return m_TargetWalkingSpeed; }
set { m_TargetWalkingSpeed = Mathf.Clamp(value, .1f, m_maxWalkingSpeed); }
}
const float m_maxWalkingSpeed = 10; //The max walking speed
//Should the agent sample a new goal velocity each episode?
//If true, walkSpeed will be randomly set between zero and m_maxWalkingSpeed in OnEpisodeBegin()
//If false, the goal velocity will be walkingSpeed
public bool randomizeWalkSpeedEachEpisode;
//The direction an agent will walk during training.
private Vector3 m_WorldDirToWalk = Vector3.right;
[Header("Target To Walk Towards")] public Transform target; //Target the agent will walk towards during training.
[Header("Body Parts")] [Space(10)] public Transform hips;
[Header("Body Parts")] public Transform hips;
public Transform chest;
public Transform spine;
public Transform head;

public Transform forearmR;
public Transform handR;
[Header("Orientation")] [Space(10)]
public OrientationCubeController orientationCube;
OrientationCubeController m_OrientationCube;
//The indicator graphic gameobject that points towards the target
DirectionIndicator m_DirectionIndicator;
orientationCube.UpdateOrientation(hips, target.transform);
m_OrientationCube = GetComponentInChildren<OrientationCubeController>();
m_DirectionIndicator = GetComponentInChildren<DirectionIndicator>();
//Setup each body part
m_JdController = GetComponent<JointDriveController>();

}
//Random start rotation to help generalize
transform.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
hips.rotation = Quaternion.Euler(0, Random.Range(0.0f, 360.0f), 0);
UpdateOrientationObjects();
orientationCube.UpdateOrientation(hips, target.transform);
//Set our goal walking speed
MTargetWalkingSpeed =
randomizeWalkSpeedEachEpisode ? Random.Range(0.1f, m_maxWalkingSpeed) : MTargetWalkingSpeed;
SetResetParameters();
}

//Get velocities in the context of our orientation cube's space
//Note: You can get these velocities in world space as well but it may not train as well.
sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.velocity));
sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.velocity));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.angularVelocity));
sensor.AddObservation(orientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(bp.rb.position - hips.position));
if (bp.rb.transform != hips && bp.rb.transform != handL && bp.rb.transform != handR)
{

/// </summary>
public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(Quaternion.FromToRotation(hips.forward, orientationCube.transform.forward));
sensor.AddObservation(Quaternion.FromToRotation(head.forward, orientationCube.transform.forward));
var cubeForward = m_OrientationCube.transform.forward;
//velocity we want to match
var velGoal = cubeForward * MTargetWalkingSpeed;
//ragdoll's avg vel
var avgVel = GetAvgVelocity();
//current ragdoll velocity. normalized
sensor.AddObservation(Vector3.Distance(velGoal, avgVel));
//avg body vel relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(avgVel));
//vel goal relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformDirection(velGoal));
//rotation deltas
sensor.AddObservation(Quaternion.FromToRotation(hips.forward, cubeForward));
sensor.AddObservation(Quaternion.FromToRotation(head.forward, cubeForward));
sensor.AddObservation(orientationCube.transform.InverseTransformPoint(target.transform.position));
//Position of target position relative to cube
sensor.AddObservation(m_OrientationCube.transform.InverseTransformPoint(target.transform.position));
foreach (var bodyPart in m_JdController.bodyPartsList)
{

public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
bpDict[chest].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[spine].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
var continuousActions = actionBuffers.ContinuousActions;
bpDict[chest].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[spine].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[thighL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[thighR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[shinL].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[shinR].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[footR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[footL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], vectorAction[++i]);
bpDict[thighL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[thighR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[shinL].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[shinR].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[footR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[footL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], continuousActions[++i]);
bpDict[armL].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[armR].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[forearmL].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[forearmR].SetJointTargetRotation(vectorAction[++i], 0, 0);
bpDict[head].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[armL].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[armR].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[forearmL].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[forearmR].SetJointTargetRotation(continuousActions[++i], 0, 0);
bpDict[head].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[chest].SetJointStrength(vectorAction[++i]);
bpDict[spine].SetJointStrength(vectorAction[++i]);
bpDict[head].SetJointStrength(vectorAction[++i]);
bpDict[thighL].SetJointStrength(vectorAction[++i]);
bpDict[shinL].SetJointStrength(vectorAction[++i]);
bpDict[footL].SetJointStrength(vectorAction[++i]);
bpDict[thighR].SetJointStrength(vectorAction[++i]);
bpDict[shinR].SetJointStrength(vectorAction[++i]);
bpDict[footR].SetJointStrength(vectorAction[++i]);
bpDict[armL].SetJointStrength(vectorAction[++i]);
bpDict[forearmL].SetJointStrength(vectorAction[++i]);
bpDict[armR].SetJointStrength(vectorAction[++i]);
bpDict[forearmR].SetJointStrength(vectorAction[++i]);
bpDict[chest].SetJointStrength(continuousActions[++i]);
bpDict[spine].SetJointStrength(continuousActions[++i]);
bpDict[head].SetJointStrength(continuousActions[++i]);
bpDict[thighL].SetJointStrength(continuousActions[++i]);
bpDict[shinL].SetJointStrength(continuousActions[++i]);
bpDict[footL].SetJointStrength(continuousActions[++i]);
bpDict[thighR].SetJointStrength(continuousActions[++i]);
bpDict[shinR].SetJointStrength(continuousActions[++i]);
bpDict[footR].SetJointStrength(continuousActions[++i]);
bpDict[armL].SetJointStrength(continuousActions[++i]);
bpDict[forearmL].SetJointStrength(continuousActions[++i]);
bpDict[armR].SetJointStrength(continuousActions[++i]);
bpDict[forearmR].SetJointStrength(continuousActions[++i]);
}
//Update OrientationCube and DirectionIndicator
void UpdateOrientationObjects()
{
m_WorldDirToWalk = target.position - hips.position;
m_OrientationCube.UpdateOrientation(hips, target);
if (m_DirectionIndicator)
{
m_DirectionIndicator.MatchOrientation(m_OrientationCube.transform);
}
var cubeForward = orientationCube.transform.forward;
orientationCube.UpdateOrientation(hips, target.transform);
UpdateOrientationObjects();
var cubeForward = m_OrientationCube.transform.forward;
// a. Velocity alignment with goal direction.
var moveTowardsTargetReward = Vector3.Dot(cubeForward,
Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed));
if (float.IsNaN(moveTowardsTargetReward))
// a. Match target speed
//This reward will approach 1 if it matches perfectly and approach zero as it deviates
var matchSpeedReward = GetMatchingVelocityReward(cubeForward * MTargetWalkingSpeed, GetAvgVelocity());
//Check for NaNs
if (float.IsNaN(matchSpeedReward))
$" cubeForward: {cubeForward}\n"+
$" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n"+
$" maximumWalkingSpeed: {maximumWalkingSpeed}"
$" cubeForward: {cubeForward}\n" +
$" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n" +
$" maximumWalkingSpeed: {m_maxWalkingSpeed}"
// b. Rotation alignment with goal direction.
var lookAtTargetReward = Vector3.Dot(cubeForward, head.forward);
// b. Rotation alignment with target direction.
//This reward will approach 1 if it faces the target direction perfectly and approach zero as it deviates
var lookAtTargetReward = (Vector3.Dot(cubeForward, head.forward) + 1) * .5F;
//Check for NaNs
$" cubeForward: {cubeForward}\n"+
$" cubeForward: {cubeForward}\n" +
// c. Encourage head height. //Should normalize to ~1
var headHeightOverFeetReward =
((head.position.y - footL.position.y) + (head.position.y - footR.position.y) / 10);
if (float.IsNaN(headHeightOverFeetReward))
AddReward(matchSpeedReward * lookAtTargetReward);
}
//Returns the average velocity of all of the body parts
//Using the velocity of the hips only has shown to result in more erratic movement from the limbs, so...
//...using the average helps prevent this erratic movement
Vector3 GetAvgVelocity()
{
Vector3 velSum = Vector3.zero;
Vector3 avgVel = Vector3.zero;
//ALL RBS
int numOfRB = 0;
foreach (var item in m_JdController.bodyPartsList)
throw new ArgumentException(
"NaN in headHeightOverFeetReward.\n" +
$" head.position: {head.position}\n"+
$" footL.position: {footL.position}\n"+
$" footR.position: {footR.position}"
);
numOfRB++;
velSum += item.rb.velocity;
AddReward(
+ 0.02f * moveTowardsTargetReward
+ 0.02f * lookAtTargetReward
+ 0.005f * headHeightOverFeetReward
);
avgVel = velSum / numOfRB;
return avgVel;
}
//normalized value of the difference in avg speed vs goal walking speed.
public float GetMatchingVelocityReward(Vector3 velocityGoal, Vector3 actualVelocity)
{
//distance between our actual velocity and goal velocity
var velDeltaMagnitude = Mathf.Clamp(Vector3.Distance(actualVelocity, velocityGoal), 0, MTargetWalkingSpeed);
//return the value on a declining sigmoid shaped curve that decays from 1 to 0
//This reward will approach 1 if it matches perfectly and approach zero as it deviates
return Mathf.Pow(1 - Mathf.Pow(velDeltaMagnitude / MTargetWalkingSpeed, 2), 2);
}
/// <summary>

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerDynamic.nn.meta


fileFormatVersion: 2
guid: e785133c5b0ac461588106642550d1b3
guid: 8cbae6de45ea44d0c97366e252052722
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj

1001
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Walker/TFModels/WalkerStatic.nn.meta


fileFormatVersion: 2
guid: 8dfd4337ed40e4d48872a4f86919c9da
guid: 185990f76b7804d1e83378e9d4454c6b
ScriptedImporter:
fileIDToRecycleName:
11400000: main obj

31
Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


using UnityEngine;
using Unity.MLAgents;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Unity.MLAgentsExamples;

m_GroundRenderer.material = m_GroundMaterial;
}
public void MoveAgent(float[] act)
public void MoveAgent(ActionSegment<int> act)
{
AddReward(-0.0005f);
var smallGrounded = DoGroundCheck(true);

var rotateDir = Vector3.zero;
var dirToGoForwardAction = (int)act[0];
var rotateDirAction = (int)act[1];
var dirToGoSideAction = (int)act[2];
var jumpAction = (int)act[3];
var dirToGoForwardAction = act[0];
var rotateDirAction = act[1];
var dirToGoSideAction = act[2];
var jumpAction = act[3];
if (dirToGoForwardAction == 1)
dirToGo = (largeGrounded ? 1f : 0.5f) * 1f * transform.forward;

jumpingTime -= Time.fixedDeltaTime;
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
MoveAgent(vectorAction);
MoveAgent(actionBuffers.DiscreteActions);
if ((!Physics.Raycast(m_AgentRb.position, Vector3.down, 20))
|| (!Physics.Raycast(m_ShortBlockRb.position, Vector3.down, 20)))
{

}
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
System.Array.Clear(actionsOut, 0, actionsOut.Length);
var discreteActionsOut = actionsOut.DiscreteActions;
discreteActionsOut.Clear();
actionsOut[1] = 2f;
discreteActionsOut[1] = 2;
actionsOut[0] = 1f;
discreteActionsOut[0] = 1;
actionsOut[1] = 1f;
discreteActionsOut[1] = 1;
actionsOut[0] = 2f;
discreteActionsOut[0] = 2;
actionsOut[3] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
discreteActionsOut[3] = Input.GetKey(KeyCode.Space) ? 1 : 0;
}
// Detect when the agent hits the goal

17
Project/Assets/ML-Agents/Examples/Worm/Scripts/WormAgent.cs


using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgentsExamples;
using Unity.MLAgents.Sensors;

AddReward(1f);
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers actionBuffers)
var continuousActions = actionBuffers.ContinuousActions;
bpDict[bodySegment1].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[bodySegment2].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[bodySegment3].SetJointTargetRotation(vectorAction[++i], vectorAction[++i], 0);
bpDict[bodySegment1].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[bodySegment2].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[bodySegment3].SetJointTargetRotation(continuousActions[++i], continuousActions[++i], 0);
bpDict[bodySegment1].SetJointStrength(vectorAction[++i]);
bpDict[bodySegment2].SetJointStrength(vectorAction[++i]);
bpDict[bodySegment3].SetJointStrength(vectorAction[++i]);
bpDict[bodySegment1].SetJointStrength(continuousActions[++i]);
bpDict[bodySegment2].SetJointStrength(continuousActions[++i]);
bpDict[bodySegment3].SetJointStrength(continuousActions[++i]);
// Detect if worm fell off/through platform
if (bodySegment0.position.y < ground.position.y - 2)

2
Project/ProjectSettings/ProjectVersion.txt


m_EditorVersion: 2018.4.17f1
m_EditorVersion: 2018.4.24f1

4
com.unity.ml-agents.extensions/Editor/Unity.ML-Agents.Extensions.Editor.asmdef


{
"name": "Unity.ML-Agents.Extensions.Editor",
"references": [
"Unity.ML-Agents.Extensions"
"Unity.ML-Agents.Extensions",
"Unity.ML-Agents",
"Unity.ML-Agents.Editor"
],
"includePlatforms": [
"Editor"

1
com.unity.ml-agents.extensions/Runtime/AssemblyInfo.cs


using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.EditorTests")]
[assembly: InternalsVisibleTo("Unity.ML-Agents.Extensions.Editor")]

29
com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs


return new Pose { rotation = t.rotation, position = t.position };
}
/// <inheritdoc/>
protected internal override Object GetObjectAt(int index)
{
return m_Bodies[index];
}
internal IEnumerable<ArticulationBody> GetEnabledArticulationBodies()
{
if (m_Bodies == null)
{
yield break;
}
for (var i = 0; i < m_Bodies.Length; i++)
{
var articBody = m_Bodies[i];
if (articBody == null)
{
// Ignore a virtual root.
continue;
}
if (IsPoseEnabled(i))
{
yield return articBody;
}
}
}
}
}
#endif // UNITY_2020_1_OR_NEWER

8
com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodySensorComponent.cs


var poseExtractor = new ArticulationBodyPoseExtractor(RootBody);
var numPoseObservations = poseExtractor.GetNumPoseObservations(Settings);
var numJointObservations = 0;
// Start from i=1 to ignore the root
for (var i = 1; i < poseExtractor.Bodies.Length; i++)
foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
numJointObservations += ArticulationBodyJointExtractor.NumObservations(
poseExtractor.Bodies[i], Settings
);
numJointObservations += ArticulationBodyJointExtractor.NumObservations(articBody, Settings);
}
return new[] { numPoseObservations + numJointObservations };
}

54
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs


using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents.Sensors;

string m_SensorName;
PoseExtractor m_PoseExtractor;
IJointExtractor[] m_JointExtractors;
List<IJointExtractor> m_JointExtractors;
/// Construct a new PhysicsBodySensor
/// Construct a new PhysicsBodySensor
/// <param name="rootBody">The root Rigidbody. This has no Joints on it (but other Joints may connect to it).</param>
/// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
/// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
/// <param name="poseExtractor"></param>
Rigidbody rootBody,
GameObject rootGameObject,
GameObject virtualRoot,
RigidBodyPoseExtractor poseExtractor,
string sensorName=null
string sensorName
var poseExtractor = new RigidBodyPoseExtractor(rootBody, rootGameObject, virtualRoot);
m_SensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{rootBody?.name}" : sensorName;
m_SensorName = sensorName;
var rigidBodies = poseExtractor.Bodies;
if (rigidBodies != null)
{
m_JointExtractors = new IJointExtractor[rigidBodies.Length - 1]; // skip the root
for (var i = 1; i < rigidBodies.Length; i++)
{
var jointExtractor = new RigidBodyJointExtractor(rigidBodies[i]);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors[i - 1] = jointExtractor;
}
}
else
m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
foreach(var rb in poseExtractor.GetEnabledRigidbodies())
m_JointExtractors = new IJointExtractor[0];
var jointExtractor = new RigidBodyJointExtractor(rb);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors.Add(jointExtractor);
}
var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);

m_Settings = settings;
var numJointExtractorObservations = 0;
var articBodies = poseExtractor.Bodies;
if (articBodies != null)
m_JointExtractors = new List<IJointExtractor>(poseExtractor.NumEnabledPoses);
foreach(var articBody in poseExtractor.GetEnabledArticulationBodies())
m_JointExtractors = new IJointExtractor[articBodies.Length - 1]; // skip the root
for (var i = 1; i < articBodies.Length; i++)
{
var jointExtractor = new ArticulationBodyJointExtractor(articBodies[i]);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors[i - 1] = jointExtractor;
}
}
else
{
m_JointExtractors = new IJointExtractor[0];
var jointExtractor = new ArticulationBodyJointExtractor(articBody);
numJointExtractorObservations += jointExtractor.NumObservations(settings);
m_JointExtractors.Add(jointExtractor);
}
var numTransformObservations = m_PoseExtractor.GetNumPoseObservations(settings);

117
com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs


using System;
using Object = UnityEngine.Object;
namespace Unity.MLAgents.Extensions.Sensors
{

{
if (m_ParentIndices == null)
{
return -1;
throw new NullReferenceException("No parent indices set");
}
return m_ParentIndices[index];

public void SetPoseEnabled(int index, bool val)
{
m_PoseEnabled[index] = val;
}
public bool IsPoseEnabled(int index)
{
return m_PoseEnabled[index];
}
/// <summary>

/// <returns></returns>
protected internal abstract Vector3 GetLinearVelocityAt(int index);
/// <summary>
/// Return the underlying object at the given index. This is only
/// used for display in the inspector.
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
protected internal virtual Object GetObjectAt(int index)
{
return null;
}
/// <summary>
/// Update the internal model space transform storage based on the underlying system.

Debug.DrawLine(current.position+offset, current.position+offset+.1f*localRight, Color.blue);
}
}
/// <summary>
/// Simplified representation of the a node in the hierarchy for display.
/// </summary>
internal struct DisplayNode
{
/// <summary>
/// Underlying object in the hierarchy. Pass to EditorGUIUtility.ObjectContent() for display.
/// </summary>
public Object NodeObject;
/// <summary>
/// Whether the poses for the object are enabled.
/// </summary>
public bool Enabled;
/// <summary>
/// Depth in the hierarchy, used for adjusting the indent level.
/// </summary>
public int Depth;
/// <summary>
/// The index of the corresponding object in the PoseExtractor.
/// </summary>
public int OriginalIndex;
}
/// <summary>
/// Get a list of display nodes in depth-first order.
/// </summary>
/// <returns></returns>
internal IList<DisplayNode> GetDisplayNodes()
{
if (NumPoses == 0)
{
return Array.Empty<DisplayNode>();
}
var nodesOut = new List<DisplayNode>(NumPoses);
// List of children for each node
var tree = new Dictionary<int, List<int>>();
for (var i = 0; i < NumPoses; i++)
{
var parent = GetParentIndex(i);
if (i == -1)
{
continue;
}
if (!tree.ContainsKey(parent))
{
tree[parent] = new List<int>();
}
tree[parent].Add(i);
}
// Store (index, depth) in the stack
var stack = new Stack<(int, int)>();
stack.Push((0, 0));
while (stack.Count != 0)
{
var (current, depth) = stack.Pop();
var obj = GetObjectAt(current);
var node = new DisplayNode
{
NodeObject = obj,
Enabled = IsPoseEnabled(current),
OriginalIndex = current,
Depth = depth
};
nodesOut.Add(node);
// Add children
if (tree.ContainsKey(current))
{
// Push to the stack in reverse order
var children = tree[current];
for (var childIdx = children.Count-1; childIdx >= 0; childIdx--)
{
stack.Push((children[childIdx], depth+1));
}
}
// Safety check
// This shouldn't even happen, but in case we have a cycle in the graph
// exit instead of looping forever and eating up all the memory.
if (nodesOut.Count > NumPoses)
{
return nodesOut;
}
}
return nodesOut;
}
}
/// <summary>

80
com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs


/// <param name="rootGameObject">Optional GameObject used to find Rigidbodies in the hierarchy.</param>
/// <param name="virtualRoot">Optional GameObject used to determine the root of the poses,
/// separate from the actual Rigidbodies in the hierarchy. For locomotion tasks, with ragdolls, this provides
/// a stabilized refernece frame, which can improve learning.</param>
public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null, GameObject virtualRoot = null)
/// a stabilized reference frame, which can improve learning.</param>
/// <param name="enableBodyPoses">Optional mapping of whether a body's psoe should be enabled or not.</param>
public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null,
GameObject virtualRoot = null, Dictionary<Rigidbody, bool> enableBodyPoses = null)
{
if (rootBody == null)
{

Rigidbody[] rbs;
Joint[] joints;
joints = rootBody.GetComponentsInChildren <Joint>();
joints = rootGameObject.GetComponentsInChildren<Joint>();
}
if (rbs == null || rbs.Length == 0)

}
if (rbs[0] != rootBody)
if (rbs[0] != rootBody)
{
Debug.Log("Expected root body at index 0");
return;

}
}
var joints = rootBody.GetComponentsInChildren <Joint>();
foreach (var j in joints)
{
var parent = j.connectedBody;

// By default, ignore the root
SetPoseEnabled(0, false);
if (enableBodyPoses != null)
{
foreach (var pair in enableBodyPoses)
{
var rb = pair.Key;
if (bodyToIndex.TryGetValue(rb, out var index))
{
SetPoseEnabled(index, pair.Value);
}
}
}
}
/// <inheritdoc/>

return new Pose { rotation = body.rotation, position = body.position };
}
/// <inheritdoc/>
protected internal override Object GetObjectAt(int index)
{
if (index == 0 && m_VirtualRoot != null)
{
return m_VirtualRoot;
}
return m_Bodies[index];
}
/// <summary>
/// Get a dictionary indicating which Rigidbodies' poses are enabled or disabled.
/// </summary>
/// <returns></returns>
internal Dictionary<Rigidbody, bool> GetBodyPosesEnabled()
{
var bodyPosesEnabled = new Dictionary<Rigidbody, bool>(m_Bodies.Length);
for (var i = 0; i < m_Bodies.Length; i++)
{
var rb = m_Bodies[i];
if (rb == null)
{
continue; // skip virtual root
}
bodyPosesEnabled[rb] = IsPoseEnabled(i);
}
return bodyPosesEnabled;
}
internal IEnumerable<Rigidbody> GetEnabledRigidbodies()
{
if (m_Bodies == null)
{
yield break;
}
for (var i = 0; i < m_Bodies.Length; i++)
{
var rb = m_Bodies[i];
if (rb == null)
{
// Ignore a virtual root.
continue;
}
if (IsPoseEnabled(i))
{
yield return rb;
}
}
}
}
}

68
com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodySensorComponent.cs


using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents.Sensors;

/// <summary>
/// Optional sensor name. This must be unique for each Agent.
/// </summary>
[SerializeField]
[SerializeField]
[HideInInspector]
RigidBodyPoseExtractor m_PoseExtractor;
/// <summary>
/// Creates a PhysicsBodySensor.
/// </summary>

return new PhysicsBodySensor(RootBody, gameObject, VirtualRoot, Settings, sensorName);
var _sensorName = string.IsNullOrEmpty(sensorName) ? $"PhysicsBodySensor:{RootBody?.name}" : sensorName;
return new PhysicsBodySensor(GetPoseExtractor(), Settings, _sensorName);
}
/// <inheritdoc/>

return new[] { 0 };
}
// TODO static method in PhysicsBodySensor?
// TODO only update PoseExtractor when body changes?
var poseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot);
var poseExtractor = GetPoseExtractor();
// Start from i=1 to ignore the root
for (var i = 1; i < poseExtractor.Bodies.Length; i++)
foreach(var rb in poseExtractor.GetEnabledRigidbodies())
var body = poseExtractor.Bodies[i];
var joint = body?.GetComponent<Joint>();
numJointObservations += RigidBodyJointExtractor.NumObservations(body, joint, Settings);
var joint = rb.GetComponent<Joint>();
numJointObservations += RigidBodyJointExtractor.NumObservations(rb, joint, Settings);
}
/// <summary>
/// Get the DisplayNodes of the hierarchy.
/// </summary>
/// <returns></returns>
internal IList<PoseExtractor.DisplayNode> GetDisplayNodes()
{
return GetPoseExtractor().GetDisplayNodes();
}
/// <summary>
/// Lazy construction of the PoseExtractor.
/// </summary>
/// <returns></returns>
RigidBodyPoseExtractor GetPoseExtractor()
{
if (m_PoseExtractor == null)
{
ResetPoseExtractor();
}
return m_PoseExtractor;
}
/// <summary>
/// Reset the pose extractor, trying to keep the enabled state of the corresponding poses the same.
/// </summary>
internal void ResetPoseExtractor()
{
// Get the current enabled state of each body, so that we can reinitialize with them.
Dictionary<Rigidbody, bool> bodyPosesEnabled = null;
if (m_PoseExtractor != null)
{
bodyPosesEnabled = m_PoseExtractor.GetBodyPosesEnabled();
}
m_PoseExtractor = new RigidBodyPoseExtractor(RootBody, gameObject, VirtualRoot, bodyPosesEnabled);
}
/// <summary>
/// Toggle the pose at the given index.
/// </summary>
/// <param name="index"></param>
/// <param name="enabled"></param>
internal void SetPoseEnabled(int index, bool enabled)
{
GetPoseExtractor().SetPoseEnabled(index, enabled);
}
}

88
com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs


using System;
using UnityEngine;
using NUnit.Framework;
using Unity.MLAgents.Extensions.Sensors;

public class PoseExtractorTests
{
class UselessPoseExtractor : PoseExtractor
class BasicPoseExtractor : PoseExtractor
{
protected internal override Pose GetPoseAt(int index)
{

protected internal override Vector3 GetLinearVelocityAt(int index)
protected internal override Vector3 GetLinearVelocityAt(int index)
}
class UselessPoseExtractor : BasicPoseExtractor
{
public void Init(int[] parentIndices)
{
Setup(parentIndices);

poseExtractor.UpdateModelSpacePoses();
Assert.AreEqual(0, poseExtractor.NumPoses);
// Iterating through poses and velocities should be an empty loop
foreach (var pose in poseExtractor.GetEnabledModelSpacePoses())
{
throw new UnityAgentsException("This shouldn't happen");
}
foreach (var pose in poseExtractor.GetEnabledLocalSpacePoses())
{
throw new UnityAgentsException("This shouldn't happen");
}
foreach (var vel in poseExtractor.GetEnabledModelSpaceVelocities())
{
throw new UnityAgentsException("This shouldn't happen");
}
foreach (var vel in poseExtractor.GetEnabledLocalSpaceVelocities())
{
throw new UnityAgentsException("This shouldn't happen");
}
// Getting a parent index should throw an index exception
Assert.Throws <NullReferenceException>(
() => poseExtractor.GetParentIndex(0)
);
// DisplayNodes should be empty
var displayNodes = poseExtractor.GetDisplayNodes();
Assert.AreEqual(0, displayNodes.Count);
}
[Test]

Assert.AreEqual(size, localPoseIndex);
}
class BadPoseExtractor : PoseExtractor
[Test]
public void TestChainDisplayNodes()
{
var size = 4;
var chain = new ChainPoseExtractor(size);
var displayNodes = chain.GetDisplayNodes();
Assert.AreEqual(size, displayNodes.Count);
for (var i = 0; i < size; i++)
{
var displayNode = displayNodes[i];
Assert.AreEqual(i, displayNode.OriginalIndex);
Assert.AreEqual(null, displayNode.NodeObject);
Assert.AreEqual(i, displayNode.Depth);
Assert.AreEqual(true, displayNode.Enabled);
}
}
[Test]
public void TestDisplayNodesLoop()
{
// Degenerate case with a loop
var poseExtractor = new UselessPoseExtractor();
poseExtractor.Init(new[] {-1, 2, 1});
// This just shouldn't blow up
poseExtractor.GetDisplayNodes();
// Self-loop
poseExtractor.Init(new[] {-1, 1});
// This just shouldn't blow up
poseExtractor.GetDisplayNodes();
}
class BadPoseExtractor : BasicPoseExtractor
{
public BadPoseExtractor()
{

}
Setup(parents);
}
protected internal override Pose GetPoseAt(int index)
{
return Pose.identity;
}
protected internal override Vector3 GetLinearVelocityAt(int index)
{
return Vector3.zero;
}
}
[Test]

var bad = new BadPoseExtractor();
});
}
}
public class PoseExtensionTests

67
com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs


var rootRb = go.AddComponent<Rigidbody>();
var poseExtractor = new RigidBodyPoseExtractor(rootRb);
Assert.AreEqual(1, poseExtractor.NumPoses);
// Also pass the GameObject
poseExtractor = new RigidBodyPoseExtractor(rootRb, go);
Assert.AreEqual(1, poseExtractor.NumPoses);
}
[Test]
public void TestNoBodiesFound()
{
// Check that if we can't find any bodies under the game object, we get an empty extractor
var gameObj = new GameObject();
var rootRb = gameObj.AddComponent<Rigidbody>();
var otherGameObj = new GameObject();
var poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
Assert.AreEqual(0, poseExtractor.NumPoses);
// Add an RB under the other GameObject. Constructor will find a rigid body, but not the root.
var otherRb = otherGameObj.AddComponent<Rigidbody>();
poseExtractor = new RigidBodyPoseExtractor(rootRb, otherGameObj);
Assert.AreEqual(0, poseExtractor.NumPoses);
}
[Test]

Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(0).position);
Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(0).rotation);
Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(0));
// Check DisplayNodes gives expected results
var displayNodes = poseExtractor.GetDisplayNodes();
Assert.AreEqual(2, displayNodes.Count);
Assert.AreEqual(rb1, displayNodes[0].NodeObject);
Assert.AreEqual(false, displayNodes[0].Enabled);
Assert.AreEqual(rb2, displayNodes[1].NodeObject);
Assert.AreEqual(true, displayNodes[1].Enabled);
}
[Test]

Assert.AreEqual(rb1.position, poseExtractor.GetPoseAt(1).position);
Assert.IsTrue(rb1.rotation == poseExtractor.GetPoseAt(1).rotation);
Assert.AreEqual(rb1.velocity, poseExtractor.GetLinearVelocityAt(1));
}
[Test]
public void TestBodyPosesEnabledDictionary()
{
// * rootObj
// - rb1
// * go2
// - rb2
// - joint
var rootObj = new GameObject();
var rb1 = rootObj.AddComponent<Rigidbody>();
var go2 = new GameObject();
var rb2 = go2.AddComponent<Rigidbody>();
go2.transform.SetParent(rootObj.transform);
var joint = go2.AddComponent<ConfigurableJoint>();
joint.connectedBody = rb1;
var poseExtractor = new RigidBodyPoseExtractor(rb1);
// Expect the root body disabled and the attached one enabled.
Assert.IsFalse(poseExtractor.IsPoseEnabled(0));
Assert.IsTrue(poseExtractor.IsPoseEnabled(1));
var bodyPosesEnabled = poseExtractor.GetBodyPosesEnabled();
Assert.IsFalse(bodyPosesEnabled[rb1]);
Assert.IsTrue(bodyPosesEnabled[rb2]);
// Swap the values
bodyPosesEnabled[rb1] = true;
bodyPosesEnabled[rb2] = false;
var poseExtractor2 = new RigidBodyPoseExtractor(rb1, null, null, bodyPosesEnabled);
Assert.IsTrue(poseExtractor2.IsPoseEnabled(0));
Assert.IsFalse(poseExtractor2.IsPoseEnabled(1));
}
}
}

29
com.unity.ml-agents/CHANGELOG.md


and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Minor Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.3.0-preview] - 2020-08-12
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Minor Changes
#### com.unity.ml-agents (C#)
- Update Barracuda to 1.0.2.
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.3.0-preview] - 2020-08-12
### Major Changes

201
com.unity.ml-agents/Runtime/Agent.cs


using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Sensors.Reflection;
using Unity.MLAgents.Demonstrations;

/// to separate between different agents in the environment.
/// </summary>
public int episodeId;
}
/// <summary>
/// Struct that contains the action information sent from the Brain to the
/// Agent.
/// </summary>
internal struct AgentAction
{
public float[] vectorActions;
public void ClearActions()
{
Array.Clear(storedVectorActions, 0, storedVectorActions.Length);
}
public void CopyActions(ActionBuffers actionBuffers)
{
actionBuffers.PackActions(storedVectorActions);
}
}
/// <summary>

/// can only take an action when it touches the ground, so several frames might elapse between
/// one decision and the need for the next.
///
/// Use the <see cref="OnActionReceived"/> function to implement the actions your agent can take,
/// Use the <see cref="OnActionReceived(float[])"/> function to implement the actions your agent can take,
/// such as moving to reach a goal or interacting with its environment.
///
/// When you call <see cref="EndEpisode"/> on an agent or the agent reaches its <see cref="MaxStep"/> count,

"docs/Learning-Environment-Design-Agents.md")]
[Serializable]
[RequireComponent(typeof(BehaviorParameters))]
public class Agent : MonoBehaviour, ISerializationCallbackReceiver
public partial class Agent : MonoBehaviour, ISerializationCallbackReceiver, IActionReceiver
{
IPolicy m_Brain;
BehaviorParameters m_PolicyFactory;

/// Current Agent information (message sent to Brain).
AgentInfo m_Info;
/// Current Agent action (message sent from Brain).
AgentAction m_Action;
/// Represents the reward the agent accumulated during the current step.
/// It is reset to 0 at the beginning of every step.

internal VectorSensor collectObservationsSensor;
/// <summary>
/// List of IActuators that this Agent will delegate actions to if any exist.
/// </summary>
ActuatorManager m_ActuatorManager;
/// <summary>
/// VectorActuator which is used by default if no other sensors exist on this Agent. This VectorSensor will
/// delegate its actions to <see cref="OnActionReceived(float[])"/> by default in order to keep backward compatibility
/// with the current behavior of Agent.
/// </summary>
IActuator m_VectorActuator;
/// <summary>
/// This is used to avoid allocation of a float array every frame if users are still using the old
/// OnActionReceived method.
/// </summary>
float[] m_LegacyActionCache;
/// <summary>
/// Called when the attached [GameObject] becomes enabled and active.
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html
/// </summary>

m_PolicyFactory = GetComponent<BehaviorParameters>();
m_Info = new AgentInfo();
m_Action = new AgentAction();
sensors = new List<ISensor>();
Academy.Instance.AgentIncrementStep += AgentIncrementStep;

InitializeSensors();
}
using (TimerStack.Instance.Scoped("InitializeActuators"))
{
InitializeActuators();
}
m_Info.storedVectorActions = new float[m_ActuatorManager.TotalNumberOfActions];
// The first time the Academy resets, all Agents in the scene will be
// forced to reset through the <see cref="AgentForceReset"/> event.
// To avoid the Agent resetting twice, the Agents will not begin their

/// set the reward assigned to the current step with a specific value rather than
/// increasing or decreasing it.
///
/// Typically, you assign rewards in the Agent subclass's <see cref="OnActionReceived(float[])"/>
/// Typically, you assign rewards in the Agent subclass's <see cref="IActionReceiver.OnActionReceived"/>
/// implementation after carrying out the received action and evaluating its success.
///
/// Rewards are used during reinforcement learning; they are ignored during inference.

/// <remarks>
/// Call `RequestAction()` to repeat the previous action returned by the agent's
/// most recent decision. A new decision is not requested. When you call this function,
/// the Agent instance invokes <seealso cref="OnActionReceived(float[])"/> with the
/// the Agent instance invokes <seealso cref="IActionReceiver.OnActionReceived"/> with the
/// existing action vector.
///
/// You can use `RequestAction()` in situations where an agent must take an action

/// at the end of an episode.
void ResetData()
{
var param = m_PolicyFactory.BrainParameters;
m_ActionMasker = new DiscreteActionMasker(param);
// If we haven't initialized vectorActions, initialize to 0. This should only
// happen during the creation of the Agent. In subsequent episodes, vectorAction
// should stay the previous action before the Done(), so that it is properly recorded.
if (m_Action.vectorActions == null)
{
m_Action.vectorActions = new float[param.NumActions];
m_Info.storedVectorActions = new float[param.NumActions];
}
m_ActuatorManager?.ResetData();
}
/// <summary>

/// control of an agent using keyboard, mouse, or game controller input.
///
/// Your heuristic implementation can use any decision making logic you specify. Assign decision
/// values to the float[] array, <paramref name="actionsOut"/>, passed to your function as a parameter.
/// values to the <see cref="ActionBuffers.ContinuousActions"/> and <see cref="ActionBuffers.DiscreteActions"/>
/// arrays , passed to your function as a parameter.
/// <seealso cref="OnActionReceived(float[])"/> function, which receives this array and
/// <seealso cref="IActionReceiver.OnActionReceived"/> function, which receives this array and
/// implements the corresponding agent behavior. See [Actions] for more information
/// about agent actions.
/// Note : Do not create a new float array of action in the `Heuristic()` method,

/// You can also use the [Input System package], which provides a more flexible and
/// configurable input system.
/// <code>
/// public override void Heuristic(float[] actionsOut)
/// public override void Heuristic(ActionBuffers actionsOut)
/// actionsOut[0] = Input.GetAxis("Horizontal");
/// actionsOut[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
/// actionsOut[2] = Input.GetAxis("Vertical");
/// actionsOut.ContinuousActions[0] = Input.GetAxis("Horizontal");
/// actionsOut.ContinuousActions[1] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
/// actionsOut.ContinuousActions[2] = Input.GetAxis("Vertical");
/// <param name="actionsOut">Array for the output actions.</param>
/// <seealso cref="OnActionReceived(float[])"/>
public virtual void Heuristic(float[] actionsOut)
/// <param name="actionsOut">The <see cref="ActionBuffers"/> which contain the continuous and
/// discrete action buffers to write to.</param>
/// <seealso cref="IActionReceiver.OnActionReceived"/>
public virtual void Heuristic(in ActionBuffers actionsOut)
Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions.");
Array.Clear(actionsOut, 0, actionsOut.Length);
// For backward compatibility
switch (m_PolicyFactory.BrainParameters.VectorActionSpaceType)
{
case SpaceType.Continuous:
Heuristic(actionsOut.ContinuousActions.Array);
actionsOut.DiscreteActions.Clear();
break;
case SpaceType.Discrete:
var convertedOut = Array.ConvertAll(actionsOut.DiscreteActions.Array, x => (float)x);
Heuristic(convertedOut);
var discreteActionSegment = actionsOut.DiscreteActions;
for (var i = 0; i < actionsOut.DiscreteActions.Length; i++)
{
discreteActionSegment[i] = (int)convertedOut[i];
}
actionsOut.ContinuousActions.Clear();
break;
}
}
/// <summary>

#if DEBUG
// Make sure the names are actually unique
for (var i = 0; i < sensors.Count - 1; i++)
{
Debug.Assert(

#endif
}
void InitializeActuators()
{
ActuatorComponent[] attachedActuators;
if (m_PolicyFactory.UseChildActuators)
{
attachedActuators = GetComponentsInChildren<ActuatorComponent>();
}
else
{
attachedActuators = GetComponents<ActuatorComponent>();
}
// Support legacy OnActionReceived
var param = m_PolicyFactory.BrainParameters;
m_VectorActuator = new VectorActuator(this, param.VectorActionSize, param.VectorActionSpaceType);
m_ActuatorManager = new ActuatorManager(attachedActuators.Length + 1);
m_LegacyActionCache = new float[m_VectorActuator.TotalNumberOfActions];
m_ActuatorManager.Add(m_VectorActuator);
foreach (var actuatorComponent in attachedActuators)
{
m_ActuatorManager.Add(actuatorComponent.CreateActuator());
}
}
/// <summary>
/// Sends the Agent info to the linked Brain.
/// </summary>

if (m_Info.done)
{
Array.Clear(m_Info.storedVectorActions, 0, m_Info.storedVectorActions.Length);
m_Info.ClearActions();
Array.Copy(m_Action.vectorActions, m_Info.storedVectorActions, m_Action.vectorActions.Length);
m_ActuatorManager.StoredActions.PackActions(m_Info.storedVectorActions);
m_ActionMasker.ResetMask();
UpdateSensors();
using (TimerStack.Instance.Scoped("CollectObservations"))
{

{
if (m_PolicyFactory.BrainParameters.VectorActionSpaceType == SpaceType.Discrete)
{
CollectDiscreteActionMasks(m_ActionMasker);
}
m_ActuatorManager.WriteActionMask();
m_Info.discreteActionMasks = m_ActionMasker.GetMask();
m_Info.discreteActionMasks = m_ActuatorManager.DiscreteActionMask?.GetMask();
m_Info.reward = m_Reward;
m_Info.done = false;
m_Info.maxStepReached = false;

/// <summary>
/// Returns a read-only view of the observations that were generated in
/// <see cref="CollectObservations(VectorSensor)"/>. This is mainly useful inside of a
/// <see cref="Heuristic(float[])"/> method to avoid recomputing the observations.
/// <see cref="Heuristic(float[], int[])"/> method to avoid recomputing the observations.
/// </summary>
/// <returns>A read-only view of the observations list.</returns>
public ReadOnlyCollection<float> GetObservations()

///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <seealso cref="OnActionReceived(float[])"/>
public virtual void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
/// <seealso cref="IActionReceiver.OnActionReceived"/>
public virtual void WriteDiscreteActionMask(IDiscreteActionMask actionMask)
if (m_ActionMasker == null)
{
m_ActionMasker = new DiscreteActionMasker(actionMask);
}
CollectDiscreteActionMasks(m_ActionMasker);
ActionSpec IActionReceiver.ActionSpec { get; }
/// <summary>
/// Implement `OnActionReceived()` to specify agent behavior at every step, based

/// three values in the action array to use as the force components. During
/// training, the agent's policy learns to set those particular elements of
/// the array to maximize the training rewards the agent receives. (Of course,
/// if you implement a <seealso cref="Heuristic"/> function, it must use the same
/// if you implement a <seealso cref="Heuristic(float[], int[])"/> function, it must use the same
/// elements of the action array for the same purpose since there is no learning
/// involved.)
///

///
/// [Agents - Actions]: https://github.com/Unity-Technologies/ml-agents/blob/release_6_docs/docs/Learning-Environment-Design-Agents.md#actions
/// </remarks>
/// <param name="vectorAction">
/// An array containing the action vector. The length of the array is specified
/// by the <see cref="BrainParameters"/> of the agent's associated
/// <see cref="BehaviorParameters"/> component.
/// <param name="actions">
/// Struct containing the buffers of actions to be executed at this step.
public virtual void OnActionReceived(float[] vectorAction) {}
public virtual void OnActionReceived(ActionBuffers actions)
{
actions.PackActions(m_LegacyActionCache);
OnActionReceived(m_LegacyActionCache);
}
/// <summary>
/// Implement `OnEpisodeBegin()` to set up an Agent instance at the beginning

public virtual void OnEpisodeBegin() {}
/// <summary>
/// Returns the last action that was decided on by the Agent.
/// Gets the last ActionBuffer for this agent.
/// <returns>
/// The last action that was decided by the Agent (or null if no decision has been made).
/// </returns>
/// <seealso cref="OnActionReceived(float[])"/>
public float[] GetAction()
public ActionBuffers GetStoredActionBuffers()
return m_Action.vectorActions;
return m_ActuatorManager.StoredActions;
}
/// <summary>

if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;
OnActionReceived(m_Action.vectorActions);
m_ActuatorManager.ExecuteActions();
}
if ((m_StepCount >= MaxStep) && (MaxStep > 0))

void DecideAction()
{
if (m_Action.vectorActions == null)
if (m_ActuatorManager.StoredActions.ContinuousActions.Array == null)
var action = m_Brain?.DecideAction();
if (action == null)
{
Array.Clear(m_Action.vectorActions, 0, m_Action.vectorActions.Length);
}
else
{
Array.Copy(action, m_Action.vectorActions, action.Length);
}
var actions = m_Brain?.DecideAction() ?? new ActionBuffers();
m_Info.CopyActions(actions);
m_ActuatorManager.UpdateActions(actions);
}
}
}

14
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


}
#region AgentAction
public static AgentAction ToAgentAction(this AgentActionProto aap)
{
return new AgentAction
{
vectorActions = aap.VectorActions.ToArray()
};
}
public static List<AgentAction> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
public static List<float[]> ToAgentActionList(this UnityRLInputProto.Types.ListAgentActionProto proto)
var agentActions = new List<AgentAction>(proto.Value.Count);
var agentActions = new List<float[]>(proto.Value.Count);
agentActions.Add(ap.ToAgentAction());
agentActions.Add(ap.VectorActions.ToArray());
}
return agentActions;
}

2
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


var agentId = m_OrderedAgentsRequestingDecisions[brainName][i];
if (m_LastActionsReceived[brainName].ContainsKey(agentId))
{
m_LastActionsReceived[brainName][agentId] = agentAction.vectorActions;
m_LastActionsReceived[brainName][agentId] = agentAction;
}
}
}

6
com.unity.ml-agents/Runtime/DecisionRequester.cs


/// that the Agent will request a decision every 5 Academy steps. /// </summary>
[Range(1, 20)]
[Tooltip("The frequency with which the agent requests a decision. A DecisionPeriod " +
"of 5 means that the Agent will request a decision every 5 Academy steps.")]
"of 5 means that the Agent will request a decision every 5 Academy steps.")]
public int DecisionPeriod = 5;
/// <summary>

[Tooltip("Indicates whether or not the agent will take an action during the Academy " +
"steps where it does not request a decision. Has no effect when DecisionPeriod " +
"is set to 1.")]
"steps where it does not request a decision. Has no effect when DecisionPeriod " +
"is set to 1.")]
[FormerlySerializedAs("RepeatAction")]
public bool TakeActionsBetweenDecisions = true;

118
com.unity.ml-agents/Runtime/DiscreteActionMasker.cs


using System;
using System.Collections.Generic;
using System.Linq;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Actuators;
namespace Unity.MLAgents
{

/// may be illegal. For example, if an agent is adjacent to a wall or other obstacle
/// you could mask any actions that direct the agent to move into the blocked space.
/// </remarks>
public class DiscreteActionMasker
public class DiscreteActionMasker : IDiscreteActionMask
/// When using discrete control, is the starting indices of the actions
/// when all the branches are concatenated with each other.
int[] m_StartingActionIndices;
bool[] m_CurrentMask;
readonly BrainParameters m_BrainParameters;
IDiscreteActionMask m_Delegate;
internal DiscreteActionMasker(BrainParameters brainParameters)
internal DiscreteActionMasker(IDiscreteActionMask actionMask)
m_BrainParameters = brainParameters;
m_Delegate = actionMask;
}
/// <summary>

/// <param name="actionIndices">The indices of the masked actions.</param>
public void SetMask(int branch, IEnumerable<int> actionIndices)
{
// If the branch does not exist, raise an error
if (branch >= m_BrainParameters.VectorActionSize.Length)
throw new UnityAgentsException(
"Invalid Action Masking : Branch " + branch + " does not exist.");
var totalNumberActions = m_BrainParameters.VectorActionSize.Sum();
// By default, the masks are null. If we want to specify a new mask, we initialize
// the actionMasks with trues.
if (m_CurrentMask == null)
{
m_CurrentMask = new bool[totalNumberActions];
}
// If this is the first time the masked actions are used, we generate the starting
// indices for each branch.
if (m_StartingActionIndices == null)
{
m_StartingActionIndices = Utilities.CumSum(m_BrainParameters.VectorActionSize);
}
// Perform the masking
foreach (var actionIndex in actionIndices)
{
if (actionIndex >= m_BrainParameters.VectorActionSize[branch])
{
throw new UnityAgentsException(
"Invalid Action Masking: Action Mask is too large for specified branch.");
}
m_CurrentMask[actionIndex + m_StartingActionIndices[branch]] = true;
}
}
/// <summary>
/// Get the current mask for an agent.
/// </summary>
/// <returns>A mask for the agent. A boolean array of length equal to the total number of
/// actions.</returns>
internal bool[] GetMask()
{
if (m_CurrentMask != null)
{
AssertMask();
}
return m_CurrentMask;
m_Delegate.WriteMask(branch, actionIndices);
/// <summary>
/// Makes sure that the current mask is usable.
/// </summary>
void AssertMask()
public void WriteMask(int branch, IEnumerable<int> actionIndices)
// Action Masks can only be used in Discrete Control.
if (m_BrainParameters.VectorActionSpaceType != SpaceType.Discrete)
{
throw new UnityAgentsException(
"Invalid Action Masking : Can only set action mask for Discrete Control.");
}
var numBranches = m_BrainParameters.VectorActionSize.Length;
for (var branchIndex = 0; branchIndex < numBranches; branchIndex++)
{
if (AreAllActionsMasked(branchIndex))
{
throw new UnityAgentsException(
"Invalid Action Masking : All the actions of branch " + branchIndex +
" are masked.");
}
}
m_Delegate.WriteMask(branch, actionIndices);
/// <summary>
/// Resets the current mask for an agent.
/// </summary>
internal void ResetMask()
public bool[] GetMask()
if (m_CurrentMask != null)
{
Array.Clear(m_CurrentMask, 0, m_CurrentMask.Length);
}
return m_Delegate.GetMask();
/// <summary>
/// Checks if all the actions in the input branch are masked.
/// </summary>
/// <param name="branch"> The index of the branch to check.</param>
/// <returns> True if all the actions of the branch are masked.</returns>
bool AreAllActionsMasked(int branch)
public void ResetMask()
if (m_CurrentMask == null)
{
return false;
}
var start = m_StartingActionIndices[branch];
var end = m_StartingActionIndices[branch + 1];
for (var i = start; i < end; i++)
{
if (!m_CurrentMask[i])
{
return false;
}
}
return true;
m_Delegate.ResetMask();
}
}
}

17
com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs


using System;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Sensors;

internal class BarracudaPolicy : IPolicy
{
protected ModelRunner m_ModelRunner;
ActionBuffers m_LastActionBuffer;
int m_AgentId;

List<int[]> m_SensorShapes;
SpaceType m_SpaceType;
/// <inheritdoc />
public BarracudaPolicy(

{
var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, brainParameters, inferenceDevice);
m_ModelRunner = modelRunner;
m_SpaceType = brainParameters.VectorActionSpaceType;
}
/// <inheritdoc />

}
/// <inheritdoc />
public float[] DecideAction()
public ref readonly ActionBuffers DecideAction()
return m_ModelRunner?.GetAction(m_AgentId);
var actions = m_ModelRunner?.GetAction(m_AgentId);
if (m_SpaceType == SpaceType.Continuous)
{
m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
return ref m_LastActionBuffer;
}
m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
return ref m_LastActionBuffer;
}
public void Dispose()

37
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


[Tooltip("Use all Sensor components attached to child GameObjects of this Agent.")]
bool m_UseChildSensors = true;
[HideInInspector]
[SerializeField]
[Tooltip("Use all Actuator components attached to child GameObjects of this Agent.")]
bool m_UseChildActuators = true;
/// <summary>
/// Whether or not to use all the sensor components attached to child GameObjects of the agent.
/// Note that changing this after the Agent has been initialized will not have any effect.

set { m_UseChildSensors = value; }
}
/// <summary>
/// Whether or not to use all the actuator components attached to child GameObjects of the agent.
/// Note that changing this after the Agent has been initialized will not have any effect.
/// </summary>
public bool UseChildActuators
{
get { return m_UseChildActuators; }
set { m_UseChildActuators = value; }
}
[HideInInspector, SerializeField]
ObservableAttributeOptions m_ObservableAttributeHandling = ObservableAttributeOptions.Ignore;

switch (m_BehaviorType)
{
case BehaviorType.HeuristicOnly:
return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
return GenerateHeuristicPolicy(heuristic);
case BehaviorType.InferenceOnly:
{
if (m_Model == null)

}
else
{
return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
return GenerateHeuristicPolicy(heuristic);
return new HeuristicPolicy(heuristic, m_BrainParameters.NumActions);
return GenerateHeuristicPolicy(heuristic);
}
internal IPolicy GenerateHeuristicPolicy(HeuristicPolicy.ActionGenerator heuristic)
{
var numContinuousActions = 0;
var numDiscreteActions = 0;
if (m_BrainParameters.VectorActionSpaceType == SpaceType.Continuous)
{
numContinuousActions = m_BrainParameters.NumActions;
}
else if (m_BrainParameters.VectorActionSpaceType == SpaceType.Discrete)
{
numDiscreteActions = m_BrainParameters.NumActions;
}
return new HeuristicPolicy(heuristic, numContinuousActions, numDiscreteActions);
}
internal void UpdateAgentPolicy()

20
com.unity.ml-agents/Runtime/Policies/HeuristicPolicy.cs


using System.Collections.Generic;
using System;
using System.Collections;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies

/// </summary>
internal class HeuristicPolicy : IPolicy
{
public delegate void ActionGenerator(float[] actionsOut);
public delegate void ActionGenerator(in ActionBuffers actionBuffers);
float[] m_LastDecision;
ActionBuffers m_ActionBuffers;
bool m_Done;
bool m_DecisionRequested;

/// <inheritdoc />
public HeuristicPolicy(ActionGenerator heuristic, int numActions)
public HeuristicPolicy(ActionGenerator heuristic, int numContinuousActions, int numDiscreteActions)
m_LastDecision = new float[numActions];
var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);
var discreteDecision = new ActionSegment<int>(new int[numDiscreteActions], 0, numDiscreteActions);
m_ActionBuffers = new ActionBuffers(continuousDecision, discreteDecision);
}
/// <inheritdoc />

m_Done = info.done;
m_DecisionRequested = true;
public float[] DecideAction()
public ref readonly ActionBuffers DecideAction()
m_Heuristic.Invoke(m_LastDecision);
m_Heuristic.Invoke(m_ActionBuffers);
return m_LastDecision;
return ref m_ActionBuffers;
}
public void Dispose()

public float this[int index]
{
get { return 0.0f; }
set { }
set {}
}
}

3
com.unity.ml-agents/Runtime/Policies/IPolicy.cs


using System;
using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies

/// it must be taken now. The Brain is expected to update the actions
/// of the Agents at this point the latest.
/// </summary>
float[] DecideAction();
ref readonly ActionBuffers DecideAction();
}
}

15
com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs


using UnityEngine;
using System.Collections.Generic;
using System;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies

{
int m_AgentId;
string m_FullyQualifiedBehaviorName;
SpaceType m_SpaceType;
ActionBuffers m_LastActionBuffer;
internal ICommunicator m_Communicator;

{
m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
m_Communicator = Academy.Instance.Communicator;
m_SpaceType = brainParameters.VectorActionSpaceType;
m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, brainParameters);
}

}
/// <inheritdoc />
public float[] DecideAction()
public ref readonly ActionBuffers DecideAction()
return m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
var actions = m_Communicator?.GetActions(m_FullyQualifiedBehaviorName, m_AgentId);
if (m_SpaceType == SpaceType.Continuous)
{
m_LastActionBuffer = new ActionBuffers(actions, Array.Empty<int>());
return ref m_LastActionBuffer;
}
m_LastActionBuffer = ActionBuffers.FromDiscreteActions(actions);
return ref m_LastActionBuffer;
}
public void Dispose()

3
com.unity.ml-agents/Tests/Editor/BehaviorParameterTests.cs


using NUnit.Framework;
using Unity.MLAgents.Actuators;
using UnityEngine;
using Unity.MLAgents.Policies;

public class BehaviorParameterTests
{
static void DummyHeuristic(float[] actionsOut)
static void DummyHeuristic(in ActionBuffers actionsOut)
{
// No-op
}

48
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


using NUnit.Framework;
using System.Reflection;
using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Sensors.Reflection;
using Unity.MLAgents.Policies;

{
public Action OnRequestDecision;
ObservationWriter m_ObsWriter = new ObservationWriter();
static ActionSpec s_ActionSpec = ActionSpec.MakeContinuous(1);
static ActionBuffers s_EmptyActionBuffers = new ActionBuffers(new float[1], Array.Empty<int>());
public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
foreach (var sensor in sensors)

OnRequestDecision?.Invoke();
}
public float[] DecideAction() { return new float[0]; }
public ref readonly ActionBuffers DecideAction() { return ref s_EmptyActionBuffers; }
public void Dispose() {}
}

sensor.AddObservation(collectObservationsCallsForEpisode);
}
public override void OnActionReceived(float[] vectorAction)
public override void OnActionReceived(ActionBuffers buffers)
{
agentActionCalls += 1;
agentActionCallsForEpisode += 1;

agentActionCallsForEpisode = 0;
}
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
actionsOut[0] = obs[0];
var continuousActions = actionsOut.ContinuousActions;
continuousActions[0] = (int)obs[0];
heuristicCalls++;
}
}

public void TestAgent()
{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
var bp2 = agentGo2.AddComponent<BehaviorParameters>();
bp2.BrainParameters.VectorActionSize = new[] { 1 };
bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();

public void TestAgent()
{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
var bp2 = agentGo2.AddComponent<BehaviorParameters>();
bp2.BrainParameters.VectorActionSize = new[] { 1 };
bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();

public void AssertStackingReset()
{
var agentGo1 = new GameObject("TestAgent");
agentGo1.AddComponent<TestAgent>();
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
var agent1 = agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;
agent1.LazyInitialize();
var policy = new TestPolicy();

public void TestCumulativeReward()
{
var agentGo1 = new GameObject("TestAgent");
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
var agent1 = agentGo1.AddComponent<TestAgent>();
agentGo2.AddComponent<TestAgent>();
var agent2 = agentGo2.GetComponent<TestAgent>();
var bp2 = agentGo2.AddComponent<BehaviorParameters>();
bp2.BrainParameters.VectorActionSize = new[] { 1 };
bp2.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
var agent2 = agentGo2.AddComponent<TestAgent>();
var aca = Academy.Instance;
var decisionRequester = agent1.gameObject.AddComponent<DecisionRequester>();

public void TestMaxStepsReset()
{
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;

{
// Make sure that Agents with HeuristicPolicies step their sensors each Academy step.
var agentGo1 = new GameObject("TestAgent");
var bp1 = agentGo1.AddComponent<BehaviorParameters>();
bp1.BrainParameters.VectorActionSize = new[] { 1 };
bp1.BrainParameters.VectorActionSpaceType = SpaceType.Continuous;
agentGo1.AddComponent<TestAgent>();
var agent1 = agentGo1.GetComponent<TestAgent>();
var aca = Academy.Instance;

7
com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs


using Unity.MLAgents.Sensors;
using Unity.MLAgents.Sensors.Reflection;
using NUnit.Framework;
using Unity.MLAgents.Actuators;
using UnityEngine;
using UnityEngine.TestTools;

[Observable]
public float ObservableFloat;
public override void Heuristic(float[] actionsOut)
public override void Heuristic(in ActionBuffers actionsOut)
{
numHeuristicCalls++;
base.Heuristic(actionsOut);

Academy.Instance.EnvironmentStep();
var actions = agent.GetAction();
var actions = agent.GetStoredActionBuffers().DiscreteActions;
Assert.AreEqual(new[] {0.0f, 0.0f}, actions);
Assert.AreEqual(new ActionSegment<int>(new[] {0, 0}), actions);
Assert.AreEqual(1, agent.numHeuristicCalls);
Academy.Instance.EnvironmentStep();

2
config/ppo/WalkerDynamic.yaml


gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 20000000
max_steps: 30000000
time_horizon: 1000
summary_freq: 30000
threaded: true

2
config/ppo/WalkerStatic.yaml


gamma: 0.995
strength: 1.0
keep_checkpoints: 5
max_steps: 20000000
max_steps: 30000000
time_horizon: 1000
summary_freq: 30000
threaded: true

51
docs/Learning-Environment-Create-New.md


Note the `forceMultiplier` class variable is defined before the function. Since `forceMultiplier` is
public, you can set the value from the Inspector window.
## Final Editor Setup
Now, that all the GameObjects and ML-Agent components are in place, it is time
to connect everything together in the Unity Editor. This involves changing some
of the Agent Component's properties so that they are compatible with our Agent
code.
1. Select the **RollerAgent** GameObject to show its properties in the Inspector
window.
1. Drag the Target GameObject in the Hierarchy into the `Target` field in RollerAgent Script.
1. Add the `Decision Requester` script with the Add Component button from the
RollerAgent Inspector.
1. Change **Decision Period** to `10`. For more information on decisions, see [the Agent documentation](Learning-Environment-Design-Agents.md#decisions)
1. Drag the Target GameObject from the Hierarchy window to the RollerAgent
Target field.
1. Add the `Behavior Parameters` script with the Add Component button from the
RollerAgent Inspector.
1. Modify the Behavior Parameters of the Agent :
- `Behavior Name` to _RollerBall_
- `Vector Observation` > `Space Size` = 8
- `Vector Action` > `Space Type` = **Continuous**
- `Vector Action` > `Space Size` = 2
Now you are ready to test the environment before training.
## Testing the Environment
It is always a good idea to first test your environment by controlling the Agent

Console window and that the Agent resets when it reaches its target or falls
from the platform.
## Final Editor Setup
Now, that all the GameObjects and ML-Agent components are in place, it is time
to connect everything together in the Unity Editor. This involves changing some
of the Agent Component's properties so that they are compatible with our Agent
code.
1. Select the **RollerAgent** GameObject to show its properties in the Inspector
window.
1. Add the `Decision Requester` script with the Add Component button from the
RollerAgent Inspector.
1. Change **Decision Period** to `10`. For more information on decisions, see [the Agent documentation](Learning-Environment-Design-Agents.md#decisions)
1. Drag the Target GameObject from the Hierarchy window to the RollerAgent
Target field.
1. Add the `Behavior Parameters` script with the Add Component button from the
RollerAgent Inspector.
1. Modify the Behavior Parameters of the Agent :
- `Behavior Name` to _RollerBall_
- `Vector Observation` > `Space Size` = 8
- `Vector Action` > `Space Type` = **Continuous**
- `Vector Action` > `Space Size` = 2
Now you are ready to test the environment before training.
## Training the Environment
The process is the same as described in the

pass to the `mlagents-learn` program. Create a new `rollerball_config.yaml` file
and include the following hyperparameter values:
under `config/` and include the following hyperparameter values:
```yml
behaviors:

31
docs/Learning-Environment-Examples.md


- Set-up: Physics-based Humanoid agents with 26 degrees of freedom. These DOFs
correspond to articulation of the following body-parts: hips, chest, spine,
head, thighs, shins, feet, arms, forearms and hands.
- Goal: The agents must move its body toward the goal direction as quickly as
possible without falling.
- `WalkerStatic` - Goal direction is always forward.
- Goal: The agents must move its body toward the goal direction without falling.
- `WalkerDynamicVariableSpeed`- Goal direction and walking speed are randomized.
- `WalkerStatic` - Goal direction is always forward.
- `WalkerStaticVariableSpeed` - Goal direction is always forward. Walking
speed is randomized
- +0.02 times body velocity in the goal direction. (run towards target)
- +0.01 times head direction alignment with goal direction. (face towards target)
- +0.005 times head y position - left foot y position. (encourage head height)
- +0.005 times head y position - right foot y position. (encourage head height)
The reward function is now geometric meaning the reward each step is a product
of all the rewards instead of a sum, this helps the agent try to maximize all
rewards instead of the easiest rewards.
- Body velocity matches goal velocity. (normalized between (0,1))
- Head direction alignment with goal direction. (normalized between (0,1))
- Vector Observation space: 236 variables corresponding to position, rotation,
- Vector Observation space: 243 variables corresponding to position, rotation,
velocity, and angular velocities of each limb, along with goal direction.
- Vector Action space: (Continuous) Size of 39, corresponding to target
rotations and strength applicable to the joints.

- Recommended Minimum:
- Recommended Maximum:
- hip_mass: Mass of the hip component of the walker
- Default: 15
- Default: 8
- Recommended Minimum: 7
- Recommended Maximum: 28
- chest_mass: Mass of the chest component of the walker

- spine_mass: Mass of the spine component of the walker
- Default: 10
- Default: 8
- Benchmark Mean Reward for `WalkerStatic`: 1500
- Benchmark Mean Reward for `WalkerDynamic`: 700
- Benchmark Mean Reward for `WalkerDynamic`: 2500
- Benchmark Mean Reward for `WalkerDynamicVariableSpeed`: 2500
- Benchmark Mean Reward for `WalkerStatic`: 3500
- Benchmark Mean Reward for `WalkerStaticVariableSpeed`: 3500
## Pyramids

2
gym-unity/gym_unity/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.19.0"
__version__ = "0.20.0.dev0"
# Git tag that will be checked to determine whether to trigger upload to pypi
__release_tag__ = "release_6"

2
ml-agents-envs/mlagents_envs/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.19.0"
__version__ = "0.20.0.dev0"
# Git tag that will be checked to determine whether to trigger upload to pypi
__release_tag__ = "release_6"

8
ml-agents-envs/mlagents_envs/exception.py


def __init__(self, worker_id):
message = self.MESSAGE_TEMPLATE.format(str(worker_id))
super().__init__(message)
class UnityPolicyException(UnityException):
"""
Related to errors with the Trainer.
"""
pass

2
ml-agents/mlagents/trainers/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.19.0"
__version__ = "0.20.0.dev0"
# Git tag that will be checked to determine whether to trigger upload to pypi
__release_tag__ = "release_6"

2
ml-agents/mlagents/trainers/ghost/trainer.py


"""
policy = self.trainer.create_policy(parsed_behavior_id, behavior_spec)
policy.create_tf_graph()
policy.initialize_or_load()
self.trainer.saver.initialize_or_load(policy)
policy.init_load_weights()
team_id = parsed_behavior_id.team_id
self.controller.subscribe_team_id(team_id, self)

3
ml-agents/mlagents/trainers/optimizer/tf_optimizer.py


self.reward_signals[reward_signal.value].update_dict
)
@classmethod
self, learning_rate: tf.Tensor, name: str = "Adam"
cls, learning_rate: tf.Tensor, name: str = "Adam"
) -> tf.train.Optimizer:
return tf.train.AdamOptimizer(learning_rate=learning_rate, name=name)

15
ml-agents/mlagents/trainers/policy/policy.py


from mlagents_envs.base_env import DecisionSteps
from mlagents_envs.exception import UnityException
from mlagents.model_serialization import SerializationSettings
from mlagents.trainers.action_info import ActionInfo
from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.settings import TrainerSettings, NetworkSettings

seed: int,
behavior_spec: BehaviorSpec,
trainer_settings: TrainerSettings,
model_path: str,
load: bool = False,
tanh_squash: bool = False,
reparameterize: bool = False,
condition_sigma_on_obs: bool = True,

self.vis_obs_size = sum(
1 for shape in behavior_spec.observation_shapes if len(shape) == 3
)
self.model_path = model_path
self.initialize_path = self.trainer_settings.init_path
self._keep_checkpoints = self.trainer_settings.keep_checkpoints
self.use_continuous_act = behavior_spec.is_action_continuous()
self.num_branches = self.behavior_spec.action_size
self.previous_action_dict: Dict[str, np.array] = {}

self.load = load
self.h_size = self.network_settings.hidden_units
num_layers = self.network_settings.num_layers
if num_layers < 1:

@abstractmethod
def get_current_step(self):
pass
@abstractmethod
def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
pass
@abstractmethod
def save(self, output_filepath: str, settings: SerializationSettings) -> None:
pass
@abstractmethod

115
ml-agents/mlagents/trainers/policy/tf_policy.py


from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple, Callable
from mlagents.model_serialization import SerializationSettings, export_policy_model
from mlagents.tf_utils import tf
from mlagents import tf_utils
from mlagents_envs.exception import UnityException

GaussianDistribution,
MultiCategoricalDistribution,
)
from mlagents.tf_utils.globals import get_rank
logger = get_logger(__name__)

functions to save/load models and create the input placeholders.
"""
# Callback function used at the start of training to synchronize weights.
# By default, this nothing.
# If this needs to be used, it should be done from outside ml-agents.
broadcast_global_variables: Callable[[int], None] = lambda root_rank: None
model_path: str,
load: bool = False,
tanh_squash: bool = False,
reparameterize: bool = False,
condition_sigma_on_obs: bool = True,

:param seed: Random seed to use for TensorFlow.
:param brain: The corresponding Brain for this policy.
:param trainer_settings: The trainer parameters.
:param model_path: Where to load/save the model.
:param load: If True, load model from model_path. Otherwise, create new model.
model_path,
load,
tanh_squash,
reparameterize,
condition_sigma_on_obs,

self.sess = tf.Session(
config=tf_utils.generate_session_config(), graph=self.graph
)
self.saver: Optional[tf.Operation] = None
self.rank = get_rank()
if create_tf_graph:
self.create_tf_graph()

# We do an initialize to make the Policy usable out of the box. If an optimizer is needed,
# it will re-load the full graph
self._initialize_graph()
self.initialize()
def _create_encoder(
self,

ver = LooseVersion(version_string)
return tuple(map(int, ver.version[0:3]))
def _check_model_version(self, version: str) -> None:
"""
Checks whether the model being loaded was created with the same version of
ML-Agents, and throw a warning if not so.
"""
if self.version_tensors is not None:
loaded_ver = tuple(
num.eval(session=self.sess) for num in self.version_tensors
)
if loaded_ver != TFPolicy._convert_version_string(version):
logger.warning(
f"The model checkpoint you are loading from was saved with ML-Agents version "
f"{loaded_ver[0]}.{loaded_ver[1]}.{loaded_ver[2]} but your current ML-Agents"
f"version is {version}. Model may not behave properly."
)
def _initialize_graph(self):
def initialize(self):
self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
def _load_graph(self, model_path: str, reset_global_steps: bool = False) -> None:
with self.graph.as_default():
self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints)
logger.info(f"Loading model from {model_path}.")
ckpt = tf.train.get_checkpoint_state(model_path)
if ckpt is None:
raise UnityPolicyException(
"The model {} could not be loaded. Make "
"sure you specified the right "
"--run-id and that the previous run you are loading from had the same "
"behavior names.".format(model_path)
)
try:
self.saver.restore(self.sess, ckpt.model_checkpoint_path)
except tf.errors.NotFoundError:
raise UnityPolicyException(
"The model {} was found but could not be loaded. Make "
"sure the model is from the same version of ML-Agents, has the same behavior parameters, "
"and is using the same trainer configuration as the current run.".format(
model_path
)
)
self._check_model_version(__version__)
if reset_global_steps:
self._set_step(0)
logger.info(
"Starting training from step 0 and saving to {}.".format(
self.model_path
)
)
else:
logger.info(f"Resuming training from step {self.get_current_step()}.")
def initialize_or_load(self):
# If there is an initialize path, load from that. Else, load from the set model path.
# If load is set to True, don't reset steps to 0. Else, do. This allows a user to,
# e.g., resume from an initialize path.
reset_steps = not self.load
if self.initialize_path is not None:
self._load_graph(self.initialize_path, reset_global_steps=reset_steps)
elif self.load:
self._load_graph(self.model_path, reset_global_steps=reset_steps)
else:
self._initialize_graph()
def get_weights(self):
with self.graph.as_default():
_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

step = self.sess.run(self.global_step)
return step
def _set_step(self, step: int) -> int:
def set_step(self, step: int) -> int:
"""
Sets current model step to step without creating additional ops.
:param step: Step to set the current model step to.

:return:list of update var names
"""
return list(self.update_dict.keys())
def checkpoint(self, checkpoint_path: str, settings: SerializationSettings) -> None:
"""
Checkpoints the policy on disk.
:param checkpoint_path: filepath to write the checkpoint
:param settings: SerializationSettings for exporting the model.
"""
# Save the TF checkpoint and graph definition
with self.graph.as_default():
if self.saver:
self.saver.save(self.sess, f"{checkpoint_path}.ckpt")
tf.train.write_graph(
self.graph, self.model_path, "raw_graph_def.pb", as_text=False
)
# also save the policy so we have optimized model files for each checkpoint
self.save(checkpoint_path, settings)
def save(self, output_filepath: str, settings: SerializationSettings) -> None:
"""
Saves the serialized model, given a path and SerializationSettings
This method will save the policy graph to the given filepath. The path
should be provided without an extension as multiple serialized model formats
may be generated as a result.
:param output_filepath: path (without suffix) for the model file(s)
:param settings: SerializationSettings for how to save the model.
"""
export_policy_model(output_filepath, settings, self.graph, self.sess)
def update_normalization(self, vector_obs: np.ndarray) -> None:
"""

10
ml-agents/mlagents/trainers/ppo/optimizer.py


self.stream_names = list(self.reward_signals.keys())
self.tf_optimizer: Optional[tf.train.AdamOptimizer] = None
self.tf_optimizer_op: Optional[tf.train.Optimizer] = None
self.grads = None
self.update_batch: Optional[tf.Operation] = None

"decay_beta": self.decay_beta,
}
)
self.policy.initialize_or_load()
def _create_cc_critic(
self, h_size: int, num_layers: int, vis_encode_type: EncoderType

)
def _create_ppo_optimizer_ops(self):
self.tf_optimizer = self.create_optimizer_op(self.learning_rate)
self.grads = self.tf_optimizer.compute_gradients(self.loss)
self.update_batch = self.tf_optimizer.minimize(self.loss)
self.tf_optimizer_op = self.create_optimizer_op(self.learning_rate)
self.grads = self.tf_optimizer_op.compute_gradients(self.loss)
self.update_batch = self.tf_optimizer_op.minimize(self.loss)
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:

17
ml-agents/mlagents/trainers/ppo/trainer.py


:param artifact_path: The directory within which to store artifacts from this trainer.
"""
super().__init__(
brain_name, trainer_settings, training, artifact_path, reward_buff_cap
brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
self.load = load
self.seed = seed
self.policy: Policy = None # type: ignore

self.seed,
behavior_spec,
self.trainer_settings,
model_path=self.artifact_path,
load=self.load,
def create_ppo_optimizer(self) -> PPOOptimizer:
return PPOOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
self.optimizer = PPOOptimizer(
cast(TFPolicy, self.policy), self.trainer_settings
)
self.optimizer = self.create_ppo_optimizer()
self.saver.register(self.policy)
self.saver.register(self.optimizer)
self.saver.initialize_or_load()
# Needed to resume loads properly
self.step = policy.get_current_step()

2
ml-agents/mlagents/trainers/sac/optimizer.py


[self.policy.update_normalization_op, target_update_norm]
)
self.policy.initialize_or_load()
self.stats_name_to_update_name = {
"Losses/Value Loss": "value_loss",
"Losses/Policy Loss": "policy_loss",

17
ml-agents/mlagents/trainers/sac/trainer.py


:param artifact_path: The directory within which to store artifacts from this trainer.
"""
super().__init__(
brain_name, trainer_settings, training, artifact_path, reward_buff_cap
brain_name, trainer_settings, training, load, artifact_path, reward_buff_cap
self.load = load
self.seed = seed
self.policy: Policy = None # type: ignore
self.optimizer: SACOptimizer = None # type: ignore

self.seed,
behavior_spec,
self.trainer_settings,
self.artifact_path,
self.load,
tanh_squash=True,
reparameterize=True,
create_tf_graph=False,

for stat, stat_list in batch_update_stats.items():
self._stats_reporter.add_stat(stat, np.mean(stat_list))
def create_sac_optimizer(self) -> SACOptimizer:
return SACOptimizer(cast(TFPolicy, self.policy), self.trainer_settings)
def add_policy(
self, parsed_behavior_id: BehaviorIdentifiers, policy: Policy
) -> None:

)
self.policy = policy
self.policies[parsed_behavior_id.behavior_id] = policy
self.optimizer = SACOptimizer(
cast(TFPolicy, self.policy), self.trainer_settings
)
self.optimizer = self.create_sac_optimizer()
self.saver.register(self.policy)
self.saver.register(self.optimizer)
self.saver.initialize_or_load()
# Needed to resume loads properly
self.step = policy.get_current_step()
# Assume steps were updated at the correct ratio before

8
ml-agents/mlagents/trainers/settings.py


return {key: cattr.unstructure(val) for key, val in d.items()}
class SerializationSettings:
convert_to_barracuda = True
convert_to_onnx = True
onnx_opset = 9
@attr.s(auto_attribs=True)
class ExportableSettings:
def as_dict(self):

PROGRESS: str = "progress"
REWARD: str = "reward"
behavior: str
behavior: str = attr.ib(default="")
min_lesson_length: int = 0
signal_smoothing: bool = True
threshold: float = attr.ib(default=0.0)

41
ml-agents/mlagents/trainers/stats.py


from mlagents_envs.logging_util import get_logger
from mlagents_envs.timers import set_gauge
from mlagents.tf_utils import tf, generate_session_config
from mlagents.tf_utils.globals import get_rank
logger = get_logger(__name__)

class GaugeWriter(StatsWriter):
"""
Write all stats that we recieve to the timer gauges, so we can track them offline easily
Write all stats that we receive to the timer gauges, so we can track them offline easily
"""
@staticmethod

# If self-play, we want to print ELO as well as reward
self.self_play = False
self.self_play_team = -1
self.rank = get_rank()
def write_stats(
self, category: str, values: Dict[str, StatsSummary], step: int

stats_summary = stats_summary = values["Is Training"]
stats_summary = values["Is Training"]
elapsed_time = time.time() - self.training_start_time
log_info: List[str] = [category]
log_info.append(f"Step: {step}")
log_info.append(f"Time Elapsed: {elapsed_time:0.3f} s")
logger.info(
"{}: Step: {}. "
"Time Elapsed: {:0.3f} s "
"Mean "
"Reward: {:0.3f}"
". Std of Reward: {:0.3f}. {}".format(
category,
step,
time.time() - self.training_start_time,
stats_summary.mean,
stats_summary.std,
is_training,
)
)
if self.rank is not None:
log_info.append(f"Rank: {self.rank}")
log_info.append(f"Mean Reward: {stats_summary.mean:0.3f}")
log_info.append(f"Std of Reward: {stats_summary.std:0.3f}")
log_info.append(is_training)
logger.info(f"{category} ELO: {elo_stats.mean:0.3f}. ")
log_info.append(f"ELO: {elo_stats.mean:0.3f}")
logger.info(
"{}: Step: {}. No episode was completed since last summary. {}".format(
category, step, is_training
)
)
log_info.append("No episode was completed since last summary")
log_info.append(is_training)
logger.info(". ".join(log_info))
def add_property(
self, category: str, property_type: StatsPropertyType, value: Any

27
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


import os
import tempfile
import pytest
from mlagents.trainers.tests.test_nn_policy import create_policy_mock
from mlagents.trainers.settings import TrainerSettings
from mlagents.tf_utils import tf
from mlagents.model_serialization import SerializationSettings
def test_barracuda_converter():

# cleanup
os.remove(tmpfile)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_policy_conversion(tmpdir, rnn, visual, discrete):
tf.reset_default_graph()
dummy_config = TrainerSettings()
policy = create_policy_mock(
dummy_config,
use_rnn=rnn,
model_path=os.path.join(tmpdir, "test"),
use_discrete=discrete,
use_visual=visual,
)
settings = SerializationSettings(policy.model_path, "MockBrain")
checkpoint_path = f"{tmpdir}/MockBrain-1"
policy.checkpoint(checkpoint_path, settings)
# These checks taken from test_barracuda_converter
assert os.path.isfile(checkpoint_path + ".nn")
assert os.path.getsize(checkpoint_path + ".nn") > 100

10
ml-agents/mlagents/trainers/tests/test_bcmodule.py


NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TFPolicy(
0,
mock_behavior_specs,
trainer_config,
"test",
False,
tanhresample,
tanhresample,
0, mock_behavior_specs, trainer_config, tanhresample, tanhresample
)
with policy.graph.as_default():
bc_module = BCModule(

default_num_epoch=3,
settings=bc_settings,
)
policy.initialize_or_load() # Normally the optimizer calls this after the BCModule is created
policy.initialize() # Normally the optimizer calls this after the BCModule is created
return bc_module

62
ml-agents/mlagents/trainers/tests/test_env_param_manager.py


yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
)
param_manager = EnvironmentParameterManager(
run_options.environment_parameters, 1337, False
)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (False, False)
assert param_manager.get_current_lesson_number() == {"param_1": 2}
param_manager = EnvironmentParameterManager(
run_options.environment_parameters, 1337, False
)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (True, True)
assert param_manager.update_lessons(
trainer_steps={"fake_behavior": 500},
trainer_max_steps={"fake_behavior": 1000},
trainer_reward_buffer={"fake_behavior": [1000] * 101},
) == (False, False)
assert param_manager.get_current_lesson_number() == {"param_1": 2}
test_everything_config_yaml = """

"param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
"param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
}
test_curriculum_no_behavior_yaml = """
environment_parameters:
param_1:
curriculum:
- name: Lesson1
completion_criteria:
measure: reward
threshold: 30
min_lesson_length: 100
require_reset: true
value: 1
- name: Lesson2
value: 2
"""
def test_curriculum_no_behavior():
with pytest.raises(TypeError):
run_options = RunOptions.from_dict(
yaml.safe_load(test_curriculum_no_behavior_yaml)
)
EnvironmentParameterManager(run_options.environment_parameters, 1337, False)

62
ml-agents/mlagents/trainers/tests/test_nn_policy.py


import pytest
import os
import unittest
import tempfile
from mlagents.model_serialization import SerializationSettings
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.tf.models import ModelUtils, Tensor3DShape
from mlagents.trainers.exception import UnityTrainerException

from mlagents.trainers import __version__
VECTOR_ACTION_SPACE = 2

use_rnn: bool = False,
use_discrete: bool = True,
use_visual: bool = False,
model_path: str = "",
load: bool = False,
seed: int = 0,
) -> TFPolicy:
mock_spec = mb.setup_test_behavior_specs(

trainer_settings.network_settings.memory = (
NetworkSettings.MemorySettings() if use_rnn else None
)
policy = TFPolicy(
seed, mock_spec, trainer_settings, model_path=model_path, load=load
)
policy = TFPolicy(seed, mock_spec, trainer_settings)
def test_load_save(tmp_path):
path1 = os.path.join(tmp_path, "runid1")
path2 = os.path.join(tmp_path, "runid2")
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params, model_path=path1)
policy.initialize_or_load()
policy._set_step(2000)
mock_brain_name = "MockBrain"
checkpoint_path = f"{policy.model_path}/{mock_brain_name}-2000"
serialization_settings = SerializationSettings(policy.model_path, mock_brain_name)
policy.checkpoint(checkpoint_path, serialization_settings)
assert len(os.listdir(tmp_path)) > 0
# Try load from this path
policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
policy2.initialize_or_load()
_compare_two_policies(policy, policy2)
assert policy2.get_current_step() == 2000
# Try initialize from path 1
trainer_params.output_path = path2
trainer_params.init_path = path1
policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
policy3.initialize_or_load()
_compare_two_policies(policy2, policy3)
# Assert that the steps are 0.
assert policy3.get_current_step() == 0
class ModelVersionTest(unittest.TestCase):
def test_version_compare(self):
# Test write_stats
with self.assertLogs("mlagents.trainers", level="WARNING") as cm:
path1 = tempfile.mkdtemp()
trainer_params = TrainerSettings()
policy = create_policy_mock(trainer_params, model_path=path1)
policy.initialize_or_load()
policy._check_model_version(
"0.0.0"
) # This is not the right version for sure
# Assert that 1 warning has been thrown with incorrect version
assert len(cm.output) == 1
policy._check_model_version(__version__) # This should be the right version
# Assert that no additional warnings have been thrown wth correct ver
assert len(cm.output) == 1
def _compare_two_policies(policy1: TFPolicy, policy2: TFPolicy) -> None:

8
ml-agents/mlagents/trainers/tests/test_ppo.py


import attr
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy

0, mock_specs, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = PPOOptimizer(policy, trainer_settings)
policy.initialize()
return optimizer

)
@mock.patch.object(RLTrainer, "create_saver")
def test_trainer_increment_step(ppo_optimizer):
def test_trainer_increment_step(ppo_optimizer, mock_create_saver):
trainer_params = PPO_CONFIG
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}

assert trainer.stats_reporter.get_stats_summaries("Policy/Extrinsic Reward").num > 0
@mock.patch.object(RLTrainer, "create_saver")
def test_add_get_policy(ppo_optimizer, dummy_config):
def test_add_get_policy(ppo_optimizer, mock_create_saver, dummy_config):
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}
ppo_optimizer.return_value = mock_optimizer

1
ml-agents/mlagents/trainers/tests/test_reward_signals.py


optimizer = SACOptimizer(policy, trainer_settings)
else:
optimizer = PPOOptimizer(policy, trainer_settings)
optimizer.policy.initialize()
return optimizer

21
ml-agents/mlagents/trainers/tests/test_rl_trainer.py


import os
from unittest import mock
import pytest
import mlagents.trainers.tests.mock_brain as mb

return self.update_policy
def add_policy(self, mock_behavior_id, mock_policy):
def checkpoint_path(brain_name, step):
return os.path.join(self.saver.model_path, f"{brain_name}-{step}")
mock_saver = mock.Mock()
mock_saver.model_path = self.artifact_path
mock_saver.save_checkpoint.side_effect = checkpoint_path
self.saver = mock_saver
def create_policy(self):
return mock.Mock()

"test_trainer",
TrainerSettings(max_steps=100, checkpoint_interval=10, summary_freq=20),
True,
False,
"mock_model_path",
0,
)
trainer.set_is_policy_updating(True)

def test_advance(mocked_clear_update_buffer, mocked_save_model):
trainer = create_rl_trainer()
mock_policy = mock.Mock()
mock_policy.model_path = "mock_model_path"
trainer.add_policy("TestBrain", mock_policy)
trajectory_queue = AgentManagerQueue("testbrain")
policy_queue = AgentManagerQueue("testbrain")

def test_summary_checkpoint(mock_add_checkpoint, mock_write_summary):
trainer = create_rl_trainer()
mock_policy = mock.Mock()
mock_policy.model_path = "mock_model_path"
trainer.add_policy("TestBrain", mock_policy)
trajectory_queue = AgentManagerQueue("testbrain")
policy_queue = AgentManagerQueue("testbrain")

checkpoint_range = range(
checkpoint_interval, num_trajectories * time_horizon, checkpoint_interval
)
calls = [
mock.call(f"{mock_policy.model_path}/{trainer.brain_name}-{step}", mock.ANY)
for step in checkpoint_range
]
mock_policy.checkpoint.assert_has_calls(calls, any_order=True)
calls = [mock.call(trainer.brain_name, step) for step in checkpoint_range]
trainer.saver.save_checkpoint.assert_has_calls(calls, any_order=True)
add_checkpoint_calls = [
mock.call(

f"{mock_policy.model_path}/{trainer.brain_name}-{step}.nn",
f"{trainer.saver.model_path}/{trainer.brain_name}-{step}.nn",
None,
mock.ANY,
),

6
ml-agents/mlagents/trainers/tests/test_sac.py


from mlagents.tf_utils import tf
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.sac.trainer import SACTrainer
from mlagents.trainers.sac.optimizer import SACOptimizer
from mlagents.trainers.policy.tf_policy import TFPolicy

0, mock_brain, trainer_settings, "test", False, create_tf_graph=False
)
optimizer = SACOptimizer(policy, trainer_settings)
policy.initialize()
return optimizer

assert trainer2.update_buffer.num_experiences == buffer_len
@mock.patch.object(RLTrainer, "create_saver")
def test_add_get_policy(sac_optimizer, dummy_config):
def test_add_get_policy(sac_optimizer, mock_create_saver, dummy_config):
mock_optimizer = mock.Mock()
mock_optimizer.reward_signals = {}
sac_optimizer.return_value = mock_optimizer

policy = trainer.create_policy(behavior_id, specs)
policy.get_current_step = lambda: 200
trainer.add_policy(behavior_id, policy)
trainer.saver.initialize_or_load(policy)
trainer.optimizer.update = mock.Mock()
trainer.optimizer.update_reward_signals = mock.Mock()
trainer.optimizer.update_reward_signals.return_value = {}

8
ml-agents/mlagents/trainers/tests/test_simple_rl.py


# The reward processor is passed as an argument to _check_environment_trains.
# It is applied to the list pf all final rewards for each brain individually.
# It is applied to the list of all final rewards for each brain individually.
# Custom reward processors shuld be built within the test function and passed to _check_environment_trains
# Custom reward processors should be built within the test function and passed to _check_environment_trains
# Default is average over the last 5 final rewards
def default_reward_processor(rewards, last_n_rewards=5):
rewards_to_use = rewards[-last_n_rewards:]

@pytest.mark.parametrize("use_discrete", [True, False])
def test_recurrent_sac(use_discrete):
step_size = 0.2 if use_discrete else 1.0
step_size = 0.5 if use_discrete else 0.2
env = MemoryEnvironment(
[BRAIN_NAME], use_discrete=use_discrete, step_size=step_size
)

swap_steps=5000,
team_change=2000,
)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=2000)
config = attr.evolve(PPO_CONFIG, self_play=self_play_settings, max_steps=3000)
_check_environment_trains(
env, {BRAIN_NAME: config, brain_name_opp: config}, success_threshold=None
)

20
ml-agents/mlagents/trainers/tests/test_tf_policy.py


from mlagents.model_serialization import SerializationSettings
from unittest import mock
from mlagents.trainers.settings import TrainerSettings
import numpy as np

# Test dev versions
result = TFPolicy._convert_version_string("200.300.100.dev0")
assert result == (200, 300, 100)
@mock.patch("mlagents.trainers.policy.tf_policy.export_policy_model")
@mock.patch("time.time", mock.MagicMock(return_value=12345))
def test_checkpoint_writes_tf_and_nn_checkpoints(export_policy_model_mock):
mock_brain = basic_mock_brain()
test_seed = 4 # moving up in the world
policy = FakePolicy(test_seed, mock_brain, TrainerSettings(), "output")
n_steps = 5
policy.get_current_step = MagicMock(return_value=n_steps)
policy.saver = MagicMock()
serialization_settings = SerializationSettings("output", mock_brain.brain_name)
checkpoint_path = f"output/{mock_brain.brain_name}-{n_steps}"
policy.checkpoint(checkpoint_path, serialization_settings)
policy.saver.save.assert_called_once_with(policy.sess, f"{checkpoint_path}.ckpt")
export_policy_model_mock.assert_called_once_with(
checkpoint_path, serialization_settings, policy.graph, policy.sess
)

26
ml-agents/mlagents/trainers/trainer/rl_trainer.py


# # Unity ML-Agents Toolkit
import os
from mlagents.model_serialization import SerializationSettings, copy_model_files
from mlagents.trainers.policy.checkpoint_manager import (
NNCheckpoint,
NNCheckpointManager,

from mlagents_envs.timers import hierarchical_timer
from mlagents.trainers.agent_processor import AgentManagerQueue
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.settings import TrainerSettings
from mlagents.trainers.saver.saver import BaseSaver
from mlagents.trainers.saver.tf_saver import TFSaver
RewardSignalResults = Dict[str, RewardSignalResult]

)
self._next_save_step = 0
self._next_summary_step = 0
self.saver = self.create_saver(
self.trainer_settings, self.artifact_path, self.load
)
def end_episode(self) -> None:
"""

for agent_id in rewards:
rewards[agent_id] = 0
@staticmethod
def create_saver(
trainer_settings: TrainerSettings, model_path: str, load: bool
) -> BaseSaver:
saver = TFSaver(trainer_settings, model_path, load)
return saver
def _update_end_episode_stats(self, agent_id: str, optimizer: Optimizer) -> None:
for name, rewards in self.collected_rewards.items():
if name == "environment":

logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
policy = list(self.policies.values())[0]
model_path = policy.model_path
settings = SerializationSettings(model_path, self.brain_name)
checkpoint_path = os.path.join(model_path, f"{self.brain_name}-{self.step}")
policy.checkpoint(checkpoint_path, settings)
checkpoint_path = self.saver.save_checkpoint(self.brain_name, self.step)
new_checkpoint = NNCheckpoint(
int(self.step),
f"{checkpoint_path}.nn",

logger.warning(
"Trainer has multiple policies, but default behavior only saves the first."
)
policy = list(self.policies.values())[0]
copy_model_files(model_checkpoint.file_path, f"{policy.model_path}.nn")
self.saver.copy_final_model(model_checkpoint.file_path)
model_checkpoint, file_path=f"{policy.model_path}.nn"
model_checkpoint, file_path=f"{self.saver.model_path}.nn"
)
NNCheckpointManager.track_final_checkpoint(self.brain_name, final_checkpoint)

2
ml-agents/mlagents/trainers/trainer/trainer.py


brain_name: str,
trainer_settings: TrainerSettings,
training: bool,
load: bool,
artifact_path: str,
reward_buff_cap: int = 1,
):

self._threaded = trainer_settings.threaded
self._stats_reporter = StatsReporter(brain_name)
self.is_training = training
self.load = load
self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
self.policy_queues: List[AgentManagerQueue[Policy]] = []
self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []

10
ml-agents/mlagents/trainers/trainer_controller.py


from mlagents.trainers.trainer_util import TrainerFactory
from mlagents.trainers.behavior_id_utils import BehaviorIdentifiers
from mlagents.trainers.agent_processor import AgentManager
from mlagents.tf_utils.globals import get_rank
class TrainerController:

self.kill_trainers = False
np.random.seed(training_seed)
tf.set_random_seed(training_seed)
self.rank = get_rank()
@timed
def _save_models(self):

if self.rank is not None and self.rank != 0:
return
for brain_name in self.trainers.keys():
self.trainers[brain_name].save_model()
self.logger.info("Saved Model")

"""
Saves models for all trainers.
"""
if self.rank is not None and self.rank != 0:
return
for brain_name in self.trainers.keys():
self.trainers[brain_name].save_model()

) in self.param_manager.get_current_lesson_number().items():
for trainer in self.trainers.values():
trainer.stats_reporter.set_stat(
f"Environment/Lesson/{param_name}", lesson_number
f"Environment/Lesson Number/{param_name}", lesson_number
)
for trainer in self.trainers.values():

3
test_requirements.txt


pytest-cov==2.6.1
pytest-xdist
# PyTorch tests are here for the time being, before they are used in the codebase.
torch>=1.5.0
# onnx doesn't currently have a wheel for 3.8
tf2onnx>=1.5.5;python_version<'3.8'

2
utils/validate_release_links.py


ALLOW_LIST = {
# Previous release table
"README.md": re.compile(r"\*\*Release [0-9]+\*\*"),
"com.unity.ml-agents/Runtime/Actuators/IActionReceiver.cs": RELEASE_PATTERN,
"com.unity.ml-agents/Runtime/Actuators/IDiscreteActionMask.cs": RELEASE_PATTERN,
"docs/Versioning.md": None,
"com.unity.ml-agents/CHANGELOG.md": None,
"utils/make_readme_table.py": None,

21
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/DynamicTarget.prefab


m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3840539935788495952}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 1, y: 1, z: 1}
m_LocalScale: {x: 1.2356956, y: 1.2356961, z: 1.2356961}
m_LocalPosition: {x: 0, y: 1, z: 1}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3840539935788495952}
m_Enabled: 1
m_CastShadows: 0
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1

respawnIfTouched: 1
respawnIfFallsOffPlatform: 1
fallDistance: 5
triggerIsTouching: 0
onTriggerEnterEvent:
m_PersistentCalls:
m_Calls: []

onTriggerExitEvent:
m_PersistentCalls:
m_Calls: []
colliderIsTouching: 0
m_Calls:
- m_Target: {fileID: 0}
m_MethodName: TouchedTarget
m_Mode: 1
m_Arguments:
m_ObjectArgument: {fileID: 0}
m_ObjectArgumentAssemblyTypeName: UnityEngine.Object, UnityEngine
m_IntArgument: 0
m_FloatArgument: 0
m_StringArgument:
m_BoolArgument: 0
m_CallState: 2
m_Calls: []
onCollisionStayEvent:
m_PersistentCalls:
m_Calls: []

19
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets/StaticTarget.prefab


m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 3840539935788495952}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 6.2, y: 1.15, z: 3.824}
m_LocalScale: {x: 1.2356956, y: 1.2356961, z: 1.2356961}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 1, y: 1, z: 1}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children: []
m_Father: {fileID: 0}
m_RootOrder: 0

m_IsKinematic: 0
m_Interpolate: 0
m_Constraints: 0
m_CollisionDetection: 0
m_CollisionDetection: 3
--- !u!114 &3631016866778687563
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
tagToDetect: agent
moveTargetToRandomPosIfTouched: 0
targetSpawnRadius: 0
onTrtesiggerEnterEvent:
m_PersistentCalls:
m_Calls: []
triggerIsTouching: 0
spawnRadius: 0
respawnIfTouched: 0
respawnIfFallsOffPlatform: 1
fallDistance: 5
onTriggerEnterEvent:
m_PersistentCalls:
m_Calls: []

onTriggerExitEvent:
m_PersistentCalls:
m_Calls: []
colliderIsTouching: 0
onCollisionEnterEvent:
m_PersistentCalls:
m_Calls: []

82
Project/Assets/ML-Agents/Examples/Walker/Prefabs/Ragdoll/WalkerRagdollBase.prefab


m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 895268871264836243}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0.15, z: 0}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 895268873051627235}

- component: {fileID: 895268871377934302}
- component: {fileID: 895268871377934301}
m_Layer: 0
m_Name: WalkerRagdoll
m_Name: WalkerRagdollBase
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0

m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 895268871377934275}
m_LocalRotation: {x: 0, y: 0.7071068, z: 0, w: 0.7071068}
m_LocalPosition: {x: 0, y: 3.07, z: 0}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 3, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 895268871264836332}

m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 90, z: 0}
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!114 &895268871377934297
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
VectorObservationSize: 236
VectorObservationSize: 243
m_Model: {fileID: 11400000, guid: 3c6170922a9ad4d9f85261699ca00f5d, type: 3}
m_Model: {fileID: 11400000, guid: f598eaeeef9f94691989a2cfaaafb565, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0
m_BehaviorName: WalkerDynamic

maxStep: 0
hasUpgradedFromAgentParameters: 1
MaxStep: 5000
maximumWalkingSpeed: 999
targetWalkingSpeed: 10
randomizeWalkSpeedEachEpisode: 1
walkDirectionMethod: 0
worldDirToWalk: {x: 1, y: 0, z: 0}
worldPosToWalkTo: {x: 0, y: 0, z: 0}
target: {fileID: 0}
hips: {fileID: 895268871264836332}
chest: {fileID: 7933235354845945071}

armR: {fileID: 7933235355057813930}
forearmR: {fileID: 7933235353195701980}
handR: {fileID: 7933235354616748502}
orientationCube: {fileID: 7559180363928843817}
--- !u!114 &895268871377934303
MonoBehaviour:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
maxJointSpring: 40000
jointDampen: 3000
maxJointForceLimit: 10000
jointDampen: 5000
maxJointForceLimit: 20000
bodyPartsList: []
--- !u!114 &895268871377934302
MonoBehaviour:

m_Script: {fileID: 11500000, guid: 1513f8a85fedd47efba089213b7c5bde, type: 3}
m_Name:
m_EditorClassIdentifier:
updatedByAgent: 0
transformToFollow: {fileID: 895268871264836332}
targetToLookAt: {fileID: 0}
heightOffset: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235353030744116}
serializedVersion: 2
m_Mass: 3
m_Mass: 4
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0.55, y: 0, z: 0}
m_Axis: {x: 0, y: -1, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.7000002, y: 0, z: 0}
m_ConnectedAnchor: {x: -0.7000001, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235353041637840}
serializedVersion: 2
m_Mass: 1
m_Mass: 2
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0, y: 0, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.70000064, y: 0, z: 0}
m_ConnectedAnchor: {x: -0.70000017, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235353195701956}
serializedVersion: 2
m_Mass: 2
m_Mass: 3
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: -0.5, y: 0, z: 0}
m_Axis: {x: 0, y: 1, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.5000005, y: 0, z: 0}
m_ConnectedAnchor: {x: 0.5, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0.5, z: 0}
m_Axis: {x: -1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.39999408, y: -0.29999986, z: 0}
m_ConnectedAnchor: {x: -0.39999396, y: -0.29999995, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235353240438151}
serializedVersion: 2
m_Mass: 2
m_Mass: 3
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0.5, y: 0, z: 0}
m_Axis: {x: 0, y: -1, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.5000005, y: 0, z: 0}
m_ConnectedAnchor: {x: -0.5, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0.5, z: 0}
m_Axis: {x: -1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.00000011920929, y: -0.5, z: 0}
m_ConnectedAnchor: {x: 0, y: -0.5, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0, z: -0.1}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.00000011920929, y: -0.60000014, z: 0}
m_ConnectedAnchor: {x: 0, y: -0.60000014, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 1, z: 0}
m_XMotion: 0

m_Anchor: {x: 0, y: 0.5, z: 0}
m_Axis: {x: -1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.00000011920929, y: -0.5, z: 0}
m_ConnectedAnchor: {x: 0, y: -0.5, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0.5, z: 0}
m_Axis: {x: -1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.39999408, y: -0.29999986, z: 0}
m_ConnectedAnchor: {x: 0.39999396, y: -0.29999995, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235354074184675}
serializedVersion: 2
m_Mass: 5
m_Mass: 6
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0, y: -0.85, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0, y: 0.5119996, z: 0}
m_ConnectedAnchor: {x: 0, y: 0.5119997, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235354616748503}
serializedVersion: 2
m_Mass: 1
m_Mass: 2
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: 0, y: 0, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.70000064, y: 0, z: 0}
m_ConnectedAnchor: {x: 0.70000017, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

m_Anchor: {x: 0, y: -0.3, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0, y: 0.383, z: 0}
m_ConnectedAnchor: {x: 0, y: 0.3829999, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_Anchor: {x: 0, y: -0.5, z: 0}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0, y: 0.3050003, z: 0}
m_ConnectedAnchor: {x: 0, y: 0.30500042, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: -1}
m_XMotion: 0

m_Anchor: {x: 0, y: 0, z: -0.1}
m_Axis: {x: 1, y: 0, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: -0.00000011920929, y: -0.60000014, z: 0}
m_ConnectedAnchor: {x: 0, y: -0.60000014, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 1, z: 0}
m_XMotion: 0

m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 7933235355057813906}
serializedVersion: 2
m_Mass: 3
m_Mass: 4
m_Drag: 0.05
m_AngularDrag: 0.05
m_UseGravity: 1

m_Anchor: {x: -0.55, y: 0, z: 0}
m_Axis: {x: 0, y: 1, z: 0}
m_AutoConfigureConnectedAnchor: 1
m_ConnectedAnchor: {x: 0.7000002, y: 0, z: 0}
m_ConnectedAnchor: {x: 0.7000001, y: 0.00000011920929, z: 0}
serializedVersion: 2
m_SecondaryAxis: {x: 0, y: 0, z: 1}
m_XMotion: 0

type: 3}
m_PrefabInstance: {fileID: 7597605653427724053}
m_PrefabAsset: {fileID: 0}
--- !u!114 &7559180363928843817 stripped
MonoBehaviour:
m_CorrespondingSourceObject: {fileID: 114705911240010044, guid: 72f745913c5a34df5aaadd5c1f0024cb,
type: 3}
m_PrefabInstance: {fileID: 7597605653427724053}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 0}
m_Enabled: 1
m_EditorHideFlags: 0
m_Script: {fileID: 11500000, guid: 771e78c5e980e440e8cd19716b55075f, type: 3}
m_Name:
m_EditorClassIdentifier:

523
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab


%YAML 1.1
%TAG !u! tag:unity3d.com,2011:
--- !u!1 &6907050159044240885
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6902197503240654641}
- component: {fileID: 6894500521640151429}
- component: {fileID: 6885223417161833361}
- component: {fileID: 6859132155796343735}
m_Layer: 0
m_Name: Wall (1)
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6902197503240654641
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907050159044240885}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: -50, y: 0, z: 0}
m_LocalScale: {x: 1, y: 5, z: 101}
m_Children: []
m_Father: {fileID: 6902102727328990095}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &6894500521640151429
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907050159044240885}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!23 &6885223417161833361
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907050159044240885}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 0
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!65 &6859132155796343735
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907050159044240885}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}
--- !u!1 &6907401236047902865
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6902265967514060089}
- component: {fileID: 6891025662345346653}
- component: {fileID: 6859036447448677835}
- component: {fileID: 6884684845870454579}
m_Layer: 14
m_Name: Ground
m_TagString: ground
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6902265967514060089
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907401236047902865}
m_LocalRotation: {x: 0, y: 0.7071068, z: 0, w: 0.7071068}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 100, y: 1, z: 100}
m_Children: []
m_Father: {fileID: 6902107422946006027}
m_RootOrder: 1
m_LocalEulerAnglesHint: {x: 0, y: 90, z: 0}
--- !u!33 &6891025662345346653
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907401236047902865}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!65 &6859036447448677835
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907401236047902865}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}
--- !u!23 &6884684845870454579
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907401236047902865}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: acba6bf2a290a496bb8989b42bf8698d, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 1
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!1 &6907666814270504157
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6902102727328990095}
m_Layer: 0
m_Name: Walls
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6902102727328990095
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907666814270504157}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 2, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 6901873285403999439}
- {fileID: 6902197503240654641}
- {fileID: 6901900959948323433}
- {fileID: 6905948743199606957}
m_Father: {fileID: 6902107422946006027}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &6907680617094430597
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6901873285403999439}
- component: {fileID: 6894618984257886823}
- component: {fileID: 6884854148710353183}
- component: {fileID: 6863062098498978603}
m_Layer: 0
m_Name: Wall
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6901873285403999439
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907680617094430597}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 50, y: 0, z: 0}
m_LocalScale: {x: 1, y: 5, z: 101}
m_Children: []
m_Father: {fileID: 6902102727328990095}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &6894618984257886823
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907680617094430597}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!23 &6884854148710353183
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907680617094430597}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 0
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!65 &6863062098498978603
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907680617094430597}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}
--- !u!1 &6907740118844148851
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6902107422946006027}
m_Layer: 0
m_Name: PlatformDynamicTarget
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 0
m_IsActive: 1
--- !u!4 &6902107422946006027
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907740118844148851}
m_LocalRotation: {x: 0, y: 0, z: 0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 0}
m_LocalScale: {x: 1, y: 1, z: 1}
m_Children:
- {fileID: 6902102727328990095}
- {fileID: 6902265967514060089}
m_Father: {fileID: 0}
m_RootOrder: 0
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!1 &6907828132384848309
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6905948743199606957}
- component: {fileID: 6894463671975680535}
- component: {fileID: 6884868534516719387}
- component: {fileID: 6859048605259525735}
m_Layer: 0
m_Name: Wall (3)
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6905948743199606957
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907828132384848309}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: -50}
m_LocalScale: {x: 100, y: 5, z: 1}
m_Children: []
m_Father: {fileID: 6902102727328990095}
m_RootOrder: 3
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &6894463671975680535
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907828132384848309}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!23 &6884868534516719387
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907828132384848309}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 0
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!65 &6859048605259525735
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907828132384848309}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}
--- !u!1 &6907860845836169157
GameObject:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
serializedVersion: 6
m_Component:
- component: {fileID: 6901900959948323433}
- component: {fileID: 6893927248293796423}
- component: {fileID: 6885176866006237333}
- component: {fileID: 6859395915623032135}
m_Layer: 0
m_Name: Wall (2)
m_TagString: Untagged
m_Icon: {fileID: 0}
m_NavMeshLayer: 0
m_StaticEditorFlags: 4294967295
m_IsActive: 1
--- !u!4 &6901900959948323433
Transform:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907860845836169157}
m_LocalRotation: {x: -0, y: -0, z: -0, w: 1}
m_LocalPosition: {x: 0, y: 0, z: 50}
m_LocalScale: {x: 100, y: 5, z: 1}
m_Children: []
m_Father: {fileID: 6902102727328990095}
m_RootOrder: 2
m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0}
--- !u!33 &6893927248293796423
MeshFilter:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907860845836169157}
m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0}
--- !u!23 &6885176866006237333
MeshRenderer:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907860845836169157}
m_Enabled: 1
m_CastShadows: 1
m_ReceiveShadows: 1
m_DynamicOccludee: 1
m_MotionVectors: 1
m_LightProbeUsage: 1
m_ReflectionProbeUsage: 1
m_RenderingLayerMask: 1
m_RendererPriority: 0
m_Materials:
- {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
m_StaticBatchInfo:
firstSubMesh: 0
subMeshCount: 0
m_StaticBatchRoot: {fileID: 0}
m_ProbeAnchor: {fileID: 0}
m_LightProbeVolumeOverride: {fileID: 0}
m_ScaleInLightmap: 1
m_PreserveUVs: 0
m_IgnoreNormalsForChartDetection: 0
m_ImportantGI: 0
m_StitchLightmapSeams: 0
m_SelectedEditorRenderState: 3
m_MinimumChartSize: 4
m_AutoUVMaxDistance: 0.5
m_AutoUVMaxAngle: 89
m_LightmapParameters: {fileID: 0}
m_SortingLayerID: 0
m_SortingLayer: 0
m_SortingOrder: 0
--- !u!65 &6859395915623032135
BoxCollider:
m_ObjectHideFlags: 0
m_CorrespondingSourceObject: {fileID: 0}
m_PrefabInstance: {fileID: 0}
m_PrefabAsset: {fileID: 0}
m_GameObject: {fileID: 6907860845836169157}
m_Material: {fileID: 0}
m_IsTrigger: 0
m_Enabled: 1
serializedVersion: 2
m_Size: {x: 1, y: 1, z: 1}
m_Center: {x: 0, y: 0, z: 0}

7
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/PlatformDynamicTarget.prefab.meta


fileFormatVersion: 2
guid: f0d7741d9e06247f6843b921a206b978
PrefabImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

8
Project/Assets/ML-Agents/Examples/SharedAssets/Prefabs/Targets.meta


fileFormatVersion: 2
guid: 88818c9b63c96424aa8e0fca85552133
folderAsset: yes
DefaultImporter:
externalObjects: {}
userData:
assetBundleName:
assetBundleVariant:

10
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDy.demo.meta


fileFormatVersion: 2
guid: 9f87b3070a0fd4a1e838131a91399c2f
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/Demonstrations/ExpertWalkerDy.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

10
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyVS.demo.meta


fileFormatVersion: 2
guid: a4b02e2c382c247919eb63ce72e90a3b
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/Demonstrations/ExpertWalkerDyVS.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

10
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStVS.demo.meta


fileFormatVersion: 2
guid: edcbb505552464c5c829886a4a3817dd
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/Demonstrations/ExpertWalkerStVS.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

10
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerSta.demo.meta


fileFormatVersion: 2
guid: 1f3a5d62e6aea4b5eb053ac33f11b06d
ScriptedImporter:
fileIDToRecycleName:
11400000: Assets/Demonstrations/ExpertWalkerSta.demo
externalObjects: {}
userData: ' (Unity.MLAgents.Demonstrations.DemonstrationSummary)'
assetBundleName:
assetBundleVariant:
script: {fileID: 11500000, guid: 7bd65ce151aaa4a41a45312543c56be1, type: 3}

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存