浏览代码

Merge branch 'master' into develop-test-imitation

/test-recurrent-gail
Andrew Cohen 4 年前
当前提交
b1cfa74d
共有 139 个文件被更改,包括 2370 次插入2438 次删除
  1. 10
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs
  2. 10
      Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs
  3. 4
      Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs
  4. 14
      Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs
  5. 6
      Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  6. 7
      Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs
  7. 1001
      Project/Assets/ML-Agents/Examples/GridWorld/Demos/ExpertGrid.demo
  8. 12
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  9. 2
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs
  10. 9
      Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs
  11. 9
      Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs
  12. 9
      Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs
  13. 6
      Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs
  14. 2
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs
  15. 5
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs
  16. 11
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs
  17. 2
      Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs
  18. 34
      Project/Assets/ML-Agents/Examples/Startup/Scripts/Startup.cs
  19. 4
      Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs
  20. 4
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs
  21. 8
      Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs
  22. 8
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  23. 16
      Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs
  24. 108
      README.md
  25. 16
      com.unity.ml-agents/CHANGELOG.md
  26. 93
      com.unity.ml-agents/Documentation~/com.unity.ml-agents.md
  27. 71
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  28. 23
      com.unity.ml-agents/Editor/RayPerceptionSensorComponentBaseEditor.cs
  29. 2
      com.unity.ml-agents/Runtime/Academy.cs
  30. 153
      com.unity.ml-agents/Runtime/Agent.cs
  31. 12
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  32. 61
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  33. 9
      com.unity.ml-agents/Runtime/DecisionRequester.cs
  34. 2
      com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
  35. 151
      com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
  36. 20
      com.unity.ml-agents/Runtime/Policies/BrainParameters.cs
  37. 19
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  38. 78
      com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs
  39. 8
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponent3D.cs
  40. 49
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs
  41. 28
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
  42. 55
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs
  43. 31
      com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs
  44. 109
      com.unity.ml-agents/Runtime/SideChannels/FloatPropertiesChannel.cs
  45. 10
      com.unity.ml-agents/Runtime/SideChannels/RawBytesChannel.cs
  46. 13
      com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs
  47. 72
      com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs
  48. 69
      com.unity.ml-agents/Tests/Editor/SideChannelTests.cs
  49. 2
      com.unity.ml-agents/package.json
  50. 2
      config/sac_trainer_config.yaml
  51. 4
      config/trainer_config.yaml
  52. 21
      docs/Getting-Started-with-Balance-Ball.md
  53. 160
      docs/Installation.md
  54. 5
      docs/Learning-Environment-Best-Practices.md
  55. 38
      docs/Learning-Environment-Create-New.md
  56. 47
      docs/Learning-Environment-Design-Agents.md
  57. 33
      docs/Learning-Environment-Design.md
  58. 4
      docs/Learning-Environment-Examples.md
  59. 35
      docs/Limitations.md
  60. 20
      docs/Migrating.md
  61. 236
      docs/Python-API.md
  62. 3
      docs/Readme.md
  63. 2
      docs/Training-Curriculum-Learning.md
  64. 1
      docs/Training-ML-Agents.md
  65. 9
      docs/Using-Docker.md
  66. 12
      docs/Using-Virtual-Environment.md
  67. 951
      docs/images/unity_package_manager_window.png
  68. 2
      docs/localized/KR/docs/Installation.md
  69. 2
      docs/localized/zh-CN/docs/Installation.md
  70. 4
      ml-agents-envs/mlagents_envs/communicator.py
  71. 7
      ml-agents-envs/mlagents_envs/environment.py
  72. 5
      ml-agents-envs/mlagents_envs/exception.py
  73. 3
      ml-agents-envs/mlagents_envs/rpc_communicator.py
  74. 3
      ml-agents-envs/mlagents_envs/rpc_utils.py
  75. 4
      ml-agents-envs/mlagents_envs/side_channel/__init__.py
  76. 23
      ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py
  77. 36
      ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py
  78. 10
      ml-agents-envs/mlagents_envs/side_channel/raw_bytes_channel.py
  79. 13
      ml-agents-envs/mlagents_envs/side_channel/side_channel.py
  80. 68
      ml-agents-envs/mlagents_envs/tests/test_side_channel.py
  81. 4
      ml-agents/mlagents/trainers/brain.py
  82. 5
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  83. 5
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py
  84. 3
      ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py
  85. 19
      ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py
  86. 4
      ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
  87. 4
      ml-agents/mlagents/trainers/demo_loader.py
  88. 23
      ml-agents/mlagents/trainers/distributions.py
  89. 7
      ml-agents/mlagents/trainers/ghost/trainer.py
  90. 155
      ml-agents/mlagents/trainers/learn.py
  91. 39
      ml-agents/mlagents/trainers/models.py
  92. 5
      ml-agents/mlagents/trainers/policy/nn_policy.py
  93. 4
      ml-agents/mlagents/trainers/policy/tf_policy.py
  94. 13
      ml-agents/mlagents/trainers/ppo/optimizer.py
  95. 18
      ml-agents/mlagents/trainers/ppo/trainer.py
  96. 6
      ml-agents/mlagents/trainers/sac/network.py
  97. 56
      ml-agents/mlagents/trainers/sac/optimizer.py
  98. 22
      ml-agents/mlagents/trainers/sac/trainer.py
  99. 136
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  100. 60
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py

10
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAgent.cs


[Header("Specific to Ball3D")]
public GameObject ball;
Rigidbody m_BallRb;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
public override void InitializeAgent()
public override void Initialize()
{
m_BallRb = ball.GetComponent<Rigidbody>();
m_ResetParams = Academy.Instance.FloatProperties;

sensor.AddObservation(m_BallRb.velocity);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);

Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f)
{
SetReward(-1f);
Done();
EndEpisode();
}
else
{

public override void AgentReset()
public override void OnEpisodeBegin()
{
gameObject.transform.rotation = new Quaternion(0f, 0f, 0f, 0f);
gameObject.transform.Rotate(new Vector3(1, 0, 0), Random.Range(-10f, 10f));

10
Project/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DHardAgent.cs


[Header("Specific to Ball3DHard")]
public GameObject ball;
Rigidbody m_BallRb;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
public override void InitializeAgent()
public override void Initialize()
{
m_BallRb = ball.GetComponent<Rigidbody>();
m_ResetParams = Academy.Instance.FloatProperties;

sensor.AddObservation((ball.transform.position - gameObject.transform.position));
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
var actionZ = 2f * Mathf.Clamp(vectorAction[0], -1f, 1f);
var actionX = 2f * Mathf.Clamp(vectorAction[1], -1f, 1f);

Mathf.Abs(ball.transform.position.z - gameObject.transform.position.z) > 3f)
{
SetReward(-1f);
Done();
EndEpisode();
}
else
{

public override void AgentReset()
public override void OnEpisodeBegin()
{
gameObject.transform.rotation = new Quaternion(0f, 0f, 0f, 0f);
gameObject.transform.Rotate(new Vector3(1, 0, 0), Random.Range(-10f, 10f));

4
Project/Assets/ML-Agents/Examples/Basic/Scripts/BasicController.cs


if (m_Position == k_SmallGoalPosition)
{
m_Agent.AddReward(0.1f);
m_Agent.Done();
m_Agent.EndEpisode();
ResetAgent();
}

m_Agent.Done();
m_Agent.EndEpisode();
ResetAgent();
}
}

14
Project/Assets/ML-Agents/Examples/Bouncer/Scripts/BouncerAgent.cs


int m_NumberJumps = 20;
int m_JumpLeft = 20;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
public override void InitializeAgent()
public override void Initialize()
{
m_Rb = gameObject.GetComponent<Rigidbody>();
m_LookDir = Vector3.zero;

sensor.AddObservation(target.transform.localPosition);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
for (var i = 0; i < vectorAction.Length; i++)
{

m_LookDir = new Vector3(x, y, z);
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
gameObject.transform.localPosition = new Vector3(
(1 - 2 * Random.value) * 5, 2, (1 - 2 * Random.value) * 5);

if (gameObject.transform.position.y < -1)
{
AddReward(-1);
Done();
EndEpisode();
return;
}

AddReward(-1);
Done();
EndEpisode();
Done();
EndEpisode();
}
}

6
Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


Quaternion m_LookRotation;
Matrix4x4 m_TargetDirMatrix;
public override void InitializeAgent()
public override void Initialize()
{
m_JdController = GetComponent<JointDriveController>();
m_DirToTarget = target.position - body.position;

target.position = newTargetPos + ground.position;
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
// The dictionary with all the body parts in it are in the jdController
var bpDict = m_JdController.bodyPartsDict;

/// <summary>
/// Loop over body parts and reset them to initial conditions.
/// </summary>
public override void AgentReset()
public override void OnEpisodeBegin()
{
if (m_DirToTarget != Vector3.zero)
{

7
Project/Assets/ML-Agents/Examples/FoodCollector/Scripts/FoodCollectorAgent.cs


public bool useVectorObs;
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
m_AgentRb = GetComponent<Rigidbody>();
m_MyArea = area.GetComponent<FoodCollectorArea>();
m_FoodCollecterSettings = FindObjectOfType<FoodCollectorSettings>();

gameObject.GetComponentInChildren<Renderer>().material = normalMaterial;
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
MoveAgent(vectorAction);
}

return action;
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
Unfreeze();
Unpoison();

1001
Project/Assets/ML-Agents/Examples/GridWorld/Demos/ExpertGrid.demo
文件差异内容过多而无法显示
查看文件

12
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


const int k_Left = 3;
const int k_Right = 4;
public override void InitializeAgent()
{
}
public override void CollectDiscreteActionMasks(DiscreteActionMasker actionMasker)
{
// Mask the necessary actions if selected by the user.

}
// to be implemented by the developer
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
AddReward(-0.01f);
var action = Mathf.FloorToInt(vectorAction[0]);

if (hit.Where(col => col.gameObject.CompareTag("goal")).ToArray().Length == 1)
{
SetReward(1f);
Done();
EndEpisode();
Done();
EndEpisode();
}
}
}

}
// to be implemented by the developer
public override void AgentReset()
public override void OnEpisodeBegin()
{
area.AreaReset();
}

2
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridArea.cs


public GameObject trueAgent;
IFloatProperties m_ResetParameters;
FloatPropertiesChannel m_ResetParameters;
Camera m_AgentCam;

9
Project/Assets/ML-Agents/Examples/Hallway/Scripts/HallwayAgent.cs


HallwaySettings m_HallwaySettings;
int m_Selection;
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
m_HallwaySettings = FindObjectOfType<HallwaySettings>();
m_AgentRb = GetComponent<Rigidbody>();
m_GroundRenderer = ground.GetComponent<Renderer>();

m_AgentRb.AddForce(dirToGo * m_HallwaySettings.agentRunSpeed, ForceMode.VelocityChange);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
AddReward(-1f / maxStep);
MoveAgent(vectorAction);

SetReward(-0.1f);
StartCoroutine(GoalScoredSwapGroundMaterial(m_HallwaySettings.failMaterial, 0.5f));
}
Done();
EndEpisode();
}
}

return new float[] { 0 };
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
var agentOffset = -15f;
var blockOffset = 0f;

9
Project/Assets/ML-Agents/Examples/PushBlock/Scripts/PushAgentBasic.cs


m_PushBlockSettings = FindObjectOfType<PushBlockSettings>();
}
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
goalDetect = block.GetComponent<GoalDetect>();
goalDetect.agent = this;

AddReward(5f);
// By marking an agent as done AgentReset() will be called automatically.
Done();
EndEpisode();
// Swap ground material for a bit to indicate we scored.
StartCoroutine(GoalScoredSwapGroundMaterial(m_PushBlockSettings.goalScoredMaterial, 0.5f));

/// <summary>
/// Called every step of the engine. Here the agent takes an action.
/// </summary>
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
// Move the agent using the action.
MoveAgent(vectorAction);

/// In the editor, if "Reset On Done" is checked then AgentReset() will be
/// called automatically anytime we mark done = true in an agent script.
/// </summary>
public override void AgentReset()
public override void OnEpisodeBegin()
{
var rotation = Random.Range(0, 4);
var rotationAngle = rotation * 90f;

9
Project/Assets/ML-Agents/Examples/Pyramids/Scripts/PyramidAgent.cs


public GameObject areaSwitch;
public bool useVectorObs;
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
m_AgentRb = GetComponent<Rigidbody>();
m_MyArea = area.GetComponent<PyramidArea>();
m_SwitchLogic = areaSwitch.GetComponent<PyramidSwitch>();

m_AgentRb.AddForce(dirToGo * 2f, ForceMode.VelocityChange);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
AddReward(-1f / maxStep);
MoveAgent(vectorAction);

return new float[] { 0 };
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
var enumerable = Enumerable.Range(0, 9).OrderBy(x => Guid.NewGuid()).Take(9);
var items = enumerable.ToArray();

if (collision.gameObject.CompareTag("goal"))
{
SetReward(2f);
Done();
EndEpisode();
}
}
}

6
Project/Assets/ML-Agents/Examples/Reacher/Scripts/ReacherAgent.cs


/// Collect the rigidbodies of the reacher in order to resue them for
/// observations and actions.
/// </summary>
public override void InitializeAgent()
public override void Initialize()
{
m_RbA = pendulumA.GetComponent<Rigidbody>();
m_RbB = pendulumB.GetComponent<Rigidbody>();

/// <summary>
/// The agent's four actions correspond to torques on each of the two joints.
/// </summary>
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
m_GoalDegree += m_GoalSpeed;
UpdateGoalPosition();

/// <summary>
/// Resets the position and velocity of the agent and the goal.
/// </summary>
public override void AgentReset()
public override void OnEpisodeBegin()
{
pendulumA.transform.position = new Vector3(0f, -4f, 0f) + transform.position;
pendulumA.transform.rotation = Quaternion.Euler(180f, 0f, 0f);

2
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/GroundContact.cs


if (agentDoneOnGroundContact)
{
agent.Done();
agent.EndEpisode();
}
}
}

5
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelOverrider.cs


void OverrideModel()
{
m_Agent.LazyInitialize();
var name = m_Agent.BehaviorName;
var bp = m_Agent.GetComponent<BehaviorParameters>();
var name = bp.behaviorName;
m_Agent.GiveModel($"Override_{name}", nnModel);
m_Agent.SetModel($"Override_{name}", nnModel);
}
}

11
Project/Assets/ML-Agents/Examples/Soccer/Scripts/AgentSoccer.cs


[HideInInspector]
public Rigidbody agentRb;
SoccerSettings m_SoccerSettings;
BehaviorParameters m_BehaviorParameters;
public override void InitializeAgent()
public override void Initialize()
base.InitializeAgent();
if (TeamId == (int)Team.Blue)
m_BehaviorParameters = gameObject.GetComponent<BehaviorParameters>();
if (m_BehaviorParameters.TeamId == (int)Team.Blue)
{
team = Team.Blue;
m_Transform = new Vector3(transform.position.x - 4f, .5f, transform.position.z);

ForceMode.VelocityChange);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
// Existential penalty for strikers.
AddReward(-1f / 3000f);

}
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
if (team == Team.Purple)
{

2
Project/Assets/ML-Agents/Examples/Soccer/Scripts/SoccerFieldArea.cs


{
ps.agentScript.AddReward(-1);
}
ps.agentScript.Done(); //all agents need to be reset
ps.agentScript.EndEpisode(); //all agents need to be reset
if (goalTextUI)
{

34
Project/Assets/ML-Agents/Examples/Startup/Scripts/Startup.cs


internal class Startup : MonoBehaviour
{
const string k_SceneVariableName = "SCENE_NAME";
private const string k_SceneCommandLineFlag = "--mlagents-scene-name";
var sceneName = Environment.GetEnvironmentVariable(k_SceneVariableName);
var sceneName = "";
// Check for the CLI '--scene-name' flag. This will be used if
// no scene environment variable is found.
var args = Environment.GetCommandLineArgs();
Console.WriteLine("Command line arguments passed: " + String.Join(" ", args));
for (int i = 0; i < args.Length; i++) {
if (args [i] == k_SceneCommandLineFlag && i < args.Length - 1) {
sceneName = args[i + 1];
}
}
var sceneEnvironmentVariable = Environment.GetEnvironmentVariable(k_SceneVariableName);
if (!string.IsNullOrEmpty(sceneEnvironmentVariable))
{
sceneName = sceneEnvironmentVariable;
}
SwitchScene(sceneName);
}

{
throw new ArgumentException(
$"You didn't specified the {k_SceneVariableName} environment variable");
Console.WriteLine(
$"You didn't specify the {k_SceneVariableName} environment variable or the {k_SceneCommandLineFlag} command line argument."
);
Application.Quit(22);
return;
throw new ArgumentException(
$"The scene {sceneName} doesn't exist within your build. ");
Console.WriteLine(
$"The scene {sceneName} doesn't exist within your build."
);
Application.Quit(22);
return;
}
SceneManager.LoadSceneAsync(sceneName);
}

4
Project/Assets/ML-Agents/Examples/Template/Scripts/TemplateAgent.cs


{
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
public override void AgentReset()
public override void OnEpisodeBegin()
{
}
}

4
Project/Assets/ML-Agents/Examples/Tennis/Scripts/HitWall.cs


void Reset()
{
m_AgentA.Done();
m_AgentB.Done();
m_AgentA.EndEpisode();
m_AgentB.EndEpisode();
m_Area.MatchReset();
lastFloorHit = FloorHit.Service;
net = false;

8
Project/Assets/ML-Agents/Examples/Tennis/Scripts/TennisAgent.cs


Rigidbody m_AgentRb;
Rigidbody m_BallRb;
float m_InvertMult;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
// Looks for the scoreboard based on the name of the gameObjects.
// Do not modify the names of the Score GameObjects

public override void InitializeAgent()
public override void Initialize()
{
m_AgentRb = GetComponent<Rigidbody>();
m_BallRb = ball.GetComponent<Rigidbody>();

sensor.AddObservation(m_InvertMult * gameObject.transform.rotation.z);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
var moveX = Mathf.Clamp(vectorAction[0], -1f, 1f) * m_InvertMult;
var moveY = Mathf.Clamp(vectorAction[1], -1f, 1f);

return action;
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
m_InvertMult = invertX ? -1f : 1f;

8
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


Rigidbody m_ChestRb;
Rigidbody m_SpineRb;
IFloatProperties m_ResetParams;
FloatPropertiesChannel m_ResetParams;
public override void InitializeAgent()
public override void Initialize()
{
m_JdController = GetComponent<JointDriveController>();
m_JdController.SetupBodyPart(hips);

}
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
var bpDict = m_JdController.bodyPartsDict;
var i = -1;

/// <summary>
/// Loop over body parts and reset them to initial conditions.
/// </summary>
public override void AgentReset()
public override void OnEpisodeBegin()
{
if (m_DirToTarget != Vector3.zero)
{

16
Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs


Vector3 m_JumpTargetPos;
Vector3 m_JumpStartingPos;
public override void InitializeAgent()
public override void Initialize()
{
m_WallJumpSettings = FindObjectOfType<WallJumpSettings>();
m_Configuration = Random.Range(0, 5);

jumpingTime -= Time.fixedDeltaTime;
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
MoveAgent(vectorAction);
if ((!Physics.Raycast(m_AgentRb.position, Vector3.down, 20))

Done();
EndEpisode();
ResetBlock(m_ShortBlockRb);
StartCoroutine(
GoalScoredSwapGroundMaterial(m_WallJumpSettings.failMaterial, .5f));

if (col.gameObject.CompareTag("goal") && DoGroundCheck(true))
{
SetReward(1f);
Done();
EndEpisode();
StartCoroutine(
GoalScoredSwapGroundMaterial(m_WallJumpSettings.goalScoredMaterial, 2));
}

blockRb.angularVelocity = Vector3.zero;
}
public override void AgentReset()
public override void OnEpisodeBegin()
{
ResetBlock(m_ShortBlockRb);
transform.localPosition = new Vector3(

Academy.Instance.FloatProperties.GetPropertyWithDefault("no_wall_height", 0),
localScale.z);
wall.transform.localScale = localScale;
GiveModel("SmallWallJump", noWallBrain);
SetModel("SmallWallJump", noWallBrain);
}
else if (config == 1)
{

localScale.z);
wall.transform.localScale = localScale;
GiveModel("SmallWallJump", smallWallBrain);
SetModel("SmallWallJump", smallWallBrain);
}
else
{

height,
localScale.z);
wall.transform.localScale = localScale;
GiveModel("BigWallJump", bigWallBrain);
SetModel("BigWallJump", bigWallBrain);
}
}
}

108
README.md


used for multiple purposes, including controlling NPC behavior (in a variety of
settings such as multi-agent and adversarial), automated testing of game builds
and evaluating different game design decisions pre-release. The ML-Agents
toolkit is mutually beneficial for both game developers and AI researchers as it
Toolkit is mutually beneficial for both game developers and AI researchers as it
provides a central platform where advances in AI can be evaluated on Unity’s
rich environments and then made accessible to the wider research and game
developer communities.

* Unity environment control from Python
* 10+ sample Unity environments
* 15+ sample Unity environments
* Two deep reinforcement learning algorithms,
[Proximal Policy Optimization](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Training-PPO.md)
(PPO) and [Soft Actor-Critic](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Training-SAC.md)

* Built-in support for Imitation Learning
* Flexible agent control with On Demand Decision Making
* Visualizing network outputs within the environment
* Simplified set-up with Docker
## Documentation
## Releases & Documentation
**Our latest, stable release is 0.14.1. Click
[here](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md) to
get started with the latest release of ML-Agents.**
The table below lists all our releases, including our `master` branch which is under active
development and may be unstable. A few helpful guidelines:
* The docs links in the table below include installation and usage instructions specific to each
release. Remember to always use the documentation that corresponds to the release version you're
using.
* See the [GitHub releases](https://github.com/Unity-Technologies/ml-agents/releases) for more
details of the changes between versions.
* If you have used an earlier version of the ML-Agents Toolkit, we strongly recommend our
[guide on migrating from earlier versions](docs/Migrating.md).
* For more information, in addition to installation and usage instructions, see
the [documentation for the latest release](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md).
* If you are a researcher interested in a discussion of Unity as an AI platform, see a pre-print of our [reference paper on Unity and the ML-Agents Toolkit](https://arxiv.org/abs/1809.02627). Also, see below for instructions on citing this paper.
* If you have used an earlier version of the ML-Agents toolkit, we strongly
recommend our [guide on migrating from earlier versions](docs/Migrating.md).
| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master** (unstable) | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **0.14.1** (latest stable release) | February 26, 2020 | **[source](https://github.com/Unity-Technologies/ml-agents/tree/latest_release)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/latest_release/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/latest_release.zip)** |
| **0.14.0** | February 13, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
| **0.13.1** | January 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) |
| **0.13.0** | January 8, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.0.zip) |
| **0.12.1** | December 11, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.1.zip) |
| **0.12.0** | December 2, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.0.zip) |
| **0.11.0** | November 4, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.11.0.zip) |
| **0.10.1** | October 9, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.1.zip) |
| **0.10.0** | September 30, 2019 | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.0.zip) |
## Citation
If you are a researcher interested in a discussion of Unity as an AI platform, see a pre-print
of our [reference paper on Unity and the ML-Agents Toolkit](https://arxiv.org/abs/1809.02627).
If you use Unity or the ML-Agents Toolkit to conduct research, we ask that you cite the following
paper as a reference:
Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D. (2018). Unity: A General Platform for Intelligent Agents. *arXiv preprint arXiv:1809.02627.* https://github.com/Unity-Technologies/ml-agents.
* (February 28, 2020) [Training intelligent adversaries using self-play with ML-Agents](https://blogs.unity3d.com/2020/02/28/training-intelligent-adversaries-using-self-play-with-ml-agents/)
* (November 11, 2019) [Training your agents 7 times faster with ML-Agents](https://blogs.unity3d.com/2019/11/11/training-your-agents-7-times-faster-with-ml-agents/)
* (October 21, 2019) [The AI@Unity interns help shape the world](https://blogs.unity3d.com/2019/10/21/the-aiunity-interns-help-shape-the-world/)
* (April 15, 2019) [Unity ML-Agents Toolkit v0.8: Faster training on real games](https://blogs.unity3d.com/2019/04/15/unity-ml-agents-toolkit-v0-8-faster-training-on-real-games/)
* (March 1, 2019) [Unity ML-Agents Toolkit v0.7: A leap towards cross-platform inference](https://blogs.unity3d.com/2019/03/01/unity-ml-agents-toolkit-v0-7-a-leap-towards-cross-platform-inference/)
* (December 17, 2018) [ML-Agents Toolkit v0.6: Improved usability of Brains and Imitation Learning](https://blogs.unity3d.com/2018/12/17/ml-agents-toolkit-v0-6-improved-usability-of-brains-and-imitation-learning/)
* (October 2, 2018) [Puppo, The Corgi: Cuteness Overload with the Unity ML-Agents Toolkit](https://blogs.unity3d.com/2018/10/02/puppo-the-corgi-cuteness-overload-with-the-unity-ml-agents-toolkit/)
* (September 11, 2018) [ML-Agents Toolkit v0.5, new resources for AI researchers available now](https://blogs.unity3d.com/2018/09/11/ml-agents-toolkit-v0-5-new-resources-for-ai-researchers-available-now/)
* (June 26, 2018) [Solving sparse-reward tasks with Curiosity](https://blogs.unity3d.com/2018/06/26/solving-sparse-reward-tasks-with-curiosity/)
* (June 19, 2018) [Unity ML-Agents Toolkit v0.4 and Udacity Deep Reinforcement Learning Nanodegree](https://blogs.unity3d.com/2018/06/19/unity-ml-agents-toolkit-v0-4-and-udacity-deep-reinforcement-learning-nanodegree/)
* (May 24, 2018) [Imitation Learning in Unity: The Workflow](https://blogs.unity3d.com/2018/05/24/imitation-learning-in-unity-the-workflow/)
* (March 15, 2018) [ML-Agents Toolkit v0.3 Beta released: Imitation Learning, feedback-driven features, and more](https://blogs.unity3d.com/2018/03/15/ml-agents-v0-3-beta-released-imitation-learning-feedback-driven-features-and-more/)
* (December 11, 2017) [Using Machine Learning Agents in a real game: a beginner’s guide](https://blogs.unity3d.com/2017/12/11/using-machine-learning-agents-in-a-real-game-a-beginners-guide/)
* (December 8, 2017) [Introducing ML-Agents Toolkit v0.2: Curriculum Learning, new environments, and more](https://blogs.unity3d.com/2017/12/08/introducing-ml-agents-v0-2-curriculum-learning-new-environments-and-more/)
* (September 19, 2017) [Introducing: Unity Machine Learning Agents Toolkit](https://blogs.unity3d.com/2017/09/19/introducing-unity-machine-learning-agents/)
* [Using Machine Learning Agents in a real game: a beginner’s guide](https://blogs.unity3d.com/2017/12/11/using-machine-learning-agents-in-a-real-game-a-beginners-guide/)
* [Post](https://blogs.unity3d.com/2018/02/28/introducing-the-winners-of-the-first-ml-agents-challenge/)
announcing the winners of our
[first ML-Agents Challenge](https://connect.unity.com/challenges/ml-agents-1)
* [Post](https://blogs.unity3d.com/2018/01/23/designing-safer-cities-through-simulations/)
overviewing how Unity can be leveraged as a simulator to design safer cities.
In addition to our own documentation, here are some additional, relevant articles:

## Community and Feedback
The ML-Agents toolkit is an open-source project and we encourage and welcome
The ML-Agents Toolkit is an open-source project and we encourage and welcome
For problems with the installation and setup of the the ML-Agents toolkit, or
For problems with the installation and setup of the the ML-Agents Toolkit, or
If you run into any other problems using the ML-Agents toolkit, or have a specific
If you run into any other problems using the ML-Agents Toolkit, or have a specific
Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents Toolkit can we continue
to improve and grow. Please take a few minutes to [let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
Your opinion matters a great deal to us. Only by hearing your thoughts on the Unity ML-Agents
Toolkit can we continue to improve and grow. Please take a few minutes to
[let us know about it](https://github.com/Unity-Technologies/ml-agents/issues/1454).
## Releases
The latest release is 0.14.1. Previous releases can be found below:
| **Version** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|
| **0.14.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.14.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.14.0.zip) |
| **0.13.1** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.1/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.1.zip) |
| **0.13.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.13.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.13.0.zip) |
| **0.12.1** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.1/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.1.zip) |
| **0.12.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.12.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.12.0.zip) |
| **0.11.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.11.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.11.0.zip) |
| **0.10.1** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.1/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.1.zip) |
| **0.10.0** | [source](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/0.10.0/docs) | [download](https://github.com/Unity-Technologies/ml-agents/archive/0.10.0.zip) |
See the [GitHub releases](https://github.com/Unity-Technologies/ml-agents/releases) for more details of the changes
between versions.
Please note that the `master` branch is under active development, so the documentation there may differ from the code
of a previous release. Always use the documentation that corresponds to the release version you're using.
## Citation
If you use Unity or the ML-Agents Toolkit to conduct research, we ask that you cite the following paper as a reference:
Juliani, A., Berges, V., Vckay, E., Gao, Y., Henry, H., Mattar, M., Lange, D. (2018). Unity: A General Platform for Intelligent Agents. *arXiv preprint arXiv:1809.02627.* https://github.com/Unity-Technologies/ml-agents.

16
com.unity.ml-agents/CHANGELOG.md


- All SideChannel related code has been moved to the namespace `MLAgents.SideChannels`.
- `BrainParameters` and `SpaceType` have been removed from the public API
- `BehaviorParameters` have been removed from the public API.
- The following methods in the `Agent` class have been deprecated and will be removed in a later release:
- `InitializeAgent()` was renamed to `Initialize()`
- `AgentAction()` was renamed to `OnActionReceived()`
- `AgentReset()` was renamed to `OnEpisodeBegin()`
- `Done()` was renamed to `EndEpisode()`
- `GiveModel()` was renamed to `SetModel()`
### Minor Changes
- Monitor.cs was moved to Examples. (#3372)

- Academy.RegisterSideChannel and UnregisterSideChannel methods were added. (#3391)
- A tutorial on adding custom SideChannels was added (#3391)
- The stepping logic for the Agent and the Academy has been simplified (#3448)
- Update Barracuda to 0.6.0-preview
- Update Barracuda to 0.6.1-preview
* The interface for `RayPerceptionSensor.PerceiveStatic()` was changed to take an input class and write to an output class, and the method was renamed to `Perceive()`.
- The checkpoint file suffix was changed from `.cptk` to `.ckpt` (#3470)
- The command-line argument used to determine the port that an environment will listen on was changed from `--port` to `--mlagents-port`.

- The method `GetStepCount()` on the Agent class has been replaced with the property getter `StepCount`
- `RayPerceptionSensorComponent` and related classes now display the debug gizmos whenever the Agent is selected (not just Play mode).
- Most fields on `RayPerceptionSensorComponent` can now be changed while the editor is in Play mode. The exceptions to this are fields that affect the number of observations.
- Most fields on `CameraSensorComponent` and `RenderTextureSensorComponent` were changed to private and replaced by properties with the same name.
- `DecisionRequester` has been made internal (you can still use the DecisionRequesterComponent from the inspector). `RepeatAction` was renamed `TakeActionsBetweenDecisions` for clarity. (#3555)
- The `IFloatProperties` interface has been removed.
- Fix #3579.
- Improved inference performance for models with multiple action branches. (#3598)
- Fixed an issue when using GAIL with less than `batch_size` number of demonstrations. (#3591)
- The interfaces to the `SideChannel` classes (on C# and python) have changed to use new `IncomingMessage` and `OutgoingMessage` classes. These should make reading and writing data to the channel easier. (#3596)
## [0.14.1-preview] - 2020-02-25

93
com.unity.ml-agents/Documentation~/com.unity.ml-agents.md


Please see the [ML-Agents README)(https://github.com/Unity-Technologies/ml-agents/blob/master/README.md)
# About ML-Agents package (`com.unity.ml-agents`)
The Unity ML-Agents package contains the C# SDK for the
[Unity ML-Agents Toolkit](https://github.com/Unity-Technologies/ml-agents).
The package provides the ability for any Unity scene to be converted into a learning
environment where character behaviors can be trained using a variety of machine learning
algorithms. Additionally, it enables any trained behavior to be embedded back into the Unity
scene. More specifically, the package provides the following core functionalities:
* Define Agents: entities whose behavior will be learned. Agents are entities
that generate observations (through sensors), take actions and receive rewards from
the environment.
* Define Behaviors: entities that specifiy how an agent should act. Multiple agents can
share the same Behavior and a scene may have multiple Behaviors.
* Record demonstrations of an agent within the Editor. These demonstrations can be
valuable to train a behavior for that agent.
* Embedding a trained behavior into the scene via the
[Unity Inference Engine](https://docs.unity3d.com/Packages/com.unity.barracuda@latest/index.html).
Thus an Agent can switch from a learning behavior to an inference behavior.
Note that this package does not contain the machine learning algorithms for training
behaviors. It relies on a Python package to orchestrate the training. This package
only enables instrumenting a Unity scene and setting it up for training, and then
embedding the trained model back into your Unity scene.
## Preview package
This package is available as a preview, so it is not ready for production use.
The features and documentation in this package might change before it is verified for release.
## Package contents
The following table describes the package folder structure:
|**Location**|**Description**|
|---|---|
|*Documentation~*|Contains the documentation for the Unity package.|
|*Editor*|Contains utilities for Editor windows and drawers.|
|*Plugins*|Contains third-party DLLs.|
|*Runtime*|Contains core C# APIs for integrating ML-Agents into your Unity scene. |
|*Tests*|Contains the unit tests for the package.|
<a name="Installation"></a>
## Installation
To install this package, follow the instructions in the
[Package Manager documentation](https://docs.unity3d.com/Manual/upm-ui-install.html).
To install the Python package to enable training behaviors, follow the instructions on our
[GitHub repository](https://github.com/Unity-Technologies/ml-agents/blob/latest_release/docs/Installation.md).
## Requirements
This version of the Unity ML-Agents package is compatible with the following versions of the Unity Editor:
* 2018.4 and later (recommended)
## Known limitations
### Headless Mode
If you enable Headless mode, you will not be able to collect visual observations
from your agents.
### Rendering Speed and Synchronization
Currently the speed of the game physics can only be increased to 100x real-time.
The Academy also moves in time with FixedUpdate() rather than Update(), so game
behavior implemented in Update() may be out of sync with the agent decision
making. See
[Execution Order of Event Functions](https://docs.unity3d.com/Manual/ExecutionOrder.html)
for more information.
You can control the frequency of Academy stepping by calling
`Academy.Instance.DisableAutomaticStepping()`, and then calling
`Academy.Instance.EnvironmentStep()`
### Unity Inference Engine Models
Currently, only models created with our trainers are supported for running
ML-Agents with a neural network behavior.
## Helpful links
If you are new to the Unity ML-Agents package, or have a question after reading
the documentation, you can checkout our
[GitHUb Repository](https://github.com/Unity-Technologies/ml-agents), which
also includes a number of ways to
[connect with us](https://github.com/Unity-Technologies/ml-agents#community-and-feedback)
including our [ML-Agents Forum](https://forum.unity.com/forums/ml-agents.453/).

71
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


using UnityEditor;
using Barracuda;
using MLAgents.Policies;
using UnityEngine;
namespace MLAgents.Editor
{

{
var so = serializedObject;
so.Update();
bool needPolicyUpdate; // Whether the name, model, inference device, or BehaviorType changed.
EditorGUI.indentLevel++;
EditorGUI.BeginChangeCheck(); // global
EditorGUI.indentLevel++;
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
EditorGUI.indentLevel++;
EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
EditorGUI.indentLevel--;
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
{
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorName"));
}
needPolicyUpdate = EditorGUI.EndChangeCheck();
EditorGUI.BeginDisabledGroup(Application.isPlaying);
{
EditorGUILayout.PropertyField(so.FindProperty("m_BrainParameters"), true);
}
EditorGUI.EndDisabledGroup();
EditorGUI.BeginChangeCheck();
{
EditorGUILayout.PropertyField(so.FindProperty("m_Model"), true);
EditorGUI.indentLevel++;
EditorGUILayout.PropertyField(so.FindProperty("m_InferenceDevice"), true);
EditorGUI.indentLevel--;
}
needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();
EditorGUI.BeginChangeCheck();
{
EditorGUILayout.PropertyField(so.FindProperty("m_BehaviorType"));
}
needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();
EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
// EditorGUILayout.PropertyField(serializedObject.FindProperty("m_Heuristic"), true);
EditorGUI.indentLevel--;
if (EditorGUI.EndChangeCheck())
EditorGUI.BeginDisabledGroup(Application.isPlaying);
m_RequireReload = true;
EditorGUILayout.PropertyField(so.FindProperty("m_UseChildSensors"), true);
EditorGUI.EndDisabledGroup();
EditorGUI.indentLevel--;
m_RequireReload = EditorGUI.EndChangeCheck();
if (needPolicyUpdate)
{
UpdateAgentPolicy();
}
}
/// <summary>

EditorGUILayout.HelpBox(check, MessageType.Warning);
}
}
}
}
void UpdateAgentPolicy()
{
if (Application.isPlaying)
{
var behaviorParameters = (BehaviorParameters)target;
var agent = behaviorParameters.GetComponent<Agent>();
if (agent == null)
{
return;
}
agent.ReloadPolicy();
}
}
}

23
com.unity.ml-agents/Editor/RayPerceptionSensorComponentBaseEditor.cs


EditorGUI.BeginChangeCheck();
EditorGUI.indentLevel++;
EditorGUILayout.PropertyField(so.FindProperty("m_SensorName"), true);
// Because the number of rays and the tags affect the observation shape,
// they are not editable during play mode.
// Don't allow certain fields to be modified during play mode.
// * SensorName affects the ordering of the Agent's observations
// * The number of tags and rays affects the size of the observations.
EditorGUILayout.PropertyField(so.FindProperty("m_SensorName"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_DetectableTags"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_RaysPerDirection"), true);
}

m_RequireSensorUpdate = true;
}
UpdateSensorIfDirty();
UpdateSensorIfDirty();
if (m_RequireSensorUpdate)
{
var sensorComponent = serializedObject.targetObject as RayPerceptionSensorComponentBase;
sensorComponent?.UpdateSensor();
m_RequireSensorUpdate = false;
}
if (m_RequireSensorUpdate)
{
var sensorComponent = serializedObject.targetObject as RayPerceptionSensorComponentBase;
sensorComponent?.UpdateSensor();
m_RequireSensorUpdate = false;
}
}
}

2
com.unity.ml-agents/Runtime/Academy.cs


/// <summary>
/// Collection of float properties (indexed by a string).
/// </summary>
public IFloatProperties FloatProperties;
public FloatPropertiesChannel FloatProperties;
// Fields not provided in the Inspector.

153
com.unity.ml-agents/Runtime/Agent.cs


public int maxStep;
}
public int TeamId {
get {
LazyInitialize();
return m_PolicyFactory.TeamId;
}
}
public string BehaviorName {
get {
LazyInitialize();
return m_PolicyFactory.behaviorName;
}
}
[SerializeField][HideInInspector]
internal AgentParameters agentParameters;
[SerializeField][HideInInspector]

/// </summary>
internal VectorSensor collectObservationsSensor;
void OnEnable()
/// <summary>
/// Called when the attached <see cref="GameObject"/> becomes enabled and active.
/// </summary>
protected virtual void OnEnable()
{
LazyInitialize();
}

Academy.Instance.AgentForceReset += _AgentReset;
m_Brain = m_PolicyFactory.GeneratePolicy(Heuristic);
ResetData();
InitializeAgent();
Initialize();
InitializeSensors();
}

Disabled,
}
void OnDisable()
/// <summary>
/// Called when the attached <see cref="GameObject"/> becomes disabled and inactive.
/// </summary>
protected virtual void OnDisable()
{
DemonstrationWriters.Clear();

m_Brain?.RequestDecision(m_Info, sensors);
// We also have to write any to any DemonstationStores so that they get the "done" flag.
foreach(var demoWriter in DemonstrationWriters)
foreach (var demoWriter in DemonstrationWriters)
{
demoWriter.Record(m_Info, sensors);
}

m_RequestDecision = false;
}
[Obsolete("GiveModel() has been deprecated, use SetModel() instead.")]
public void GiveModel(
string behaviorName,
NNModel model,
InferenceDevice inferenceDevice = InferenceDevice.CPU)
{
SetModel(behaviorName, model, inferenceDevice);
}
/// <summary>
/// Updates the Model for the agent. Any model currently assigned to the
/// agent will be replaced with the provided one. If the arguments are

/// <param name="model"> The model to use for inference.</param>
/// <param name = "inferenceDevice"> Define on what device the model
/// will be run.</param>
public void GiveModel(
public void SetModel(
m_PolicyFactory.GiveModel(behaviorName, model, inferenceDevice);
if (behaviorName == m_PolicyFactory.behaviorName &&
model == m_PolicyFactory.model &&
inferenceDevice == m_PolicyFactory.inferenceDevice)
{
// If everything is the same, don't make any changes.
return;
}
m_PolicyFactory.model = model;
m_PolicyFactory.inferenceDevice = inferenceDevice;
m_PolicyFactory.behaviorName = behaviorName;
ReloadPolicy();
}
/// <summary>
/// Updates the type of behavior for the agent.
/// </summary>
/// <param name="behaviorType"> The new behaviorType for the Agent.</param>
public void SetBehaviorType(BehaviorType behaviorType)
{
if (m_PolicyFactory.behaviorType == behaviorType)
{
return;
}
m_PolicyFactory.behaviorType = behaviorType;
ReloadPolicy();
}
internal void ReloadPolicy()
{
}
/// <summary>

TimerStack.Instance.SetGauge(gaugeName, GetCumulativeReward());
}
[Obsolete("Done() has been deprecated, use EndEpisode() instead.")]
public void Done()
{
EndEpisode();
}
public void Done()
public void EndEpisode()
{
NotifyAgentDone(DoneReason.DoneCalled);
_AgentReset();

m_RequestAction = true;
}
/// Helper function that resets all the data structures associated with
/// the agent. Typically used when the agent is being initialized or reset
/// at the end of an episode.

// should stay the previous action before the Done(), so that it is properly recorded.
if (m_Action.vectorActions == null)
{
if (param.vectorActionSpaceType == SpaceType.Continuous)
{
m_Action.vectorActions = new float[param.vectorActionSize[0]];
m_Info.storedVectorActions = new float[param.vectorActionSize[0]];
}
else
{
m_Action.vectorActions = new float[param.vectorActionSize.Length];
m_Info.storedVectorActions = new float[param.vectorActionSize.Length];
}
m_Action.vectorActions = new float[param.numActions];
m_Info.storedVectorActions = new float[param.numActions];
}
[Obsolete("InitializeAgent() has been deprecated, use Initialize() instead.")]
public virtual void InitializeAgent()
{
}
/// <summary>

/// One sample use is to store local references to other objects in the
/// scene which would facilitate computing this agents observation.
/// </remarks>
public virtual void InitializeAgent()
public virtual void Initialize()
#pragma warning disable 0618
InitializeAgent();
#pragma warning restore 0618
}
/// <summary>

{
Debug.LogWarning("Heuristic method called but not implemented. Returning placeholder actions.");
var param = m_PolicyFactory.brainParameters;
var actionSize = param.vectorActionSpaceType == SpaceType.Continuous ?
param.vectorActionSize[0] :
param.vectorActionSize.Length;
return new float[actionSize];
return new float[param.numActions];
}
/// <summary>

/// </summary>
void SendInfoToBrain()
{
if (!m_Initialized)
{
throw new UnityAgentsException("Call to SendInfoToBrain when Agent hasn't been initialized." +
"Please ensure that you are calling 'base.OnEnable()' if you have overridden OnEnable.");
}
if (m_Brain == null)
{
return;

m_Brain.RequestDecision(m_Info, sensors);
// If we have any DemonstrationWriters, write the AgentInfo and sensors to them.
foreach(var demoWriter in DemonstrationWriters)
foreach (var demoWriter in DemonstrationWriters)
{
demoWriter.Record(m_Info, sensors);
}

sensor.Update();
}
}
/// <summary>
/// Collects the vector observations of the agent.

{
}
[Obsolete("AgentAction() has been deprecated, use OnActionReceived() instead.")]
public virtual void AgentAction(float[] vectorAction)
{
}
/// <summary>
/// Specifies the agent behavior at every step based on the provided
/// action.

/// will be of length 1.
/// </param>
public virtual void AgentAction(float[] vectorAction)
public virtual void OnActionReceived(float[] vectorAction)
{
#pragma warning disable 0618
AgentAction(m_Action.vectorActions);
#pragma warning restore 0618
}
[Obsolete("AgentReset() has been deprecated, use OnEpisodeBegin() instead.")]
public virtual void AgentReset()
{
}

/// episode).
/// </summary>
public virtual void AgentReset()
public virtual void OnEpisodeBegin()
#pragma warning disable 0618
AgentReset();
#pragma warning restore 0618
}
/// <summary>

}
/// <summary>
/// This method will forcefully reset the agent and will also reset the hasAlreadyReset flag.
/// This way, even if the agent was already in the process of reseting, it will be reset again
/// and will not send a Done flag at the next step.
/// </summary>
void ForceReset()
{
_AgentReset();
}
/// <summary>
/// An internal reset method that updates internal data structures in
/// addition to calling <see cref="AgentReset"/>.
/// </summary>

m_StepCount = 0;
AgentReset();
OnEpisodeBegin();
}
/// <summary>

if ((m_RequestAction) && (m_Brain != null))
{
m_RequestAction = false;
AgentAction(m_Action.vectorActions);
OnActionReceived(m_Action.vectorActions);
}
if ((m_StepCount >= maxStep) && (maxStep > 0))

void DecideAction()
{
m_Action.vectorActions = m_Brain?.DecideAction();
if (m_Action.vectorActions == null){
if (m_Action.vectorActions == null)
{
ResetData();
}
}

12
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


}
else
{
var compressedObs = sensor.GetCompressedObservation();
if (compressedObs == null)
{
throw new UnityAgentsException(
$"GetCompressedObservation() returned null data for sensor named {sensor.GetName()}. " +
"You must return a byte[]. If you don't want to use compressed observations, " +
"return SensorCompressionType.None from GetCompressionType()."
);
}
CompressedData = ByteString.CopyFrom(sensor.GetCompressedObservation()),
CompressedData = ByteString.CopyFrom(compressedObs),
CompressionType = (CompressionTypeProto)sensor.GetCompressionType(),
};
}

61
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


"Python library version: {2}.",
pythonCommunicationVersion, initParameters.unityCommunicationVersion,
pythonPackageVersion
);
);
}
else
{

"A side channel with type index {0} is already registered. You cannot register multiple " +
"side channels of the same id.", channelId));
}
// Process any messages that we've already received for this channel ID.
var numMessages = m_CachedMessages.Count;
for (int i = 0; i < numMessages; i++)
{
var cachedMessage = m_CachedMessages.Dequeue();
if (channelId == cachedMessage.ChannelId)
{
using (var incomingMsg = new IncomingMessage(cachedMessage.Message))
{
sideChannel.OnMessageReceived(incomingMsg);
}
}
else
{
m_CachedMessages.Enqueue(cachedMessage);
}
}
m_SideChannels.Add(channelId, sideChannel);
}

}
}
private struct CachedSideChannelMessage
{
public Guid ChannelId;
public byte[] Message;
}
private static Queue<CachedSideChannelMessage> m_CachedMessages = new Queue<CachedSideChannelMessage>();
/// <summary>
/// Separates the data received from Python into individual messages for each registered side channel.
/// </summary>

{
while (m_CachedMessages.Count != 0)
{
var cachedMessage = m_CachedMessages.Dequeue();
if (sideChannels.ContainsKey(cachedMessage.ChannelId))
{
using (var incomingMsg = new IncomingMessage(cachedMessage.Message))
{
sideChannels[cachedMessage.ChannelId].OnMessageReceived(incomingMsg);
}
}
else
{
Debug.Log(string.Format(
"Unknown side channel data received. Channel Id is "
+ ": {0}", cachedMessage.ChannelId));
}
}
if (dataReceived.Length == 0)
{
return;

}
if (sideChannels.ContainsKey(channelId))
{
sideChannels[channelId].OnMessageReceived(message);
using (var incomingMsg = new IncomingMessage(message))
{
sideChannels[channelId].OnMessageReceived(incomingMsg);
}
Debug.Log(string.Format(
"Unknown side channel data received. Channel Id is "
+ ": {0}", channelId));
// Don't recognize this ID, but cache it in case the SideChannel that can handle
// it is registered before the next call to ProcessSideChannelData.
m_CachedMessages.Enqueue(new CachedSideChannelMessage
{
ChannelId = channelId,
Message = message
});
}
}
}

9
com.unity.ml-agents/Runtime/DecisionRequester.cs


using System.Runtime.CompilerServices;
using UnityEngine.Serialization;
namespace MLAgents
{

/// </summary>
[AddComponentMenu("ML Agents/Decision Requester", (int)MenuGroup.Default)]
public class DecisionRequester : MonoBehaviour
internal class DecisionRequester : MonoBehaviour
{
/// <summary>
/// The frequency with which the agent requests a decision. A DecisionPeriod of 5 means

[Tooltip("Indicates whether or not the agent will take an action during the Academy " +
"steps where it does not request a decision. Has no effect when DecisionPeriod " +
"is set to 1.")]
public bool RepeatAction = true;
[FormerlySerializedAs("RepeatAction")]
public bool TakeActionsBetweenDecisions = true;
/// <summary>
/// Whether or not the Agent decisions should start at an offset (different for each agent).

{
m_Agent?.RequestDecision();
}
if (RepeatAction)
if (TakeActionsBetweenDecisions)
{
m_Agent?.RequestAction();
}

2
com.unity.ml-agents/Runtime/Inference/ModelRunner.cs


m_VisualObservationsInitialized = true;
}
Profiler.BeginSample("LearningBrain.DecideAction");
Profiler.BeginSample("ModelRunner.DecideAction");
Profiler.BeginSample($"MLAgents.{m_Model.name}.GenerateTensors");
// Prepare the input tensors to be feed into the engine

151
com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs


namespace MLAgents.Policies
{
/// <summary>
/// Defines what type of behavior the Agent will be using
/// </summary>
[Serializable]
public enum BehaviorType
{
/// <summary>
/// The Agent will use the remote process for decision making.
/// if unavailable, will use inference and if no model is provided, will use
/// the heuristic.
/// </summary>
Default,
/// <summary>
/// The Agent will always use its heuristic
/// </summary>
HeuristicOnly,
/// <summary>
/// The Agent will always use inference with the provided
/// neural network model.
/// </summary>
InferenceOnly
}
/// <summary>
internal class BehaviorParameters : MonoBehaviour
public class BehaviorParameters : MonoBehaviour
[Serializable]
enum BehaviorType
[HideInInspector, SerializeField]
BrainParameters m_BrainParameters = new BrainParameters();
/// <summary>
/// The associated <see cref="BrainParameters"/> for this behavior.
/// </summary>
public BrainParameters brainParameters
Default,
HeuristicOnly,
InferenceOnly
get { return m_BrainParameters; }
internal set { m_BrainParameters = value; }
[HideInInspector]
[SerializeField]
BrainParameters m_BrainParameters = new BrainParameters();
[HideInInspector]
[SerializeField]
[HideInInspector, SerializeField]
[HideInInspector]
[SerializeField]
/// <summary>
/// The neural network model used when in inference mode.
/// This cannot be set directly; use <see cref="Agent.GiveModel(string,NNModel,InferenceDevice)"/>
/// to set it.
/// </summary>
public NNModel model
{
get { return m_Model; }
internal set { m_Model = value; }
}
[HideInInspector, SerializeField]
[HideInInspector]
[SerializeField]
/// <summary>
/// How inference is performed for this Agent's model.
/// This cannot be set directly; use <see cref="Agent.GiveModel(string,NNModel,InferenceDevice)"/>
/// to set it.
/// </summary>
public InferenceDevice inferenceDevice
{
get { return m_InferenceDevice; }
internal set { m_InferenceDevice = value; }
}
// Disable warning /com.unity.ml-agents/Runtime/Policy/BehaviorParameters.cs(...):
// warning CS0649: Field 'BehaviorParameters.m_BehaviorType' is never assigned to,
// and will always have its default value
// This field is set in the custom editor.
#pragma warning disable 0649
[HideInInspector, SerializeField]
#pragma warning restore 0649
[HideInInspector]
[SerializeField]
/// <summary>
/// The BehaviorType for the Agent.
/// This cannot be set directly; use <see cref="Agent.SetBehaviorType(BehaviorType)"/>
/// to set it.
/// </summary>
public BehaviorType behaviorType
{
get { return m_BehaviorType; }
internal set { m_BehaviorType = value; }
}
[HideInInspector, SerializeField]
/// The name of this behavior, which is used as a base name. See
/// <see cref="fullyQualifiedBehaviorName"/> for the full name.
/// This cannot be set directly; use <see cref="Agent.GiveModel(string,NNModel,InferenceDevice)"/>
/// to set it.
/// </summary>
public string behaviorName
{
get { return m_BehaviorName; }
internal set { m_BehaviorName = value; }
}
/// <summary>
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("m_TeamID")]
[HideInInspector, SerializeField, FormerlySerializedAs("m_TeamID")]
// TODO properties here instead of Agent
[FormerlySerializedAs("m_useChildSensors")]
[HideInInspector]

/// <summary>
/// The associated <see cref="BrainParameters"/> for this behavior.
/// </summary>
internal BrainParameters brainParameters
{
get { return m_BrainParameters; }
}
/// <summary>
}
/// <summary>
/// The name of this behavior, which is used as a base name. See
/// <see cref="fullyQualifiedBehaviorName"/> for the full name.
/// </summary>
public string behaviorName
{
get { return m_BehaviorName; }
internal set { m_UseChildSensors = value; } // TODO make public, don't allow changes at runtime
}
/// <summary>

get { return m_BehaviorName + "?team=" + TeamId; }
}
public IPolicy GeneratePolicy(Func<float[]> heuristic)
internal IPolicy GeneratePolicy(Func<float[]> heuristic)
{
switch (m_BehaviorType)
{

default:
return new HeuristicPolicy(heuristic);
}
}
/// <summary>
/// Updates the model and related details for this behavior.
/// </summary>
/// <param name="newBehaviorName">New name for the behavior.</param>
/// <param name="model">New neural network model for this behavior.</param>
/// <param name="inferenceDevice">New inference device for this behavior.</param>
public void GiveModel(
string newBehaviorName,
NNModel model,
InferenceDevice inferenceDevice = InferenceDevice.CPU)
{
m_Model = model;
m_InferenceDevice = inferenceDevice;
m_BehaviorName = newBehaviorName;
}
}
}

20
com.unity.ml-agents/Runtime/Policies/BrainParameters.cs


/// <summary>
/// Whether the action space is discrete or continuous.
/// </summary>
internal enum SpaceType
public enum SpaceType
{
/// <summary>
/// Discrete action space: a fixed number of options are available.

/// decision process.
/// </summary>
[Serializable]
internal class BrainParameters
public class BrainParameters
{
/// <summary>
/// If continuous : The length of the float vector that represents the state.

/// Defines if the action is discrete or continuous.
/// </summary>
public SpaceType vectorActionSpaceType = SpaceType.Discrete;
public int numActions
{
get
{
switch (vectorActionSpaceType)
{
case SpaceType.Discrete:
return vectorActionSize.Length;
case SpaceType.Continuous:
return vectorActionSize[0];
default:
return 0;
}
}
}
/// <summary>
/// Deep clones the BrainParameter object.

19
com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs


SensorCompressionType m_CompressionType;
/// <summary>
/// The Camera used for rendering the sensor observations.
/// </summary>
public Camera camera
{
get { return m_Camera; }
set { m_Camera = value; }
}
/// <summary>
/// The compression type used by the sensor.
/// </summary>
public SensorCompressionType compressionType
{
get { return m_CompressionType; }
set { m_CompressionType = value; }
}
/// <summary>
/// Creates and returns the camera sensor.
/// </summary>
/// <param name="camera">Camera object to capture images from.</param>

78
com.unity.ml-agents/Runtime/Sensors/CameraSensorComponent.cs


using UnityEngine;
using UnityEngine.Serialization;
namespace MLAgents.Sensors
{

[AddComponentMenu("ML Agents/Camera Sensor", (int)MenuGroup.Sensors)]
public class CameraSensorComponent : SensorComponent
{
[HideInInspector, SerializeField, FormerlySerializedAs("camera")]
Camera m_Camera;
CameraSensor m_Sensor;
public new Camera camera;
public new Camera camera
{
get { return m_Camera; }
set { m_Camera = value; UpdateSensor(); }
}
[HideInInspector, SerializeField, FormerlySerializedAs("sensorName")]
string m_SensorName = "CameraSensor";
public string sensorName = "CameraSensor";
public string sensorName
{
get { return m_SensorName; }
internal set { m_SensorName = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("width")]
int m_Width = 84;
/// Width of the generated image.
/// Width of the generated observation.
public int width = 84;
public int width
{
get { return m_Width; }
internal set { m_Width = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("height")]
int m_Height = 84;
/// Height of the generated image.
/// Height of the generated observation.
public int height = 84;
public int height
{
get { return m_Height; }
internal set { m_Height = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("grayscale")]
public bool m_Grayscale;
public bool grayscale;
public bool grayscale
{
get { return m_Grayscale; }
internal set { m_Grayscale = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("compression")]
SensorCompressionType m_Compression = SensorCompressionType.PNG;
public SensorCompressionType compression = SensorCompressionType.PNG;
public SensorCompressionType compression
{
get { return m_Compression; }
set { m_Compression = value; UpdateSensor(); }
}
/// <summary>
/// Creates the <see cref="CameraSensor"/>

{
return new CameraSensor(camera, width, height, grayscale, sensorName, compression);
m_Sensor = new CameraSensor(m_Camera, m_Width, m_Height, grayscale, m_SensorName, compression);
return m_Sensor;
}
/// <summary>

public override int[] GetObservationShape()
{
return CameraSensor.GenerateShape(width, height, grayscale);
return CameraSensor.GenerateShape(m_Width, m_Height, grayscale);
}
/// <summary>
/// Update fields that are safe to change on the Sensor at runtime.
/// </summary>
internal void UpdateSensor()
{
if (m_Sensor != null)
{
m_Sensor.camera = m_Camera;
m_Sensor.compressionType = m_Compression;
}
}
}
}

8
com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponent3D.cs


[AddComponentMenu("ML Agents/Ray Perception Sensor 3D", (int)MenuGroup.Sensors)]
public class RayPerceptionSensorComponent3D : RayPerceptionSensorComponentBase
{
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("startVerticalOffset")]
[HideInInspector, SerializeField, FormerlySerializedAs("startVerticalOffset")]
[Range(-10f, 10f)]
[Tooltip("Ray start is offset up or down by this amount.")]
float m_StartVerticalOffset;

set { m_StartVerticalOffset = value; UpdateSensor(); }
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("endVerticalOffset")]
[HideInInspector, SerializeField, FormerlySerializedAs("endVerticalOffset")]
[Range(-10f, 10f)]
[Tooltip("Ray end is offset up or down by this amount.")]
float m_EndVerticalOffset;

49
com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensorComponentBase.cs


/// </summary>
public abstract class RayPerceptionSensorComponentBase : SensorComponent
{
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("sensorName")]
[HideInInspector, SerializeField, FormerlySerializedAs("sensorName")]
string m_SensorName = "RayPerceptionSensor";
/// <summary>

internal set => m_SensorName = value;
}
[SerializeField]
[FormerlySerializedAs("detectableTags")]
[SerializeField, FormerlySerializedAs("detectableTags")]
[Tooltip("List of tags in the scene to compare against.")]
List<string> m_DetectableTags;

internal set => m_DetectableTags = value;
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("raysPerDirection")]
[HideInInspector, SerializeField, FormerlySerializedAs("raysPerDirection")]
[Range(0, 50)]
[Tooltip("Number of rays to the left and right of center.")]
int m_RaysPerDirection = 3;

internal set => m_RaysPerDirection = value;
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("maxRayDegrees")]
[HideInInspector, SerializeField, FormerlySerializedAs("maxRayDegrees")]
"Greater than 90 degrees will go backwards.")]
"Greater than 90 degrees will go backwards.")]
float m_MaxRayDegrees = 70;
/// <summary>

set { m_MaxRayDegrees = value; UpdateSensor(); }
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("sphereCastRadius")]
[HideInInspector, SerializeField, FormerlySerializedAs("sphereCastRadius")]
[Range(0f, 10f)]
[Tooltip("Radius of sphere to cast. Set to zero for raycasts.")]
float m_SphereCastRadius = 0.5f;

set { m_SphereCastRadius = value; UpdateSensor(); }
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("rayLength")]
[HideInInspector, SerializeField, FormerlySerializedAs("rayLength")]
[Range(1, 1000)]
[Tooltip("Length of the rays to cast.")]
float m_RayLength = 20f;

set { m_RayLength = value; UpdateSensor(); }
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("rayLayerMask")]
[HideInInspector, SerializeField, FormerlySerializedAs("rayLayerMask")]
[Tooltip("Controls which layers the rays can hit.")]
LayerMask m_RayLayerMask = Physics.DefaultRaycastLayers;

set { m_RayLayerMask = value; UpdateSensor();}
}
[HideInInspector]
[SerializeField]
[FormerlySerializedAs("observationStacks")]
[HideInInspector, SerializeField, FormerlySerializedAs("observationStacks")]
[Range(1, 50)]
[Tooltip("Whether to stack previous observations. Using 1 means no previous observations.")]
int m_ObservationStacks = 1;

RayPerceptionSensor m_RaySensor;
/// <summary>
/// Get the RayPerceptionSensor that was created.
/// </summary>
public RayPerceptionSensor raySensor
{
get => m_RaySensor;
}
/// <summary>
/// Returns the <see cref="RayPerceptionCastType"/> for the associated raycast sensor.
/// </summary>
/// <returns></returns>

return new[] { obsSize * stacks };
}
RayPerceptionInput GetRayPerceptionInput()
/// <summary>
/// Get the RayPerceptionInput that is used by the <see cref="RayPerceptionSensor"/>.
/// </summary>
/// <returns></returns>
public RayPerceptionInput GetRayPerceptionInput()
{
var rayAngles = GetRayAngles(raysPerDirection, maxRayDegrees);

/// <summary>
/// Draw the debug information from the sensor (if available).
/// </summary>
void DrawRaycastGizmos(DebugDisplayInfo.RayInfo rayInfo, float alpha=1.0f)
void DrawRaycastGizmos(DebugDisplayInfo.RayInfo rayInfo, float alpha = 1.0f)
{
var startPositionWorld = rayInfo.worldStart;
var endPositionWorld = rayInfo.worldEnd;

28
com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs


SensorCompressionType m_CompressionType;
/// <summary>
/// The compression type used by the sensor.
/// </summary>
public SensorCompressionType compressionType
{
get { return m_CompressionType; }
set { m_CompressionType = value; }
}
/// <summary>
/// Initializes the sensor.
/// </summary>
/// <param name="renderTexture">The <see cref="RenderTexture"/> instance to wrap.</param>

var texture = ObservationToTexture(m_RenderTexture);
// TODO support more types here, e.g. JPG
var compressed = texture.EncodeToPNG();
UnityEngine.Object.Destroy(texture);
DestroyTexture(texture);
return compressed;
}
}

{
var texture = ObservationToTexture(m_RenderTexture);
var numWritten = Utilities.TextureToTensorProxy(texture, adapter, m_Grayscale);
UnityEngine.Object.Destroy(texture);
DestroyTexture(texture);
return numWritten;
}
}

texture2D.Apply();
RenderTexture.active = prevActiveRt;
return texture2D;
}
static void DestroyTexture(Texture2D texture)
{
if (Application.isEditor)
{
// Edit Mode tests complain if we use Destroy()
// TODO move to extension methods for UnityEngine.Object?
UnityEngine.Object.DestroyImmediate(texture);
}
else
{
UnityEngine.Object.Destroy(texture);
}
}
}
}

55
com.unity.ml-agents/Runtime/Sensors/RenderTextureSensorComponent.cs


using UnityEngine;
using UnityEngine.Serialization;
namespace MLAgents.Sensors
{

[AddComponentMenu("ML Agents/Render Texture Sensor", (int)MenuGroup.Sensors)]
public class RenderTextureSensorComponent : SensorComponent
{
RenderTextureSensor m_Sensor;
public RenderTexture renderTexture;
[HideInInspector, SerializeField, FormerlySerializedAs("renderTexture")]
RenderTexture m_RenderTexture;
public RenderTexture renderTexture
{
get { return m_RenderTexture; }
set { m_RenderTexture = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("sensorName")]
string m_SensorName = "RenderTextureSensor";
/// Name of the sensor.
/// Name of the generated <see cref="RenderTextureSensor"/>.
public string sensorName = "RenderTextureSensor";
public string sensorName
{
get { return m_SensorName; }
internal set { m_SensorName = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("grayscale")]
public bool m_Grayscale;
public bool grayscale;
public bool grayscale
{
get { return m_Grayscale; }
internal set { m_Grayscale = value; }
}
[HideInInspector, SerializeField, FormerlySerializedAs("compression")]
SensorCompressionType m_Compression = SensorCompressionType.PNG;
public SensorCompressionType compression = SensorCompressionType.PNG;
public SensorCompressionType compression
{
get { return m_Compression; }
set { m_Compression = value; UpdateSensor(); }
}
return new RenderTextureSensor(renderTexture, grayscale, sensorName, compression);
m_Sensor = new RenderTextureSensor(renderTexture, grayscale, sensorName, compression);
return m_Sensor;
}
/// <inheritdoc/>

var height = renderTexture != null ? renderTexture.height : 0;
return new[] { height, width, grayscale ? 1 : 3 };
}
/// <summary>
/// Update fields that are safe to change on the Sensor at runtime.
/// </summary>
internal void UpdateSensor()
{
if (m_Sensor != null)
{
m_Sensor.compressionType = m_Compression;
}
}
}
}

31
com.unity.ml-agents/Runtime/SideChannels/EngineConfigurationChannel.cs


using System.IO;
using System;
using UnityEngine;

}
/// <inheritdoc/>
public override void OnMessageReceived(byte[] data)
public override void OnMessageReceived(IncomingMessage msg)
using (var memStream = new MemoryStream(data))
{
using (var binaryReader = new BinaryReader(memStream))
{
var width = binaryReader.ReadInt32();
var height = binaryReader.ReadInt32();
var qualityLevel = binaryReader.ReadInt32();
var timeScale = binaryReader.ReadSingle();
var targetFrameRate = binaryReader.ReadInt32();
var width = msg.ReadInt32();
var height = msg.ReadInt32();
var qualityLevel = msg.ReadInt32();
var timeScale = msg.ReadFloat32();
var targetFrameRate = msg.ReadInt32();
timeScale = Mathf.Clamp(timeScale, 1, 100);
timeScale = Mathf.Clamp(timeScale, 1, 100);
Screen.SetResolution(width, height, false);
QualitySettings.SetQualityLevel(qualityLevel, true);
Time.timeScale = timeScale;
Time.captureFramerate = 60;
Application.targetFrameRate = targetFrameRate;
}
}
Screen.SetResolution(width, height, false);
QualitySettings.SetQualityLevel(qualityLevel, true);
Time.timeScale = timeScale;
Time.captureFramerate = 60;
Application.targetFrameRate = targetFrameRate;
}
}
}

109
com.unity.ml-agents/Runtime/SideChannels/FloatPropertiesChannel.cs


using System.Collections.Generic;
using System.IO;
using System.Text;
/// Interface for managing a collection of float properties keyed by a string variable.
/// </summary>
public interface IFloatProperties
{
/// <summary>
/// Sets one of the float properties of the environment. This data will be sent to Python.
/// </summary>
/// <param name="key"> The string identifier of the property.</param>
/// <param name="value"> The float value of the property.</param>
void SetProperty(string key, float value);
/// <summary>
/// Get an Environment property with a default value. If there is a value for this property,
/// it will be returned, otherwise, the default value will be returned.
/// </summary>
/// <param name="key"> The string identifier of the property.</param>
/// <param name="defaultValue"> The default value of the property.</param>
/// <returns></returns>
float GetPropertyWithDefault(string key, float defaultValue);
/// <summary>
/// Registers an action to be performed everytime the property is changed.
/// </summary>
/// <param name="key"> The string identifier of the property.</param>
/// <param name="action"> The action that ill be performed. Takes a float as input.</param>
void RegisterCallback(string key, Action<float> action);
/// <summary>
/// Returns a list of all the string identifiers of the properties currently present.
/// </summary>
/// <returns> The list of string identifiers </returns>
IList<string> ListProperties();
}
/// <summary>
public class FloatPropertiesChannel : SideChannel, IFloatProperties
public class FloatPropertiesChannel : SideChannel
{
Dictionary<string, float> m_FloatProperties = new Dictionary<string, float>();
Dictionary<string, Action<float>> m_RegisteredActions = new Dictionary<string, Action<float>>();

{
ChannelId = new Guid(k_FloatPropertiesDefaultId);
}
else{
else
{
public override void OnMessageReceived(byte[] data)
public override void OnMessageReceived(IncomingMessage msg)
var kv = DeserializeMessage(data);
m_FloatProperties[kv.Key] = kv.Value;
if (m_RegisteredActions.ContainsKey(kv.Key))
{
m_RegisteredActions[kv.Key].Invoke(kv.Value);
}
var key = msg.ReadString();
var value = msg.ReadFloat32();
m_FloatProperties[key] = value;
Action<float> action;
m_RegisteredActions.TryGetValue(key, out action);
action?.Invoke(value);
}
/// <inheritdoc/>

QueueMessageToSend(SerializeMessage(key, value));
if (m_RegisteredActions.ContainsKey(key))
using (var msgOut = new OutgoingMessage())
m_RegisteredActions[key].Invoke(value);
msgOut.WriteString(key);
msgOut.WriteFloat32(value);
QueueMessageToSend(msgOut);
Action<float> action;
m_RegisteredActions.TryGetValue(key, out action);
action?.Invoke(value);
if (m_FloatProperties.ContainsKey(key))
{
return m_FloatProperties[key];
}
else
{
return defaultValue;
}
float valueOut;
bool hasKey = m_FloatProperties.TryGetValue(key, out valueOut);
return hasKey ? valueOut : defaultValue;
}
/// <inheritdoc/>

public IList<string> ListProperties()
{
return new List<string>(m_FloatProperties.Keys);
}
static KeyValuePair<string, float> DeserializeMessage(byte[] data)
{
using (var memStream = new MemoryStream(data))
{
using (var binaryReader = new BinaryReader(memStream))
{
var keyLength = binaryReader.ReadInt32();
var key = Encoding.ASCII.GetString(binaryReader.ReadBytes(keyLength));
var value = binaryReader.ReadSingle();
return new KeyValuePair<string, float>(key, value);
}
}
}
static byte[] SerializeMessage(string key, float value)
{
using (var memStream = new MemoryStream())
{
using (var binaryWriter = new BinaryWriter(memStream))
{
var stringEncoded = Encoding.ASCII.GetBytes(key);
binaryWriter.Write(stringEncoded.Length);
binaryWriter.Write(stringEncoded);
binaryWriter.Write(value);
return memStream.ToArray();
}
}
}
}
}

10
com.unity.ml-agents/Runtime/SideChannels/RawBytesChannel.cs


}
/// <inheritdoc/>
public override void OnMessageReceived(byte[] data)
public override void OnMessageReceived(IncomingMessage msg)
m_MessagesReceived.Add(data);
m_MessagesReceived.Add(msg.GetRawBytes());
}
/// <summary>

/// <param name="data"> The byte array of data to send to Python.</param>
public void SendRawBytes(byte[] data)
{
QueueMessageToSend(data);
using (var msg = new OutgoingMessage())
{
msg.SetRawBytes(data);
QueueMessageToSend(msg);
}
}
/// <summary>

13
com.unity.ml-agents/Runtime/SideChannels/SideChannel.cs


using System.Collections.Generic;
using System;
using System.IO;
using System.Text;
namespace MLAgents.SideChannels
{

/// of each type. Ensure the Unity side channels will be linked to their Python equivalent.
/// </summary>
/// <returns> The integer identifier of the SideChannel.</returns>
public Guid ChannelId{
public Guid ChannelId
{
get;
protected set;
}

/// Can be called multiple times per simulation step if multiple messages were sent.
/// </summary>
/// <param name="data"> the payload of the message.</param>
public abstract void OnMessageReceived(byte[] data);
/// <param name="msg">The incoming message.</param>
public abstract void OnMessageReceived(IncomingMessage msg);
protected void QueueMessageToSend(byte[] data)
protected void QueueMessageToSend(OutgoingMessage msg)
MessageQueue.Add(data);
MessageQueue.Add(msg.ToByteArray());
}
}
}

72
com.unity.ml-agents/Tests/Editor/MLAgentsEditModeTest.cs


using System.CodeDom;
using UnityEngine;
using NUnit.Framework;
using System.Reflection;

internal IPolicy GetPolicy()
{
return (IPolicy) typeof(Agent).GetField("m_Brain", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
return (IPolicy)typeof(Agent).GetField("m_Brain", BindingFlags.Instance | BindingFlags.NonPublic).GetValue(this);
}
public int initializeAgentCalls;

public TestSensor sensor1;
public TestSensor sensor2;
public override void InitializeAgent()
public override void Initialize()
{
initializeAgentCalls += 1;

sensor.AddObservation(0f);
}
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
agentActionCalls += 1;
agentActionCallsSinceLastReset += 1;

public override void AgentReset()
public override void OnEpisodeBegin()
{
agentResetCalls += 1;
collectObservationsCallsSinceLastReset = 0;

public byte[] GetCompressedObservation()
{
numCompressedCalls++;
return null;
return new byte[] { 0 };
}
public SensorCompressionType GetCompressionType()

// Set agent 1 to done every 11 steps to test behavior
if (i % 11 == 5)
{
agent1.Done();
agent1.EndEpisode();
agent2.Done();
agent2.EndEpisode();
numberAgent2Reset += 1;
agent2StepSinceReset = 0;
}

Assert.AreEqual(numSteps, agent1.heuristicCalls);
Assert.AreEqual(numSteps, agent1.sensor1.numWriteCalls);
Assert.AreEqual(numSteps, agent1.sensor2.numCompressedCalls);
}
}
[TestFixture]
public class TestOnEnableOverride
{
public class OnEnableAgent : Agent
{
public bool callBase;
protected override void OnEnable()
{
if (callBase)
base.OnEnable();
}
}
static void _InnerAgentTestOnEnableOverride(bool callBase = false)
{
var go = new GameObject();
var agent = go.AddComponent<OnEnableAgent>();
agent.callBase = callBase;
var onEnable = typeof(OnEnableAgent).GetMethod("OnEnable", BindingFlags.NonPublic | BindingFlags.Instance);
var sendInfo = typeof(Agent).GetMethod("SendInfoToBrain", BindingFlags.NonPublic | BindingFlags.Instance);
Assert.NotNull(onEnable);
onEnable.Invoke(agent, null);
Assert.NotNull(sendInfo);
if (agent.callBase)
{
Assert.DoesNotThrow(() => sendInfo.Invoke(agent, null));
}
else
{
Assert.Throws<UnityAgentsException>(() =>
{
try
{
sendInfo.Invoke(agent, null);
}
catch (TargetInvocationException e)
{
throw e.GetBaseException();
}
});
}
}
[Test]
public void TestAgentCallBaseOnEnable()
{
_InnerAgentTestOnEnableOverride(true);
}
[Test]
public void TestAgentDontCallBaseOnEnable()
{
_InnerAgentTestOnEnableOverride();
}
}
}

69
com.unity.ml-agents/Tests/Editor/SideChannelTests.cs


{
public List<int> messagesReceived = new List<int>();
public TestSideChannel() {
public TestSideChannel()
{
public override void OnMessageReceived(byte[] data)
public override void OnMessageReceived(IncomingMessage msg)
messagesReceived.Add(BitConverter.ToInt32(data, 0));
messagesReceived.Add(msg.ReadInt32());
public void SendInt(int data)
public void SendInt(int value)
QueueMessageToSend(BitConverter.GetBytes(data));
using (var msg = new OutgoingMessage())
{
msg.WriteInt32(value);
QueueMessageToSend(msg);
}
}
}

fakeData = RpcCommunicator.GetSideChannelMessage(dictSender);
RpcCommunicator.ProcessSideChannelData(dictReceiver, fakeData);
Assert.AreEqual(wasCalled, 1);
var keysA = propA.ListProperties();
Assert.AreEqual(2, keysA.Count);
Assert.IsTrue(keysA.Contains(k1));
Assert.IsTrue(keysA.Contains(k2));
var keysB = propA.ListProperties();
Assert.AreEqual(2, keysB.Count);
Assert.IsTrue(keysB.Contains(k1));
Assert.IsTrue(keysB.Contains(k2));
}
[Test]
public void TestOutgoingMessageRawBytes()
{
// Make sure that SetRawBytes resets the buffer correctly.
// Write 8 bytes (an int and float) then call SetRawBytes with 4 bytes
var msg = new OutgoingMessage();
msg.WriteInt32(42);
msg.WriteFloat32(1.0f);
var data = new byte[] { 1, 2, 3, 4 };
msg.SetRawBytes(data);
var result = msg.ToByteArray();
Assert.AreEqual(data, result);
}
[Test]
public void TestMessageReadWrites()
{
var boolVal = true;
var intVal = 1337;
var floatVal = 4.2f;
var floatListVal = new float[] { 1001, 1002 };
var stringVal = "mlagents!";
IncomingMessage incomingMsg;
using (var outgoingMsg = new OutgoingMessage())
{
outgoingMsg.WriteBoolean(boolVal);
outgoingMsg.WriteInt32(intVal);
outgoingMsg.WriteFloat32(floatVal);
outgoingMsg.WriteString(stringVal);
outgoingMsg.WriteFloatList(floatListVal);
incomingMsg = new IncomingMessage(outgoingMsg.ToByteArray());
}
Assert.AreEqual(boolVal, incomingMsg.ReadBoolean());
Assert.AreEqual(intVal, incomingMsg.ReadInt32());
Assert.AreEqual(floatVal, incomingMsg.ReadFloat32());
Assert.AreEqual(stringVal, incomingMsg.ReadString());
Assert.AreEqual(floatListVal, incomingMsg.ReadFloatList());
}
}
}

2
com.unity.ml-agents/package.json


"unity": "2018.4",
"description": "Add interactivity to your game with Machine Learning Agents trained using Deep Reinforcement Learning.",
"dependencies": {
"com.unity.barracuda": "0.6.0-preview"
"com.unity.barracuda": "0.6.1-preview"
}
}

2
config/sac_trainer_config.yaml


summary_freq: 20000
PushBlock:
max_steps: 1.5e7
max_steps: 2e6
init_entcoef: 0.05
hidden_units: 256
summary_freq: 60000

4
config/trainer_config.yaml


Bouncer:
normalize: true
max_steps: 7.0e6
max_steps: 4.0e6
max_steps: 1.5e7
max_steps: 2.0e6
batch_size: 128
buffer_size: 2048
beta: 1.0e-2

21
docs/Getting-Started-with-Balance-Ball.md


* **Behavior Parameters** — Every Agent must have a Behavior. The Behavior
determines how an Agent makes decisions. More on Behavior Parameters in
the next section.
* **Max Step** — Defines how many simulation steps can occur before the Agent
decides it is done. In 3D Balance Ball, an Agent restarts after 5000 steps.
* **Max Step** — Defines how many simulation steps can occur before the Agent's
episode ends. In 3D Balance Ball, an Agent restarts after 5000 steps.
Perhaps the more interesting aspect of an agents is the Agent subclass
implementation. When you create an Agent, you must extend the base Agent class.
When you create an Agent, you must extend the base Agent class.
* agent.AgentReset() — Called when the Agent resets, including at the beginning
of a session. The Ball3DAgent class uses the reset function to reset the
* `Agent.OnEpisodeBegin()` — Called when the Agent resets, including at the beginning
of the simulation. The Ball3DAgent class uses the reset function to reset the
* agent.CollectObservations(VectorSensor sensor) — Called every simulation step. Responsible for
* `Agent.CollectObservations(VectorSensor sensor)` — Called every simulation step. Responsible for
* agent.AgentAction() — Called every simulation step. Receives the action chosen
* `Agent.OnActionReceived()` — Called every time the Agent receives an action to take. Receives the action chosen
small change in the agent cube's rotation at each step. The `AgentAction()` function
small change in the agent cube's rotation at each step. The `OnActionReceived()` method
negative reward for dropping the ball. An Agent is also marked as done when it
negative reward for dropping the ball. An Agent's episode is also ended when it
* agent.Heuristic() - When the `Use Heuristic` checkbox is checked in the Behavior
* `Agent.Heuristic()` - When the `Behavior Type` is set to `Heuristic Only` in the Behavior
Parameters of the Agent, the Agent will use the `Heuristic()` method to generate
the actions of the Agent. As such, the `Heuristic()` method returns an array of
floats. In the case of the Ball 3D Agent, the `Heuristic()` method converts the

160
docs/Installation.md


# Installation
To install and use ML-Agents, you need to install Unity, clone this repository and
install Python with additional dependencies. Each of the subsections below
overviews each step, in addition to a Docker set-up.
The ML-Agents Toolkit contains several components:
* Unity package ([`com.unity.ml-agents`](../com.unity.ml-agents/)) contains the Unity C#
SDK that will be integrated into your Unity scene.
* Three Python packages:
* [`mlagents`](../ml-agents/) contains the machine learning algorithms that enables you
to train behaviors in your Unity scene. Most users of ML-Agents will only need to
directly install `mlagents`.
* [`mlagents_envs`](../ml-agents-envs/) contains a Python API to interact with a Unity
scene. It is a foundational layer that facilitates data messaging between Unity scene
and the Python machine learning algorithms. Consequently, `mlagents` depends on `mlagents_envs`.
* [`gym_unity`](../gym-unity/) provides a Python-wrapper for your Unity scene that
supports the OpenAI Gym interface.
* Unity [Project](../Project/) that contains several
[example environments](Learning-Environment-Examples.md) that highlight the various features
of the toolkit to help you get started.
## Install **Unity 2018.4** or Later
Consequently, to install and use ML-Agents you will need to:
* Install Unity (2018.4 or later)
* Install Python (3.6.1 or higher)
* Clone this repository
* Install the `com.unity.ml-agents` Unity package
* Install the `mlagents` Python package
[Download](https://store.unity.com/download) and install Unity. If you would
like to use our Docker set-up (introduced later), make sure to select the _Linux
Build Support_ component when installing Unity.
### Install **Unity 2018.4** or Later
<p align="center">
<img src="images/unity_linux_build_support.png"
alt="Linux Build Support"
width="500" border="10" />
</p>
[Download](https://unity3d.com/get-unity/download) and install Unity. We strongly recommend
that you install Unity through the Unity Hub as it will enable you to manage multiple Unity
versions.
## Environment Setup
We now support a single mechanism for installing ML-Agents on Mac/Windows/Linux using Virtual
Environments. For more information on Virtual Environments and installation instructions,
follow this [guide](Using-Virtual-Environment.md).
### Install **Python 3.6.1** or Higher
Although we don't support Anaconda installation path of ML-Agents for Windows, the previous guide
is still in the docs folder. Please refer to [Windows Installation (Deprecated)](Installation-Windows.md).
We recommend [installing](https://www.python.org/downloads/) Python 3.6 or 3.7. If your Python
environment doesn't include `pip3`, see these
[instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers)
on installing it.
Although we do not provide support for Anaconda installation on Windows, the previous
[Windows Anaconda Installation (Deprecated) guide](Installation-Anaconda-Windows.md)
is still available.
Once installed, you will want to clone the ML-Agents Toolkit GitHub repository.
Now that you have installed Unity and Python, you will need to clone the
ML-Agents Toolkit GitHub repository to install the Unity package (the Python
packages can be installed directly from PyPi - a Python package registry).
```sh
git clone --branch latest_release https://github.com/Unity-Technologies/ml-agents.git

The `com.unity.ml-agents` subdirectory contains the core code to add to your projects.
The `Project` subdirectory contains many [example environments](Learning-Environment-Examples.md)
to help you get started.
### Package Installation
ML-Agents C# SDK is transitioning to a Unity Package. While we are working on getting into the
official packages list, you can add the `com.unity.ml-agents` package to your project by
navigating to the menu `Window` -> `Package Manager`. In the package manager window click
on the `+` button.
<p align="center">
<img src="images/unity_package_manager_window.png"
alt="Linux Build Support"
width="500" border="10" />
</p>
### Install the `com.unity.ml-agents` Unity package
**NOTE:** In Unity 2018.4 it's on the bottom right of the packages list, and in Unity 2019.3 it's on the top left of the packages list.
Select `Add package from disk...` and navigate into the
The Unity ML-Agents C# SDK is a Unity Package. We are working on getting it added to the
official Unity package registry which will enable you to install the `com.unity.ml-agents` package
[directly from the registry](https://docs.unity3d.com/Manual/upm-ui-install.html) without cloning
this repository. Until then, you will need to
[install it from the local package](https://docs.unity3d.com/Manual/upm-ui-local.html) that you
just cloned. You can add the `com.unity.ml-agents` package to
your project by navigating to the menu `Window` -> `Package Manager`. In the package manager
window click on the `+` button. Select `Add package from disk...` and navigate into the
**NOTE:** In Unity 2018.4 it's on the bottom right of the packages list, and in Unity 2019.3 it's
on the top left of the packages list.
<img src="images/unity_package_manager_window.png"
alt="Unity Package Manager Window"
height="340" border="10" />
alt="Linux Build Support"
width="500" border="10" />
alt="package.json"
height="340" border="10" />
The `ml-agents` subdirectory contains a Python package which provides deep reinforcement
learning trainers to use with Unity environments.
The `ml-agents-envs` subdirectory contains a Python API to interface with Unity, which
the `ml-agents` package depends on.
The `gym-unity` subdirectory contains a package to interface with OpenAI Gym.
### Install Python and mlagents Package
In order to use ML-Agents toolkit, you need Python 3.6.1 or higher.
[Download](https://www.python.org/downloads/) and install the latest version of Python if you do not already have it.
### Install the `mlagents` Python package
If your Python environment doesn't include `pip3`, see these
[instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers)
on installing it.
Installing the `mlagents` Python package involves installing other Python packages
that `mlagents` depends on. So you may run into installation issues if your machine
has older versions of any of those dependencies already installed. Consequently, our
supported path for installing `mlagents` is to leverage Python Virtual Environments.
Virtual Environments provide a mechanim for isolating the dependencies for each project
and are supported on Mac / Windows / Linux. We offer a dedicated
[guide on Virtual Environments](Using-Virtual-Environment.md).
To install the `mlagents` Python package, run from the command line:
To install the `mlagents` Python package, activate your virtual environment and run from the
command line:
Note that this will install `ml-agents` from PyPi, _not_ from the cloned repo.
Note that this will install `mlagents` from PyPi, _not_ from the cloned repo.
By installing the `mlagents` package, the dependencies listed in the [setup.py file](../ml-agents/setup.py) are also installed.
Some of the primary dependencies include:
- [TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support)
- [Jupyter](Background-Jupyter.md)
**Notes:**
- We do not currently support Python 3.5 or lower.
- If you are using Anaconda and are having trouble with TensorFlow, please see
the following
[link](https://www.tensorflow.org/install/pip)
on how to install TensorFlow in an Anaconda environment.
By installing the `mlagents` package, the dependencies listed in the
[setup.py file](../ml-agents/setup.py) are also installed. These include
[TensorFlow](Background-TensorFlow.md) (Requires a CPU w/ AVX support) and
[Jupyter](Background-Jupyter.md).
### Installing for Development
#### Advanced: Installing for Development
If you intend to make modifications to `ml-agents` or `ml-agents-envs`, you should install
If you intend to make modifications to `mlagents` or `mlagents_envs`, you should install
`ml-agents` and `ml-agents-envs` separately. From the repo's root directory, run:
`mlagents` and `mlagents_envs` separately. From the repo's root directory, run:
cd ml-agents-envs
pip3 install -e ./
cd ..
cd ml-agents
pip3 install -e ./
pip3 install -e ./ml-agents-envs
pip3 install -e ./ml-agents
Running pip with the `-e` flag will let you make changes to the Python files directly and have those
reflected when you run `mlagents-learn`. It is important to install these packages in this order as the
`mlagents` package depends on `mlagents_envs`, and installing it in the other
Running pip with the `-e` flag will let you make changes to the Python files directly and have
those reflected when you run `mlagents-learn`. It is important to install these packages in this
order as the `mlagents` package depends on `mlagents_envs`, and installing it in the other
setting up the ML-Agents toolkit within Unity, running a pre-trained model, in
setting up the ML-Agents Toolkit within Unity, running a pre-trained model, in
addition to building and training environments.
## Help

5
docs/Learning-Environment-Best-Practices.md


lessons which progressively increase in difficulty are presented to the agent
([learn more here](Training-Curriculum-Learning.md)).
* When possible, it is often helpful to ensure that you can complete the task by
using a heuristic to control the agent. To do so, check the `Use Heuristic`
checkbox on the Agent and implement the `Heuristic()` method on the Agent.
using a heuristic to control the agent. To do so, set the `Behavior Type`
to `Heuristic Only` on the Agent's Behavior Parameters, and implement the
`Heuristic()` method on the Agent.
* It is often helpful to make many copies of the agent, and give them the same
`Behavior Name`. In this way the learning process can get more feedback
information from all of these agents, which helps it train faster.

38
docs/Learning-Environment-Create-New.md


### Initialization and Resetting the Agent
When the Agent reaches its target, it marks itself done and its Agent reset
function moves the target to a random location. In addition, if the Agent rolls
off the platform, the reset function puts it back onto the floor.
When the Agent reaches its target, its episode ends and the `OnEpisodeBegin()`
method moves the target to a random location. In addition, if the Agent rolls
off the platform, the `OnEpisodeBegin()` method puts it back onto the floor.
To move the target GameObject, we need a reference to its Transform (which
stores a GameObject's position, orientation and scale in the 3D world). To get

}
public Transform Target;
public override void AgentReset()
public override void OnEpisodeBegin()
{
if (this.transform.position.y < 0)
{

}
```
The final part of the Agent code is the `Agent.AgentAction()` method, which
receives the decision from the Brain and assigns the reward.
The final part of the Agent code is the `Agent.OnActionReceived()` method, which
receives the actions from the Brain and assigns the reward.
`AgentAction()` function. The number of elements in this array is determined by
`OnActionReceived()` function. The number of elements in this array is determined by
the `Vector Action` `Space Type` and `Space Size` settings of the
agent's Brain. The RollerAgent uses the continuous vector action space and needs
two continuous control signals from the Brain. Thus, we will set the Brain

### Rewards
Reinforcement learning requires rewards. Assign rewards in the `AgentAction()`
Reinforcement learning requires rewards. Assign rewards in the `OnActionReceived()`
function. The learning algorithm uses the rewards assigned to the Agent during
the simulation and learning process to determine whether it is giving
the Agent the optimal actions. You want to reward an Agent for completing the

The RollerAgent calculates the distance to detect when it reaches the target.
When it does, the code calls the `Agent.SetReward()` method to assign a
reward of 1.0 and marks the agent as finished by calling the `Done()` method
reward of 1.0 and marks the agent as finished by calling the `EndEpisode()` method
on the Agent.
```csharp

if (distanceToTarget < 1.42f)
{
SetReward(1.0f);
Done();
EndEpisode();
Finally, if the Agent falls off the platform, set the Agent to done so that it can reset itself:
Finally, if the Agent falls off the platform, end the episode so that it can reset itself:
Done();
EndEpisode();
### AgentAction()
### OnActionReceived()
`AgentAction()` function looks like:
`OnActionReceived()` function looks like:
public override void AgentAction(float[] vectorAction)
public override void OnActionReceived(float[] vectorAction)
{
// Actions, size = 2
Vector3 controlSignal = Vector3.zero;

if (distanceToTarget < 1.42f)
{
SetReward(1.0f);
Done();
EndEpisode();
Done();
EndEpisode();
}
}

to the values of the "Horizontal" and "Vertical" input axis (which correspond to
the keyboard arrow keys).
In order for the Agent to use the Heuristic, You will need to check the `Use Heuristic`
checkbox in the `Behavior Parameters` of the RollerAgent.
In order for the Agent to use the Heuristic, You will need to set the `Behavior Type`
to `Heuristic Only` in the `Behavior Parameters` of the RollerAgent.
Press **Play** to run the scene and use the arrows keys to move the Agent around

47
docs/Learning-Environment-Design-Agents.md


The Policy class abstracts out the decision making logic from the Agent itself so
that you can use the same Policy in multiple Agents. How a Policy makes its
decisions depends on the kind of Policy it is. You can change the Policy of an
Agent by changing its `Behavior Parameters`. If you check `Use Heuristic`, the
Agent will use its `Heuristic()` method to make decisions which can allow you to
control the Agent manually or write your own Policy. If the Agent has a `Model`
file, it Policy will use the neural network `Model` to take decisions.
Agent by changing its `Behavior Parameters`. If you set `Behavior Type` to
`Heuristic Only`, the Agent will use its `Heuristic()` method to make decisions
which can allow you to control the Agent manually or write your own Policy. If
the Agent has a `Model` file, it Policy will use the neural network `Model` to
take decisions.
## Decisions

An action is an instruction from the Policy that the agent carries out. The
action is passed to the Agent as a parameter when the Academy invokes the
agent's `AgentAction()` function. When you specify that the vector action space
agent's `OnActionReceived()` function. When you specify that the vector action space
is **Continuous**, the action parameter passed to the Agent is an array of
control signals with length equal to the `Vector Action Space Size` property.
When you specify a **Discrete** vector action space type, the action parameter

values themselves mean. The training algorithm simply tries different values for
the action list and observes the affect on the accumulated rewards over time and
many training episodes. Thus, the only place actions are defined for an Agent is
in the `AgentAction()` function. You simply specify the type of vector action
space, and, for the continuous vector action space, the number of values, and
then apply the received values appropriately (and consistently) in
`ActionAct()`.
in the `OnActionReceived()` function.
For example, if you designed an agent to move in two dimensions, you could use
either continuous or the discrete vector actions. In the continuous case, you

### Continuous Action Space
When an Agent uses a Policy set to the **Continuous** vector action space, the
action parameter passed to the Agent's `AgentAction()` function is an array with
action parameter passed to the Agent's `OnActionReceived()` function is an array with
length equal to the `Vector Action Space Size` property value.
The individual values in the array have whatever meanings that you ascribe to
them. If you assign an element in the array as the speed of an Agent, for

These control values are applied as torques to the bodies making up the arm:
```csharp
public override void AgentAction(float[] act)
public override void OnActionReceived(float[] act)
{
float torque_x = Mathf.Clamp(act[0], -1, 1) * 100f;
float torque_z = Mathf.Clamp(act[1], -1, 1) * 100f;

### Discrete Action Space
When an Agent uses a **Discrete** vector action space, the
action parameter passed to the Agent's `AgentAction()` function is an array
action parameter passed to the Agent's `OnActionReceived()` function is an array
containing indices. With the discrete vector action space, `Branches` is an
array of integers, each value corresponds to the number of possibilities for
each branch.

agent be able to move __and__ jump concurrently. We define the first branch to
have 5 possible actions (don't move, go left, go right, go backward, go forward)
and the second one to have 2 possible actions (don't jump, jump). The
AgentAction method would look something like:
`OnActionReceived()` method would look something like:
```csharp
// Get the action index for movement

Agent's Heuristic to control the Agent while watching how it accumulates rewards.
Allocate rewards to an Agent by calling the `AddReward()` method in the
`AgentAction()` function. The reward assigned between each decision
`OnActionReceived()` function. The reward assigned between each decision
should be in the range [-1,1]. Values outside this range can lead to
unstable training. The `reward` value is reset to zero when the agent receives a
new decision. If there are multiple calls to `AddReward()` for a single agent

### Examples
You can examine the `AgentAction()` functions defined in the [example
You can examine the `OnActionReceived()` functions defined in the [example
environments](Learning-Environment-Examples.md) to see how those projects
allocate rewards.

if (hitObjects.Where(col => col.gameObject.tag == "goal").ToArray().Length == 1)
{
AddReward(1.0f);
Done();
EndEpisode();
Done();
EndEpisode();
}
```

Mathf.Abs(gameObject.transform.position.x - area.transform.position.x) > 8f ||
Mathf.Abs(gameObject.transform.position.z + 5 - area.transform.position.z) > 8)
{
Done();
EndEpisode();
AddReward(-1f);
}
```

platform:
```csharp
if (IsDone() == false)
{
SetReward(0.1f);
}
SetReward(0.1f);
// When ball falls mark Agent as done and give a negative penalty
// When ball falls mark Agent as finished and give a negative penalty
Done();
EndEpisode();
}
```

Note that all of these environments make use of the `Done()` method, which manually
Note that all of these environments make use of the `EndEpisode()` method, which manually
terminates an episode when a termination condition is reached. This can be
called independently of the `Max Step` property.

33
docs/Learning-Environment-Design.md


Training and simulation proceed in steps orchestrated by the ML-Agents Academy
class. The Academy works with Agent objects in the scene to step
through the simulation. When all Agents in the scene are _done_,
one training episode is finished.
through the simulation.
During training, the external Python training process communicates with the
Academy to run a series of episodes while it collects data and optimizes its

The ML-Agents Academy class orchestrates the agent simulation loop as follows:
1. Calls your Academy's `OnEnvironmentReset` delegate.
2. Calls the `AgentReset()` function for each Agent in the scene.
2. Calls the `OnEpisodeBegin()` function for each Agent in the scene.
5. Calls the `AgentAction()` function for each Agent in the scene, passing in
the action chosen by the Agent's Policy. (This function is not called if the
Agent is done.)
6. Calls the Agent's `AgentReset()` function if the Agent has reached its `Max
Step` count or has otherwise marked itself as `done`.
5. Calls the `OnActionReceived()` function for each Agent in the scene, passing in
the action chosen by the Agent's Policy.
6. Calls the Agent's `OnEpisodeBegin()` function if the Agent has reached its `Max
Step` count or has otherwise marked itself as `EndEpisode()`.
implement the above methods. The `Agent.CollectObservations(VectorSensor sensor)` and
`Agent.AgentAction()` functions are required; the other methods are optional —
whether you need to implement them or not depends on your specific scenario.
implement the above methods whether you need to implement them or not depends on
your specific scenario.
**Note:** The API used by the Python training process to communicate with
and control the Academy during training can be used for other purposes as well.

have appropriate `Behavior Parameters`.
To create an Agent, extend the Agent class and implement the essential
`CollectObservations(VectorSensor sensor)` and `AgentAction()` methods:
`CollectObservations(VectorSensor sensor)` and `OnActionReceived()` methods:
* `AgentAction()` — Carries out the action chosen by the Agent's Policy and
* `OnActionReceived()` — Carries out the action chosen by the Agent's Policy and
assigns a reward to the current state.
Your implementations of these functions determine how the Behavior Parameters

manually set an Agent to done in your `AgentAction()` function when the Agent
has finished (or irrevocably failed) its task by calling the `Done()` function.
manually terminate an Agent episode in your `OnActionReceived()` function when the Agent
has finished (or irrevocably failed) its task by calling the `EndEpisode()` function.
Agent will consider itself done after it has taken that many steps. You can
use the `Agent.AgentReset()` function to prepare the Agent to start again.
Agent will consider the episode over after it has taken that many steps. You can
use the `Agent.OnEpisodeBegin()` function to prepare the Agent to start again.
See [Agents](Learning-Environment-Design-Agents.md) for detailed information
about programming your own Agents.

* The Academy must reset the scene to a valid starting point for each episode of
training.
* A training episode must have a definite end — either using `Max Steps` or by
each Agent setting itself to `done`.
each Agent ending its episode manually with `EndEpisode()`.

4
docs/Learning-Environment-Examples.md


* Goal: The agents must hit the ball so that the opponent cannot hit a valid
return.
* Agents: The environment contains two agent with same Behavior Parameters.
After training you can check the `Use Heuristic` checkbox on one of the Agents
to play against your trained model.
After training you can set the `Behavior Type` to `Heuristic Only` on one of the Agent's
Behavior Parameters to play against your trained model.
* Agent Reward Function (independent):
* +1.0 To the agent that wins the point. An agent wins a point by preventing
the opponent from hitting a valid return.

35
docs/Limitations.md


# Limitations
## Unity SDK
### Headless Mode
If you enable Headless mode, you will not be able to collect visual observations
from your agents.
### Rendering Speed and Synchronization
Currently the speed of the game physics can only be increased to 100x real-time.
The Academy also moves in time with FixedUpdate() rather than Update(), so game
behavior implemented in Update() may be out of sync with the agent decision
making. See
[Execution Order of Event Functions](https://docs.unity3d.com/Manual/ExecutionOrder.html)
for more information.
You can control the frequency of Academy stepping by calling
`Academy.Instance.DisableAutomaticStepping()`, and then calling
`Academy.Instance.EnvironmentStep()`
### Unity Inference Engine Models
Currently, only models created with our trainers are supported for running
ML-Agents with a neural network behavior.
## Python API
### Python version
As of version 0.3, we no longer support Python 2.
See the package-specific Limitations pages:
* [Unity `com.unity.mlagents` package](../com.unity.ml-agents/Documentation~/com.unity.ml-agents.md)
* [`mlagents` Python package](../ml-agents/README.md)
* [`mlagents_envs` Python package](../ml-agents-envs/README.md)
* [`gym_unity` Python package](../gym-unity/README.md)

20
docs/Migrating.md


* `AgentInfo.actionMasks` has been renamed to `AgentInfo.discreteActionMasks`.
* `BrainParameters` and `SpaceType` have been removed from the public API
* `BehaviorParameters` have been removed from the public API.
* `DecisionRequester` has been made internal (you can still use the DecisionRequesterComponent from the inspector). `RepeatAction` was renamed `TakeActionsBetweenDecisions` for clarity.
* The following methods in the `Agent` class have been renamed. The original method names will be removed in a later release:
* `InitializeAgent()` was renamed to `Initialize()`
* `AgentAction()` was renamed to `OnActionReceived()`
* `AgentReset()` was renamed to `OnEpsiodeBegin()`
* `Done()` was renamed to `EndEpisode()`
* `GiveModel()` was renamed to `SetModel()`
* The `IFloatProperties` interface has been removed.
* The interface for SideChannels was changed:
* In C#, `OnMessageReceived` now takes a `IncomingMessage` argument, and `QueueMessageToSend` takes an `OutgoingMessage` argument.
* In python, `on_message_received` now takes a `IncomingMessage` argument, and `queue_message_to_send` takes an `OutgoingMessage` argument.
### Steps to Migrate
* Add the `using MLAgents.Sensors;` in addition to `using MLAgents;` on top of your Agent's script.

iterate through `RayPerceptionOutput.rayOutputs` and call `RayPerceptionOutput.RayOutput.ToFloatArray()`.
* Re-import all of your `*.NN` files to work with the updated Barracuda package.
* We strongly recommend replacing the following methods with their new equivalent as they will be removed in a later release:
* `InitializeAgent()` to `Initialize()`
* `AgentAction()` to `OnActionReceived()`
* `AgentReset()` to `OnEpsiodeBegin()`
* `Done()` to `EndEpisode()`
* `GiveModel()` to `SetModel()`
* Replace `IFloatProperties` variables with `FloatPropertiesChannel` variables.
* If you implemented custom `SideChannels`, update the signatures of your methods, and add your data to the `OutgoingMessage` or read it from the `IncomingMessage`.
## Migrating from 0.13 to 0.14

236
docs/Python-API.md


allows you to interact directly with a Unity Environment (`mlagents_envs`) and
an entry point to train (`mlagents-learn`) which allows you to train agents in
Unity Environments using our implementations of reinforcement learning or
imitation learning.
imitation learning. This document describes how to use the `mlagents_envs` API.
For information on using `mlagents-learn`, see [here](Training-ML-Agents.md).
You can use the Python Low Level API to interact directly with your learning
environment, and use it to develop new learning algorithms.
The Python Low Level API can be used to interact directly with your Unity learning environment.
As such, it can serve as the basis for developing and evaluating new learning algorithms.
## mlagents_envs

Python-side communication happens through `UnityEnvironment` which is located in
[`environment.py`](../ml-agents-envs/mlagents_envs/environment.py). To load
a Unity environment from a built binary file, put the file in the same directory
as `envs`. For example, if the filename of your Unity environment is 3DBall.app, in python, run:
as `envs`. For example, if the filename of your Unity environment is `3DBall`, in python, run:
```python
from mlagents_envs.environment import UnityEnvironment

`discrete_action_branches = (3,2,)`)
### Modifying the environment from Python
The Environment can be modified by using side channels to send data to the
environment. When creating the environment, pass a list of side channels as
`side_channels` argument to the constructor.
### Communicating additional information with the Environment
In addition to the means of communicating between Unity and python described above,
we also provide methods for sharing agent-agnostic information. These
additional methods are referred to as side channels. ML-Agents includes two ready-made
side channels, described below. It is also possible to create custom side channels to
communicate any additional data between a Unity environment and Python. Instructions for
creating custom side channels can be found [here](Custom-SideChannels.md).
Side channels exist as separate classes which are instantiated, and then passed as list to the `side_channels` argument of the constructor of the `UnityEnvironment` class.
```python
channel = MyChannel()
env = UnityEnvironment(side_channels = [channel])
```
__Note__ : A side channel will only send/receive messages when `env.step` is
__Note__ : A side channel will only send/receive messages when `env.step` or `env.reset()` is
An `EngineConfiguration` will allow you to modify the time scale and graphics quality of the Unity engine.
The `EngineConfiguration` side channel allows you to modify the time-scale, resolution, and graphics quality of the environment. This can be useful for adjusting the environment to perform better during training, or be more interpretable during inference.
* `set_configuration_parameters` with arguments
* width: Defines the width of the display. Default 80.
* height: Defines the height of the display. Default 80.
* quality_level: Defines the quality level of the simulation. Default 1.
* time_scale: Defines the multiplier for the deltatime in the simulation. If set to a higher value, time will pass faster in the simulation but the physics might break. Default 20.
* target_frame_rate: Instructs simulation to try to render at a specified frame rate. Default -1.
* `set_configuration_parameters` which takes the following arguments:
* `width`: Defines the width of the display. Default 80.
* `height`: Defines the height of the display. Default 80.
* `quality_level`: Defines the quality level of the simulation. Default 1.
* `time_scale`: Defines the multiplier for the deltatime in the simulation. If set to a higher value, time will pass faster in the simulation but the physics may perform unpredictably. Default 20.
* `target_frame_rate`: Instructs simulation to try to render at a specified frame rate. Default -1.
For example :
For example, the following code would adjust the time-scale of the simulation to be 2x realtime.
```python
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel

```
#### FloatPropertiesChannel
A `FloatPropertiesChannel` will allow you to get and set float properties
in the environment. You can call get_property and set_property on the
side channel to read and write properties.
The `FloatPropertiesChannel` will allow you to get and set pre-defined numerical values in the environment. This can be useful for adjusting environment-specific settings, or for reading non-agent related information from the environment. You can call `get_property` and `set_property` on the side channel to read and write properties.
`FloatPropertiesChannel` has three methods:
* `set_property` Sets a property in the Unity Environment.

channel.set_property("parameter_1", 2.0)
i = env.reset()
readout_value = channel.get_property("parameter_2")
...
```

float property1 = sharedProperties.GetPropertyWithDefault("parameter_1", 0.0f);
```
#### [Advanced] Create your own SideChannel
You can create your own `SideChannel` in C# and Python and use it to communicate data between the two.
##### Unity side
The side channel will have to implement the `SideChannel` abstract class and the following method.
* `OnMessageReceived(byte[] data)` : You must implement this method to specify what the side channel will be doing
with the data received from Python. The data is a `byte[]` argument.
The side channel must also assign a `ChannelId` property in the constructor. The `ChannelId` is a Guid
(or UUID in Python) used to uniquely identify a side channel. This Guid must be the same on C# and Python.
There can only be one side channel of a certain id during communication.
To send a byte array from C# to Python, call the `base.QueueMessageToSend(data)` method inside the side channel.
The `data` argument must be a `byte[]`.
To register a side channel on the Unity side, call `Academy.Instance.RegisterSideChannel` with the side channel
as only argument.
##### Python side
The side channel will have to implement the `SideChannel` abstract class. You must implement :
* `on_message_received(self, data: bytes) -> None` : You must implement this method to specify what the
side channel will be doing with the data received from Unity. The data is a `byte[]` argument.
The side channel must also assign a `channel_id` property in the constructor. The `channel_id` is a UUID
(referred in C# as Guid) used to uniquely identify a side channel. This number must be the same on C# and
Python. There can only be one side channel of a certain id during communication.
To assign the `channel_id` call the abstract class constructor with the appropriate `channel_id` as follows:
```python
super().__init__(my_channel_id)
```
To send a byte array from Python to C#, call the `super().queue_message_to_send(bytes_data)` method inside the
side channel. The `bytes_data` argument must be a `bytes` object.
To register a side channel on the Python side, pass the side channel as argument when creating the
`UnityEnvironment` object. One of the arguments of the constructor (`side_channels`) is a list of side channels.
##### Example implementation
Here is a simple implementation of a Side Channel that will exchange strings between C# and Python
(encoded as ascii).
One the C# side :
Here is an implementation of a `StringLogSideChannel` that will listed to the `UnityEngine.Debug.LogError` calls in
the game :
```csharp
using UnityEngine;
using MLAgents;
using System.Text;
using System;
public class StringLogSideChannel : SideChannel
{
public StringLogSideChannel()
{
ChannelId = new Guid("621f0a70-4f87-11ea-a6bf-784f4387d1f7");
}
public override void OnMessageReceived(byte[] data)
{
var receivedString = Encoding.ASCII.GetString(data);
Debug.Log("From Python : " + receivedString);
}
public void SendDebugStatementToPython(string logString, string stackTrace, LogType type)
{
if (type == LogType.Error)
{
var stringToSend = type.ToString() + ": " + logString + "\n" + stackTrace;
var encodedString = Encoding.ASCII.GetBytes(stringToSend);
base.QueueMessageToSend(encodedString);
}
}
}
```
We also need to register this side channel to the Academy and to the `Application.logMessageReceived` events,
so we write a simple MonoBehavior for this. (Do not forget to attach it to a GameObject in the scene).
```csharp
using UnityEngine;
using MLAgents;
public class RegisterStringLogSideChannel : MonoBehaviour
{
StringLogSideChannel stringChannel;
public void Awake()
{
// We create the Side Channel
stringChannel = new StringLogSideChannel();
// When a Debug.Log message is created, we send it to the stringChannel
Application.logMessageReceived += stringChannel.SendDebugStatementToPython;
// Just in case the Academy has not yet initialized
Academy.Instance.RegisterSideChannel(stringChannel);
}
public void OnDestroy()
{
// De-register the Debug.Log callback
Application.logMessageReceived -= stringChannel.SendDebugStatementToPython;
if (Academy.IsInitialized){
Academy.Instance.UnregisterSideChannel(stringChannel);
}
}
public void Update()
{
// Optional : If the space bar is pressed, raise an error !
if (Input.GetKeyDown(KeyCode.Space))
{
Debug.LogError("This is a fake error. Space bar was pressed in Unity.");
}
}
}
```
And here is the script on the Python side. This script creates a new Side channel type (`StringLogChannel`) and
launches a `UnityEnvironment` with that side channel.
```python
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.side_channel import SideChannel
import numpy as np
# Create the StringLogChannel class
class StringLogChannel(SideChannel):
def __init__(self) -> None:
super().__init__(uuid.UUID("621f0a70-4f87-11ea-a6bf-784f4387d1f7"))
#### Custom side channels
def on_message_received(self, data: bytes) -> None:
"""
Note :We must implement this method of the SideChannel interface to
receive messages from Unity
"""
# We simply print the data received interpreted as ascii
print(data.decode("ascii"))
def send_string(self, data: str) -> None:
# Convert the string to ascii
bytes_data = data.encode("ascii")
# We call this method to queue the data we want to send
super().queue_message_to_send(bytes_data)
# Create the channel
string_log = StringLogChannel()
# We start the communication with the Unity Editor and pass the string_log side channel as input
env = UnityEnvironment(base_port=UnityEnvironment.DEFAULT_EDITOR_PORT, side_channels=[string_log])
env.reset()
string_log.send_string("The environment was reset")
group_name = env.get_agent_groups()[0] # Get the first group_name
for i in range(1000):
step_data = env.get_step_result(group_name)
n_agents = step_data.n_agents() # Get the number of agents
# We send data to Unity : A string with the number of Agent at each
string_log.send_string(
"Step " + str(i) + " occurred with " + str(n_agents) + " agents."
)
env.step() # Move the simulation forward
env.close()
```
Now, if you run this script and press `Play` the Unity Editor when prompted, The console in the Unity Editor will
display a message at every Python step. Additionally, if you press the Space Bar in the Unity Engine, a message will
appear in the terminal.
For information on how to make custom side channels for sending additional data types, see the documentation [here](Custom-SideChannels.md).

3
docs/Readme.md


* [Using the Monitor](Feature-Monitor.md)
* [Using the Video Recorder](https://github.com/Unity-Technologies/video-recorder)
* [Using an Executable Environment](Learning-Environment-Executable.md)
* [Creating Custom Side Channels](Custom-SideChannels.md)
## Training

* [Training on the Cloud with Amazon Web Services](Training-on-Amazon-Web-Service.md)
* [Training on the Cloud with Microsoft Azure](Training-on-Microsoft-Azure.md)
* [Using Docker](Using-Docker.md)
* [Installation-Windows](Installation-Windows.md)
* [Windows Anaconda Installation](Installation-Anaconda-Windows.md)

2
docs/Training-Curriculum-Learning.md


greater than number of thresholds.
Once our curriculum is defined, we have to use the reset parameters we defined
and modify the environment from the Agent's `AgentReset()` function. See
and modify the environment from the Agent's `OnEpisodeBegin()` function. See
[WallJumpAgent.cs](https://github.com/Unity-Technologies/ml-agents/blob/master/Project/Assets/ML-Agents/Examples/WallJump/Scripts/WallJumpAgent.cs)
for an example.

1
docs/Training-ML-Agents.md


| init_entcoef | How much the agent should explore in the beginning of training. | SAC |
| lambd | The regularization parameter. | PPO |
| learning_rate | The initial learning rate for gradient descent. | PPO, SAC |
| learning_rate_schedule | Determines how learning rate changes over time. | PPO, SAC |
| max_steps | The maximum number of simulation steps to run during a training session. | PPO, SAC |
| memory_size | The size of the memory an agent must keep. Used for training with a recurrent neural network. See [Using Recurrent Neural Networks](Feature-Memory.md). | PPO, SAC |
| normalize | Whether to automatically normalize observations. | PPO, SAC |

9
docs/Using-Docker.md


## Requirements
- Unity _Linux Build Support_ Component
- Unity _Linux Build Support_ Component. Make sure to select the _Linux
Build Support_ component when installing Unity.
<p align="center">
<img src="images/unity_linux_build_support.png"
alt="Linux Build Support"
width="500" border="10" />
</p>
## Setup

12
docs/Using-Virtual-Environment.md


spinning up a new environment and verifying the compatibility of the code with the
different version.
Requirement - Python 3.6 must be installed on the machine you would like
to run ML-Agents on (either local laptop/desktop or remote server). Python 3.6 can be
installed from [here](https://www.python.org/downloads/).
## Python Version Requirement (Required)
This guide has been tested with Python 3.6 and 3.7. Python 3.8 is not supported at this time.

1. To activate the environment execute `$ source ~/python-envs/sample-env/bin/activate`
1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the latest, upgrade to
the latest pip version using `$ pip3 install --upgrade pip`
1. Install ML-Agents package using `$ pip3 install mlagents`
1. To deactivate the environment execute `$ deactivate`
1. To deactivate the environment execute `$ deactivate` (you can reactivate the environment
using the same `activate` command listed above)
## Ubuntu Setup

1. To activate the environment execute `python-envs\sample-env\Scripts\activate`
1. Verify pip version is the same as in the __Installing Pip__ section. In case it is not the
latest, upgrade to the latest pip version using `pip install --upgrade pip`
1. Install ML-Agents package using `pip install mlagents`
1. To deactivate the environment execute `deactivate`
1. To deactivate the environment execute `deactivate` (you can reactivate the environment
using the same `activate` command listed above)
Note:
- Verify that you are using Python 3.6 or Python 3.7. Launch a command prompt using `cmd` and

951
docs/images/unity_package_manager_window.png

之前 之后
宽度: 1002  |  高度: 1150  |  大小: 266 KiB

2
docs/localized/KR/docs/Installation.md


</p>
## Windows 사용자
Windows에서 환경을 설정하기 위해, [세부 사항](Installation-Windows.md)에 설정 방법에 대해 작성하였습니다.
Windows에서 환경을 설정하기 위해, [세부 사항](Installation-Anaconda-Windows.md)에 설정 방법에 대해 작성하였습니다.
Mac과 Linux는 다음 가이드를 확인해주십시오.
## Mac 또는 Unix 사용자

2
docs/localized/zh-CN/docs/Installation.md


### Windows 用户
如果您是刚接触 Python 和 TensorFlow 的 Windows 用户,请遵循[此指南](/docs/Installation-Windows.md)来设置 Python 环境。
如果您是刚接触 Python 和 TensorFlow 的 Windows 用户,请遵循[此指南](/docs/Installation-Anaconda-Windows.md)来设置 Python 环境。
### Mac 和 Unix 用户

4
ml-agents-envs/mlagents_envs/communicator.py


import logging
logger = logging.getLogger("mlagents_envs")
class Communicator(object):

7
ml-agents-envs/mlagents_envs/environment.py


from typing import Dict, List, Optional, Any
import mlagents_envs
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.side_channel import SideChannel, IncomingMessage
from mlagents_envs.base_env import (
BaseEnv,

import signal
import struct
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("mlagents_envs")

"sending side channel data properly.".format(channel_id)
)
if channel_id in side_channels:
side_channels[channel_id].on_message_received(message_data)
incoming_message = IncomingMessage(message_data)
side_channels[channel_id].on_message_received(incoming_message)
else:
logger.warning(
"Unknown side channel data received. Channel type "

5
ml-agents-envs/mlagents_envs/exception.py


import logging
logger = logging.getLogger("mlagents_envs")
class UnityException(Exception):
"""
Any error related to ml-agents environment.

3
ml-agents-envs/mlagents_envs/rpc_communicator.py


import logging
import grpc
from typing import Optional

from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
from mlagents_envs.communicator_objects.unity_output_pb2 import UnityOutputProto
from .exception import UnityTimeOutException, UnityWorkerInUseException
logger = logging.getLogger("mlagents_envs")
class UnityToExternalServicerImplementation(UnityToExternalProtoServicer):

3
ml-agents-envs/mlagents_envs/rpc_utils.py


NONE as COMPRESSION_NONE,
)
from mlagents_envs.communicator_objects.brain_parameters_pb2 import BrainParametersProto
import logging
logger = logging.getLogger("mlagents_envs")
def agent_group_spec_from_proto(

4
ml-agents-envs/mlagents_envs/side_channel/__init__.py


from mlagents_envs.side_channel.incoming_message import IncomingMessage # noqa
from mlagents_envs.side_channel.outgoing_message import OutgoingMessage # noqa
from mlagents_envs.side_channel.side_channel import SideChannel # noqa

23
ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py


from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel import SideChannel, OutgoingMessage, IncomingMessage
import struct
import uuid
from typing import NamedTuple

def __init__(self) -> None:
super().__init__(uuid.UUID("e951342c-4f7e-11ea-b238-784f4387d1f7"))
def on_message_received(self, data: bytes) -> None:
def on_message_received(self, msg: IncomingMessage) -> None:
"""
Is called by the environment to the side channel. Can be called
multiple times per step if multiple messages are meant for that

:param target_frame_rate: Instructs simulation to try to render at a
specified frame rate. Default -1.
"""
data = bytearray()
data += struct.pack("<i", width)
data += struct.pack("<i", height)
data += struct.pack("<i", quality_level)
data += struct.pack("<f", time_scale)
data += struct.pack("<i", target_frame_rate)
super().queue_message_to_send(data)
msg = OutgoingMessage()
msg.write_int32(width)
msg.write_int32(height)
msg.write_int32(quality_level)
msg.write_float32(time_scale)
msg.write_int32(target_frame_rate)
super().queue_message_to_send(msg)
data = bytearray()
data += struct.pack("<iiifi", *config)
super().queue_message_to_send(data)
self.set_configuration_parameters(**config._asdict())

36
ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py


from mlagents_envs.side_channel.side_channel import SideChannel
import struct
from mlagents_envs.side_channel import SideChannel, IncomingMessage, OutgoingMessage
from typing import Dict, Tuple, Optional, List
from typing import Dict, Optional, List
class FloatPropertiesChannel(SideChannel):

channel_id = uuid.UUID(("60ccf7d0-4f7e-11ea-b238-784f4387d1f7"))
super().__init__(channel_id)
def on_message_received(self, data: bytes) -> None:
def on_message_received(self, msg: IncomingMessage) -> None:
Note that Python should never receive an engine configuration from
Unity
k, v = self.deserialize_float_prop(data)
k = msg.read_string()
v = msg.read_float32()
self._float_properties[k] = v
def set_property(self, key: str, value: float) -> None:

:param value: The float value of the property.
"""
self._float_properties[key] = value
super().queue_message_to_send(self.serialize_float_prop(key, value))
msg = OutgoingMessage()
msg.write_string(key)
msg.write_float32(value)
super().queue_message_to_send(msg)
def get_property(self, key: str) -> Optional[float]:
"""

:return:
"""
return dict(self._float_properties)
@staticmethod
def serialize_float_prop(key: str, value: float) -> bytearray:
result = bytearray()
encoded_key = key.encode("ascii")
result += struct.pack("<i", len(encoded_key))
result += encoded_key
result += struct.pack("<f", value)
return result
@staticmethod
def deserialize_float_prop(data: bytes) -> Tuple[str, float]:
offset = 0
encoded_key_len = struct.unpack_from("<i", data, offset)[0]
offset = offset + 4
key = data[offset : offset + encoded_key_len].decode("ascii")
offset = offset + encoded_key_len
value = struct.unpack_from("<f", data, offset)[0]
return key, value

10
ml-agents-envs/mlagents_envs/side_channel/raw_bytes_channel.py


from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel import SideChannel, IncomingMessage, OutgoingMessage
from typing import List
import uuid

self._received_messages: List[bytes] = []
super().__init__(channel_id)
def on_message_received(self, data: bytes) -> None:
def on_message_received(self, msg: IncomingMessage) -> None:
self._received_messages.append(data)
self._received_messages.append(msg.get_raw_bytes())
def get_and_clear_received_messages(self) -> List[bytes]:
"""

Queues a message to be sent by the environment at the next call to
step.
"""
super().queue_message_to_send(data)
msg = OutgoingMessage()
msg.set_raw_bytes(data)
super().queue_message_to_send(msg)

13
ml-agents-envs/mlagents_envs/side_channel/side_channel.py


from abc import ABC, abstractmethod
from typing import List
import uuid
import logging
from mlagents_envs.side_channel import IncomingMessage, OutgoingMessage
logger = logging.getLogger(__name__)
class SideChannel(ABC):

to the Env object at construction.
"""
def __init__(self, channel_id):
def __init__(self, channel_id: uuid.UUID):
def queue_message_to_send(self, data: bytearray) -> None:
def queue_message_to_send(self, msg: OutgoingMessage) -> None:
self.message_queue.append(data)
self.message_queue.append(msg.buffer)
def on_message_received(self, data: bytes) -> None:
def on_message_received(self, msg: IncomingMessage) -> None:
"""
Is called by the environment to the side channel. Can be called
multiple times per step if multiple messages are meant for that

68
ml-agents-envs/mlagents_envs/tests/test_side_channel.py


import struct
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel import SideChannel, IncomingMessage, OutgoingMessage
from mlagents_envs.side_channel.float_properties_channel import FloatPropertiesChannel
from mlagents_envs.side_channel.raw_bytes_channel import RawBytesChannel
from mlagents_envs.environment import UnityEnvironment

self.list_int = []
super().__init__(uuid.UUID("a85ba5c0-4f87-11ea-a517-784f4387d1f7"))
def on_message_received(self, data):
val = struct.unpack_from("<i", data, 0)[0]
def on_message_received(self, msg: IncomingMessage) -> None:
val = msg.read_int32()
data = bytearray()
data += struct.pack("<i", value)
super().queue_message_to_send(data)
msg = OutgoingMessage()
msg.write_int32(value)
super().queue_message_to_send(msg)
def test_int_channel():

messages = receiver.get_and_clear_received_messages()
assert len(messages) == 0
def test_message_bool():
vals = [True, False]
msg_out = OutgoingMessage()
for v in vals:
msg_out.write_bool(v)
msg_in = IncomingMessage(msg_out.buffer)
read_vals = []
for _ in range(len(vals)):
read_vals.append(msg_in.read_bool())
assert vals == read_vals
def test_message_int32():
val = 1337
msg_out = OutgoingMessage()
msg_out.write_int32(val)
msg_in = IncomingMessage(msg_out.buffer)
read_val = msg_in.read_int32()
assert val == read_val
def test_message_float32():
val = 42.0
msg_out = OutgoingMessage()
msg_out.write_float32(val)
msg_in = IncomingMessage(msg_out.buffer)
read_val = msg_in.read_float32()
# These won't be exactly equal in general, since python floats are 64-bit.
assert val == read_val
def test_message_string():
val = "mlagents!"
msg_out = OutgoingMessage()
msg_out.write_string(val)
msg_in = IncomingMessage(msg_out.buffer)
read_val = msg_in.read_string()
assert val == read_val
def test_message_float_list():
val = [1.0, 3.0, 9.0]
msg_out = OutgoingMessage()
msg_out.write_float32_list(val)
msg_in = IncomingMessage(msg_out.buffer)
read_val = msg_in.read_float32_list()
# These won't be exactly equal in general, since python floats are 64-bit.
assert val == read_val

4
ml-agents/mlagents/trainers/brain.py


import logging
logger = logging.getLogger("mlagents.trainers")
class CameraResolution(NamedTuple):

5
ml-agents/mlagents/trainers/components/reward_signals/__init__.py


from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.buffer import AgentBuffer
logger = logging.getLogger("mlagents.trainers")

self.strength = strength
self.stats_name_to_update_name: Dict[str, str] = {}
def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
"""
Evaluates the reward for the data present in the Dict mini_batch. Use this when evaluating a reward
function drawn straight from a Buffer.

)
def prepare_update(
self, policy: TFPolicy, mini_batch: Dict[str, np.ndarray], num_sequences: int
self, policy: TFPolicy, mini_batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:
"""
If the reward signal has an internal model (e.g. GAIL or Curiosity), get the feed_dict

5
ml-agents/mlagents/trainers/components/reward_signals/curiosity/signal.py


from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.components.reward_signals.curiosity.model import CuriosityModel
from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.buffer import AgentBuffer
class CuriosityRewardSignal(RewardSignal):

}
self.has_updated = False
def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
feed_dict: Dict[tf.Tensor, Any] = {
self.policy.batch_size_ph: len(mini_batch["actions"]),
self.policy.sequence_length_ph: self.policy.sequence_length,

super().check_config(config_dict, param_keys)
def prepare_update(
self, policy: TFPolicy, mini_batch: Dict[str, np.ndarray], num_sequences: int
self, policy: TFPolicy, mini_batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:
"""
Prepare for update and get feed_dict.

3
ml-agents/mlagents/trainers/components/reward_signals/extrinsic/signal.py


import numpy as np
from mlagents.trainers.components.reward_signals import RewardSignal, RewardSignalResult
from mlagents.trainers.buffer import AgentBuffer
class ExtrinsicRewardSignal(RewardSignal):

param_keys = ["strength", "gamma"]
super().check_config(config_dict, param_keys)
def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
env_rews = np.array(mini_batch["environment_rewards"], dtype=np.float32)
return RewardSignalResult(self.strength * env_rews, env_rews)

19
ml-agents/mlagents/trainers/components/reward_signals/gail/signal.py


from typing import Any, Dict, List
import logging
import numpy as np
from mlagents.tf_utils import tf

from mlagents.trainers.demo_loader import demo_to_buffer
LOGGER = logging.getLogger("mlagents.trainers")
from mlagents.trainers.buffer import AgentBuffer
class GAILRewardSignal(RewardSignal):

"Policy/GAIL Expert Estimate": "gail_expert_estimate",
}
def evaluate_batch(self, mini_batch: Dict[str, np.array]) -> RewardSignalResult:
def evaluate_batch(self, mini_batch: AgentBuffer) -> RewardSignalResult:
feed_dict: Dict[tf.Tensor, Any] = {
self.policy.batch_size_ph: len(mini_batch["actions"]),
self.policy.sequence_length_ph: self.policy.sequence_length,

super().check_config(config_dict, param_keys)
def prepare_update(
self, policy: TFPolicy, mini_batch: Dict[str, np.ndarray], num_sequences: int
self, policy: TFPolicy, mini_batch: AgentBuffer, num_sequences: int
) -> Dict[tf.Tensor, Any]:
"""
Prepare inputs for update. .

"""
max_num_experiences = min(
len(mini_batch["actions"]), self.demonstration_buffer.num_experiences
)
# If num_sequences is less, we need to shorten the input batch.
for key, element in mini_batch.items():
mini_batch[key] = element[:max_num_experiences]
# Get batch from demo buffer
# Get batch from demo buffer. Even if demo buffer is smaller, we sample with replacement
len(mini_batch["actions"]), 1
mini_batch.num_experiences, 1
)
feed_dict: Dict[tf.Tensor, Any] = {

4
ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py


import logging
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.components.reward_signals import RewardSignal
from mlagents.trainers.components.reward_signals.extrinsic.signal import (

CuriosityRewardSignal,
)
from mlagents.trainers.policy.tf_policy import TFPolicy
logger = logging.getLogger("mlagents.trainers")
NAME_TO_CLASS: Dict[str, Type[RewardSignal]] = {

4
ml-agents/mlagents/trainers/demo_loader.py


import logging
import os
from typing import List, Tuple
import numpy as np

)
from mlagents_envs.timers import timed, hierarchical_timer
from google.protobuf.internal.decoder import _DecodeVarint32 # type: ignore
logger = logging.getLogger("mlagents.trainers")
@timed

23
ml-agents/mlagents/trainers/distributions.py


and 1 for unmasked.
"""
unmasked_log_probs = self._create_policy_branches(logits, act_size)
self._sampled_policy, self._all_probs, action_index = self._get_masked_actions_probs(
unmasked_log_probs, act_size, action_masks
)
(
self._sampled_policy,
self._all_probs,
action_index,
) = self._get_masked_actions_probs(unmasked_log_probs, act_size, action_masks)
self._entropy = self._create_entropy(
self._sampled_onehot, self._all_probs, action_index, act_size
)
self._entropy = self._create_entropy(self._all_probs, action_index, act_size)
self._total_prob = self._get_log_probs(
self._sampled_onehot, self._all_probs, action_index, act_size
)

kernel_initializer=ModelUtils.scaled_init(0.01),
)
)
unmasked_log_probs = tf.concat(policy_branches, axis=1)
return unmasked_log_probs
return policy_branches
unmasked_log_probs: tf.Tensor,
unmasked_log_probs: List[tf.Tensor],
act_size: List[int],
action_masks: tf.Tensor,
) -> Tuple[tf.Tensor, tf.Tensor, np.ndarray]:

return log_probs
def _create_entropy(
self,
all_log_probs: tf.Tensor,
sample_onehot: tf.Tensor,
action_idx: List[int],
act_size: List[int],
self, all_log_probs: tf.Tensor, action_idx: List[int], act_size: List[int]
) -> tf.Tensor:
entropy = tf.reduce_sum(
(

7
ml-agents/mlagents/trainers/ghost/trainer.py


# # Unity ML-Agents Toolkit
# ## ML-Agent Learning (Ghost Trainer)
# import logging
from typing import Deque, Dict, List, Any, cast
import numpy as np

from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.agent_processor import AgentManagerQueue
LOGGER = logging.getLogger("mlagents.trainers")
logger = logging.getLogger("mlagents.trainers")
class GhostTrainer(Trainer):

Saves training statistics to Tensorboard.
"""
opponents = np.array(self.policy_elos, dtype=np.float32)
LOGGER.info(
logger.info(
" Learning brain {} ELO: {:0.3f}\n"
"Mean Opponent ELO: {:0.3f}"
" Std Opponent ELO: {:0.3f}".format(

x = "current"
self.policy_elos[-1] = self.current_elo
self.current_opponent = -1 if x == "current" else x
LOGGER.debug(
logger.debug(
"Step {}: Swapping snapshot {} to id {} with {} learning".format(
self.get_step, x, name_behavior_id, self.learning_behavior_name
)

155
ml-agents/mlagents/trainers/learn.py


from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents_envs.exception import UnityEnvironmentException
from mlagents_envs.timers import hierarchical_timer
from mlagents.logging_util import create_logger
def _create_parser():

:param run_seed: Random seed used for training.
:param run_options: Command line arguments for training.
"""
# Recognize and use docker volume if one is passed as an argument
if not options.docker_target_name:
model_path = f"./models/{options.run_id}"
summaries_dir = "./summaries"
else:
model_path = f"/{options.docker_target_name}/models/{options.run_id}"
summaries_dir = f"/{options.docker_target_name}/summaries"
port = options.base_port
with hierarchical_timer("run_training.setup"):
# Recognize and use docker volume if one is passed as an argument
if not options.docker_target_name:
model_path = f"./models/{options.run_id}"
summaries_dir = "./summaries"
else:
model_path = f"/{options.docker_target_name}/models/{options.run_id}"
summaries_dir = f"/{options.docker_target_name}/summaries"
port = options.base_port
# Configure CSV, Tensorboard Writers and StatsReporter
# We assume reward and episode length are needed in the CSV.
csv_writer = CSVWriter(
summaries_dir,
required_fields=["Environment/Cumulative Reward", "Environment/Episode Length"],
)
tb_writer = TensorboardWriter(summaries_dir)
gauge_write = GaugeWriter()
StatsReporter.add_writer(tb_writer)
StatsReporter.add_writer(csv_writer)
StatsReporter.add_writer(gauge_write)
# Configure CSV, Tensorboard Writers and StatsReporter
# We assume reward and episode length are needed in the CSV.
csv_writer = CSVWriter(
summaries_dir,
required_fields=[
"Environment/Cumulative Reward",
"Environment/Episode Length",
],
)
tb_writer = TensorboardWriter(summaries_dir)
gauge_write = GaugeWriter()
StatsReporter.add_writer(tb_writer)
StatsReporter.add_writer(csv_writer)
StatsReporter.add_writer(gauge_write)
if options.env_path is None:
port = UnityEnvironment.DEFAULT_EDITOR_PORT
env_factory = create_environment_factory(
options.env_path,
options.docker_target_name,
options.no_graphics,
run_seed,
port,
options.env_args,
)
engine_config = EngineConfig(
options.width,
options.height,
options.quality_level,
options.time_scale,
options.target_frame_rate,
)
env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
maybe_meta_curriculum = try_create_meta_curriculum(
options.curriculum_config, env_manager, options.lesson
)
sampler_manager, resampling_interval = create_sampler_manager(
options.sampler_config, run_seed
)
trainer_factory = TrainerFactory(
options.trainer_config,
summaries_dir,
options.run_id,
model_path,
options.keep_checkpoints,
options.train_model,
options.load_model,
run_seed,
maybe_meta_curriculum,
options.multi_gpu,
)
# Create controller and begin training.
tc = TrainerController(
trainer_factory,
model_path,
summaries_dir,
options.run_id,
options.save_freq,
maybe_meta_curriculum,
options.train_model,
run_seed,
sampler_manager,
resampling_interval,
)
if options.env_path is None:
port = UnityEnvironment.DEFAULT_EDITOR_PORT
env_factory = create_environment_factory(
options.env_path,
options.docker_target_name,
options.no_graphics,
run_seed,
port,
options.env_args,
)
engine_config = EngineConfig(
options.width,
options.height,
options.quality_level,
options.time_scale,
options.target_frame_rate,
)
env_manager = SubprocessEnvManager(env_factory, engine_config, options.num_envs)
maybe_meta_curriculum = try_create_meta_curriculum(
options.curriculum_config, env_manager, options.lesson
)
sampler_manager, resampling_interval = create_sampler_manager(
options.sampler_config, run_seed
)
trainer_factory = TrainerFactory(
options.trainer_config,
summaries_dir,
options.run_id,
model_path,
options.keep_checkpoints,
options.train_model,
options.load_model,
run_seed,
maybe_meta_curriculum,
options.multi_gpu,
)
# Create controller and begin training.
tc = TrainerController(
trainer_factory,
model_path,
summaries_dir,
options.run_id,
options.save_freq,
maybe_meta_curriculum,
options.train_model,
run_seed,
sampler_manager,
resampling_interval,
)
# Begin training
try:
tc.start_learning(env_manager)

except Exception:
print("\n\n\tUnity Technologies\n")
print(get_version_string())
trainer_logger = logging.getLogger("mlagents.trainers")
env_logger = logging.getLogger("mlagents_envs")
trainer_logger.setLevel("DEBUG")
env_logger.setLevel("DEBUG")
log_level = logging.DEBUG
trainer_logger.setLevel("INFO")
env_logger.setLevel("INFO")
# disable noisy warnings from tensorflow.
log_level = logging.INFO
# disable noisy warnings from tensorflow
trainer_logger = create_logger("mlagents.trainers", log_level)
trainer_logger.debug("Configuration for this run:")
trainer_logger.debug(json.dumps(options._asdict(), indent=4))

39
ml-agents/mlagents/trainers/models.py


import logging
from enum import Enum
from typing import Callable, Dict, List, Tuple, NamedTuple

from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.brain import CameraResolution
logger = logging.getLogger("mlagents.trainers")
ActivationFunction = Callable[[tf.Tensor], tf.Tensor]
EncoderFunction = Callable[

)
@staticmethod
def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
def break_into_branches(
concatenated_logits: tf.Tensor, action_size: List[int]
) -> List[tf.Tensor]:
"""
Takes a concatenated set of logits that represent multiple discrete action branches
and breaks it up into one Tensor per branch.
:param concatenated_logits: Tensor that represents the concatenated action branches
:param action_size: List of ints containing the number of possible actions for each branch.
:return: A List of Tensors containing one tensor per branch.
"""
action_idx = [0] + list(np.cumsum(action_size))
branched_logits = [
concatenated_logits[:, action_idx[i] : action_idx[i + 1]]
for i in range(len(action_size))
]
return branched_logits
@staticmethod
def create_discrete_action_masking_layer(
branches_logits: List[tf.Tensor],
action_masks: tf.Tensor,
action_size: List[int],
) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
:param all_logits: The concatenated unnormalized action probabilities for all branches
:param branches_logits: A List of the unnormalized action probabilities for each branch
:param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
:param action_size: A list containing the number of possible actions for each branch
:return: The action output dimension [batch_size, num_branches], the concatenated

action_idx = [0] + list(np.cumsum(action_size))
branches_logits = [
all_logits[:, action_idx[i] : action_idx[i + 1]]
for i in range(len(action_size))
]
branch_masks = [
action_masks[:, action_idx[i] : action_idx[i + 1]]
for i in range(len(action_size))
]
branch_masks = ModelUtils.break_into_branches(action_masks, action_size)
raw_probs = [
tf.multiply(tf.nn.softmax(branches_logits[k]) + EPSILON, branch_masks[k])
for k in range(len(action_size))

5
ml-agents/mlagents/trainers/policy/nn_policy.py


import logging
from mlagents_envs.timers import timed
from mlagents_envs.base_env import BatchedStepResult
from mlagents.trainers.brain import BrainParameters

GaussianDistribution,
MultiCategoricalDistribution,
)
logger = logging.getLogger("mlagents.trainers")
EPSILON = 1e-6 # Small value to avoid divide by zero

4
ml-agents/mlagents/trainers/policy/tf_policy.py


import logging
from typing import Any, Dict, List, Optional
from mlagents_envs.exception import UnityException
from mlagents.trainers.policy import Policy
from mlagents.trainers.action_info import ActionInfo

self.use_recurrent = trainer_parameters["use_recurrent"]
self.memory_dict: Dict[str, np.ndarray] = {}
self.reward_signals: Dict[str, "RewardSignal"] = {}
self.num_branches = len(self.brain.vector_action_space_size)
self.previous_action_dict: Dict[str, np.array] = {}
self.normalize = trainer_parameters.get("normalize", False)

13
ml-agents/mlagents/trainers/ppo/optimizer.py


import logging
import numpy as np
from mlagents.tf_utils import tf
from mlagents_envs.timers import timed

from mlagents.trainers.buffer import AgentBuffer
logger = logging.getLogger("mlagents.trainers")
class PPOOptimizer(TFOptimizer):

dtype=tf.float32,
name="old_probabilities",
)
# Break old log probs into separate branches
old_log_prob_branches = ModelUtils.break_into_branches(
self.all_old_log_probs, self.policy.act_size
)
self.all_old_log_probs, self.policy.action_masks, self.policy.act_size
old_log_prob_branches, self.policy.action_masks, self.policy.act_size
)
action_idx = [0] + list(np.cumsum(self.policy.act_size))

18
ml-agents/mlagents/trainers/ppo/trainer.py


from mlagents.trainers.policy.tf_policy import TFPolicy
from mlagents.trainers.ppo.optimizer import PPOOptimizer
from mlagents.trainers.trajectory import Trajectory
from mlagents.trainers.exception import UnityTrainerException
logger = logging.getLogger("mlagents.trainers")

self.load = load
self.seed = seed
self.policy: NNPolicy = None # type: ignore
def _check_param_keys(self):
super()._check_param_keys()
# Check that batch size is greater than sequence length. Else, throw
# an exception.
if (
self.trainer_parameters["sequence_length"]
> self.trainer_parameters["batch_size"]
and self.trainer_parameters["use_recurrent"]
):
raise UnityTrainerException(
"batch_size must be greater than or equal to sequence_length when use_recurrent is True."
)
def _process_trajectory(self, trajectory: Trajectory) -> None:
"""

"""
if self.policy:
logger.warning(
"add_policy has been called twice. {} is not a multi-agent trainer".format(
"Your environment contains multiple teams, but {} doesn't support adversarial games. Enable self-play to \
train adversarial games.".format(
self.__class__.__name__
)
)

6
ml-agents/mlagents/trainers/sac/network.py


import logging
from mlagents.trainers.models import ModelUtils, EncoderType
LOG_STD_MAX = 2

CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0 # TODO: Make these an optional hyperparam.
LOGGER = logging.getLogger("mlagents.trainers")
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"

56
ml-agents/mlagents/trainers/sac/optimizer.py


EPSILON = 1e-6 # Small value to avoid divide by zero
LOGGER = logging.getLogger("mlagents.trainers")
logger = logging.getLogger("mlagents.trainers")
POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"

for name in stream_names:
if discrete:
_branched_mpq1 = self._apply_as_branches(
self.policy_network.q1_pheads[name] * discrete_action_probs
_branched_mpq1 = ModelUtils.break_into_branches(
self.policy_network.q1_pheads[name] * discrete_action_probs,
self.act_size,
)
branched_mpq1 = tf.stack(
[

)
_q1_p_mean = tf.reduce_mean(branched_mpq1, axis=0)
_branched_mpq2 = self._apply_as_branches(
self.policy_network.q2_pheads[name] * discrete_action_probs
_branched_mpq2 = ModelUtils.break_into_branches(
self.policy_network.q2_pheads[name] * discrete_action_probs,
self.act_size,
)
branched_mpq2 = tf.stack(
[

if discrete:
# We need to break up the Q functions by branch, and update them individually.
branched_q1_stream = self._apply_as_branches(
self.policy.selected_actions * q1_streams[name]
branched_q1_stream = ModelUtils.break_into_branches(
self.policy.selected_actions * q1_streams[name], self.act_size
branched_q2_stream = self._apply_as_branches(
self.policy.selected_actions * q2_streams[name]
branched_q2_stream = ModelUtils.break_into_branches(
self.policy.selected_actions * q2_streams[name], self.act_size
)
# Reduce each branch into scalar

self.ent_coef = tf.exp(self.log_ent_coef)
if discrete:
# We also have to do a different entropy and target_entropy per branch.
branched_per_action_ent = self._apply_as_branches(per_action_entropy)
branched_per_action_ent = ModelUtils.break_into_branches(
per_action_entropy, self.act_size
)
branched_ent_sums = tf.stack(
[
tf.reduce_sum(_lp, axis=1, keep_dims=True) + _te

# Same with policy loss, we have to do the loss per branch and average them,
# so that larger branches don't get more weight.
# The equivalent KL divergence from Eq 10 of Haarnoja et al. is also pi*log(pi) - Q
branched_q_term = self._apply_as_branches(
discrete_action_probs * self.policy_network.q1_p
branched_q_term = ModelUtils.break_into_branches(
discrete_action_probs * self.policy_network.q1_p, self.act_size
)
branched_policy_loss = tf.stack(

self.entropy = self.policy_network.entropy
def _apply_as_branches(self, concat_logits: tf.Tensor) -> List[tf.Tensor]:
"""
Takes in a concatenated set of logits and breaks it up into a list of non-concatenated logits, one per
action branch
"""
action_idx = [0] + list(np.cumsum(self.act_size))
branches_logits = [
concat_logits[:, action_idx[i] : action_idx[i + 1]]
for i in range(len(self.act_size))
]
return branches_logits
def _create_sac_optimizer_ops(self) -> None:
"""
Creates the Adam optimizers and update ops for SAC, including

self.target_network.value_vars, self.policy_network.value_vars
)
]
LOGGER.debug("value_vars")
logger.debug("value_vars")
LOGGER.debug("targvalue_vars")
logger.debug("targvalue_vars")
LOGGER.debug("critic_vars")
logger.debug("critic_vars")
LOGGER.debug("q_vars")
logger.debug("q_vars")
LOGGER.debug("policy_vars")
logger.debug("policy_vars")
policy_vars = self.policy.get_trainable_variables()
self.print_all_vars(policy_vars)

def print_all_vars(self, variables):
for _var in variables:
LOGGER.debug(_var)
logger.debug(_var)
@timed
def update(self, batch: AgentBuffer, num_sequences: int) -> Dict[str, float]:

return update_stats
def update_reward_signals(
self, reward_signal_minibatches: Mapping[str, Dict], num_sequences: int
self, reward_signal_minibatches: Mapping[str, AgentBuffer], num_sequences: int
) -> Dict[str, float]:
"""
Only update the reward signals.

feed_dict: Dict[tf.Tensor, Any],
update_dict: Dict[str, tf.Tensor],
stats_needed: Dict[str, str],
reward_signal_minibatches: Mapping[str, Dict],
reward_signal_minibatches: Mapping[str, AgentBuffer],
num_sequences: int,
) -> None:
"""

22
ml-agents/mlagents/trainers/sac/trainer.py


from mlagents.trainers.trainer.rl_trainer import RLTrainer
from mlagents.trainers.trajectory import Trajectory, SplitObservations
from mlagents.trainers.brain import BrainParameters
from mlagents.trainers.exception import UnityTrainerException
logger = logging.getLogger("mlagents.trainers")

else False
)
def _check_param_keys(self):
super()._check_param_keys()
# Check that batch size is greater than sequence length. Else, throw
# an exception.
if (
self.trainer_parameters["sequence_length"]
> self.trainer_parameters["batch_size"]
and self.trainer_parameters["use_recurrent"]
):
raise UnityTrainerException(
"batch_size must be greater than or equal to sequence_length when use_recurrent is True."
)
def save_model(self, name_behavior_id: str) -> None:
"""
Saves the model. Overrides the default save_model since we want to save

self.collected_rewards["environment"][agent_id] += np.sum(
agent_buffer_trajectory["environment_rewards"]
)
for name, reward_signal in self.policy.reward_signals.items():
for name, reward_signal in self.optimizer.reward_signals.items():
evaluate_result = reward_signal.evaluate_batch(
agent_buffer_trajectory
).scaled_reward

reparameterize=True,
create_tf_graph=False,
)
for _reward_signal in policy.reward_signals.keys():
self.collected_rewards[_reward_signal] = defaultdict(lambda: 0)
# Load the replay buffer if load
if self.load and self.checkpoint_replay_buffer:
try:

"""
if self.policy:
logger.warning(
"add_policy has been called twice. {} is not a multi-agent trainer".format(
"Your environment contains multiple teams, but {} doesn't support adversarial games. Enable self-play to \
train adversarial games.".format(
self.__class__.__name__
)
)

136
ml-agents/mlagents/trainers/tests/simple_test_envs.py


import random
from typing import Dict, List
from typing import Dict, List, Any, Tuple
import numpy as np
from mlagents_envs.base_env import (

from mlagents_envs.rpc_utils import proto_from_batched_step_result
OBS_SIZE = 1
VIS_OBS_SIZE = (20, 20, 3)
STEP_SIZE = 0.1
TIME_PENALTY = 0.001

it reaches -1. The position is incremented by the action amount (clamped to [-step_size, step_size]).
"""
def __init__(self, brain_names, use_discrete, step_size=STEP_SIZE):
def __init__(
self,
brain_names,
use_discrete,
step_size=STEP_SIZE,
num_visual=0,
num_vector=1,
vis_obs_size=VIS_OBS_SIZE,
vec_obs_size=OBS_SIZE,
):
self.num_visual = num_visual
self.num_vector = num_vector
self.vis_obs_size = vis_obs_size
self.vec_obs_size = vec_obs_size
[(OBS_SIZE,)], action_type, (2,) if use_discrete else 1
self._make_obs_spec(), action_type, (2,) if use_discrete else 1
)
self.names = brain_names
self.position: Dict[str, float] = {}

self.rewards: Dict[str, float] = {}
self.final_rewards: Dict[str, List[float]] = {}
self.step_result: Dict[str, BatchedStepResult] = {}
self.agent_id: Dict[str, int] = {}
self.agent_id[name] = 0
self.goal[name] = self.random.choice([-1, 1])
self.rewards[name] = 0
self.final_rewards[name] = []

def _make_obs_spec(self) -> List[Any]:
obs_spec: List[Any] = []
for _ in range(self.num_vector):
obs_spec.append((self.vec_obs_size,))
for _ in range(self.num_visual):
obs_spec.append(self.vis_obs_size)
return obs_spec
def _make_obs(self, value: float) -> List[np.ndarray]:
obs = []
for _ in range(self.num_vector):
obs.append(np.ones((1, self.vec_obs_size), dtype=np.float32) * value)
for _ in range(self.num_visual):
obs.append(np.ones((1,) + self.vis_obs_size, dtype=np.float32) * value)
return obs
def get_agent_groups(self):
return self.names

delta = 1 if act else -1
else:
delta = self.action[name][0][0]
delta = clamp(delta, -self.step_size, self.step_size)
self.position[name] += delta
self.position[name] = clamp(self.position[name], -1, 1)

def _make_batched_step(
self, name: str, done: bool, reward: float
) -> BatchedStepResult:
m_vector_obs = [np.ones((1, OBS_SIZE), dtype=np.float32) * self.goal[name]]
m_vector_obs = self._make_obs(self.goal[name])
m_agent_id = np.array([0], dtype=np.int32)
m_agent_id = np.array([self.agent_id[name]], dtype=np.int32)
if done:
self._reset_agent(name)
new_vector_obs = self._make_obs(self.goal[name])
(
m_vector_obs,
m_reward,
m_done,
m_agent_id,
action_mask,
) = self._construct_reset_step(
m_vector_obs,
new_vector_obs,
m_reward,
m_done,
m_agent_id,
action_mask,
name,
)
m_vector_obs, m_reward, m_done, m_done, m_agent_id, action_mask
m_vector_obs,
m_reward,
m_done,
np.zeros(m_done.shape, dtype=bool),
m_agent_id,
action_mask,
def _construct_reset_step(
self,
vector_obs: List[np.ndarray],
new_vector_obs: List[np.ndarray],
reward: np.ndarray,
done: np.ndarray,
agent_id: np.ndarray,
action_mask: List[np.ndarray],
name: str,
) -> Tuple[List[np.ndarray], np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
new_reward = np.array([0.0], dtype=np.float32)
new_done = np.array([False], dtype=np.bool)
new_agent_id = np.array([self.agent_id[name]], dtype=np.int32)
new_action_mask = self._generate_mask()
m_vector_obs = [
np.concatenate((old, new), axis=0)
for old, new in zip(vector_obs, new_vector_obs)
]
m_reward = np.concatenate((reward, new_reward), axis=0)
m_done = np.concatenate((done, new_done), axis=0)
m_agent_id = np.concatenate((agent_id, new_agent_id), axis=0)
if action_mask is not None:
action_mask = [
np.concatenate((old, new), axis=0)
for old, new in zip(action_mask, new_action_mask)
]
return m_vector_obs, m_reward, m_done, m_agent_id, action_mask
done = self._take_action(name)
done = self._take_action(name)
if done:
self._reset_agent(name)
def _generate_mask(self):
if self.discrete:

self.step_count[name] = 0
self.final_rewards[name].append(self.rewards[name])
self.rewards[name] = 0
self.agent_id[name] = self.agent_id[name] + 1
def reset(self) -> None: # type: ignore
for name in self.names:

recurrent_obs_val = (
self.goal[name] if self.step_count[name] <= self.num_show_steps else 0
)
m_vector_obs = [np.ones((1, OBS_SIZE), dtype=np.float32) * recurrent_obs_val]
m_vector_obs = self._make_obs(recurrent_obs_val)
m_agent_id = np.array([0], dtype=np.int32)
m_agent_id = np.array([self.agent_id[name]], dtype=np.int32)
if done:
self._reset_agent(name)
recurrent_obs_val = (
self.goal[name] if self.step_count[name] <= self.num_show_steps else 0
)
new_vector_obs = self._make_obs(recurrent_obs_val)
(
m_vector_obs,
m_reward,
m_done,
m_agent_id,
action_mask,
) = self._construct_reset_step(
m_vector_obs,
new_vector_obs,
m_reward,
m_done,
m_agent_id,
action_mask,
name,
)
m_vector_obs, m_reward, m_done, m_done, m_agent_id, action_mask
m_vector_obs,
m_reward,
m_done,
np.zeros(m_done.shape, dtype=bool),
m_agent_id,
action_mask,
)

60
ml-agents/mlagents/trainers/tests/test_barracuda_converter.py


import os
import tempfile
import pytest
import yaml
from mlagents.trainers.tests.test_nn_policy import create_policy_mock
from mlagents.tf_utils import tf
from mlagents.model_serialization import SerializationSettings, export_policy_model
def test_barracuda_converter():

# cleanup
os.remove(tmpfile)
@pytest.fixture
def dummy_config():
return yaml.safe_load(
"""
trainer: ppo
batch_size: 32
beta: 5.0e-3
buffer_size: 512
epsilon: 0.2
hidden_units: 128
lambd: 0.95
learning_rate: 3.0e-4
max_steps: 5.0e4
normalize: true
num_epoch: 5
num_layers: 2
time_horizon: 64
sequence_length: 64
summary_freq: 1000
use_recurrent: false
normalize: true
memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1
summary_path: test
model_path: test
reward_signals:
extrinsic:
strength: 1.0
gamma: 0.99
"""
)
@pytest.mark.parametrize("discrete", [True, False], ids=["discrete", "continuous"])
@pytest.mark.parametrize("visual", [True, False], ids=["visual", "vector"])
@pytest.mark.parametrize("rnn", [True, False], ids=["rnn", "no_rnn"])
def test_policy_conversion(dummy_config, tmpdir, rnn, visual, discrete):
tf.reset_default_graph()
dummy_config["summary_path"] = str(tmpdir)
dummy_config["model_path"] = os.path.join(tmpdir, "test")
policy = create_policy_mock(
dummy_config, use_rnn=rnn, use_discrete=discrete, use_visual=visual
)
policy.save_model(1000)
settings = SerializationSettings(
policy.model_path, os.path.join(tmpdir, policy.brain.brain_name)
)
export_policy_model(settings, policy.graph, policy.sess)
# These checks taken from test_barracuda_converter
assert os.path.isfile(os.path.join(tmpdir, "test.nn"))
assert os.path.getsize(os.path.join(tmpdir, "test.nn")) > 100

部分文件因为文件数量过多而无法显示

正在加载...
取消
保存