浏览代码

Merge remote-tracking branch 'origin/master' into r5-master

/release_5_branch
Arthur Juliani 4 年前
当前提交
1a123641
共有 135 个文件被更改,包括 3380 次插入662 次删除
  1. 12
      .pre-commit-config.yaml
  2. 46
      .yamato/com.unity.ml-agents-test.yml
  3. 1001
      Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerDyn.demo
  4. 2
      Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerDyn.demo.meta
  5. 2
      Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo.meta
  6. 25
      Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs
  7. 73
      Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab
  8. 1001
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyna.demo
  9. 2
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyna.demo.meta
  10. 2
      Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStat.demo.meta
  11. 33
      Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs
  12. 28
      com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs
  13. 68
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsSensorSettings.cs
  14. 103
      com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs
  15. 28
      com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs
  16. 11
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs
  17. 1
      com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs
  18. 8
      com.unity.ml-agents.extensions/Tests/Editor/Unity.ML-Agents.Extensions.EditorTests.asmdef
  19. 27
      com.unity.ml-agents/CHANGELOG.md
  20. 69
      com.unity.ml-agents/Runtime/Academy.cs
  21. 1
      com.unity.ml-agents/Runtime/StatsRecorder.cs
  22. 32
      com.unity.ml-agents/Tests/Editor/AcademyTests.cs
  23. 22
      com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs
  24. 2
      docs/Getting-Started.md
  25. 4
      docs/Learning-Environment-Create-New.md
  26. 2
      docs/ML-Agents-Overview.md
  27. 2
      docs/Python-API.md
  28. 4
      docs/Training-Configuration-File.md
  29. 4
      docs/Training-ML-Agents.md
  30. 2
      docs/Training-on-Microsoft-Azure.md
  31. 4
      docs/Using-Docker.md
  32. 2
      docs/Using-Tensorboard.md
  33. 4
      docs/localized/zh-CN/docs/Getting-Started-with-Balance-Ball.md
  34. 4
      gym-unity/README.md
  35. 4
      gym-unity/gym_unity/__init__.py
  36. 4
      gym-unity/setup.py
  37. 4
      ml-agents-envs/mlagents_envs/__init__.py
  38. 8
      ml-agents-envs/mlagents_envs/base_env.py
  39. 2
      ml-agents-envs/mlagents_envs/communicator.py
  40. 4
      ml-agents-envs/mlagents_envs/env_utils.py
  41. 17
      ml-agents-envs/mlagents_envs/environment.py
  42. 2
      ml-agents-envs/mlagents_envs/exception.py
  43. 2
      ml-agents-envs/mlagents_envs/rpc_communicator.py
  44. 2
      ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py
  45. 2
      ml-agents-envs/mlagents_envs/side_channel/float_properties_channel.py
  46. 2
      ml-agents-envs/mlagents_envs/side_channel/side_channel_manager.py
  47. 18
      ml-agents-envs/mlagents_envs/side_channel/stats_side_channel.py
  48. 6
      ml-agents-envs/mlagents_envs/tests/test_side_channel.py
  49. 4
      ml-agents-envs/setup.py
  50. 22
      ml-agents/mlagents/model_serialization.py
  51. 4
      ml-agents/mlagents/trainers/__init__.py
  52. 29
      ml-agents/mlagents/trainers/agent_processor.py
  53. 4
      ml-agents/mlagents/trainers/buffer.py
  54. 2
      ml-agents/mlagents/trainers/cli_utils.py
  55. 2
      ml-agents/mlagents/trainers/components/bc/model.py
  56. 2
      ml-agents/mlagents/trainers/components/reward_signals/__init__.py
  57. 8
      ml-agents/mlagents/trainers/components/reward_signals/curiosity/model.py
  58. 8
      ml-agents/mlagents/trainers/components/reward_signals/gail/model.py
  59. 2
      ml-agents/mlagents/trainers/components/reward_signals/reward_signal_factory.py
  60. 33
      ml-agents/mlagents/trainers/env_manager.py
  61. 4
      ml-agents/mlagents/trainers/ghost/controller.py
  62. 46
      ml-agents/mlagents/trainers/ghost/trainer.py
  63. 3
      ml-agents/mlagents/trainers/learn.py
  64. 3
      ml-agents/mlagents/trainers/optimizer/optimizer.py
  65. 2
      ml-agents/mlagents/trainers/optimizer/tf_optimizer.py
  66. 166
      ml-agents/mlagents/trainers/policy/policy.py
  67. 370
      ml-agents/mlagents/trainers/policy/tf_policy.py
  68. 14
      ml-agents/mlagents/trainers/ppo/optimizer.py
  69. 38
      ml-agents/mlagents/trainers/ppo/trainer.py
  70. 10
      ml-agents/mlagents/trainers/sac/network.py
  71. 10
      ml-agents/mlagents/trainers/sac/optimizer.py
  72. 53
      ml-agents/mlagents/trainers/sac/trainer.py
  73. 12
      ml-agents/mlagents/trainers/settings.py
  74. 12
      ml-agents/mlagents/trainers/stats.py
  75. 6
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  76. 8
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  77. 14
      ml-agents/mlagents/trainers/tests/test_barracuda_converter.py
  78. 7
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  79. 4
      ml-agents/mlagents/trainers/tests/test_config_conversion.py
  80. 2
      ml-agents/mlagents/trainers/tests/test_distributions.py
  81. 2
      ml-agents/mlagents/trainers/tests/test_models.py
  82. 24
      ml-agents/mlagents/trainers/tests/test_nn_policy.py
  83. 40
      ml-agents/mlagents/trainers/tests/test_ppo.py
  84. 6
      ml-agents/mlagents/trainers/tests/test_reward_signals.py
  85. 43
      ml-agents/mlagents/trainers/tests/test_rl_trainer.py
  86. 42
      ml-agents/mlagents/trainers/tests/test_sac.py
  87. 5
      ml-agents/mlagents/trainers/tests/test_simple_rl.py
  88. 2
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  89. 11
      ml-agents/mlagents/trainers/tests/test_trainer_controller.py
  90. 66
      ml-agents/mlagents/trainers/tests/test_training_status.py
  91. 72
      ml-agents/mlagents/trainers/trainer/rl_trainer.py
  92. 29
      ml-agents/mlagents/trainers/trainer/trainer.py
  93. 57
      ml-agents/mlagents/trainers/trainer_controller.py
  94. 5
      ml-agents/mlagents/trainers/trainer_util.py
  95. 4
      ml-agents/mlagents/trainers/training_status.py
  96. 5
      ml-agents/setup.py
  97. 13
      ml-agents/tests/yamato/check_coverage_percent.py
  98. 4
      ml-agents/tests/yamato/scripts/run_gym.py

12
.pre-commit-config.yaml


# flake8-tidy-imports is used for banned-modules, not actually tidying
additional_dependencies: [flake8-comprehensions==3.2.2, flake8-tidy-imports==4.1.0, flake8-bugbear==20.1.4]
- repo: https://github.com/asottile/pyupgrade
rev: v2.7.0
hooks:
- id: pyupgrade
args: [--py3-plus, --py36-plus]
exclude: >
(?x)^(
.*barracuda.py|
.*_pb2.py|
.*_pb2_grpc.py
)$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
hooks:

46
.yamato/com.unity.ml-agents-test.yml


test_editors:
- version: 2018.4
# 2018.4 doesn't support code-coverage
coverageOptions:
minCoveragePct: 0
enableCodeCoverage: !!bool false
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
minCoveragePct: 72
enableCodeCoverage: !!bool true
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
minCoveragePct: 72
enableCodeCoverage: !!bool true
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
minCoveragePct: 72
enableCodeCoverage: !!bool true
coverageOptions: --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+Unity.ML-Agents'
minCoveragePct: 72
enableCodeCoverage: !!bool true
test_platforms:
- name: win
type: Unity::VM

flavor: b1.medium
packages:
- name: com.unity.ml-agents
assembly: Unity.ML-Agents
minCoveragePct: 72
assembly: Unity.ML-Agents.Extensions
minCoveragePct: 75
---
all_package_tests:

{% for package in packages %}
{% for editor in test_editors %}
{% for platform in test_platforms %}
{% if editor.enableCodeCoverage %}
{% capture coverageOptions %} --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+{{ package.assembly }}'{% endcapture %}
{% else %}
{% assign coverageOptions = "" %}
{% endif %}
test_{{ package.name }}_{{ platform.name }}_{{ editor.version }}:
name : {{ package.name }} test {{ editor.version }} on {{ platform.name }}
agent:

commands:
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci project test -u {{ editor.version }} --project-path Project --package-filter {{ package.name }} {{ editor.coverageOptions }}
{% if package.name == "com.unity.ml-agents" %}
# TODO get coverage tests running for extensions too
- python ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ editor.minCoveragePct }}
- upm-ci project test -u {{ editor.version }} --project-path Project --package-filter {{ package.name }} {{ coverageOptions }}
{% if editor.enableCodeCoverage %}
- python3 ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ package.minCoveragePct }}
{% endif %}
artifacts:
logs:

{% for package in packages %}
{% for editor in trunk_editor %}
{% for platform in test_platforms %}
{% if editor.enableCodeCoverage %}
{% capture coverageOptions %} --enable-code-coverage --code-coverage-options 'generateHtmlReport;assemblyFilters:+{{ package.assembly }}'{% endcapture %}
{% else %}
{% assign coverageOptions = "" %}
{% endif %}
test_{{ package.name }}_{{ platform.name }}_trunk:
name : {{ package.name }} test {{ editor.version }} on {{ platform.name }}
agent:

- python -m pip install unity-downloader-cli --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple --upgrade
- unity-downloader-cli -u trunk -c editor --wait --fast
- npm install upm-ci-utils@stable -g --registry https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm
- upm-ci project test -u {{ editor.version }} --project-path Project --package-filter {{ package.name }} {{ editor.coverageOptions }}
{% if package.name == "com.unity.ml-agents" %}
# TODO get coverage tests running for extensions too
- python ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ editor.minCoveragePct }}
- upm-ci project test -u {{ editor.version }} --project-path Project --package-filter {{ package.name }} {{ coverageOptions }}
{% if editor.enableCodeCoverage %}
- python3 ml-agents/tests/yamato/check_coverage_percent.py upm-ci~/test-results/ {{ package.minCoveragePct }}
{% endif %}
artifacts:
logs:

1001
Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerDyn.demo
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerDyn.demo.meta


fileFormatVersion: 2
guid: 0b45b2f0ee5e548babcc58c9adcda117
guid: 0d65dafbeef46458f9793deb48a2cc1b
ScriptedImporter:
fileIDToRecycleName:
11400002: Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerDyn.demo

2
Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo.meta


fileFormatVersion: 2
guid: f19829d1024204357b30822fc9adbfc9
guid: 9c4423ab9e9844fd88c4ca8508337330
ScriptedImporter:
fileIDToRecycleName:
11400002: Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo

25
Project/Assets/ML-Agents/Examples/Crawler/Scripts/CrawlerAgent.cs


using System;
using Random = UnityEngine.Random;
[RequireComponent(typeof(JointDriveController))] // Required to set joint forces
public class CrawlerAgent : Agent

{
//Add body rotation delta relative to orientation cube
sensor.AddObservation(Quaternion.FromToRotation(body.forward, orientationCube.transform.forward));
//Add pos of target relative to orientation cube
sensor.AddObservation(orientationCube.transform.InverseTransformPoint(target.transform.position));

{
var movingTowardsDot = Vector3.Dot(orientationCube.transform.forward,
Vector3.ClampMagnitude(m_JdController.bodyPartsDict[body].rb.velocity, maximumWalkingSpeed));
;
if (float.IsNaN(movingTowardsDot))
{
throw new ArgumentException(
"NaN in movingTowardsDot.\n" +
$" orientationCube.transform.forward: {orientationCube.transform.forward}\n"+
$" body.velocity: {m_JdController.bodyPartsDict[body].rb.velocity}\n"+
$" maximumWalkingSpeed: {maximumWalkingSpeed}"
);
}
AddReward(0.03f * movingTowardsDot);
}

void RewardFunctionFacingTarget()
{
AddReward(0.01f * Vector3.Dot(orientationCube.transform.forward, body.forward));
var facingReward = Vector3.Dot(orientationCube.transform.forward, body.forward);
if (float.IsNaN(facingReward))
{
throw new ArgumentException(
"NaN in movingTowardsDot.\n" +
$" orientationCube.transform.forward: {orientationCube.transform.forward}\n"+
$" body.forward: {body.forward}"
);
}
AddReward(0.01f * facingReward);
}
/// <summary>

73
Project/Assets/ML-Agents/Examples/FoodCollector/Prefabs/FoodCollectorArea.prefab


m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114176228333253036
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

frozenMaterial: {fileID: 2100000, guid: 66163cf35956a4be08e801b750c26f33, type: 2}
myLaser: {fileID: 1081721624670010}
contribute: 0
contribute: 1
useVectorObs: 1
--- !u!114 &114725457980523372
MonoBehaviour:

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!114 &1222199865870203693
MonoBehaviour:
m_ObjectHideFlags: 0

m_Script: {fileID: 11500000, guid: 3a6da8f78a394c6ab027688eab81e04d, type: 3}
m_Name:
m_EditorClassIdentifier:
debugCommandLineOverride:
--- !u!1 &1482701732800114
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114711827726849508
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1528397385587768
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114542632553128056
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1617924810425504
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114189751434580810
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1688105343773098
GameObject:
m_ObjectHideFlags: 0

m_Name:
m_EditorClassIdentifier:
m_BrainParameters:
vectorObservationSize: 4
numStackedVectorObservations: 1
vectorActionSize: 03000000030000000300000002000000
vectorActionDescriptions: []
vectorActionSpaceType: 0
VectorObservationSize: 4
NumStackedVectorObservations: 1
VectorActionSize: 03000000030000000300000002000000
VectorActionDescriptions: []
VectorActionSpaceType: 0
m_Model: {fileID: 11400000, guid: 36ab3e93020504f48858d0856f939685, type: 3}
m_InferenceDevice: 0
m_BehaviorType: 0

m_ObservableAttributeHandling: 0
--- !u!114 &114235147148547996
MonoBehaviour:
m_ObjectHideFlags: 0

agentParameters:
maxStep: 0
hasUpgradedFromAgentParameters: 1
maxStep: 5000
MaxStep: 5000
area: {fileID: 1819751139121548}
turnSpeed: 300
moveSpeed: 2

m_EditorClassIdentifier:
DecisionPeriod: 5
TakeActionsBetweenDecisions: 1
offsetStep: 0
--- !u!1 &1729825611722018
GameObject:
m_ObjectHideFlags: 0

1001
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyna.demo
文件差异内容过多而无法显示
查看文件

2
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyna.demo.meta


fileFormatVersion: 2
guid: 1ea82869060c54bb48fed5b95baaf53c
guid: 31ae3a2f4d53f4e9b9c6096c37a2284c
ScriptedImporter:
fileIDToRecycleName:
11400002: Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerDyna.demo

2
Project/Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStat.demo.meta


fileFormatVersion: 2
guid: 720007cd6923e410abaa4ba800400cb0
guid: 30f91fdf8e3294d249031613855f5992
ScriptedImporter:
fileIDToRecycleName:
11400002: Assets/ML-Agents/Examples/Walker/Demos/ExpertWalkerStat.demo

33
Project/Assets/ML-Agents/Examples/Walker/Scripts/WalkerAgent.cs


using System;
using MLAgentsExamples;
using UnityEngine;
using Unity.MLAgents;

using Random = UnityEngine.Random;
public class WalkerAgent : Agent
{

// a. Velocity alignment with goal direction.
var moveTowardsTargetReward = Vector3.Dot(cubeForward,
Vector3.ClampMagnitude(m_JdController.bodyPartsDict[hips].rb.velocity, maximumWalkingSpeed));
if (float.IsNaN(moveTowardsTargetReward))
{
throw new ArgumentException(
"NaN in moveTowardsTargetReward.\n" +
$" cubeForward: {cubeForward}\n"+
$" hips.velocity: {m_JdController.bodyPartsDict[hips].rb.velocity}\n"+
$" maximumWalkingSpeed: {maximumWalkingSpeed}"
);
}
if (float.IsNaN(lookAtTargetReward))
{
throw new ArgumentException(
"NaN in lookAtTargetReward.\n" +
$" cubeForward: {cubeForward}\n"+
$" head.forward: {head.forward}"
);
}
var headHeightOverFeetReward =
var headHeightOverFeetReward =
if (float.IsNaN(headHeightOverFeetReward))
{
throw new ArgumentException(
"NaN in headHeightOverFeetReward.\n" +
$" head.position: {head.position}\n"+
$" footL.position: {footL.position}\n"+
$" footR.position: {footR.position}"
);
}
AddReward(
+ 0.02f * moveTowardsTargetReward
+ 0.02f * lookAtTargetReward

28
com.unity.ml-agents.extensions/Runtime/Sensors/ArticulationBodyPoseExtractor.cs


namespace Unity.MLAgents.Extensions.Sensors
{
/// <summary>
/// Utility class to track a hierarchy of ArticulationBodies.
/// </summary>
public class ArticulationBodyPoseExtractor : PoseExtractor
{
ArticulationBody[] m_Bodies;

if (rootBody == null)
{
return;
}
if (!rootBody.isRoot)
{
Debug.Log("Must pass ArticulationBody.isRoot");

for (var i = 1; i < numBodies; i++)
{
var body = m_Bodies[i];
var parent = body.GetComponentInParent<ArticulationBody>();
parentIndices[i] = bodyToIndex[parent];
var currentArticBody = m_Bodies[i];
// Component.GetComponentInParent will consider the provided object as well.
// So start looking from the parent.
var currentGameObject = currentArticBody.gameObject;
var parentGameObject = currentGameObject.transform.parent;
var parentArticBody = parentGameObject.GetComponentInParent<ArticulationBody>();
parentIndices[i] = bodyToIndex[parentArticBody];
/// <inheritdoc/>
protected override Vector3 GetLinearVelocityAt(int index)
{
return m_Bodies[index].velocity;
}
/// <inheritdoc/>
protected override Pose GetPoseAt(int index)
{
var body = m_Bodies[index];

}
internal ArticulationBody[] Bodies => m_Bodies;
}
}
#endif // UNITY_2020_1_OR_NEWER

68
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsSensorSettings.cs


namespace Unity.MLAgents.Extensions.Sensors
{
/// <summary>
/// Settings that define the observations generated for physics-based sensors.
/// </summary>
[Serializable]
public struct PhysicsSensorSettings
{

public bool UseModelSpaceTranslations;
/// <summary>
/// Whether to use model space (relative to the root body) rotatoins as observations.
/// Whether to use model space (relative to the root body) rotations as observations.
/// </summary>
public bool UseModelSpaceRotations;

public bool UseLocalSpaceRotations;
/// <summary>
/// Whether to use model space (relative to the root body) linear velocities as observations.
/// </summary>
public bool UseModelSpaceLinearVelocity;
/// <summary>
/// Whether to use local space (relative to the parent body) linear velocities as observations.
/// </summary>
public bool UseLocalSpaceLinearVelocity;
/// <summary>
/// Whether to use joint-specific positions and angles as observations.
/// </summary>
public bool UseJointPositionsAndAngles;
/// <summary>
/// Whether to use the joint forces and torques that are applied by the solver as observations.
/// </summary>
public bool UseJointForces;
/// <summary>
/// Creates a PhysicsSensorSettings with reasonable default values.
/// </summary>
/// <returns></returns>

/// </summary>
public bool UseModelSpace
{
get { return UseModelSpaceTranslations || UseModelSpaceRotations; }
get { return UseModelSpaceTranslations || UseModelSpaceRotations || UseModelSpaceLinearVelocity; }
}
/// <summary>

{
get { return UseLocalSpaceTranslations || UseLocalSpaceRotations; }
}
/// <summary>
/// The number of floats needed to represent a given number of transforms.
/// </summary>
/// <param name="numTransforms"></param>
/// <returns></returns>
public int TransformSize(int numTransforms)
{
int obsPerTransform = 0;
obsPerTransform += UseModelSpaceTranslations ? 3 : 0;
obsPerTransform += UseModelSpaceRotations ? 4 : 0;
obsPerTransform += UseLocalSpaceTranslations ? 3 : 0;
obsPerTransform += UseLocalSpaceRotations ? 4 : 0;
return numTransforms * obsPerTransform;
get { return UseLocalSpaceTranslations || UseLocalSpaceRotations || UseLocalSpaceLinearVelocity; }
}
}

var offset = baseOffset;
if (settings.UseModelSpace)
{
foreach (var pose in poseExtractor.ModelSpacePoses)
var poses = poseExtractor.ModelSpacePoses;
var vels = poseExtractor.ModelSpaceVelocities;
for(var i=0; i<poseExtractor.NumPoses; i++)
var pose = poses[i];
if(settings.UseModelSpaceTranslations)
{
writer.Add(pose.position, offset);

writer.Add(pose.rotation, offset);
offset += 4;
}
if (settings.UseModelSpaceLinearVelocity)
{
writer.Add(vels[i], offset);
offset += 3;
}
foreach (var pose in poseExtractor.LocalSpacePoses)
var poses = poseExtractor.LocalSpacePoses;
var vels = poseExtractor.LocalSpaceVelocities;
for(var i=0; i<poseExtractor.NumPoses; i++)
var pose = poses[i];
if(settings.UseLocalSpaceTranslations)
{
writer.Add(pose.position, offset);

{
writer.Add(pose.rotation, offset);
offset += 4;
}
if (settings.UseLocalSpaceLinearVelocity)
{
writer.Add(vels[i], offset);
offset += 3;
}
}
}

103
com.unity.ml-agents.extensions/Runtime/Sensors/PoseExtractor.cs


Pose[] m_ModelSpacePoses;
Pose[] m_LocalSpacePoses;
Vector3[] m_ModelSpaceLinearVelocities;
Vector3[] m_LocalSpaceLinearVelocities;
/// <summary>
/// Read access to the model space transforms.
/// </summary>

}
/// <summary>
/// Number of transforms in the hierarchy (read-only).
/// Read access to the model space linear velocities.
/// </summary>
public IList<Vector3> ModelSpaceVelocities
{
get { return m_ModelSpaceLinearVelocities; }
}
/// <summary>
/// Read access to the local space linear velocities.
/// </summary>
public IList<Vector3> LocalSpaceVelocities
{
get { return m_LocalSpaceLinearVelocities; }
}
/// <summary>
/// Number of poses in the hierarchy (read-only).
/// </summary>
public int NumPoses
{

/// <summary>
/// Get the parent index of the body at the specified index.
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
public int GetParentIndex(int index)
{
if (m_ParentIndices == null)
{
return -1;
}
return m_ParentIndices[index];
}
/// <summary>
/// Initialize with the mapping of parent indices.
/// The 0th element is assumed to be -1, indicating that it's the root.
/// </summary>

var numTransforms = parentIndices.Length;
m_ModelSpacePoses = new Pose[numTransforms];
m_LocalSpacePoses = new Pose[numTransforms];
m_ModelSpaceLinearVelocities = new Vector3[numTransforms];
m_LocalSpaceLinearVelocities = new Vector3[numTransforms];
}
/// <summary>

protected abstract Pose GetPoseAt(int index);
/// <summary>
/// Return the world space linear velocity of the i'th object.
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
protected abstract Vector3 GetLinearVelocityAt(int index);
/// <summary>
/// Update the internal model space transform storage based on the underlying system.
/// </summary>
public void UpdateModelSpacePoses()

return;
}
var worldTransform = GetPoseAt(0);
var worldToModel = worldTransform.Inverse();
var rootWorldTransform = GetPoseAt(0);
var worldToModel = rootWorldTransform.Inverse();
var rootLinearVel = GetLinearVelocityAt(0);
var currentTransform = GetPoseAt(i);
m_ModelSpacePoses[i] = worldToModel.Multiply(currentTransform);
var currentWorldSpacePose = GetPoseAt(i);
var currentModelSpacePose = worldToModel.Multiply(currentWorldSpacePose);
m_ModelSpacePoses[i] = currentModelSpacePose;
var currentBodyLinearVel = GetLinearVelocityAt(i);
var relativeVelocity = currentBodyLinearVel - rootLinearVel;
m_ModelSpaceLinearVelocities[i] = worldToModel.rotation * relativeVelocity;
}
}

var invParent = parentTransform.Inverse();
var currentTransform = GetPoseAt(i);
m_LocalSpacePoses[i] = invParent.Multiply(currentTransform);
var parentLinearVel = GetLinearVelocityAt(m_ParentIndices[i]);
var currentLinearVel = GetLinearVelocityAt(i);
m_LocalSpaceLinearVelocities[i] = invParent.rotation * (currentLinearVel - parentLinearVel);
m_LocalSpaceLinearVelocities[i] = Vector3.zero;
/// <summary>
/// Compute the number of floats needed to represent the poses for the given PhysicsSensorSettings.
/// </summary>
/// <param name="settings"></param>
/// <returns></returns>
public int GetNumPoseObservations(PhysicsSensorSettings settings)
{
int obsPerPose = 0;
obsPerPose += settings.UseModelSpaceTranslations ? 3 : 0;
obsPerPose += settings.UseModelSpaceRotations ? 4 : 0;
obsPerPose += settings.UseLocalSpaceTranslations ? 3 : 0;
obsPerPose += settings.UseLocalSpaceRotations ? 4 : 0;
obsPerPose += settings.UseModelSpaceLinearVelocity ? 3 : 0;
obsPerPose += settings.UseLocalSpaceLinearVelocity ? 3 : 0;
return NumPoses * obsPerPose;
}
public void DrawModelSpace(Vector3 offset)
internal void DrawModelSpace(Vector3 offset)
{
UpdateLocalSpacePoses();
UpdateModelSpacePoses();

}
}
/// <summary>
/// Extension methods for the Pose struct, in order to improve the readability of some math.
/// </summary>
public static class PoseExtensions
{
/// <summary>

public static Pose Multiply(this Pose pose, Pose rhs)
{
return rhs.GetTransformedBy(pose);
}
/// <summary>
/// Transform the vector by the pose. Conceptually this is equivalent to treating the Pose
/// as a 4x4 matrix and multiplying the augmented vector.
/// See https://en.wikipedia.org/wiki/Affine_transformation#Augmented_matrix for more details.
/// </summary>
/// <param name="pose"></param>
/// <param name="rhs"></param>
/// <returns></returns>
public static Vector3 Multiply(this Pose pose, Vector3 rhs)
{
return pose.rotation * rhs + pose.position;
}
// TODO optimize inv(A)*B?

28
com.unity.ml-agents.extensions/Runtime/Sensors/RigidBodyPoseExtractor.cs


/// Initialize given a root RigidBody.
/// </summary>
/// <param name="rootBody"></param>
public RigidBodyPoseExtractor(Rigidbody rootBody)
public RigidBodyPoseExtractor(Rigidbody rootBody, GameObject rootGameObject = null)
var rbs = rootBody.GetComponentsInChildren <Rigidbody>();
Rigidbody[] rbs;
if (rootGameObject == null)
{
rbs = rootBody.GetComponentsInChildren<Rigidbody>();
}
else
{
rbs = rootGameObject.GetComponentsInChildren<Rigidbody>();
}
var bodyToIndex = new Dictionary<Rigidbody, int>(rbs.Length);
var parentIndices = new int[rbs.Length];

SetParentIndices(parentIndices);
}
/// <summary>
/// Get the pose of the i'th RigidBody.
/// </summary>
/// <param name="index"></param>
/// <returns></returns>
/// <inheritdoc/>
protected override Vector3 GetLinearVelocityAt(int index)
{
return m_Bodies[index].velocity;
}
/// <inheritdoc/>
protected override Pose GetPoseAt(int index)
{
var body = m_Bodies[index];

internal Rigidbody[] Bodies => m_Bodies;
}
}
}

11
com.unity.ml-agents.extensions/Tests/Editor/Sensors/PoseExtractorTests.cs


return Pose.identity;
}
protected override Vector3 GetLinearVelocityAt(int index)
{
return Vector3.zero;
}
public void Init(int[] parentIndices)
{
SetParentIndices(parentIndices);

position = translation
};
}
protected override Vector3 GetLinearVelocityAt(int index)
{
return Vector3.zero;
}
}
[Test]

1
com.unity.ml-agents.extensions/Tests/Editor/Sensors/RigidBodyPoseExtractorTests.cs


using System.Collections.Generic;
using UnityEngine;
using NUnit.Framework;
using Unity.MLAgents.Extensions.Sensors;

8
com.unity.ml-agents.extensions/Tests/Editor/Unity.ML-Agents.Extensions.EditorTests.asmdef


"name": "Unity.ML-Agents.Extensions.EditorTests",
"references": [
"Unity.ML-Agents.Extensions.Editor",
"Unity.ML-Agents.Extensions"
"Unity.ML-Agents.Extensions",
"Unity.ML-Agents"
],
"optionalUnityReferences": [
"TestAssemblies"

],
"excludePlatforms": []
"excludePlatforms": [],
"defineConstraints": [
"UNITY_INCLUDE_TESTS"
]
}

27
com.unity.ml-agents/CHANGELOG.md


and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The minimum supported python version for ml-agents-envs was changed to 3.6.1. (#4244)
- The interaction between EnvManager and TrainerController was changed; EnvManager.advance() was split into to stages,
and TrainerController now uses the results from the first stage to handle new behavior names. This change speeds up
Python training by approximately 5-10%. (#4259)
### Minor Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- StatsSideChannel now stores multiple values per key. This means that multiple
calls to `StatsRecorder.Add()` with the same key in the same step will no
longer overwrite each other. (#4236)
- Model checkpoints are now also saved as .nn files during training. (#4127)
- Model checkpoint info is saved in TrainingStatus.json after training is concluded (#4127)
### Bug Fixes
#### com.unity.ml-agents (C#)
- Academy.EnvironmentStep() will now throw an exception if it is called
recursively (for example, by an Agent's CollectObservations method).
Previously, this would result in an infinite loop and cause the editor to hang.
(#4226)
#### ml-agents / ml-agents-envs / gym-unity (Python)
## [1.2.0-preview] - 2020-07-15
### Major Changes

69
com.unity.ml-agents/Runtime/Academy.cs


// Flag used to keep track of the first time the Academy is reset.
bool m_HadFirstReset;
// Whether the Academy is in the middle of a step. This is used to detect and Academy
// step called by user code that is also called by the Academy.
bool m_IsStepping;
// Random seed used for inference.
int m_InferenceSeed;

/// </summary>
public void EnvironmentStep()
{
if (!m_HadFirstReset)
// Check whether we're already in the middle of a step.
// This shouldn't happen generally, but could happen if user code (e.g. CollectObservations)
// that is called by EnvironmentStep() also calls EnvironmentStep(). This would result
// in an infinite loop and/or stack overflow, so stop it before it happens.
if (m_IsStepping)
ForcedFullReset();
throw new UnityAgentsException(
"Academy.EnvironmentStep() called recursively. " +
"This might happen if you call EnvironmentStep() from custom code such as " +
"CollectObservations() or OnActionReceived()."
);
AgentPreStep?.Invoke(m_StepCount);
m_IsStepping = true;
try
{
if (!m_HadFirstReset)
{
ForcedFullReset();
}
m_StepCount += 1;
m_TotalStepCount += 1;
AgentIncrementStep?.Invoke();
AgentPreStep?.Invoke(m_StepCount);
m_StepCount += 1;
m_TotalStepCount += 1;
AgentIncrementStep?.Invoke();
using (TimerStack.Instance.Scoped("AgentSendState"))
{
AgentSendState?.Invoke();
}
using (TimerStack.Instance.Scoped("AgentSendState"))
{
AgentSendState?.Invoke();
}
using (TimerStack.Instance.Scoped("DecideAction"))
{
DecideAction?.Invoke();
}
using (TimerStack.Instance.Scoped("DecideAction"))
{
DecideAction?.Invoke();
}
// If the communicator is not on, we need to clear the SideChannel sending queue
if (!IsCommunicatorOn)
{
SideChannelManager.GetSideChannelMessage();
}
// If the communicator is not on, we need to clear the SideChannel sending queue
if (!IsCommunicatorOn)
{
SideChannelManager.GetSideChannelMessage();
using (TimerStack.Instance.Scoped("AgentAct"))
{
AgentAct?.Invoke();
}
using (TimerStack.Instance.Scoped("AgentAct"))
finally
AgentAct?.Invoke();
// Reset m_IsStepping when we're done (or if an exception occurred).
m_IsStepping = false;
}
}

1
com.unity.ml-agents/Runtime/StatsRecorder.cs


{
/// <summary>
/// Values within the summary period are averaged before reporting.
/// Note that values from the same C# environment in the same step may replace each other.
/// </summary>
Average = 0,

32
com.unity.ml-agents/Tests/Editor/AcademyTests.cs


using NUnit.Framework;
using Unity.MLAgents.Sensors;
using UnityEngine;
#if UNITY_2019_3_OR_NEWER
using System.Reflection;

Assert.AreEqual("com.unity.ml-agents", packageInfo.name);
Assert.AreEqual(Academy.k_PackageVersion, packageInfo.version);
#endif
}
class RecursiveAgent : Agent
{
int m_collectObsCount;
public override void CollectObservations(VectorSensor sensor)
{
m_collectObsCount++;
if (m_collectObsCount == 1)
{
// NEVER DO THIS IN REAL CODE!
Academy.Instance.EnvironmentStep();
}
}
}
[Test]
public void TestRecursiveStepThrows()
{
var gameObj = new GameObject();
var agent = gameObj.AddComponent<RecursiveAgent>();
agent.LazyInitialize();
agent.RequestDecision();
Assert.Throws<UnityAgentsException>(() =>
{
Academy.Instance.EnvironmentStep();
});
// Make sure the Academy reset to a good state and is still steppable.
Academy.Instance.EnvironmentStep();
}

22
com.unity.ml-agents/Tests/Editor/Sensor/VectorSensorTests.cs


namespace Unity.MLAgents.Tests
{
public class SensorTestHelper
public static class SensorTestHelper
var numExpected = expected.Length;
const float fill = -1337f;
var output = new float[numExpected];
for (var i = 0; i < numExpected; i++)
{
output[i] = fill;
}
Assert.AreEqual(fill, output[0]);
ObservationWriter writer = new ObservationWriter();
writer.SetTarget(output, sensor.GetObservationShape(), 0);
// Make sure ObservationWriter didn't touch anything
Assert.AreEqual(fill, output[0]);
sensor.Write(writer);
Assert.AreEqual(expected, output);
string errorMessage;
bool isOK = SensorHelper.CompareObservation(sensor, expected, out errorMessage);
Assert.IsTrue(isOK, errorMessage);
}
}

2
docs/Getting-Started.md


TensorBoard. From the command line run:
```sh
tensorboard --logdir=results
tensorboard --logdir results
```
Then navigate to `localhost:6006` in your browser to view the TensorBoard

4
docs/Learning-Environment-Create-New.md


1. Right click in Hierarchy window, select 3D Object > Cube.
1. Name the GameObject "Target"
1. Select the Target Cube to view its properties in the Inspector window.
1. Set Transform to Position = `3, 0.5, 3)`, Rotation = `(0, 0, 0)`, Scale =
1. Set Transform to Position = `(3, 0.5, 3)`, Rotation = `(0, 0, 0)`, Scale =
`(1, 1, 1)`.
<p align="left">

1. In the Unity Project window, double-click the `RollerAgent` script to open it
in your code editor.
1. In the editor, add the `using Unity.MLAgents;` and
`using Unity.MLAgents.Sensors` statements and then change the base class from
`using Unity.MLAgents.Sensors;` statements and then change the base class from
`MonoBehaviour` to `Agent`.
1. Delete the `Update()` method, but we will use the `Start()` function, so
leave it alone for now.

2
docs/ML-Agents-Overview.md


In reinforcement learning, the end goal for the Agent is to discover a behavior
(a Policy) that maximizes a reward. You will need to provide the agent one or
more reward signals to use during training.Typically, a reward is defined by
more reward signals to use during training. Typically, a reward is defined by
your environment, and corresponds to reaching some goal. These are what we refer
to as _extrinsic_ rewards, as they are defined external of the learning
algorithm.

2
docs/Python-API.md


from mlagents_envs.environment import UnityEnvironment
# This is a non-blocking call that only loads the environment.
env = UnityEnvironment(file_name="3DBall", seed=1, side_channels=[])
# Start interacting with the evironment.
# Start interacting with the environment.
env.reset()
behavior_names = env.behavior_specs.keys()
...

4
docs/Training-Configuration-File.md


| `threaded` | (default = `true`) By default, model updates can happen while the environment is being stepped. This violates the [on-policy](https://spinningup.openai.com/en/latest/user/algorithms.html#the-on-policy-algorithms) assumption of PPO slightly in exchange for a training speedup. To maintain the strict on-policyness of PPO, you can disable parallel updates by setting `threaded` to `false`. There is usually no reason to turn `threaded` off for SAC. |
| `hyperparameters -> learning_rate` | (default = `3e-4`) Initial learning rate for gradient descent. Corresponds to the strength of each gradient descent update step. This should typically be decreased if training is unstable, and the reward does not consistently increase. <br><br>Typical range: `1e-5` - `1e-3` |
| `hyperparameters -> batch_size` | Number of experiences in each iteration of gradient descent. **This should always be multiple times smaller than `buffer_size`**. If you are using a continuous action space, this value should be large (in the order of 1000s). If you are using a discrete action space, this value should be smaller (in order of 10s). <br><br> Typical range: (Continuous - PPO): `512` - `5120`; (Continuous - SAC): `128` - `1024`; (Discrete, PPO & SAC): `32` - `512`. |
| `hyperparameters -> buffer_size` | (default = `10240` for PPO and `50000` for SAC) Number of experiences to collect before updating the policy model. Corresponds to how many experiences should be collected before we do any learning or updating of the model. **This should be multiple times larger than `batch_size`**. Typically a larger `buffer_size` corresponds to more stable training updates. In SAC, the max size of the experience buffer - on the order of thousands of times longer than your episodes, so that SAC can learn from old as well as new experiences. <br><br>Typical range: PPO: `2048` - `409600`; SAC: `50000` - `1000000` |
| `hyperparameters -> buffer_size` | (default = `10240` for PPO and `50000` for SAC)<br> **PPO:** Number of experiences to collect before updating the policy model. Corresponds to how many experiences should be collected before we do any learning or updating of the model. **This should be multiple times larger than `batch_size`**. Typically a larger `buffer_size` corresponds to more stable training updates. <br> **SAC:** The max size of the experience buffer - on the order of thousands of times longer than your episodes, so that SAC can learn from old as well as new experiences. <br><br>Typical range: PPO: `2048` - `409600`; SAC: `50000` - `1000000` |
| `hyperparameters -> learning_rate_schedule` | (default = `linear` for PPO and `constant` for SAC) Determines how learning rate changes over time. For PPO, we recommend decaying learning rate until max_steps so learning converges more stably. However, for some cases (e.g. training for an unknown amount of time) this feature can be disabled. For SAC, we recommend holding learning rate constant so that the agent can continue to learn until its Q function converges naturally. <br><br>`linear` decays the learning_rate linearly, reaching 0 at max_steps, while `constant` keeps the learning rate constant for the entire training run. |
| `network_settings -> hidden_units` | (default = `128`) Number of units in the hidden layers of the neural network. Correspond to how many units are in each fully connected layer of the neural network. For simple problems where the correct action is a straightforward combination of the observation inputs, this should be small. For problems where the action is a very complex interaction between the observation variables, this should be larger. <br><br> Typical range: `32` - `512` |
| `network_settings -> num_layers` | (default = `2`) The number of hidden layers in the neural network. Corresponds to how many hidden layers are present after the observation input, or after the CNN encoding of the visual observation. For simple problems, fewer layers are likely to train faster and more efficiently. More layers may be necessary for more complex control problems. <br><br> Typical range: `1` - `3` |

| `hyperparameters -> beta` | (default = `5.0e-3`) Strength of the entropy regularization, which makes the policy "more random." This ensures that agents properly explore the action space during training. Increasing this will ensure more random actions are taken. This should be adjusted such that the entropy (measurable from TensorBoard) slowly decreases alongside increases in reward. If entropy drops too quickly, increase beta. If entropy drops too slowly, decrease `beta`. <br><br>Typical range: `1e-4` - `1e-2` |
| `hyperparameters -> epsilon` | (default = `0.2`) Influences how rapidly the policy can evolve during training. Corresponds to the acceptable threshold of divergence between the old and new policies during gradient descent updating. Setting this value small will result in more stable updates, but will also slow the training process. <br><br>Typical range: `0.1` - `0.3` |
| `hyperparameters -> lambd` | (default = `0.95`) Regularization parameter (lambda) used when calculating the Generalized Advantage Estimate ([GAE](https://arxiv.org/abs/1506.02438)). This can be thought of as how much the agent relies on its current value estimate when calculating an updated value estimate. Low values correspond to relying more on the current value estimate (which can be high bias), and high values correspond to relying more on the actual rewards received in the environment (which can be high variance). The parameter provides a trade-off between the two, and the right value can lead to a more stable training process. <br><br>Typical range: `0.9` - `0.95` |
| `hyperparameters -> num_epoch` | Number of passes to make through the experience buffer when performing gradient descent optimization.The larger the batch_size, the larger it is acceptable to make this. Decreasing this will ensure more stable updates, at the cost of slower learning. <br><br>Typical range: `3` - `10` |
| `hyperparameters -> num_epoch` | (default = `3`) Number of passes to make through the experience buffer when performing gradient descent optimization.The larger the batch_size, the larger it is acceptable to make this. Decreasing this will ensure more stable updates, at the cost of slower learning. <br><br>Typical range: `3` - `10` |
### SAC-specific Configurations

4
docs/Training-ML-Agents.md


blocks. See [Profiling in Python](Profiling-Python.md) for more information
on the timers generated.
These artifacts (except the `.nn` file) are updated throughout the training
process and finalized when training completes or is interrupted.
These artifacts are updated throughout the training
process and finalized when training is completed or is interrupted.
#### Stopping and Resuming Training

2
docs/Training-on-Microsoft-Azure.md


2. Unless you started the training as a background process, connect to your VM
from another terminal instance.
3. Run the following command from your terminal
`tensorboard --logdir=summaries --host 0.0.0.0`
`tensorboard --logdir results --host 0.0.0.0`
4. You should now be able to open a browser and navigate to
`<Your_VM_IP_Address>:6060` to view the TensorBoard report.

4
docs/Using-Docker.md


http://localhost:6006:
```sh
docker exec -it <container-name> tensorboard --logdir=/unity-volume/summaries --host=0.0.0.0
docker exec -it <container-name> tensorboard --logdir /unity-volume/results --host 0.0.0.0
docker exec -it 3DBallContainer.first.trial tensorboard --logdir=/unity-volume/summaries --host=0.0.0.0
docker exec -it 3DBallContainer.first.trial tensorboard --logdir /unity-volume/results --host 0.0.0.0
```
For more details on Tensorboard, check out the documentation about

2
docs/Using-Tensorboard.md


1. Open a terminal or console window:
1. Navigate to the directory where the ML-Agents Toolkit is installed.
1. From the command line run: `tensorboard --logdir=results --port=6006`
1. From the command line run: `tensorboard --logdir results --port 6006`
1. Open a browser window and navigate to
[localhost:6006](http://localhost:6006).

4
docs/localized/zh-CN/docs/Getting-Started-with-Balance-Ball.md


### 观测训练进度
开始使用 `learn.py` 按照前面部分所述的方式进行训练后,`ml-agents` 文件夹将
包含一个 `summaries` 目录。为了更详细地观测训练过程,
包含一个 `results` 目录。为了更详细地观测训练过程,
`tensorboard --logdir=summaries`
`tensorboard --logdir results`
然后导航至 `localhost:6006`

4
gym-unity/README.md


```python
from gym_unity.envs import UnityToGymWrapper
env = UnityToGymWrapper(unity_environment, uint8_visual, allow_multiple_obs)
env = UnityToGymWrapper(unity_environment, uint8_visual, flatten_branched, allow_multiple_obs)
```
- `unity_environment` refers to the Unity environment to be wrapped.

- `allow_multiple_obs` will return a list of observations. The first elements contain the visual observations and the
last element contains the array of vector observations. If False the environment returns a single array (containing
a single visual observations, if present, otherwise the vector observation)
a single visual observations, if present, otherwise the vector observation). Defaults to `False`.
The returned environment `env` will function as a gym.

4
gym-unity/gym_unity/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.18.1"
__version__ = "0.19.0.dev0"
__release_tag__ = "release_5"
__release_tag__ = None

4
gym-unity/setup.py


tag = os.getenv("CIRCLE_TAG")
if tag != EXPECTED_TAG:
info = "Git tag: {0} does not match the expected tag of this app: {1}".format(
info = "Git tag: {} does not match the expected tag of this app: {}".format(
tag, EXPECTED_TAG
)
sys.exit(info)

author_email="ML-Agents@unity3d.com",
url="https://github.com/Unity-Technologies/ml-agents",
packages=find_packages(),
install_requires=["gym", "mlagents_envs=={}".format(VERSION)],
install_requires=["gym", f"mlagents_envs=={VERSION}"],
cmdclass={"verify": VerifyVersionCommand},
)

4
ml-agents-envs/mlagents_envs/__init__.py


# Version of the library that will be used to upload to pypi
__version__ = "0.18.1"
__version__ = "0.19.0.dev0"
__release_tag__ = "release_5"
__release_tag__ = None

8
ml-agents-envs/mlagents_envs/base_env.py


:returns: The DecisionStep
"""
if agent_id not in self.agent_id_to_index:
raise KeyError(
"agent_id {} is not present in the DecisionSteps".format(agent_id)
)
raise KeyError(f"agent_id {agent_id} is not present in the DecisionSteps")
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []
for batched_obs in self.obs:

specific agent
"""
if agent_id not in self.agent_id_to_index:
raise KeyError(
"agent_id {} is not present in the TerminalSteps".format(agent_id)
)
raise KeyError(f"agent_id {agent_id} is not present in the TerminalSteps")
agent_index = self._agent_id_to_index[agent_id] # type: ignore
agent_obs = []
for batched_obs in self.obs:

2
ml-agents-envs/mlagents_envs/communicator.py


from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
class Communicator(object):
class Communicator:
def __init__(self, worker_id=0, base_port=5005):
"""
Python side of the communication. Must be used in pair with the right Unity Communicator equivalent.

4
ml-agents-envs/mlagents_envs/env_utils.py


.replace(".x86", "")
)
true_filename = os.path.basename(os.path.normpath(env_path))
get_logger(__name__).debug("The true file name is {}".format(true_filename))
get_logger(__name__).debug(f"The true file name is {true_filename}")
if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
return None

f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
)
else:
get_logger(__name__).debug("This is the launch string {}".format(launch_string))
get_logger(__name__).debug(f"This is the launch string {launch_string}")
# Launch Unity environment
subprocess_args = [launch_string] + args
try:

17
ml-agents-envs/mlagents_envs/environment.py


def _assert_behavior_exists(self, behavior_name: str) -> None:
if behavior_name not in self._env_specs:
raise UnityActionException(
"The group {0} does not correspond to an existing agent group "
"in the environment".format(behavior_name)
f"The group {behavior_name} does not correspond to an existing "
f"agent group in the environment"
)
def set_actions(self, behavior_name: BehaviorName, action: np.ndarray) -> None:

expected_shape = (len(self._env_state[behavior_name][0]), spec.action_size)
if action.shape != expected_shape:
raise UnityActionException(
"The behavior {0} needs an input of dimension {1} for "