浏览代码

Merge branch 'main' into develop-coma2-trainer

/develop/action-slice
Ervin Teng 3 年前
当前提交
fd0dd35c
共有 54 个文件被更改,包括 636 次插入329 次删除
  1. 6
      .github/PULL_REQUEST_TEMPLATE.md
  2. 2
      .github/workflows/pre-commit.yml
  3. 2
      .github/workflows/publish_pypi.yaml
  4. 2
      .github/workflows/pytest.yml
  5. 2
      .yamato/com.unity.ml-agents-performance.yml
  6. 4
      .yamato/com.unity.ml-agents-test.yml
  7. 2
      .yamato/compressed-sensor-test.yml
  8. 2
      .yamato/gym-interface-test.yml
  9. 2
      .yamato/protobuf-generation-test.yml
  10. 2
      .yamato/pytest-gpu.yml
  11. 2
      .yamato/python-ll-api-test.yml
  12. 2
      .yamato/standalone-build-test.yml
  13. 2
      .yamato/standalone-build-webgl-test.yml
  14. 2
      .yamato/training-backcompat-tests.yml
  15. 2
      .yamato/training-int-tests.yml
  16. 4
      ML-Agents-Input-Example/Packages/packages-lock.json
  17. 68
      Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/GridFoodCollector.unity
  18. 5
      Project/Assets/ML-Agents/Examples/Sorter/Prefabs/Area.prefab
  19. 10
      README.md
  20. 8
      com.unity.ml-agents.extensions/Runtime/Sensors/CountingGridSensor.cs
  21. 264
      com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
  22. 4
      com.unity.ml-agents.extensions/Runtime/Unity.ML-Agents.Extensions.asmdef
  23. 15
      com.unity.ml-agents/CHANGELOG.md
  24. 4
      com.unity.ml-agents/CONTRIBUTING.md
  25. 8
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  26. 4
      com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
  27. 20
      com.unity.ml-agents/Runtime/Inference/TensorNames.cs
  28. 8
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  29. 31
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
  30. 5
      com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs
  31. 2
      docs/Installation-Anaconda-Windows.md
  32. 4
      docs/Installation.md
  33. 2
      docs/Migrating.md
  34. 2
      docs/localized/KR/docs/Installation-Anaconda-Windows.md
  35. 6
      docs/localized/RU/README.md
  36. 30
      docs/localized/RU/docs/Начало работы.md
  37. 32
      docs/localized/RU/docs/Установка.md
  38. 3
      ml-agents-envs/mlagents_envs/base_env.py
  39. 2
      ml-agents-envs/mlagents_envs/tests/test_envs.py
  40. 11
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  41. 146
      ml-agents/mlagents/trainers/agent_processor.py
  42. 8
      ml-agents/mlagents/trainers/behavior_id_utils.py
  43. 4
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  44. 15
      ml-agents/mlagents/trainers/policy/policy.py
  45. 4
      ml-agents/mlagents/trainers/tests/mock_brain.py
  46. 36
      ml-agents/mlagents/trainers/tests/test_agent_processor.py
  47. 81
      ml-agents/mlagents/trainers/torch/model_serialization.py
  48. 2
      ml-agents/mlagents/trainers/torch/networks.py
  49. 13
      ml-agents/mlagents/trainers/trajectory.py
  50. 8
      ml-agents/setup.py
  51. 16
      utils/make_readme_table.py
  52. 2
      utils/validate_release_links.py
  53. 31
      com.unity.ml-agents/Editor/BufferSensorComponentEditor.cs
  54. 11
      com.unity.ml-agents/Editor/BufferSensorComponentEditor.cs.meta

6
.github/PULL_REQUEST_TEMPLATE.md


### Checklist
- [ ] Added tests that prove my fix is effective or that my feature works
- [ ] Updated the [changelog](https://github.com/Unity-Technologies/ml-agents/blob/master/com.unity.ml-agents/CHANGELOG.md) (if applicable)
- [ ] Updated the [documentation](https://github.com/Unity-Technologies/ml-agents/tree/master/docs) (if applicable)
- [ ] Updated the [migration guide](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Migrating.md) (if applicable)
- [ ] Updated the [changelog](https://github.com/Unity-Technologies/ml-agents/blob/main/com.unity.ml-agents/CHANGELOG.md) (if applicable)
- [ ] Updated the [documentation](https://github.com/Unity-Technologies/ml-agents/tree/main/docs) (if applicable)
- [ ] Updated the [migration guide](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Migrating.md) (if applicable)
### Other comments

2
.github/workflows/pre-commit.yml


on:
pull_request:
push:
branches: [master]
branches: [main]
jobs:
pre-commit:

2
.github/workflows/publish_pypi.yaml


package-path: [ml-agents, ml-agents-envs, gym-unity]
steps:
- uses: actions/checkout@master
- uses: actions/checkout@main
- name: Set up Python 3.7
uses: actions/setup-python@v1
with:

2
.github/workflows/pytest.yml


- 'test_requirements.txt'
- '.github/workflows/pytest.yml'
push:
branches: [master]
branches: [main]
jobs:
pytest:

2
.yamato/com.unity.ml-agents-performance.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: daily
artifacts:
logs:

4
.yamato/com.unity.ml-agents-test.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: daily
{% for package in packages %}

cancel_old_ci: true
{% if platform.name == "linux" %}
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/compressed-sensor-test.yml


cancel_old_ci: true
{% if editor.extra_test == "sensor" %}
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/gym-interface-test.yml


cancel_old_ci: true
{% if editor.extra_test == "gym" %}
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/protobuf-generation-test.yml


triggers:
cancel_old_ci: true
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "protobuf-definitions/**" OR

2
.yamato/pytest-gpu.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: daily
artifacts:
logs:

2
.yamato/python-ll-api-test.yml


cancel_old_ci: true
{% if editor.extra_test == "llapi" %}
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/standalone-build-test.yml


triggers:
cancel_old_ci: true
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

2
.yamato/standalone-build-webgl-test.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: weekly
artifacts:
logs:

2
.yamato/training-backcompat-tests.yml


triggers:
cancel_old_ci: true
recurring:
- branch: master
- branch: main
frequency: daily
artifacts:
logs:

2
.yamato/training-int-tests.yml


triggers:
cancel_old_ci: true
expression: |
(pull_request.target eq "master" OR
(pull_request.target eq "main" OR
pull_request.target match "release.+") AND
NOT pull_request.draft AND
(pull_request.changes.any match "com.unity.ml-agents/**" OR

4
ML-Agents-Input-Example/Packages/packages-lock.json


"url": "https://packages.unity.com"
},
"com.unity.barracuda": {
"version": "1.3.0-preview",
"version": "1.3.1-preview",
"depth": 1,
"source": "registry",
"dependencies": {

"depth": 0,
"source": "local",
"dependencies": {
"com.unity.barracuda": "1.3.0-preview",
"com.unity.barracuda": "1.3.1-preview",
"com.unity.modules.imageconversion": "1.0.0",
"com.unity.modules.jsonserialize": "1.0.0",
"com.unity.modules.physics": "1.0.0",

68
Project/Assets/ML-Agents/Examples/FoodCollector/Scenes/GridFoodCollector.unity


m_ReflectionIntensity: 1
m_CustomReflection: {fileID: 0}
m_Sun: {fileID: 0}
m_IndirectSpecularColor: {r: 0.4497121, g: 0.49977785, b: 0.57563704, a: 1}
m_IndirectSpecularColor: {r: 0.44971168, g: 0.4997775, b: 0.57563686, a: 1}
m_UseRadianceAmbientProbe: 0
--- !u!157 &3
LightmapSettings:

propertyPath: m_Name
value: GridFoodCollectorArea
objectReference: {fileID: 0}
- target: {fileID: 4137908820211030, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: -17.2
objectReference: {fileID: 0}
- target: {fileID: 4259834826122778, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: -23.9
objectReference: {fileID: 0}
- target: {fileID: 4419274671784554, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: -8.9
objectReference: {fileID: 0}
- target: {fileID: 4688212428263696, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: 0

propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4756368533889646, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.x
value: -30.4
objectReference: {fileID: 0}
- target: {fileID: 4756368533889646, guid: b5339e4b990ade14f992aadf3bf8591b, type: 3}
propertyPath: m_LocalPosition.z
value: -9.9
objectReference: {fileID: 0}
- target: {fileID: 3067525015186813280, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: NumCollidersPerCell
value: 1
objectReference: {fileID: 0}
- target: {fileID: 3067525015186813280, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: EstimatedMaxCollidersPerCell
value: 4
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: ChannelOffsets.Array.size
value: 1
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: ShowGizmos
value: 0
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: ObservationPerCell
value: 6
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: NumberOfObservations
value: 9600
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: m_Enabled
value: 1
objectReference: {fileID: 0}
- target: {fileID: 5837508007780682603, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
propertyPath: rootReference
value:
objectReference: {fileID: 190823801}
--- !u!1 &190823801 stripped
GameObject:
m_CorrespondingSourceObject: {fileID: 1706274796045088, guid: b5339e4b990ade14f992aadf3bf8591b,
type: 3}
m_PrefabInstance: {fileID: 190823800}
m_PrefabAsset: {fileID: 0}
--- !u!1001 &392794583
PrefabInstance:
m_ObjectHideFlags: 0

5
Project/Assets/ML-Agents/Examples/Sorter/Prefabs/Area.prefab


m_Script: {fileID: 11500000, guid: dd8012d5925524537b27131fef517017, type: 3}
m_Name:
m_EditorClassIdentifier:
ObservableSize: 23
MaxNumObservables: 20
m_SensorName: BufferSensor
m_ObservableSize: 23
m_MaxNumObservables: 20
--- !u!1 &6000518840957865293
GameObject:
m_ObjectHideFlags: 0

10
README.md


[here](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/Readme.md)
to get started with the latest release of ML-Agents.**
The table below lists all our releases, including our `master` branch which is
The table below lists all our releases, including our `main` branch which is
under active development and may be unstable. A few helpful guidelines:
- The [Versioning page](docs/Versioning.md) overviews how we manage our GitHub
releases and the versioning process for each of the ML-Agents components.

| **Version** | **Release Date** | **Source** | **Documentation** | **Download** | **Python Package** | **Unity Package** |
|:-------:|:------:|:-------------:|:-------:|:------------:|:------------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) | -- | -- |
| **main (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/main) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/main/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/main.zip) | -- | -- |
| **Release 13** | **February 17, 2021** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_13)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_13.zip)** | **[0.24.0](https://pypi.org/project/mlagents/0.24.0/)** | **[1.8.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.8/manual/index.html)** |
| **Release 12** | December 22, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_12) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_12.zip) | [0.23.0](https://pypi.org/project/mlagents/0.23.0/) | [1.7.2](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.7/manual/index.html) |
| **Release 11** | December 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_11) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_11_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_11.zip) | [0.23.0](https://pypi.org/project/mlagents/0.23.0/) | [1.7.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.7/manual/index.html) |

([multi-armed bandit](https://blogs.unity3d.com/2017/06/26/unity-ai-themed-blog-entries/)
and
[Q-learning](https://blogs.unity3d.com/2017/08/22/unity-ai-reinforcement-learning-with-q-learning/))
### More from Unity
- [Unity Robotics](https://github.com/Unity-Technologies/Unity-Robotics-Hub)
- [Unity Computer Vision](https://unity.com/computer-vision)
- [Unity Game Simulation](https://unity.com/products/game-simulation)
## Community and Feedback

8
com.unity.ml-agents.extensions/Runtime/Sensors/CountingGridSensor.cs


this.ChannelDepth = channelDepth;
if (DetectableObjects.Length != ChannelDepth.Length)
throw new UnityAgentsException("The channels of a CountingGridSensor is equal to the number of detectableObjects");
this.gridDepthType = GridDepthType.Channel;
this.gridDepthType = gridDepthType;
this.CellScaleX = cellScaleX;
this.CellScaleZ = cellScaleZ;
this.GridNumSideX = gridWidth;

/// <param name="foundColliders">The array of colliders</param>
/// <param name="cellIndex">The cell index the collider is in</param>
/// <param name="cellCenter">the center of the cell the collider is in</param>
protected override void ParseColliders(Collider[] foundColliders, int cellIndex, Vector3 cellCenter)
protected override void ParseColliders(Collider[] foundColliders, int numFound, int cellIndex, Vector3 cellCenter)
for (int i = 0; i < foundColliders.Length; i++)
for (int i = 0; i < numFound; i++)
{
currentColliderGo = foundColliders[i].gameObject;

closestColliderPoint = foundColliders[i].ClosestPointOnBounds(cellCenter);
LoadObjectData(currentColliderGo, cellIndex,
Vector3.Distance(closestColliderPoint, transform.position) / SphereRadius);
Vector3.Distance(closestColliderPoint, transform.position) * InverseSphereRadius);
}
}

264
com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs


using UnityEngine;
using UnityEngine.Assertions;
using Unity.MLAgents.Sensors;
using UnityEngine.Profiling;
namespace Unity.MLAgents.Extensions.Sensors
{

[Tooltip("The reference of the root of the agent. This is used to disambiguate objects with the same tag as the agent. Defaults to current GameObject")]
public GameObject rootReference;
[Header("Collider Buffer Properties")]
[Tooltip("The absolute max size of the Collider buffer used in the non-allocating Physics calls. In other words" +
" the Collider buffer will never grow beyond this number even if there are more Colliders in the Grid Cell.")]
public int MaxColliderBufferSize = 500;
[Tooltip(
"The Estimated Max Number of Colliders to expect per cell. This number is used to " +
"pre-allocate an array of Colliders in order to take advantage of the OverlapBoxNonAlloc " +
"Physics API. If the number of colliders found is >= InitialColliderBufferSize the array " +
"will be resized to double its current size. The hard coded absolute size is 500.")]
public int InitialColliderBufferSize = 4;
Collider[] m_ColliderBuffer;
float[] m_ChannelBuffer;
//
// Hidden Parameters
//

/// <summary>
/// Radius of grid, used for normalizing the distance.
/// </summary>
protected float SphereRadius;
protected float InverseSphereRadius;
/// <summary>
/// Total Number of cells (width*height)

NumCells = GridNumSideX * GridNumSideZ;
float sphereRadiusX = (CellScaleX * GridNumSideX) / Mathf.Sqrt(2);
float sphereRadiusZ = (CellScaleZ * GridNumSideZ) / Mathf.Sqrt(2);
SphereRadius = Mathf.Max(sphereRadiusX, sphereRadiusZ);
InverseSphereRadius = 1.0f / Mathf.Max(sphereRadiusX, sphereRadiusZ);
ChannelOffsets = new int[ChannelDepth.Length];
DiffNumSideZX = (GridNumSideZ - GridNumSideX);
OffsetGridNumSide = (GridNumSideZ - 1f) / 2f;

InitDepthType();
InitCellPoints();
InitPerceptionBuffer();
m_ColliderBuffer = new Collider[Math.Min(MaxColliderBufferSize, InitialColliderBufferSize)];
// Default root reference to current game object
if (rootReference == null)
rootReference = gameObject;

m_perceptionTexture2D = new Texture2D(GridNumSideX, GridNumSideZ, TextureFormat.RGB24, false);
}
/// <inheritdoc cref="ISensor.Reset"/>
void ISensor.Reset() { }
public void Reset()
public void ClearPerceptionBuffer()
{
if (m_PerceptionBuffer != null)
{

else
{
m_PerceptionBuffer = new float[NumberOfObservations];
m_ColliderBuffer = new Collider[Math.Min(MaxColliderBufferSize, InitialColliderBufferSize)];
}
if (ShowGizmos)

/// <returns>A float[] containing all of the information collected from the gridsensor</returns>
public float[] Perceive()
{
Reset();
if (m_ColliderBuffer == null)
{
return Array.Empty<float>();
}
ClearPerceptionBuffer();
// TODO: make these part of the class
Collider[] foundColliders = null;
Vector3 cellCenter = Vector3.zero;
var halfCellScale = new Vector3(CellScaleX / 2f, CellScaleY, CellScaleZ / 2f);
Vector3 halfCellScale = new Vector3(CellScaleX / 2f, CellScaleY, CellScaleZ / 2f);
for (int cellIndex = 0; cellIndex < NumCells; cellIndex++)
for (var cellIndex = 0; cellIndex < NumCells; cellIndex++)
int numFound;
Vector3 cellCenter;
cellCenter = transform.TransformPoint(CellPoints[cellIndex]);
foundColliders = Physics.OverlapBox(cellCenter, halfCellScale, transform.rotation, ObserveMask);
Transform transform1;
cellCenter = (transform1 = transform).TransformPoint(CellPoints[cellIndex]);
numFound = BufferResizingOverlapBoxNonAlloc(cellCenter, halfCellScale, transform1.rotation);
foundColliders = Physics.OverlapBox(cellCenter, halfCellScale, Quaternion.identity, ObserveMask);
numFound = BufferResizingOverlapBoxNonAlloc(cellCenter, halfCellScale, Quaternion.identity);
if (foundColliders != null && foundColliders.Length > 0)
if (numFound > 0)
ParseColliders(foundColliders, cellIndex, cellCenter);
ParseColliders(m_ColliderBuffer, numFound, cellIndex, cellCenter);
/// This method attempts to perform the Physics.OverlapBoxNonAlloc and will double the size of the Collider buffer
/// if the number of Colliders in the buffer after the call is equal to the length of the buffer.
/// </summary>
/// <param name="cellCenter"></param>
/// <param name="halfCellScale"></param>
/// <param name="rotation"></param>
/// <returns></returns>
int BufferResizingOverlapBoxNonAlloc(Vector3 cellCenter, Vector3 halfCellScale, Quaternion rotation)
{
int numFound;
// Since we can only get a fixed number of results, requery
// until we're sure we can hold them all (or until we hit the max size).
while (true)
{
numFound = Physics.OverlapBoxNonAlloc(cellCenter, halfCellScale, m_ColliderBuffer, rotation, ObserveMask);
if (numFound == m_ColliderBuffer.Length && m_ColliderBuffer.Length < MaxColliderBufferSize)
{
m_ColliderBuffer = new Collider[Math.Min(MaxColliderBufferSize, m_ColliderBuffer.Length * 2)];
InitialColliderBufferSize = m_ColliderBuffer.Length;
}
else
{
break;
}
}
return numFound;
}
/// <summary>
/// <param name="numFound">Number of colliders found.</param>
protected virtual void ParseColliders(Collider[] foundColliders, int cellIndex, Vector3 cellCenter)
protected virtual void ParseColliders(Collider[] foundColliders, int numFound, int cellIndex, Vector3 cellCenter)
GameObject currentColliderGo = null;
Profiler.BeginSample("GridSensor.ParseColliders");
Vector3 closestColliderPoint = Vector3.zero;
float distance = float.MaxValue;
float currentDistance = 0f;
var minDistanceSquared = float.MaxValue;
for (int i = 0; i < foundColliders.Length; i++)
for (var i = 0; i < numFound; i++)
currentColliderGo = foundColliders[i].gameObject;
var currentColliderGo = foundColliders[i].gameObject;
if (currentColliderGo == rootReference)
if (ReferenceEquals(currentColliderGo, rootReference))
closestColliderPoint = foundColliders[i].ClosestPointOnBounds(cellCenter);
currentDistance = Vector3.Distance(closestColliderPoint, rootReference.transform.position);
var closestColliderPoint = foundColliders[i].ClosestPointOnBounds(cellCenter);
var currentDistanceSquared = (closestColliderPoint - rootReference.transform.position).sqrMagnitude;
if ((Array.IndexOf(DetectableObjects, currentColliderGo.tag) > -1) && (currentDistance < distance))
var index = -1;
for (var ii = 0; ii < DetectableObjects.Length; ii++)
{
if (currentColliderGo.CompareTag(DetectableObjects[ii]))
{
index = ii;
break;
}
}
if (index > -1 && currentDistanceSquared < minDistanceSquared)
distance = currentDistance;
minDistanceSquared = currentDistanceSquared;
if (closestColliderGo != null)
LoadObjectData(closestColliderGo, cellIndex, distance / SphereRadius);
if (!ReferenceEquals(closestColliderGo, null))
LoadObjectData(closestColliderGo, cellIndex, (float)Math.Sqrt(minDistanceSquared) * InverseSphereRadius);
Profiler.EndSample();
}
/// <summary>

/// </example>
protected virtual float[] GetObjectData(GameObject currentColliderGo, float typeIndex, float normalizedDistance)
{
float[] channelValues = new float[ChannelDepth.Length];
channelValues[0] = typeIndex;
return channelValues;
if (m_ChannelBuffer == null)
{
m_ChannelBuffer = new float[ChannelDepth.Length];
}
Array.Clear(m_ChannelBuffer, 0, m_ChannelBuffer.Length);
m_ChannelBuffer[0] = typeIndex;
return m_ChannelBuffer;
}
/// <summary>

/// </summary>
/// <param name="currentColliderGo">The game object that was found colliding with a certain cell</param>
/// <param name="cellIndex">The index of the current cell</param>
/// <param name="normalized_distance">A float between 0 and 1 describing the ratio of
/// <param name="normalizedDistance">A float between 0 and 1 describing the ratio of
protected virtual void LoadObjectData(GameObject currentColliderGo, int cellIndex, float normalized_distance)
protected virtual void LoadObjectData(GameObject currentColliderGo, int cellIndex, float normalizedDistance)
for (int i = 0; i < DetectableObjects.Length; i++)
Profiler.BeginSample("GridSensor.LoadObjectData");
var channelHotVals = new ArraySegment<float>(m_PerceptionBuffer, cellIndex * ObservationPerCell, ObservationPerCell);
for (var i = 0; i < DetectableObjects.Length; i++)
if (currentColliderGo != null && currentColliderGo.CompareTag(DetectableObjects[i]))
for (var ii = 0; ii < channelHotVals.Count; ii++)
{
m_PerceptionBuffer[channelHotVals.Offset + ii] = 0f;
}
if (!ReferenceEquals(currentColliderGo, null) && currentColliderGo.CompareTag(DetectableObjects[i]))
float[] channelValues = GetObjectData(currentColliderGo, (float)i + 1, normalized_distance);
float[] channelValues = GetObjectData(currentColliderGo, (float)i + 1, normalizedDistance);
if (ShowGizmos)
{
Color debugRayColor = Color.white;

}
CellActivity[cellIndex] = new Color(debugRayColor.r, debugRayColor.g, debugRayColor.b, .5f);
}

/// <remarks>
/// The observations are "channel based" so each grid is WxHxC where C is the number of channels
/// This typically means that each channel value is normalized between 0 and 1
/// If channelDepth is 1, the value is assumed normalized, else the value is normalized by the channelDepth
/// The channels are then stored consecutively in PerceptionBuffer.
/// NOTE: This is the only grid type that uses floating point values
/// For example, if a cell contains the 3rd type of 5 possible on the 2nd team of 3 possible teams:
/// channelValues = {2, 1}
/// ObservationPerCell = channelValues.Length
/// channelValues = {2f/5f, 1f/3f} = {.4, .33..}
/// Array.Copy(channelValues, 0, PerceptionBuffer, cell_id*ObservationPerCell, ObservationPerCell);
/// </remarks>
for (int j = 0; j < channelValues.Length; j++)
channelValues[j] /= ChannelDepth[j];
// The observations are "channel based" so each grid is WxHxC where C is the number of channels
// This typically means that each channel value is normalized between 0 and 1
// If channelDepth is 1, the value is assumed normalized, else the value is normalized by the channelDepth
// The channels are then stored consecutively in PerceptionBuffer.
// NOTE: This is the only grid type that uses floating point values
// For example, if a cell contains the 3rd type of 5 possible on the 2nd team of 3 possible teams:
// channelValues = {2, 1}
// ObservationPerCell = channelValues.Length
// channelValues = {2f/5f, 1f/3f} = {.4, .33..}
// Array.Copy(channelValues, 0, PerceptionBuffer, cell_id*ObservationPerCell, ObservationPerCell);
for (int j = 0; j < channelValues.Length; j++)
{
channelValues[j] /= ChannelDepth[j];
}
Array.Copy(channelValues, 0, m_PerceptionBuffer, cellIndex * ObservationPerCell, ObservationPerCell);
break;
Array.Copy(channelValues, 0, m_PerceptionBuffer, cellIndex * ObservationPerCell, ObservationPerCell);
break;
/// <remarks>
/// The observations are "channel hot" so each grid is WxHxD where D is the sum of all of the channel depths
/// The opposite of the "channel based" case, the channel values are represented as one hot vector per channel and then concatenated together
/// Thus channelDepth is assumed to be greater than 1.
/// For example, if a cell contains the 3rd type of 5 possible on the 2nd team of 3 possible teams,
/// channelValues = {2, 1}
/// channelOffsets = {5, 3}
/// ObservationPerCell = 5 + 3 = 8
/// channelHotVals = {0, 0, 1, 0, 0, 0, 1, 0}
/// Array.Copy(channelHotVals, 0, PerceptionBuffer, cell_id*ObservationPerCell, ObservationPerCell);
/// </remarks>
float[] channelHotVals = new float[ObservationPerCell];
for (int j = 0; j < channelValues.Length; j++)
if (ChannelDepth[j] > 1)
{
channelHotVals[(int)channelValues[j] + ChannelOffsets[j]] = 1f;
}
else
// The observations are "channel hot" so each grid is WxHxD where D is the sum of all of the channel depths
// The opposite of the "channel based" case, the channel values are represented as one hot vector per channel and then concatenated together
// Thus channelDepth is assumed to be greater than 1.
// For example, if a cell contains the 3rd type of 5 possible on the 2nd team of 3 possible teams,
// channelValues = {2, 1}
// channelOffsets = {5, 3}
// ObservationPerCell = 5 + 3 = 8
// channelHotVals = {0, 0, 1, 0, 0, 0, 1, 0}
// Array.Copy(channelHotVals, 0, PerceptionBuffer, cell_id*ObservationPerCell, ObservationPerCell);
for (int j = 0; j < channelValues.Length; j++)
channelHotVals[ChannelOffsets[j]] = channelValues[j];
if (ChannelDepth[j] > 1)
{
m_PerceptionBuffer[channelHotVals.Offset + (int)channelValues[j] + ChannelOffsets[j]] = 1f;
}
else
{
m_PerceptionBuffer[channelHotVals.Offset + ChannelOffsets[j]] = channelValues[j];
}
break;
}
Array.Copy(channelHotVals, 0, m_PerceptionBuffer, cellIndex * ObservationPerCell, ObservationPerCell);
break;
}
Profiler.EndSample();
}
/// <summary>Converts the index of the cell to the 3D point (y is zero)</summary>

CellActivity[toCellID] = CellActivity[fromCellID];
}
/// <summary>Creates a copy of a float array</summary>
/// <returns>float[] of the original data</returns>
/// <param name="array">The array to copy from</parma>
private static float[] CreateCopy(float[] array)
{
float[] b = new float[array.Length];
System.Buffer.BlockCopy(array, 0, b, 0, array.Length * sizeof(float));
return b;
}
/// <summary>Utility method to find the index of a tag</summary>
/// <returns>Index of the tag in DetectableObjects, if it is in there</returns>
/// <param name="tag">The tag to search for</param>
public int IndexOfTag(string tag)
{
return Array.IndexOf(DetectableObjects, tag);
}
void OnDrawGizmos()
{
if (ShowGizmos)

Perceive();
Vector3 scale = new Vector3(CellScaleX, 1, CellScaleZ);
Vector3 offset = new Vector3(0, GizmoYOffset, 0);
Matrix4x4 oldGizmoMatrix = Gizmos.matrix;
Matrix4x4 cubeTransform = Gizmos.matrix;
for (int i = 0; i < NumCells; i++)
var scale = new Vector3(CellScaleX, 1, CellScaleZ);
var offset = new Vector3(0, GizmoYOffset, 0);
var oldGizmoMatrix = Gizmos.matrix;
for (var i = 0; i < NumCells; i++)
Matrix4x4 cubeTransform;
if (RotateToAgent)
{
cubeTransform = Matrix4x4.TRS(CellToPoint(i) + offset, transform.rotation, scale);

}
/// <inheritdoc/>
void ISensor.Update() { }
void ISensor.Update()
{
using (TimerStack.Instance.Scoped("GridSensor.Update"))
{
Perceive();
}
}
/// <summary>Gets the observation shape</summary>
/// <returns>int[] of the observation shape</returns>

{
using (TimerStack.Instance.Scoped("GridSensor.WriteToTensor"))
{
Perceive();
int index = 0;
for (var h = GridNumSideZ - 1; h >= 0; h--) // height
{

4
com.unity.ml-agents.extensions/Runtime/Unity.ML-Agents.Extensions.asmdef


"Unity.Barracuda",
"Unity.ML-Agents",
"Unity.ML-Agents.Extensions.Input"
],
"includePlatforms": [],
"excludePlatforms": []
]
}

15
com.unity.ml-agents/CHANGELOG.md


## [Unreleased]
### Major Changes
#### com.unity.ml-agents (C#)
- The `BufferSensor` and `BufferSensorComponent` have been added. They allow the Agent to observe variable number of entities. (#4909)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Minor Changes

- The `cattrs` version dependency was updated to allow `>=1.1.0` on Python 3.8 or higher. (#4821)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)

- Added a `--torch-device` commandline option to `mlagents-learn`, which sets the default
[`torch.device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device) used for training. (#4888)
- The `--cpu` commandline option had no effect and was removed. Use `--torch-device=cpu` to force CPU training. (#4888)
- The `mlagents_env` API has changed, `BehaviorSpec` now has a `observation_specs` property containing a list of `ObservationSpec`. For more information on `ObservationSpec` see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md#behaviorspec). (#4763, #4825)
- The `mlagents_env` API has changed, `BehaviorSpec` now has a `observation_specs` property containing a list of `ObservationSpec`. For more information on `ObservationSpec` see [here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Python-API.md#behaviorspec). (#4763, #4825)
### Bug Fixes
#### com.unity.ml-agents (C#)

#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- PyTorch trainers are now the default. See the
[installation docs](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Installation.md) for
[installation docs](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Installation.md) for
more information on installing PyTorch. For the time being, TensorFlow is still available;
you can use the TensorFlow backend by adding `--tensorflow` to the CLI, or
adding `framework: tensorflow` in the configuration YAML. (#4517)

- The Barracuda dependency was upgraded to 1.1.2 (#4571)
- Utilities were added to `com.unity.ml-agents.extensions` to make it easier to
integrate with match-3 games. See the [readme](https://github.com/Unity-Technologies/ml-agents/blob/master/com.unity.ml-agents.extensions/Documentation~/Match3.md)
integrate with match-3 games. See the [readme](https://github.com/Unity-Technologies/ml-agents/blob/main/com.unity.ml-agents.extensions/Documentation~/Match3.md)
for more details. (#4515)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The `action_probs` node is no longer listed as an output in TensorFlow models (#4613).

#### ml-agents / ml-agents-envs / gym-unity (Python)
- Added the Random Network Distillation (RND) intrinsic reward signal to the Pytorch
trainers. To use RND, add a `rnd` section to the `reward_signals` section of your
yaml configuration file. [More information here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-Configuration-File.md#rnd-intrinsic-reward) (#4473)
yaml configuration file. [More information here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-Configuration-File.md#rnd-intrinsic-reward) (#4473)
### Minor Changes
#### com.unity.ml-agents (C#)
- Stacking for compressed observations is now supported. An additional setting

### Major Changes
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The Parameter Randomization feature has been refactored to enable sampling of new parameters per episode to improve robustness. The
`resampling-interval` parameter has been removed and the config structure updated. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md). (#4065)
`resampling-interval` parameter has been removed and the config structure updated. More information [here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-ML-Agents.md). (#4065)
[here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md).(#4160)
[here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-ML-Agents.md).(#4160)
### Minor Changes
#### com.unity.ml-agents (C#)

4
com.unity.ml-agents/CONTRIBUTING.md


## Communication
First, please read through our
[code of conduct](https://github.com/Unity-Technologies/ml-agents/blob/master/CODE_OF_CONDUCT.md),
[code of conduct](https://github.com/Unity-Technologies/ml-agents/blob/main/CODE_OF_CONDUCT.md),
as we expect all our contributors to follow it.
Second, before starting on a project that you intend to contribute to the

## Git Branches
The master branch corresponds to the most recent version of the project. Note
The main branch corresponds to the most recent version of the project. Note
that this may be newer that the
[latest release](https://github.com/Unity-Technologies/ml-agents/releases/tag/latest_release).

8
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


if (sensor.GetObservationShape().Length == 3)
{
if (!tensorsNames.Contains(
TensorNames.VisualObservationPlaceholderPrefix + visObsIndex))
TensorNames.GetVisualObservationName(visObsIndex)))
{
failedModelChecks.Add(
"The model does not contain a Visual Observation Placeholder Input " +

if (sensor.GetObservationShape().Length == 2)
{
if (!tensorsNames.Contains(
TensorNames.ObservationPlaceholderPrefix + sensorIndex))
TensorNames.GetObservationName(sensorIndex)))
{
failedModelChecks.Add(
"The model does not contain an Observation Placeholder Input " +

if (sens.GetObservationShape().Length == 3)
{
tensorTester[TensorNames.VisualObservationPlaceholderPrefix + visObsIndex] =
tensorTester[TensorNames.GetVisualObservationName(visObsIndex)] =
tensorTester[TensorNames.ObservationPlaceholderPrefix + sensorIndex] =
tensorTester[TensorNames.GetObservationName(sensorIndex)] =
(bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sens);
}
}

4
com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs


// If the tensor is of rank 2, we use the index of the sensor
// to create the name
obsGen = new ObservationGenerator(allocator);
obsGenName = TensorNames.ObservationPlaceholderPrefix + sensorIndex;
obsGenName = TensorNames.GetObservationName(sensorIndex);
obsGenName = TensorNames.VisualObservationPlaceholderPrefix + visIndex;
obsGenName = TensorNames.GetVisualObservationName(visIndex);
visIndex++;
break;
default:

20
com.unity.ml-agents/Runtime/Inference/TensorNames.cs


public const string SequenceLengthPlaceholder = "sequence_length";
public const string VectorObservationPlaceholder = "vector_observation";
public const string RecurrentInPlaceholder = "recurrent_in";
public const string recurrentInPlaceholderH = "recurrent_in_h";
public const string recurrentInPlaceholderC = "recurrent_in_c";
public const string VisualObservationPlaceholderPrefix = "visual_observation_";
public const string ObservationPlaceholderPrefix = "obs_";
public const string PreviousActionPlaceholder = "prev_action";

public const string ValueEstimateOutput = "value_estimate";
public const string RecurrentOutput = "recurrent_out";
public const string recurrentOutputH = "recurrent_out_h";
public const string recurrentOutputC = "recurrent_out_c";
public const string MemorySize = "memory_size";
public const string VersionNumber = "version_number";
public const string ContinuousActionOutputShape = "continuous_action_output_shape";

public const string IsContinuousControlDeprecated = "is_continuous_control";
public const string ActionOutputDeprecated = "action";
public const string ActionOutputShapeDeprecated = "action_output_shape";
/// <summary>
/// Returns the name of the visual observation with a given index
/// </summary>
public static string GetVisualObservationName(int index)
{
return VisualObservationPlaceholderPrefix + index;
}
/// <summary>
/// Returns the name of the observation with a given index
/// </summary>
public static string GetObservationName(int index)
{
return ObservationPlaceholderPrefix + index;
}
}
}

8
com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs


/// </summary>
public class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
{
private string m_Name;
private int m_MaxNumObs;
private int m_ObsSize;
float[] m_ObservationBuffer;

DimensionProperty.None
};
public BufferSensor(int maxNumberObs, int obsSize)
public BufferSensor(int maxNumberObs, int obsSize, string name)
m_Name = name;
m_MaxNumObs = maxNumberObs;
m_ObsSize = obsSize;
m_ObservationBuffer = new float[m_ObsSize * m_MaxNumObs];

Array.Clear(m_ObservationBuffer, 0, m_ObservationBuffer.Length);
}
/// <inheritdoc/>
/// <inheritdoc/>
return "BufferSensor";
return m_Name;
}
/// <inheritdoc/>

31
com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs


[AddComponentMenu("ML Agents/Buffer Sensor", (int)MenuGroup.Sensors)]
public class BufferSensorComponent : SensorComponent
{
/// <summary>
/// Name of the generated <see cref="bufferSensor"/> object.
/// Note that changing this at runtime does not affect how the Agent sorts the sensors.
/// </summary>
public string SensorName
{
get { return m_SensorName; }
set { m_SensorName = value; }
}
[HideInInspector, SerializeField]
private string m_SensorName = "BufferSensor";
public int ObservableSize;
public int ObservableSize
{
get { return m_ObservableSize; }
set { m_ObservableSize = value; }
}
[HideInInspector, SerializeField]
private int m_ObservableSize;
public int MaxNumObservables;
public int MaxNumObservables
{
get { return m_MaxNumObservables; }
set { m_MaxNumObservables = value; }
}
[HideInInspector, SerializeField]
private int m_MaxNumObservables;
private BufferSensor m_Sensor;

m_Sensor = new BufferSensor(MaxNumObservables, ObservableSize);
m_Sensor = new BufferSensor(MaxNumObservables, ObservableSize, m_SensorName);
return m_Sensor;
}

5
com.unity.ml-agents/Tests/Editor/Sensor/BufferSensorTest.cs


public void TestBufferSensor()
{
var bufferSensor = new BufferSensor(20, 4);
var bufferSensor = new BufferSensor(20, 4, "testName");
var shape = bufferSensor.GetObservationShape();
var dimProp = bufferSensor.GetDimensionProperties();
Assert.AreEqual(shape[0], 20);

var bufferComponent = agentGameObj.AddComponent<BufferSensorComponent>();
bufferComponent.MaxNumObservables = 20;
bufferComponent.ObservableSize = 4;
bufferComponent.SensorName = "TestName";
var sensor = bufferComponent.CreateSensor();
var shape = bufferComponent.GetObservationShape();

Assert.AreEqual(shape, obs.Shape);
Assert.AreEqual(obs.DimensionProperties.Count, 2);
Assert.AreEqual(sensor.GetName(), "TestName");
for (int i = 0; i < 8; i++)
{

2
docs/Installation-Anaconda-Windows.md


```
The `--branch release_13` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
release. Omitting that will get the `main` branch which is potentially
unstable.
If you don't want to use Git, you can find download links on the

4
docs/Installation.md


```
The `--branch release_13` option will switch to the tag of the latest stable
release. Omitting that will get the `master` branch which is potentially
release. Omitting that will get the `main` branch which is potentially
unstable.
#### Advanced: Local Installation for Development

back, make sure to clone the `master` branch (by omitting `--branch release_13`
back, make sure to clone the `main` branch (by omitting `--branch release_13`
from the command above). See our
[Contributions Guidelines](../com.unity.ml-agents/CONTRIBUTING.md) for more
information on contributing to the ML-Agents Toolkit.

2
docs/Migrating.md


- The Parameter Randomization feature has been merged with the Curriculum feature. It is now possible to specify a sampler
in the lesson of a Curriculum. Curriculum has been refactored and is now specified at the level of the parameter, not the
behavior. More information
[here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md).(#4160)
[here](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-ML-Agents.md).(#4160)
### Steps to Migrate
- The configuration format for curriculum and parameter randomization has changed. To upgrade your configuration files,

2
docs/localized/KR/docs/Installation-Anaconda-Windows.md


git clone https://github.com/Unity-Technologies/ml-agents.git
```
���� Git�� �����ϰ� ���� �ʴٸ� ������ [��ũ](https://github.com/Unity-Technologies/ml-agents/archive/master.zip)���� ���� ������ �ٿ��ε� �� �� �ֽ��ϴ�.
���� Git�� �����ϰ� ���� �ʴٸ� ������ [��ũ](https://github.com/Unity-Technologies/ml-agents/archive/main.zip)���� ���� ������ �ٿ��ε� �� �� �ֽ��ϴ�.
`UnitySDK` ���� �����丮���� ������Ʈ�� �߰��� ����Ƽ �ּ��� ���ԵǾ� �ֽ��ϴ�. ���� �����ϴµ� ������ �Ǵ� ���� [���� ȯ��](Learning-Environment-Examples.md)���� �ֽ��ϴ�.

6
docs/localized/RU/README.md


<img src="https://github.com/Unity-Technologies/ml-agents/blob/master/docs/images/image-banner.png" align="middle" width="3000"/>
<img src="https://github.com/Unity-Technologies/ml-agents/blob/main/docs/images/image-banner.png" align="middle" width="3000"/>
# Unity ML-Agents Toolkit Version Release 7

См. [здесь](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md),
чтобы начать работать с самой последней версий ML-Agents.**
Таблица внизу - список всех наших релизов, включая master ветку, над которой мы ведем активную работу
Таблица внизу - список всех наших релизов, включая main ветку, над которой мы ведем активную работу
и которая может быть нестабильной. Полезная информация:
[Управление версиями](docs/Versioning.md) - описание того, как мы работам с GitHub.

| **Version** | **Дата релиза** | **Source** | **Документация** | **Загрузка** |
|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **main (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/main) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/main/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/main.zip) |
| **Release 7** | **16 Сентября, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_7)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip)** |
| **Release 6** | 12 Августа, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_6) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_6_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_6.zip) |
| **Release 5** | 31 Июля, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_5) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_5.zip) |

30
docs/localized/RU/docs/Начало работы.md


# Начало работы
В данной статье мы разберем шаг за шагом один из [наших примеров](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Examples.md),
В данной статье мы разберем шаг за шагом один из [наших примеров](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Examples.md),
[Background: Unity](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Background-Unity.md).
[Background: Unity](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Background-Unity.md).
[Background: Machine Learning](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Background-Machine-Learning.md).
[Background: Machine Learning](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Background-Machine-Learning.md).
![Головы с мячами](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/images/balance.png)
![Головы с мячами](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/images/balance.png)
Нашей сценой будет **3D Balance Ball**. Агенты в ней - синие кубы-платформы, у каждого
на голове по мячу. Все они - копии друг друга. Каждый куб-агент пытается

## Установка
Если вы еще не установили ML-Agents, следуйте этой
инструкции по установке(https://github.com/Unity-Technologies/ml-agents/tree/master/docs/localized/RU/docs/Установка.md).
инструкции по установке(https://github.com/Unity-Technologies/ml-agents/tree/main/docs/localized/RU/docs/Установка.md).
1. Далее, откройте в Unity Project, в котором находятся примеры:
1. Запустите Unity Hub

со _средой_, собирая о ней данные. В Unity, среда это сцена (scene), в которой есть один
или более объектов - Агентов, а также, конечно, и другие объекты, с которыми взаимодействует агент.
![Unity-Editor](https://raw.githubusercontent.com/Unity-Technologies/ml-agents/master/docs/images/mlagents-3DBallHierarchy.png)
![Unity-Editor](https://raw.githubusercontent.com/Unity-Technologies/ml-agents/main/docs/images/mlagents-3DBallHierarchy.png)
Обратите внимание: В Unity каждый объект сцены - это объект класса GameObject. GameObject это
буквально контейнер для всего, что касается объекта: его физики, графики, поведения и пр.,

## Запуск заранее обученной (предтренированной) модели
Мы включили в свои примеры заранее обученные модели (файлы с расширением `.nn`)
и использовали [Unity Inference Engine](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Unity-Inference-Engine.md),
и использовали [Unity Inference Engine](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Unity-Inference-Engine.md),
чтобы запустить их в Unity. В этом разделе мы воспользуемся одной
из таких моделей для 3D Ball.

Поэтому при необходимости внести изменения в каждую из платформ,
вы можете просто изменить сам префаб вместо этого.
![Platform Prefab](https://raw.githubusercontent.com/Unity-Technologies/ml-agents/master/docs/images/platform_prefab.png)
![Platform Prefab](https://raw.githubusercontent.com/Unity-Technologies/ml-agents/main/docs/images/platform_prefab.png)
![3dball learning brain](https://raw.githubusercontent.com/Unity-Technologies/ml-agents/master/docs/images/3dball_learning_brain.png)
![3dball learning brain](https://raw.githubusercontent.com/Unity-Technologies/ml-agents/main/docs/images/3dball_learning_brain.png)
1. Теперь каждый `Агент` на каждой платформе `3DBall` в окне **Hierarchy**
должен содержать модель поведения **3DBall** в `Behavior Parameters`.

фазы обучения к другой. Это сигнал, что процесс обучения проходит успешно.
**Примечание:** Вы можете обучать агента, используя вместо Editor’a исполняемые файлы.
См. инструкцию [Using an Executable](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Executable.md).
См. инструкцию [Using an Executable](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Executable.md).
### Наблюдение за тренировочным процессом

(среда/суммарная награда за эпизод), которая должно увеличиваться в процессе обучения,
приближаясь к 100 - максимально возможное значение, которого может достигнуть агент.
![Example TensorBoard Run](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/images/mlagents-TensorBoard.png)
![Example TensorBoard Run](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/images/mlagents-TensorBoard.png)
## Внедрение модели в среду Unity

### Следующие шаги
- Для дополнительной информации о ML-Agents Toolkit,
см. [Обзор ML-Agents Toolkit](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/ML-Agents-Overview.md).
- [Создание своих сцен](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Create-New.md)
см. [Обзор ML-Agents Toolkit](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/ML-Agents-Overview.md).
- [Создание своих сцен](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Create-New.md)
примера в [ML-Agents - Example Environments](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Examples.md)
- Информация про различные опции обучения - [Training ML-Agents](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Training-ML-Agents.md)
примера в [ML-Agents - Example Environments](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Examples.md)
- Информация про различные опции обучения - [Training ML-Agents](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-ML-Agents.md)

32
docs/localized/RU/docs/Установка.md


ML-Agents Toolkit состоит из нескольких компонентов:
- Специальный пакет Unity ([`com.unity.ml-agents`](https://github.com/Unity-Technologies/ml-agents/tree/master/com.unity.ml-agents))
- Специальный пакет Unity ([`com.unity.ml-agents`](https://github.com/Unity-Technologies/ml-agents/tree/main/com.unity.ml-agents))
- ([`mlagents`](https://github.com/Unity-Technologies/ml-agents/tree/master/ml-agents)) - пакет с алгоритмами машинного обучения,
- ([`mlagents`](https://github.com/Unity-Technologies/ml-agents/tree/main/ml-agents)) - пакет с алгоритмами машинного обучения,
- ([`mlagents_envs`](https://github.com/Unity-Technologies/ml-agents/tree/master/ml-agents-envs) содержит Python
- ([`mlagents_envs`](https://github.com/Unity-Technologies/ml-agents/tree/main/ml-agents-envs) содержит Python
- ([`gym_unity`](https://github.com/Unity-Technologies/ml-agents/tree/master/gym-unity)) - позволяет обернуть вашу сцену
- ([`gym_unity`](https://github.com/Unity-Technologies/ml-agents/tree/main/gym-unity)) - позволяет обернуть вашу сцену
- Unity [Project](https://github.com/Unity-Technologies/ml-agents/tree/master/Project),
содержащий [примеры сцены](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Learning-Environment-Examples.md),
- Unity [Project](https://github.com/Unity-Technologies/ml-agents/tree/main/Project),
содержащий [примеры сцены](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Examples.md),
где реализованы различные возможности ML-Agents для наглядности.
Итого, чтобы установить и использовать ML-Agents, вам нужно:

Вам нужно будет склонировать репозиторий, если вы планируете вносить изменения
или что-то добавлять в ML-Agents для своих целей. Или вы планируете сделать
эти изменения общедоступными, пожалуйста, склонируйте master branch
эти изменения общедоступными, пожалуйста, склонируйте main branch
Далее см. [гайд про публикацию правок](https://github.com/Unity-Technologies/ml-agents/blob/master/com.unity.ml-agents/CONTRIBUTING.md).
Далее см. [гайд про публикацию правок](https://github.com/Unity-Technologies/ml-agents/blob/main/com.unity.ml-agents/CONTRIBUTING.md).
### Установка пакета `com.unity.ml-agents` для Unity
Unity ML-Agents C# SDK это пакет Unity. Вы можете установить его прямо из пакетного менеджера,

А в Unity 2019.3 она - сверху слева.
<p align="center">
<img src="https://raw.githubusercontent.com/Unity-Technologies/ml-agents/master/docs/images/unity_package_manager_window.png"
<img src="https://raw.githubusercontent.com/Unity-Technologies/ml-agents/main/docs/images/unity_package_manager_window.png"
<img src="https://raw.githubusercontent.com/Unity-Technologies/ml-agents/master/docs/images/unity_package_json.png"
<img src="https://raw.githubusercontent.com/Unity-Technologies/ml-agents/main/docs/images/unity_package_json.png"
alt="package.json"
height="300"
border="10" />

Virtual Environments позволяет управлять ранее установленными версиями так,
что для одного проекта будут одни зависимости, а для другого - другие на каждой
из операционных систем - Mac / Windows / Linux.
Гайд по [Virtual Environments](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Using-Virtual-Environment.md).
Гайд по [Virtual Environments](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Using-Virtual-Environment.md).
Чтобы установить `mlagents` Python, активируйте вашу виртуальную среду
и выполните следующее в командной строке:

команду `mlagents-learn --help`, после
исполнения которой вы увидите набор доступных команд `mlagents-learn`.
Устанавливая mlagents, вы также устанавливаете и все то, на базе чего он сделан -
см. [setup.py file](https://github.com/Unity-Technologies/ml-agents/blob/master/ml-agents/setup.py).
В том числе [TensorFlow](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Background-TensorFlow.md)
см. [setup.py file](https://github.com/Unity-Technologies/ml-agents/blob/main/ml-agents/setup.py).
В том числе [TensorFlow](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Background-TensorFlow.md)
(требует поддержки CPU w/ AVX).
#### Продвинутая локальная установка для разработки

### Следующие шаги
Гайд ["Начало работы"](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/localized/RU/docs/Начало-работы.md)
Гайд ["Начало работы"](https://github.com/Unity-Technologies/ml-agents/tree/main/docs/localized/RU/docs/Начало-работы.md)
содержит в себе серию коротких обучающих уроков по настройке ML-Agents внутри Unity
и запуск предобученной модели, а также уроки по созданию и дополнению сцен,
где будет обучаться ваш агент.

Если у вас возникли трудности с ML-Agents, пожалуйста, обратитесь к [FAQ](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/FAQ.md)
и к странице [Limitations](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Limitations.md).
Если у вас возникли трудности с ML-Agents, пожалуйста, обратитесь к [FAQ](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/FAQ.md)
и к странице [Limitations](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Limitations.md).
Если это не решило вашу проблему, создайте запрос [здесь](https://github.com/Unity-Technologies/ml-agents/issues),
предоставив информацию о операционной системе, версии Python и сообщение об ошибки (если оно есть).

3
ml-agents-envs/mlagents_envs/base_env.py


from mlagents_envs.exception import UnityActionException
AgentId = int
GroupId = int
BehaviorName = str

reward: float
interrupted: bool
agent_id: AgentId
group_id: int
group_id: GroupId
group_reward: float

2
ml-agents-envs/mlagents_envs/tests/test_envs.py


)
env = UnityEnvironment(file_name=file_name, worker_id=0, base_port=base_port)
assert expected == env._port
env.close()
@mock.patch("mlagents_envs.env_utils.launch_executable")

args = env._executable_args()
log_file_index = args.index("-logFile")
assert args[log_file_index + 1] == "./some-log-folder-path/Player-0.log"
env.close()
@mock.patch("mlagents_envs.env_utils.launch_executable")

11
ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py


import io
import numpy as np
import pytest
from typing import List, Tuple
from typing import List, Tuple, Any
from mlagents_envs.communicator_objects.agent_info_pb2 import AgentInfoProto
from mlagents_envs.communicator_objects.observation_pb2 import (

reward = decision_steps.reward[agent_id_index]
done = False
max_step_reached = False
agent_mask = None
agent_mask: Any = None
agent_mask = [] # type: ignore
agent_mask = []
agent_mask = agent_mask.astype(np.bool).tolist()
observations: List[ObservationProto] = []
for all_observations_of_type in decision_steps.obs:
observation = all_observations_of_type[agent_id_index]

reward=reward,
done=done,
id=agent_id,
max_step_reached=max_step_reached,
max_step_reached=bool(max_step_reached),
action_mask=agent_mask,
observations=observations,
)

reward=reward,
done=done,
id=agent_id,
max_step_reached=max_step_reached,
max_step_reached=bool(max_step_reached),
action_mask=None,
observations=final_observations,
)

146
ml-agents/mlagents/trainers/agent_processor.py


StatsAggregationMethod,
EnvironmentStats,
)
from mlagents.trainers.trajectory import GroupmateStatus, Trajectory, AgentExperience
from mlagents.trainers.trajectory import AgentStatus, Trajectory, AgentExperience
from mlagents.trainers.behavior_id_utils import get_global_agent_id, get_global_group_id
from mlagents.trainers.behavior_id_utils import (
get_global_agent_id,
get_global_group_id,
GlobalAgentId,
GlobalGroupId,
)
T = TypeVar("T")

:param max_trajectory_length: Maximum length of a trajectory before it is added to the trainer.
:param stats_category: The category under which to write the stats. Usually, this comes from the Trainer.
"""
self.experience_buffers: Dict[str, List[AgentExperience]] = defaultdict(list)
self.last_step_result: Dict[str, Tuple[DecisionStep, int]] = {}
# current_group_obs is used to collect the current, most recently seen
self._experience_buffers: Dict[
GlobalAgentId, List[AgentExperience]
] = defaultdict(list)
self._last_step_result: Dict[GlobalAgentId, Tuple[DecisionStep, int]] = {}
# current_group_obs is used to collect the current (i.e. the most recently seen)
self.current_group_obs: Dict[str, Dict[str, List[np.ndarray]]] = defaultdict(
lambda: defaultdict(list)
)
# It is a dictionary of GlobalGroupId to dictionaries of GlobalAgentId to observation.
self._current_group_obs: Dict[
GlobalGroupId, Dict[GlobalAgentId, List[np.ndarray]]
] = defaultdict(lambda: defaultdict(list))
# group status of all the agents in the same group, and assemble the group obs.
self.group_status: Dict[str, Dict[str, GroupmateStatus]] = defaultdict(
lambda: defaultdict(None)
)
# group status of all the agents in the same group, and assemble the group's status.
# It is a dictionary of GlobalGroupId to dictionaries of GlobalAgentId to AgentStatus.
self._group_status: Dict[
GlobalGroupId, Dict[GlobalAgentId, AgentStatus]
] = defaultdict(lambda: defaultdict(None))
self.last_take_action_outputs: Dict[str, ActionInfoOutputs] = {}
self._last_take_action_outputs: Dict[GlobalAgentId, ActionInfoOutputs] = {}
self._episode_steps: Counter = Counter()
self._episode_rewards: Dict[GlobalAgentId, float] = defaultdict(float)
self._stats_reporter = stats_reporter
self._max_trajectory_length = max_trajectory_length
self._trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
self._behavior_id = behavior_id
self.episode_steps: Counter = Counter()
self.episode_rewards: Dict[str, float] = defaultdict(float)
self.stats_reporter = stats_reporter
self.max_trajectory_length = max_trajectory_length
self.trajectory_queues: List[AgentManagerQueue[Trajectory]] = []
self.behavior_id = behavior_id
def add_experiences(
self,

take_action_outputs = previous_action.outputs
if take_action_outputs:
for _entropy in take_action_outputs["entropy"]:
self.stats_reporter.add_stat("Policy/Entropy", _entropy)
self._stats_reporter.add_stat("Policy/Entropy", _entropy)
# Make unique agent_ids that are global across workers
action_global_agent_ids = [

if global_id in self.last_step_result: # Don't store if agent just reset
self.last_take_action_outputs[global_id] = take_action_outputs
if global_id in self._last_step_result: # Don't store if agent just reset
self._last_take_action_outputs[global_id] = take_action_outputs
self._add_to_group_status(terminal_step, worker_id)
self._add_group_status_and_obs(terminal_step, worker_id)
for terminal_step in terminal_steps.values():
local_id = terminal_step.agent_id
global_id = get_global_agent_id(worker_id, local_id)

# Clear the last seen group obs when agents die.
self._clear_group_obs(global_id)
self._clear_group_status_and_obs(global_id)
self._add_to_group_status(ongoing_step, worker_id)
self._add_group_status_and_obs(ongoing_step, worker_id)
for ongoing_step in decision_steps.values():
local_id = ongoing_step.agent_id
self._process_step(

for _gid in action_global_agent_ids:
# If the ID doesn't have a last step result, the agent just reset,
# don't store the action.
if _gid in self.last_step_result:
if _gid in self._last_step_result:
def _add_to_group_status(
def _add_group_status_and_obs(
when constructing trajectories to get the status of group mates.
when constructing trajectories to get the status of group mates. Also stores the current
observation into current_group_obs, to be used to get the next group observations
for bootstrapping.
stored_decision_step, idx = self.last_step_result.get(
stored_decision_step, idx = self._last_step_result.get(
stored_take_action_outputs = self.last_take_action_outputs.get(
stored_take_action_outputs = self._last_take_action_outputs.get(
global_agent_id, None
)
if stored_decision_step is not None and stored_take_action_outputs is not None:

continuous=stored_actions.continuous[idx],
discrete=stored_actions.discrete[idx],
)
group_status = GroupmateStatus(
group_status = AgentStatus(
self.group_status[global_group_id][global_agent_id] = group_status
self.current_group_obs[global_group_id][global_agent_id] = step.obs
self._group_status[global_group_id][global_agent_id] = group_status
self._current_group_obs[global_group_id][global_agent_id] = step.obs
def _clear_group_obs(self, global_id: str) -> None:
self._delete_in_nested_dict(self.current_group_obs, global_id)
self._delete_in_nested_dict(self.group_status, global_id)
def _clear_group_status_and_obs(self, global_id: GlobalAgentId) -> None:
"""
Clears an agent from self._group_status and self._current_group_obs.
"""
self._delete_in_nested_dict(self._current_group_obs, global_id)
self._delete_in_nested_dict(self._group_status, global_id)
def _delete_in_nested_dict(self, nested_dict: Dict[str, Any], key: str) -> None:
for _manager_id in list(nested_dict.keys()):

terminated = isinstance(step, TerminalStep)
global_agent_id = get_global_agent_id(worker_id, step.agent_id)
global_group_id = get_global_group_id(worker_id, step.group_id)
stored_decision_step, idx = self.last_step_result.get(
stored_decision_step, idx = self._last_step_result.get(
stored_take_action_outputs = self.last_take_action_outputs.get(
stored_take_action_outputs = self._last_take_action_outputs.get(
self.last_step_result[global_agent_id] = (step, index)
self._last_step_result[global_agent_id] = (step, index)
# This state is the consequence of a past action
if stored_decision_step is not None and stored_take_action_outputs is not None:

# Assemble teammate_obs. If none saved, then it will be an empty list.
group_statuses = []
for _id, _mate_status in self.group_status[global_group_id].items():
for _id, _mate_status in self._group_status[global_group_id].items():
if _id != global_agent_id:
group_statuses.append(_mate_status)

group_reward=step.group_reward,
)
# Add the value outputs if needed
self.experience_buffers[global_agent_id].append(experience)
self.episode_rewards[global_agent_id] += step.reward
self._experience_buffers[global_agent_id].append(experience)
self._episode_rewards[global_agent_id] += step.reward
self.episode_steps[global_agent_id] += 1
self._episode_steps[global_agent_id] += 1
len(self.experience_buffers[global_agent_id])
>= self.max_trajectory_length
len(self._experience_buffers[global_agent_id])
>= self._max_trajectory_length
for _id, _obs in self.current_group_obs[global_group_id].items():
for _id, _obs in self._current_group_obs[global_group_id].items():
steps=self.experience_buffers[global_agent_id],
steps=self._experience_buffers[global_agent_id],
behavior_id=self.behavior_id,
behavior_id=self._behavior_id,
for traj_queue in self.trajectory_queues:
for traj_queue in self._trajectory_queues:
self.experience_buffers[global_agent_id] = []
self._experience_buffers[global_agent_id] = []
self.stats_reporter.add_stat(
self._stats_reporter.add_stat(
self.episode_steps.get(global_agent_id, 0),
self._episode_steps.get(global_agent_id, 0),
def _clean_agent_data(self, global_id: str) -> None:
def _clean_agent_data(self, global_id: GlobalAgentId) -> None:
self._safe_delete(self.experience_buffers, global_id)
self._safe_delete(self.last_take_action_outputs, global_id)
self._safe_delete(self.last_step_result, global_id)
self._safe_delete(self.episode_steps, global_id)
self._safe_delete(self.episode_rewards, global_id)
self._safe_delete(self._experience_buffers, global_id)
self._safe_delete(self._last_take_action_outputs, global_id)
self._safe_delete(self._last_step_result, global_id)
self._safe_delete(self._episode_steps, global_id)
self._safe_delete(self._episode_rewards, global_id)
self.policy.remove_previous_action([global_id])
self.policy.remove_memories([global_id])

assembles a Trajectory
:param trajectory_queue: Trajectory queue to publish to.
"""
self.trajectory_queues.append(trajectory_queue)
self._trajectory_queues.append(trajectory_queue)
def end_episode(self) -> None:
"""

all_gids = list(self.experience_buffers.keys()) # Need to make copy
all_gids = list(self._experience_buffers.keys()) # Need to make copy
for _gid in all_gids:
self._clean_agent_data(_gid)

super().__init__(policy, behavior_id, stats_reporter, max_trajectory_length)
trajectory_queue_len = 20 if threaded else 0
self.trajectory_queue: AgentManagerQueue[Trajectory] = AgentManagerQueue(
self.behavior_id, maxlen=trajectory_queue_len
self._behavior_id, maxlen=trajectory_queue_len
self.behavior_id, maxlen=0
self._behavior_id, maxlen=0
)
self.publish_trajectory_queue(self.trajectory_queue)

for stat_name, value_list in env_stats.items():
for val, agg_type in value_list:
if agg_type == StatsAggregationMethod.AVERAGE:
self.stats_reporter.add_stat(stat_name, val, agg_type)
self._stats_reporter.add_stat(stat_name, val, agg_type)
self.stats_reporter.add_stat(stat_name, val, agg_type)
self._stats_reporter.add_stat(stat_name, val, agg_type)
self.stats_reporter.set_stat(stat_name, val)
self._stats_reporter.set_stat(stat_name, val)

8
ml-agents/mlagents/trainers/behavior_id_utils.py


from typing import NamedTuple
from urllib.parse import urlparse, parse_qs
from mlagents_envs.base_env import AgentId, GroupId
GlobalGroupId = str
GlobalAgentId = str
class BehaviorIdentifiers(NamedTuple):

return name + "?team=" + str(team_id)
def get_global_agent_id(worker_id: int, agent_id: int) -> str:
def get_global_agent_id(worker_id: int, agent_id: AgentId) -> GlobalAgentId:
"""
Create an agent id that is unique across environment workers using the worker_id.
"""

def get_global_group_id(worker_id: int, group_id: int) -> str:
def get_global_group_id(worker_id: int, group_id: GroupId) -> GlobalGroupId:
"""
Create a group id that is unique across environment workers when using the worker_id.
"""

4
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


# For the first sequence, the initial memory should be the one at the
# beginning of this trajectory.
for _ in range(first_seq_len):
all_next_memories.append(initial_memory.squeeze().detach().numpy())
all_next_memories.append(ModelUtils.to_numpy(initial_memory.squeeze()))
init_values, _mem = self.critic.critic_pass(
seq_obs, initial_memory, sequence_length=first_seq_len

):
seq_obs = []
for _ in range(self.policy.sequence_length):
all_next_memories.append(_mem.squeeze().detach().numpy())
all_next_memories.append(ModelUtils.to_numpy(_mem.squeeze()))
for _obs in tensor_obs:
start = seq_num * self.policy.sequence_length - (
self.policy.sequence_length - leftover

15
ml-agents/mlagents/trainers/policy/policy.py


from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.settings import TrainerSettings, NetworkSettings
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.behavior_id_utils import GlobalAgentId
class UnityPolicyException(UnityException):

return np.zeros((num_agents, self.m_size), dtype=np.float32)
def save_memories(
self, agent_ids: List[str], memory_matrix: Optional[np.ndarray]
self, agent_ids: List[GlobalAgentId], memory_matrix: Optional[np.ndarray]
) -> None:
if memory_matrix is None:
return

for index, agent_id in enumerate(agent_ids):
self.memory_dict[agent_id] = memory_matrix[index, :]
def retrieve_memories(self, agent_ids: List[str]) -> np.ndarray:
def retrieve_memories(self, agent_ids: List[GlobalAgentId]) -> np.ndarray:
memory_matrix = np.zeros((len(agent_ids), self.m_size), dtype=np.float32)
for index, agent_id in enumerate(agent_ids):
if agent_id in self.memory_dict:

def retrieve_previous_memories(self, agent_ids: List[str]) -> np.ndarray:
def retrieve_previous_memories(self, agent_ids: List[GlobalAgentId]) -> np.ndarray:
memory_matrix = np.zeros((len(agent_ids), self.m_size), dtype=np.float32)
for index, agent_id in enumerate(agent_ids):
if agent_id in self.previous_memory_dict:

def remove_memories(self, agent_ids):
def remove_memories(self, agent_ids: List[GlobalAgentId]) -> None:
for agent_id in agent_ids:
if agent_id in self.memory_dict:
self.memory_dict.pop(agent_id)

)
def save_previous_action(
self, agent_ids: List[str], action_tuple: ActionTuple
self, agent_ids: List[GlobalAgentId], action_tuple: ActionTuple
def retrieve_previous_action(self, agent_ids: List[str]) -> np.ndarray:
def retrieve_previous_action(self, agent_ids: List[GlobalAgentId]) -> np.ndarray:
action_matrix = self.make_empty_previous_action(len(agent_ids))
for index, agent_id in enumerate(agent_ids):
if agent_id in self.previous_action_dict:

def remove_previous_action(self, agent_ids):
def remove_previous_action(self, agent_ids: List[GlobalAgentId]) -> None:
for agent_id in agent_ids:
if agent_id in self.previous_action_dict:
self.previous_action_dict.pop(agent_id)

4
ml-agents/mlagents/trainers/tests/mock_brain.py


from mlagents.trainers.buffer import AgentBuffer, AgentBufferKey
from mlagents.trainers.torch.action_log_probs import LogProbsTuple
from mlagents.trainers.trajectory import GroupmateStatus, Trajectory, AgentExperience
from mlagents.trainers.trajectory import AgentStatus, Trajectory, AgentExperience
from mlagents_envs.base_env import (
DecisionSteps,
TerminalSteps,

behavior_id = "test_brain"
group_status = []
for _ in range(num_other_agents_in_group):
group_status.append(GroupmateStatus(obs, reward, action, done))
group_status.append(AgentStatus(obs, reward, action, done))
experience = AgentExperience(
obs=obs,
reward=reward,

36
ml-agents/mlagents/trainers/tests/test_agent_processor.py


assert len(step.group_status) == 0
# Assert that the AgentProcessor is empty
assert len(processor.experience_buffers[0]) == 0
assert len(processor._experience_buffers[0]) == 0
# Test empty steps
mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(

mock_decision_steps, mock_terminal_steps, 0, ActionInfo.empty()
)
# Assert that the AgentProcessor is still empty
assert len(processor.experience_buffers[0]) == 0
assert len(processor._experience_buffers[0]) == 0
def test_group_statuses():

policy.save_previous_action.assert_has_calls(add_calls)
policy.remove_previous_action.assert_has_calls(remove_calls)
# Check that there are no experiences left
assert len(processor.experience_buffers.keys()) == 0
assert len(processor.last_take_action_outputs.keys()) == 0
assert len(processor.episode_steps.keys()) == 0
assert len(processor.episode_rewards.keys()) == 0
assert len(processor.last_step_result.keys()) == 0
assert len(processor._experience_buffers.keys()) == 0
assert len(processor._last_take_action_outputs.keys()) == 0
assert len(processor._episode_steps.keys()) == 0
assert len(processor._episode_rewards.keys()) == 0
assert len(processor._last_step_result.keys()) == 0
assert len(processor.experience_buffers.keys()) == 0
assert len(processor.last_take_action_outputs.keys()) == 0
assert len(processor.episode_steps.keys()) == 0
assert len(processor.episode_rewards.keys()) == 0
assert len(processor.last_step_result.keys()) == 0
assert len(processor._experience_buffers.keys()) == 0
assert len(processor._last_take_action_outputs.keys()) == 0
assert len(processor._episode_steps.keys()) == 0
assert len(processor._episode_rewards.keys()) == 0
assert len(processor._last_step_result.keys()) == 0
def test_end_episode():

# Check that we removed every agent
policy.remove_previous_action.assert_has_calls(remove_calls)
# Check that there are no experiences left
assert len(processor.experience_buffers.keys()) == 0
assert len(processor.last_take_action_outputs.keys()) == 0
assert len(processor.episode_steps.keys()) == 0
assert len(processor.episode_rewards.keys()) == 0
assert len(processor._experience_buffers.keys()) == 0
assert len(processor._last_take_action_outputs.keys()) == 0
assert len(processor._episode_steps.keys()) == 0
assert len(processor._episode_rewards.keys()) == 0
def test_agent_manager():

max_trajectory_length=5,
stats_reporter=StatsReporter("testcat"),
)
assert len(manager.trajectory_queues) == 1
assert isinstance(manager.trajectory_queues[0], AgentManagerQueue)
assert len(manager._trajectory_queues) == 1
assert isinstance(manager._trajectory_queues[0], AgentManagerQueue)
def test_agent_manager_queue():

81
ml-agents/mlagents/trainers/torch/model_serialization.py


return exporting_to_onnx._local_data._is_exporting
class TensorNames:
batch_size_placeholder = "batch_size"
sequence_length_placeholder = "sequence_length"
vector_observation_placeholder = "vector_observation"
recurrent_in_placeholder = "recurrent_in"
visual_observation_placeholder_prefix = "visual_observation_"
observation_placeholder_prefix = "obs_"
previous_action_placeholder = "prev_action"
action_mask_placeholder = "action_masks"
random_normal_epsilon_placeholder = "epsilon"
value_estimate_output = "value_estimate"
recurrent_output = "recurrent_out"
memory_size = "memory_size"
version_number = "version_number"
continuous_action_output_shape = "continuous_action_output_shape"
discrete_action_output_shape = "discrete_action_output_shape"
continuous_action_output = "continuous_actions"
discrete_action_output = "discrete_actions"
# Deprecated TensorNames entries for backward compatibility
is_continuous_control_deprecated = "is_continuous_control"
action_output_deprecated = "action"
action_output_shape_deprecated = "action_output_shape"
@staticmethod
def get_visual_observation_name(index: int) -> str:
"""
Returns the name of the visual observation with a given index
"""
return TensorNames.visual_observation_placeholder_prefix + str(index)
@staticmethod
def get_observation_name(index: int) -> str:
"""
Returns the name of the observation with a given index
"""
return TensorNames.observation_placeholder_prefix + str(index)
class ModelSerializer:
def __init__(self, policy):
# ONNX only support input in NCHW (channel first) format.

dummy_memories,
)
self.input_names = ["vector_observation"]
self.input_names = [TensorNames.vector_observation_placeholder]
self.input_names.append(f"visual_observation_{i}")
self.input_names.append(TensorNames.get_visual_observation_name(i))
self.input_names.append(f"obs_{i}")
self.input_names += ["action_masks", "memories"]
self.input_names.append(TensorNames.get_observation_name(i))
self.input_names += [
TensorNames.action_mask_placeholder,
TensorNames.recurrent_in_placeholder,
]
self.output_names = ["version_number", "memory_size"]
self.output_names = [TensorNames.version_number, TensorNames.memory_size]
"continuous_actions",
"continuous_action_output_shape",
TensorNames.continuous_action_output,
TensorNames.continuous_action_output_shape,
self.dynamic_axes.update({"continuous_actions": {0: "batch"}})
self.dynamic_axes.update(
{TensorNames.continuous_action_output: {0: "batch"}}
)
self.output_names += ["discrete_actions", "discrete_action_output_shape"]
self.dynamic_axes.update({"discrete_actions": {0: "batch"}})
self.output_names += [
TensorNames.discrete_action_output,
TensorNames.discrete_action_output_shape,
]
self.dynamic_axes.update({TensorNames.discrete_action_output: {0: "batch"}})
"action",
"is_continuous_control",
"action_output_shape",
TensorNames.action_output_deprecated,
TensorNames.is_continuous_control_deprecated,
TensorNames.action_output_shape_deprecated,
self.dynamic_axes.update({"action": {0: "batch"}})
self.dynamic_axes.update(
{TensorNames.action_output_deprecated: {0: "batch"}}
)
if self.policy.export_memory_size > 0:
self.output_names += [TensorNames.recurrent_output]
def export_policy_model(self, output_filepath: str) -> None:
"""

2
ml-agents/mlagents/trainers/torch/networks.py


self.is_continuous_int_deprecated,
self.act_size_vector_deprecated,
]
if self.network_body.memory_size > 0:
export_out += [memories_out]
return tuple(export_out)

13
ml-agents/mlagents/trainers/trajectory.py


from mlagents.trainers.torch.action_log_probs import LogProbsTuple
class GroupmateStatus(NamedTuple):
class AgentStatus(NamedTuple):
Stores data related to an agent's teammate.
Stores observation, action, and reward for an agent. Does not have additional
fields that are present in AgentExperience.
"""
obs: List[np.ndarray]

class AgentExperience(NamedTuple):
"""
Stores the full amount of data for an agent in one timestep. Includes
the status' of group mates and the group reward, as well as the probabilities
outputted by the policy.
"""
obs: List[np.ndarray]
reward: float
done: bool

prev_action: np.ndarray
interrupted: bool
memory: np.ndarray
group_status: List[GroupmateStatus]
group_status: List[AgentStatus]
group_reward: float

8
ml-agents/setup.py


"protobuf>=3.6",
"pyyaml>=3.1.0",
# Windows ver. of PyTorch doesn't work from PyPi. Installation:
# https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Installation.md#windows-installing-pytorch
# https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Installation.md#windows-installing-pytorch
# cattrs 1.1.0 dropped support for python 3.6.
"cattrs>=1.0.0,<1.1.0",
# cattrs 1.1.0 dropped support for python 3.6, but 1.0.0 doesn't work for python 3.9
# Since there's no version that supports both, we have to draw the line somwehere.
"cattrs<1.1.0; python_version<'3.8'",
"cattrs>=1.1.0; python_version>='3.8'",
"attrs>=19.3.0",
'pypiwin32==223;platform_system=="Windows"',
"importlib_metadata; python_version<'3.8'",

16
utils/make_readme_table.py


f"{bold_str}[docs]({version_info.doc_link}){bold_str}",
f"{bold_str}[download]({version_info.download_link}){bold_str}",
]
if version_info.is_master:
if version_info.is_main:
cells.append("--") # python
cells.append("--") # Unity
else:

return LooseVersion(self.python_verion)
@property
def is_master(self) -> bool:
return self.release_tag == "master"
def is_main(self) -> bool:
return self.release_tag == "main"
if self.is_master:
if self.is_main:
return datetime.today()
return datetime.strptime(self.release_date, "%B %d, %Y")

"""
if self.is_verified:
return f"Verified Package {self.csharp_version}"
elif self.is_master:
return "master (unstable)"
elif self.is_main:
return "main (unstable)"
else:
return self.release_tag.replace("_", " ").title()

versions = [
ReleaseInfo("master", "master", "master", "--"),
ReleaseInfo("main", "main", "main", "--"),
ReleaseInfo("release_1", "1.0.0", "0.16.0", "April 30, 2020"),
ReleaseInfo("release_2", "1.0.2", "0.16.1", "May 20, 2020"),
ReleaseInfo("release_3", "1.1.0", "0.17.0", "June 10, 2020"),

highlight_versions.add([v for v in sorted_versions if v.is_verified][0])
# Highlight the most recent regular version
highlight_versions.add(
[v for v in sorted_versions if (not v.is_verified and not v.is_master)][0]
[v for v in sorted_versions if (not v.is_verified and not v.is_main)][0]
)
count_by_verified = Counter()

2
utils/validate_release_links.py


def get_release_tag() -> Optional[str]:
"""
Returns the release tag for the mlagents python package.
This will be None on the master branch.
This will be None on the main branch.
:return:
"""
with open(TRAINER_INIT_FILE) as f:

31
com.unity.ml-agents/Editor/BufferSensorComponentEditor.cs


using UnityEditor;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Editor
{
[CustomEditor(typeof(BufferSensorComponent))]
[CanEditMultipleObjects]
internal class BufferSensorComponentEditor : UnityEditor.Editor
{
public override void OnInspectorGUI()
{
var so = serializedObject;
so.Update();
// Drawing the BufferSensorComponent
EditorGUI.BeginDisabledGroup(!EditorUtilities.CanUpdateModelProperties());
{
// These fields affect the sensor order or observation size,
// So can't be changed at runtime.
EditorGUILayout.PropertyField(so.FindProperty("m_SensorName"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_ObservableSize"), true);
EditorGUILayout.PropertyField(so.FindProperty("m_MaxNumObservables"), true);
}
EditorGUI.EndDisabledGroup();
so.ApplyModifiedProperties();
}
}
}

11
com.unity.ml-agents/Editor/BufferSensorComponentEditor.cs.meta


fileFormatVersion: 2
guid: b042fe65027f94c1eb38a2ee1362d38d
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:
正在加载...
取消
保存