浏览代码

Merge branch 'master' into reward-dist

/reward-dist
GitHub 4 年前
当前提交
2fb87e4f
共有 94 个文件被更改,包括 2966 次插入328 次删除
  1. 3
      Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs
  2. 2
      Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs
  3. 2
      Project/ProjectSettings/UnityConnectSettings.asset
  4. 26
      README.md
  5. 8
      com.unity.ml-agents.extensions/Runtime/Match3/Match3Sensor.cs
  6. 9
      com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs
  7. 9
      com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs
  8. 29
      com.unity.ml-agents/CHANGELOG.md
  9. 7
      com.unity.ml-agents/Runtime/Academy.cs
  10. 13
      com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs
  11. 7
      com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs
  12. 68
      com.unity.ml-agents/Runtime/Analytics/Events.cs
  13. 14
      com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs
  14. 52
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  15. 5
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  16. 5
      com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
  17. 39
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
  18. 21
      com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs
  19. 36
      com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs
  20. 46
      com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
  21. 7
      com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
  22. 7
      com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
  23. 14
      com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs
  24. 8
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  25. 15
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  26. 24
      com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs
  27. 10
      com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs
  28. 9
      com.unity.ml-agents/Runtime/Sensors/Reflection/ReflectionSensorBase.cs
  29. 9
      com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs
  30. 7
      com.unity.ml-agents/Runtime/Sensors/SensorShapeValidator.cs
  31. 11
      com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs
  32. 24
      com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs
  33. 26
      com.unity.ml-agents/Runtime/SideChannels/SideChannelManager.cs
  34. 19
      com.unity.ml-agents/Tests/Editor/Analytics/InferenceAnalyticsTests.cs
  35. 32
      com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs
  36. 9
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  37. 12
      com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs
  38. 15
      com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs
  39. 4
      com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
  40. 5
      docs/Migrating.md
  41. 10
      docs/Training-ML-Agents.md
  42. 17
      ml-agents-envs/mlagents_envs/communicator.py
  43. 11
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  44. 6
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
  45. 8
      ml-agents-envs/mlagents_envs/env_utils.py
  46. 55
      ml-agents-envs/mlagents_envs/environment.py
  47. 12
      ml-agents-envs/mlagents_envs/mock_communicator.py
  48. 47
      ml-agents-envs/mlagents_envs/rpc_communicator.py
  49. 2
      ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py
  50. 2
      ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py
  51. 54
      ml-agents-envs/mlagents_envs/tests/test_rpc_communicator.py
  52. 1
      ml-agents/mlagents/torch_utils/__init__.py
  53. 37
      ml-agents/mlagents/torch_utils/torch.py
  54. 15
      ml-agents/mlagents/trainers/cli_utils.py
  55. 12
      ml-agents/mlagents/trainers/env_manager.py
  56. 15
      ml-agents/mlagents/trainers/learn.py
  57. 9
      ml-agents/mlagents/trainers/settings.py
  58. 81
      ml-agents/mlagents/trainers/subprocess_env_manager.py
  59. 2
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  60. 66
      ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py
  61. 8
      ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py
  62. 4
      ml-agents/mlagents/trainers/tests/torch/test_action_model.py
  63. 10
      ml-agents/mlagents/trainers/tests/torch/test_distributions.py
  64. 4
      ml-agents/mlagents/trainers/tests/torch/test_encoders.py
  65. 6
      ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
  66. 8
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  67. 3
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  68. 4
      ml-agents/mlagents/trainers/torch/encoders.py
  69. 3
      ml-agents/mlagents/trainers/trainer_controller.py
  70. 3
      protobuf-definitions/proto/mlagents_envs/communicator_objects/capabilities.proto
  71. 150
      utils/make_readme_table.py
  72. 23
      .github/workflows/lock.yml
  73. 24
      .yamato/pytest-gpu.yml
  74. 40
      com.unity.ml-agents/Runtime/Analytics/AnalyticsUtils.cs
  75. 3
      com.unity.ml-agents/Runtime/Analytics/AnalyticsUtils.cs.meta
  76. 246
      com.unity.ml-agents/Runtime/Analytics/TrainingAnalytics.cs
  77. 3
      com.unity.ml-agents/Runtime/Analytics/TrainingAnalytics.cs.meta
  78. 850
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/TrainingAnalytics.cs
  79. 11
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/TrainingAnalytics.cs.meta
  80. 39
      com.unity.ml-agents/Runtime/Sensors/IBuiltInSensor.cs
  81. 3
      com.unity.ml-agents/Runtime/Sensors/IBuiltInSensor.cs.meta
  82. 50
      com.unity.ml-agents/Runtime/SideChannels/TrainingAnalyticsSideChannel.cs
  83. 3
      com.unity.ml-agents/Runtime/SideChannels/TrainingAnalyticsSideChannel.cs.meta
  84. 42
      com.unity.ml-agents/Tests/Editor/Analytics/TrainingAnalyticsTest.cs
  85. 3
      com.unity.ml-agents/Tests/Editor/Analytics/TrainingAnalyticsTest.cs.meta
  86. 65
      com.unity.ml-agents/Tests/Editor/TrainingAnalyticsSideChannelTests.cs
  87. 3
      com.unity.ml-agents/Tests/Editor/TrainingAnalyticsSideChannelTests.cs.meta
  88. 243
      ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.py
  89. 97
      ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.pyi
  90. 41
      ml-agents/mlagents/trainers/tests/test_torch_utils.py
  91. 99
      ml-agents/mlagents/training_analytics_side_channel.py
  92. 31
      protobuf-definitions/proto/mlagents_envs/communicator_objects/training_analytics.proto
  93. 4
      pytest.ini
  94. 38
      .github/lock.yml

3
Project/Assets/ML-Agents/Examples/GridWorld/Scripts/GridAgent.cs


using System.Linq;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using UnityEngine.Rendering;
using UnityEngine.Serialization;
public class GridAgent : Agent

void WaitTimeInference()
{
if (renderCamera != null)
if (renderCamera != null && SystemInfo.graphicsDeviceType != GraphicsDeviceType.Null)
{
renderCamera.Render();
}

2
Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/SensorBase.cs


float[] buffer = new float[numFloats];
WriteObservation(buffer);
writer.AddRange(buffer);
writer.AddList(buffer);
return numFloats;
}

2
Project/ProjectSettings/UnityConnectSettings.asset


UnityConnectSettings:
m_ObjectHideFlags: 0
serializedVersion: 1
m_Enabled: 1
m_Enabled: 0
m_TestMode: 0
m_EventOldUrl: https://api.uca.cloud.unity3d.com/v1/events
m_EventUrl: https://cdp.cloud.unity3d.com/v1/events

26
README.md


- The **Documentation** links in the table below include installation and usage
instructions specific to each release. Remember to always use the
documentation that corresponds to the release version you're using.
| **Version** | **Release Date** | **Source** | **Documentation** | **Download** |
|:-------:|:------:|:-------------:|:-------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) |
| **Release 12** | **December 22, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_12)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_12.zip)** |
| **Release 11** | December 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_11) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_11_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_11.zip) |
| **Release 10** | November 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_10) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_10_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_10.zip) |
| **Release 9** | November 4, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_9) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_9_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_9.zip) |
| **Release 8** | October 14, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_8) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_8.zip) |
| **Release 7** | September 16, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_7) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip) |
| **Release 6** | August 12, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_6) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_6_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_6.zip) |
| **Release 5** | July 31, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_5) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_5_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_5.zip) |
- The `com.unity.ml-agents` package is [verified](https://docs.unity3d.com/2020.1/Documentation/Manual/pack-safe.html)
for Unity 2020.1 and later. Verified packages releases are numbered 1.0.x.
## Citation
| **Version** | **Release Date** | **Source** | **Documentation** | **Download** | **Python Package** | **Unity Package** |
|:-------:|:------:|:-------------:|:-------:|:------------:|:------------:|:------------:|
| **master (unstable)** | -- | [source](https://github.com/Unity-Technologies/ml-agents/tree/master) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/master/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/master.zip) | -- | -- |
| **Release 12** | **December 22, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/release_12)** | **[docs](https://github.com/Unity-Technologies/ml-agents/tree/release_12_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/release_12.zip)** | **[0.23.0](https://pypi.org/project/mlagents/0.23.0/)** | **[1.7.2](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.7/manual/index.html)** |
| **Release 11** | December 21, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_11) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_11_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_11.zip) | [0.23.0](https://pypi.org/project/mlagents/0.23.0/) | [1.7.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.7/manual/index.html) |
| **Release 10** | November 18, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_10) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_10_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_10.zip) | [0.22.0](https://pypi.org/project/mlagents/0.22.0/) | [1.6.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.6/manual/index.html) |
| **Verified Package 1.0.6** | **November 16, 2020** | **[source](https://github.com/Unity-Technologies/ml-agents/tree/com.unity.ml-agents_1.0.6)** | **[docs](https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Readme.md)** | **[download](https://github.com/Unity-Technologies/ml-agents/archive/com.unity.ml-agents_1.0.6.zip)** | **[0.16.1](https://pypi.org/project/mlagents/0.16.1/)** | **[1.0.6](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.0/manual/index.html)** |
| **Release 9** | November 4, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_9) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_9_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_9.zip) | [0.21.1](https://pypi.org/project/mlagents/0.21.1/) | [1.5.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.5/manual/index.html) |
| **Release 8** | October 14, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_8) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_8_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_8.zip) | [0.21.0](https://pypi.org/project/mlagents/0.21.0/) | [1.5.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.5/manual/index.html) |
| **Verified Package 1.0.5** | September 23, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/com.unity.ml-agents_1.0.5) | [docs](https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/com.unity.ml-agents_1.0.5.zip) | [0.16.1](https://pypi.org/project/mlagents/0.16.1/) | [1.0.5](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.0/manual/index.html) |
| **Release 7** | September 16, 2020 | [source](https://github.com/Unity-Technologies/ml-agents/tree/release_7) | [docs](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/docs/Readme.md) | [download](https://github.com/Unity-Technologies/ml-agents/archive/release_7.zip) | [0.20.0](https://pypi.org/project/mlagents/0.20.0/) | [1.4.0](https://docs.unity3d.com/Packages/com.unity.ml-agents@1.4/manual/index.html) |
If you are a researcher interested in a discussion of Unity as an AI platform,
see a pre-print of our

8
com.unity.ml-agents.extensions/Runtime/Match3/Match3Sensor.cs


/// or uncompressed visual observations. Uses AbstractBoard.GetCellType()
/// and AbstractBoard.GetSpecialType() to determine the observation values.
/// </summary>
public class Match3Sensor : ISparseChannelSensor
public class Match3Sensor : ISparseChannelSensor, IBuiltInSensor
{
private Match3ObservationType m_ObservationType;
private AbstractBoard m_Board;

public int[] GetCompressedChannelMapping()
{
return m_SparseChannelMapping;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.Match3Sensor;
}
static void DestroyTexture(Texture2D texture)

9
com.unity.ml-agents.extensions/Runtime/Sensors/GridSensor.cs


/// <summary>
/// Grid-based sensor.
/// </summary>
public class GridSensor : SensorComponent, ISensor
public class GridSensor : SensorComponent, ISensor, IBuiltInSensor
{
/// <summary>
/// Name of this grid sensor.

{
return CompressionType;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.GridSensor;
}
/// <summary>
/// GetCompressedObservation - Calls Perceive then puts the data stored on the perception buffer

9
com.unity.ml-agents.extensions/Runtime/Sensors/PhysicsBodySensor.cs


/// <summary>
/// ISensor implementation that generates observations for a group of Rigidbodies or ArticulationBodies.
/// </summary>
public class PhysicsBodySensor : ISensor
public class PhysicsBodySensor : ISensor, IBuiltInSensor
{
int[] m_Shape;
string m_SensorName;

{
return m_SensorName;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.PhysicsBodySensor;
}
}
}

29
com.unity.ml-agents/CHANGELOG.md


### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
- The `ActionSpec` constructor is now public. Previously, it was not possible to create an
ActionSpec with both continuous and discrete actions from code. (#4896)
will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
will result in the values being summed (instead of averaged) when written to
TensorBoard. Thanks to @brccabral for the contribution! (#4816)
- Added `VectorSensor.AddObservation(IList<float>)`. `VectorSensor.AddObservation(IEnumerable<float>)`
is deprecated. The `IList` version is recommended, as it does not generate any
additional memory allocations. (#4887)
- Added `ObservationWriter.AddList()` and deprecated `ObservationWriter.AddRange()`.
`AddList()` is recommended, as it does not generate any additional memory allocations. (#4887)
- Added a `--torch-device` commandline option to `mlagents-learn`, which sets the default
[`torch.device`](https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device) used for training. (#4888)
- The `--cpu` commandline option had no effect and was removed. Use `--torch-device=cpu` to force CPU training. (#4888)
- CameraSensor now logs an error if the GraphicsDevice is null. (#4880)
- Removed unnecessary memory allocations in `ActuatorManager.UpdateActionArray()` (#4877)
- Removed unnecessary memory allocations in `SensorShapeValidator.ValidateSensors()` (#4879)
- Removed unnecessary memory allocations in `SideChannelManager.GetSideChannelMessage()` (#4886)
- Removed several memory allocations that happened during inference. On a test scene, this
reduced the amount of memory allocated by approximately 25%. (#4887)
- Fixed a bug that can cause a crash if a behavior can appear during training in multi-environment training. (#4872)
- Fixed a bug that would cause `UnityEnvironment` to wait the full timeout
period and report a misleading error message if the executable crashed
without closing the connection. It now periodically checks the process status
while waiting for a connection, and raises a better error message if it crashes. (#4880)
- Passing a `-logfile` option in the `--env-args` option to `mlagents-learn` is
no longer overwritten. (#4880)
## [1.7.2-preview] - 2020-12-22

7
com.unity.ml-agents/Runtime/Academy.cs


/// <term>1.3.0</term>
/// <description>Support both continuous and discrete actions.</description>
/// </item>
/// <item>
/// <term>1.4.0</term>
/// <description>Support training analytics sent from python trainer to the editor.</description>
/// </item>
const string k_ApiVersion = "1.3.0";
const string k_ApiVersion = "1.4.0";
/// <summary>
/// Unity package version of com.unity.ml-agents.

EnableAutomaticStepping();
SideChannelManager.RegisterSideChannel(new EngineConfigurationChannel());
SideChannelManager.RegisterSideChannel(new TrainingAnalyticsSideChannel());
m_EnvironmentParameters = new EnvironmentParameters();
m_StatsRecorder = new StatsRecorder();

13
com.unity.ml-agents/Runtime/Actuators/ActionSpec.cs


/// <summary>
/// Creates a Continuous <see cref="ActionSpec"/> with the number of actions available.
/// </summary>
/// <param name="numActions">The number of actions available.</param>
/// <param name="numActions">The number of continuous actions available.</param>
/// <returns>An Continuous ActionSpec initialized with the number of actions available.</returns>
public static ActionSpec MakeContinuous(int numActions)
{

return actuatorSpace;
}
internal ActionSpec(int numContinuousActions, int[] branchSizes = null)
/// <summary>
/// Create an ActionSpec initialized with the specified action sizes.
/// </summary>
/// <param name="numContinuousActions">The number of continuous actions available.</param>
/// <param name="discreteBranchSizes">The array of branch sizes for the discrete actions. Each index
/// contains the number of actions available for that branch.</param>
/// <returns>An ActionSpec initialized with the specified action sizes.</returns>
public ActionSpec(int numContinuousActions = 0, int[] discreteBranchSizes = null)
BranchSizes = branchSizes;
BranchSizes = discreteBranchSizes;
}
/// <summary>

7
com.unity.ml-agents/Runtime/Actuators/ActuatorManager.cs


}
else
{
Debug.Assert(sourceActionBuffer.Length == destination.Length,
$"sourceActionBuffer:{sourceActionBuffer.Length} is a different" +
$" size than destination: {destination.Length}.");
Debug.AssertFormat(sourceActionBuffer.Length == destination.Length,
"sourceActionBuffer: {0} is a different size than destination: {1}.",
sourceActionBuffer.Length,
destination.Length);
Array.Copy(sourceActionBuffer.Array,
sourceActionBuffer.Offset,

68
com.unity.ml-agents/Runtime/Analytics/Events.cs


{
public string SensorName;
public string CompressionType;
public int BuiltInSensorType;
public EventObservationDimensionInfo[] DimensionInfos;
public static EventObservationSpec FromSensor(ISensor sensor)

// TODO copy flags when we have them
}
var builtInSensorType =
(sensor as IBuiltInSensor)?.GetBuiltInSensorType() ?? Sensors.BuiltInSensorType.Unknown;
BuiltInSensorType = (int)builtInSensorType,
}
internal struct RemotePolicyInitializedEvent
{
public string TrainingSessionGuid;
/// <summary>
/// Hash of the BehaviorName.
/// </summary>
public string BehaviorName;
public List<EventObservationSpec> ObservationSpecs;
public EventActionSpec ActionSpec;
/// <summary>
/// This will be the same as TrainingEnvironmentInitializedEvent if available, but
/// TrainingEnvironmentInitializedEvent maybe not always be available with older trainers.
/// </summary>
public string MLAgentsEnvsVersion;
public string TrainerCommunicationVersion;
}
internal struct TrainingEnvironmentInitializedEvent
{
public string TrainingSessionGuid;
public string TrainerPythonVersion;
public string MLAgentsVersion;
public string MLAgentsEnvsVersion;
public string TorchVersion;
public string TorchDeviceType;
public int NumEnvironments;
public int NumEnvironmentParameters;
}
[Flags]
internal enum RewardSignals
{
Extrinsic = 1 << 0,
Gail = 1 << 1,
Curiosity = 1 << 2,
Rnd = 1 << 3,
}
[Flags]
internal enum TrainingFeatures
{
BehavioralCloning = 1 << 0,
Recurrent = 1 << 1,
Threaded = 1 << 2,
SelfPlay = 1 << 3,
Curriculum = 1 << 4,
}
internal struct TrainingBehaviorInitializedEvent
{
public string TrainingSessionGuid;
public string BehaviorName;
public string TrainerType;
public RewardSignals RewardSignalFlags;
public TrainingFeatures TrainingFeatureFlags;
public string VisualEncoder;
public int NumNetworkLayers;
public int NumNetworkHiddenUnits;
}
}

14
com.unity.ml-agents/Runtime/Analytics/InferenceAnalytics.cs


{
const string k_VendorKey = "unity.ml-agents";
const string k_EventName = "ml_agents_inferencemodelset";
const int k_EventVersion = 1;
/// <summary>
/// Whether or not we've registered this particular event yet

/// </summary>
const int k_MaxNumberOfElements = 1000;
/// <summary>
/// Models that we've already sent events for.
/// </summary>

}
#if UNITY_EDITOR
AnalyticsResult result = EditorAnalytics.RegisterEventWithLimit(k_EventName, k_MaxEventsPerHour, k_MaxNumberOfElements, k_VendorKey);
AnalyticsResult result = EditorAnalytics.RegisterEventWithLimit(k_EventName, k_MaxEventsPerHour, k_MaxNumberOfElements, k_VendorKey, k_EventVersion);
#else
AnalyticsResult result = AnalyticsResult.UnsupportedPlatform;
#endif

var data = GetEventForModel(nnModel, behaviorName, inferenceDevice, sensors, actionSpec);
// Note - to debug, use JsonUtility.ToJson on the event.
// Debug.Log(JsonUtility.ToJson(data, true));
//Debug.Log(JsonUtility.ToJson(data, true));
EditorAnalytics.SendEventWithLimit(k_EventName, data);
if (AnalyticsUtils.s_SendEditorAnalytics)
{
EditorAnalytics.SendEventWithLimit(k_EventName, data, k_EventVersion);
}
#else
return;
#endif

var inferenceEvent = new InferenceEvent();
// Hash the behavior name so that there's no concern about PII or "secret" data being leaked.
var behaviorNameHash = Hash128.Compute(behaviorName);
inferenceEvent.BehaviorName = behaviorNameHash.ToString();
inferenceEvent.BehaviorName = AnalyticsUtils.Hash(behaviorName);
inferenceEvent.BarracudaModelSource = barracudaModel.IrSource;
inferenceEvent.BarracudaModelVersion = barracudaModel.IrVersion;

52
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


using UnityEngine;
using System.Runtime.CompilerServices;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Analytics;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Demonstrations;
using Unity.MLAgents.Policies;

ConcatenatedPngObservations = proto.ConcatenatedPngObservations,
CompressedChannelMapping = proto.CompressedChannelMapping,
HybridActions = proto.HybridActions,
TrainingAnalytics = proto.TrainingAnalytics,
};
}

ConcatenatedPngObservations = rlCaps.ConcatenatedPngObservations,
CompressedChannelMapping = rlCaps.CompressedChannelMapping,
HybridActions = rlCaps.HybridActions,
TrainingAnalytics = rlCaps.TrainingAnalytics,
};
}

}
return true;
}
#region Analytics
internal static TrainingEnvironmentInitializedEvent ToTrainingEnvironmentInitializedEvent(
this TrainingEnvironmentInitialized inputProto)
{
return new TrainingEnvironmentInitializedEvent
{
TrainerPythonVersion = inputProto.PythonVersion,
MLAgentsVersion = inputProto.MlagentsVersion,
MLAgentsEnvsVersion = inputProto.MlagentsEnvsVersion,
TorchVersion = inputProto.TorchVersion,
TorchDeviceType = inputProto.TorchDeviceType,
NumEnvironments = inputProto.NumEnvs,
NumEnvironmentParameters = inputProto.NumEnvironmentParameters,
};
}
internal static TrainingBehaviorInitializedEvent ToTrainingBehaviorInitializedEvent(
this TrainingBehaviorInitialized inputProto)
{
RewardSignals rewardSignals = 0;
rewardSignals |= inputProto.ExtrinsicRewardEnabled ? RewardSignals.Extrinsic : 0;
rewardSignals |= inputProto.GailRewardEnabled ? RewardSignals.Gail : 0;
rewardSignals |= inputProto.CuriosityRewardEnabled ? RewardSignals.Curiosity : 0;
rewardSignals |= inputProto.RndRewardEnabled ? RewardSignals.Rnd : 0;
TrainingFeatures trainingFeatures = 0;
trainingFeatures |= inputProto.BehavioralCloningEnabled ? TrainingFeatures.BehavioralCloning : 0;
trainingFeatures |= inputProto.RecurrentEnabled ? TrainingFeatures.Recurrent : 0;
trainingFeatures |= inputProto.TrainerThreaded ? TrainingFeatures.Threaded : 0;
trainingFeatures |= inputProto.SelfPlayEnabled ? TrainingFeatures.SelfPlay : 0;
trainingFeatures |= inputProto.CurriculumEnabled ? TrainingFeatures.Curriculum : 0;
return new TrainingBehaviorInitializedEvent
{
BehaviorName = inputProto.BehaviorName,
TrainerType = inputProto.TrainerType,
RewardSignalFlags = rewardSignals,
TrainingFeatureFlags = trainingFeatures,
VisualEncoder = inputProto.VisualEncoder,
NumNetworkLayers = inputProto.NumNetworkLayers,
NumNetworkHiddenUnits = inputProto.NumNetworkHiddenUnits,
};
}
#endregion
}
}

5
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


using System.Linq;
using UnityEngine;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Analytics;
using Unity.MLAgents.CommunicatorObjects;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.SideChannels;

},
out input);
var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion;
TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion);
var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(unityCommunicationVersion,
pythonCommunicationVersion,

5
com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs


public bool ConcatenatedPngObservations;
public bool CompressedChannelMapping;
public bool HybridActions;
public bool TrainingAnalytics;
/// <summary>
/// A class holding the capabilities flags for Reinforcement Learning across C# and the Trainer codebase. This

bool baseRlCapabilities = true,
bool concatenatedPngObservations = true,
bool compressedChannelMapping = true,
bool hybridActions = true)
bool hybridActions = true,
bool trainingAnalytics = true)
TrainingAnalytics = trainingAnalytics;
}
/// <summary>

39
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMilAEKGFVuaXR5UkxD",
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMirwEKGFVuaXR5UkxD",
"ASgIQiWqAiJVbml0eS5NTEFnZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZw",
"cm90bzM="));
"ASgIEhkKEXRyYWluaW5nQW5hbHl0aWNzGAUgASgIQiWqAiJVbml0eS5NTEFn",
"ZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions", "TrainingAnalytics" }, null, null, null)
}));
}
#endregion

concatenatedPngObservations_ = other.concatenatedPngObservations_;
compressedChannelMapping_ = other.compressedChannelMapping_;
hybridActions_ = other.hybridActions_;
trainingAnalytics_ = other.trainingAnalytics_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}

}
}
/// <summary>Field number for the "trainingAnalytics" field.</summary>
public const int TrainingAnalyticsFieldNumber = 5;
private bool trainingAnalytics_;
/// <summary>
/// support for training analytics
/// </summary>
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool TrainingAnalytics {
get { return trainingAnalytics_; }
set {
trainingAnalytics_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as UnityRLCapabilitiesProto);

if (ConcatenatedPngObservations != other.ConcatenatedPngObservations) return false;
if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
if (HybridActions != other.HybridActions) return false;
if (TrainingAnalytics != other.TrainingAnalytics) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (ConcatenatedPngObservations != false) hash ^= ConcatenatedPngObservations.GetHashCode();
if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
if (HybridActions != false) hash ^= HybridActions.GetHashCode();
if (TrainingAnalytics != false) hash ^= TrainingAnalytics.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}

output.WriteRawTag(32);
output.WriteBool(HybridActions);
}
if (TrainingAnalytics != false) {
output.WriteRawTag(40);
output.WriteBool(TrainingAnalytics);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

if (HybridActions != false) {
size += 1 + 1;
}
if (TrainingAnalytics != false) {
size += 1 + 1;
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

}
if (other.HybridActions != false) {
HybridActions = other.HybridActions;
}
if (other.TrainingAnalytics != false) {
TrainingAnalytics = other.TrainingAnalytics;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}

}
case 32: {
HybridActions = input.ReadBool();
break;
}
case 40: {
TrainingAnalytics = input.ReadBool();
break;
}
}

21
com.unity.ml-agents/Runtime/Inference/ApplierImpl.cs


m_ActionSpec = actionSpec;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionBuffer = lastActions[agentId];

m_ActionSpec = actionSpec;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
{
//var tensorDataProbabilities = tensorProxy.Data as float[,];
var idActionPairList = actionIds as List<int> ?? actionIds.ToList();

actionProbs.data.Dispose();
outputTensor.data.Dispose();
}
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
var agentId = actionIds[i];
if (lastActions.ContainsKey(agentId))
{
var actionBuffer = lastActions[agentId];

m_Memories = memories;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
var agentId = actionIds[i];
List<float> memory;
if (!m_Memories.TryGetValue(agentId, out memory)
|| memory.Count < memorySize)

m_Memories = memories;
}
public void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
foreach (int agentId in actionIds)
for (var i = 0; i < actionIds.Count; i++)
var agentId = actionIds[i];
List<float> memory;
if (!m_Memories.TryGetValue(agentId, out memory)
|| memory.Count < memorySize * m_MemoriesCount)

36
com.unity.ml-agents/Runtime/Inference/GeneratorImpl.cs


m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
}

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
tensorProxy.data?.Dispose();
tensorProxy.data = m_Allocator.Alloc(new TensorShape(1, 1));

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
tensorProxy.shape = new long[0];
tensorProxy.data?.Dispose();

}
public void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var infoSensorPair = infos[infoIndex];
var info = infoSensorPair.agentInfo;
List<float> memory;

m_Memories = memories;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var infoSensorPair = infos[infoIndex];
var info = infoSensorPair.agentInfo;
var offset = memorySize * m_MemoryIndex;
List<float> memory;

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var infoSensorPair = infos[infoIndex];
var info = infoSensorPair.agentInfo;
var pastAction = info.storedActions.DiscreteActions;
if (!pastAction.IsEmpty())

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var infoSensorPair in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var infoSensorPair = infos[infoIndex];
var agentInfo = infoSensorPair.agentInfo;
var maskList = agentInfo.discreteActionMasks;
for (var j = 0; j < maskSize; j++)

m_Allocator = allocator;
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
{
TensorUtils.ResizeTensor(tensorProxy, batchSize, m_Allocator);
TensorUtils.FillTensorWithRandomNormal(tensorProxy, m_RandomNormal);

m_SensorIndices.Add(sensorIndex);
}
public void Generate(TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos)
public void Generate(TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos)
foreach (var info in infos)
for (var infoIndex = 0; infoIndex < infos.Count; infoIndex++)
var info = infos[infoIndex];
if (info.agentInfo.done)
{
// If the agent is done, we might have a stale reference to the sensors

{
var tensorOffset = 0;
// Write each sensor consecutively to the tensor
foreach (var sensorIndex in m_SensorIndices)
for (var sensorIndexIndex = 0; sensorIndexIndex < m_SensorIndices.Count; sensorIndexIndex++)
var sensorIndex = m_SensorIndices[sensorIndexIndex];
var sensor = info.sensors[sensorIndex];
m_ObservationWriter.SetTarget(tensorProxy, agentIndex, tensorOffset);
var numWritten = sensor.Write(m_ObservationWriter);

46
com.unity.ml-agents/Runtime/Inference/ModelRunner.cs


TensorApplier m_TensorApplier;
NNModel m_Model;
string m_ModelName;
IReadOnlyList<TensorProxy> m_InferenceOutputs;
List<TensorProxy> m_InferenceOutputs;
Dictionary<string, Tensor> m_InputsByName;
Dictionary<int, List<float>> m_Memories = new Dictionary<int, List<float>>();
SensorShapeValidator m_SensorShapeValidator = new SensorShapeValidator();

{
Model barracudaModel;
m_Model = model;
m_ModelName = model.name;
m_InferenceDevice = inferenceDevice;
m_TensorAllocator = new TensorCachingAllocator();
if (model != null)

seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TensorApplier = new TensorApplier(
actionSpec, seed, m_TensorAllocator, m_Memories, barracudaModel);
m_InputsByName = new Dictionary<string, Tensor>();
m_InferenceOutputs = new List<TensorProxy>();
}
public InferenceDevice InferenceDevice

get { return m_Model; }
}
static Dictionary<string, Tensor> PrepareBarracudaInputs(IEnumerable<TensorProxy> infInputs)
void PrepareBarracudaInputs(IReadOnlyList<TensorProxy> infInputs)
var inputs = new Dictionary<string, Tensor>();
foreach (var inp in infInputs)
m_InputsByName.Clear();
for (var i = 0; i < infInputs.Count; i++)
inputs[inp.name] = inp.data;
var inp = infInputs[i];
m_InputsByName[inp.name] = inp.data;
return inputs;
}
public void Dispose()

m_TensorAllocator?.Reset(false);
}
List<TensorProxy> FetchBarracudaOutputs(string[] names)
void FetchBarracudaOutputs(string[] names)
var outputs = new List<TensorProxy>();
m_InferenceOutputs.Clear();
outputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n));
m_InferenceOutputs.Add(TensorUtils.TensorProxyFromBarracuda(output, n));
return outputs;
}
public void PutObservations(AgentInfo info, List<ISensor> sensors)

}
Profiler.BeginSample("ModelRunner.DecideAction");
Profiler.BeginSample(m_ModelName);
Profiler.BeginSample($"MLAgents.{m_Model.name}.GenerateTensors");
Profiler.BeginSample($"GenerateTensors");
Profiler.BeginSample($"MLAgents.{m_Model.name}.PrepareBarracudaInputs");
var inputs = PrepareBarracudaInputs(m_InferenceInputs);
Profiler.BeginSample($"PrepareBarracudaInputs");
PrepareBarracudaInputs(m_InferenceInputs);
Profiler.BeginSample($"MLAgents.{m_Model.name}.ExecuteGraph");
m_Engine.Execute(inputs);
Profiler.BeginSample($"ExecuteGraph");
m_Engine.Execute(m_InputsByName);
Profiler.BeginSample($"MLAgents.{m_Model.name}.FetchBarracudaOutputs");
m_InferenceOutputs = FetchBarracudaOutputs(m_OutputNames);
Profiler.BeginSample($"FetchBarracudaOutputs");
FetchBarracudaOutputs(m_OutputNames);
Profiler.BeginSample($"MLAgents.{m_Model.name}.ApplyTensors");
Profiler.BeginSample($"ApplyTensors");
Profiler.EndSample();
Profiler.EndSample(); // end name
Profiler.EndSample(); // end ModelRunner.DecideAction
m_Infos.Clear();

7
com.unity.ml-agents/Runtime/Inference/TensorApplier.cs


/// </param>
/// <param name="actionIds"> List of Agents Ids that will be updated using the tensor's data</param>
/// <param name="lastActions"> Dictionary of AgentId to Actions to be updated</param>
void Apply(TensorProxy tensorProxy, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions);
void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions);
}
readonly Dictionary<string, IApplier> m_Dict = new Dictionary<string, IApplier>();

/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated applier.</exception>
public void ApplyTensors(
IEnumerable<TensorProxy> tensors, IEnumerable<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
IReadOnlyList<TensorProxy> tensors, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions)
foreach (var tensor in tensors)
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
var tensor = tensors[tensorIndex];
if (!m_Dict.ContainsKey(tensor.name))
{
throw new UnityAgentsException(

7
com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs


/// the tensor's data.
/// </param>
void Generate(
TensorProxy tensorProxy, int batchSize, IEnumerable<AgentInfoSensorsPair> infos);
TensorProxy tensorProxy, int batchSize, IList<AgentInfoSensorsPair> infos);
}
readonly Dictionary<string, IGenerator> m_Dict = new Dictionary<string, IGenerator>();

/// <exception cref="UnityAgentsException"> One of the tensor does not have an
/// associated generator.</exception>
public void GenerateTensors(
IEnumerable<TensorProxy> tensors, int currentBatchSize, IEnumerable<AgentInfoSensorsPair> infos)
IReadOnlyList<TensorProxy> tensors, int currentBatchSize, IList<AgentInfoSensorsPair> infos)
foreach (var tensor in tensors)
for (var tensorIndex = 0; tensorIndex < tensors.Count; tensorIndex++)
var tensor = tensors[tensorIndex];
if (!m_Dict.ContainsKey(tensor.name))
{
throw new UnityAgentsException(

14
com.unity.ml-agents/Runtime/Policies/RemotePolicy.cs


using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Analytics;
namespace Unity.MLAgents.Policies
{

string m_FullyQualifiedBehaviorName;
ActionSpec m_ActionSpec;
ActionBuffers m_LastActionBuffer;
private bool m_AnalyticsSent = false;
internal ICommunicator m_Communicator;

{
m_FullyQualifiedBehaviorName = fullyQualifiedBehaviorName;
m_Communicator = Academy.Instance.Communicator;
m_Communicator.SubscribeBrain(m_FullyQualifiedBehaviorName, actionSpec);
m_Communicator?.SubscribeBrain(m_FullyQualifiedBehaviorName, actionSpec);
m_ActionSpec = actionSpec;
}

if (!m_AnalyticsSent)
{
m_AnalyticsSent = true;
TrainingAnalytics.RemotePolicyInitialized(
m_FullyQualifiedBehaviorName,
sensors,
m_ActionSpec
);
}
m_AgentId = info.episodeId;
m_Communicator?.PutObservations(m_FullyQualifiedBehaviorName, info, sensors);
}

8
com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs


namespace Unity.MLAgents.Sensors
{
internal class BufferSensor : ISensor, IDimensionPropertiesSensor
internal class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
{
private int m_MaxNumObs;
private int m_ObsSize;

public string GetName()
{
return "BufferSensor";
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.BufferSensor;
}
}

15
com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs


using UnityEngine;
using UnityEngine.Rendering;
namespace Unity.MLAgents.Sensors
{

public class CameraSensor : ISensor
public class CameraSensor : ISensor, IBuiltInSensor
{
Camera m_Camera;
int m_Width;

/// <returns name="texture2D">Texture2D to render to.</returns>
public static Texture2D ObservationToTexture(Camera obsCamera, int width, int height)
{
if (SystemInfo.graphicsDeviceType == GraphicsDeviceType.Null)
{
Debug.LogError("GraphicsDeviceType is Null. This will likely crash when trying to render.");
}
var texture2D = new Texture2D(width, height, TextureFormat.RGB24, false);
var oldRec = obsCamera.rect;
obsCamera.rect = new Rect(0f, 0f, 1f, 1f);

Object.Destroy(texture);
}
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.CameraSensor;
}
}
}

24
com.unity.ml-agents/Runtime/Sensors/ObservationWriter.cs


}
/// <summary>
/// 1D write access at a specified index. Use AddRange if possible instead.
/// 1D write access at a specified index. Use AddList if possible instead.
/// </summary>
/// <param name="index">Index to write to.</param>
public float this[int index]

/// </summary>
/// <param name="data"></param>
/// <param name="writeOffset">Optional write offset.</param>
[Obsolete("Use AddList() for better performance")]
public void AddRange(IEnumerable<float> data, int writeOffset = 0)
{
if (m_Data != null)

{
m_Proxy.data[m_Batch, index + m_Offset + writeOffset] = val;
index++;
}
}
}
public void AddList(IList<float> data, int writeOffset = 0)
{
if (m_Data != null)
{
for (var index = 0; index < data.Count; index++)
{
var val = data[index];
m_Data[index + m_Offset + writeOffset] = val;
}
}
else
{
for (var index = 0; index < data.Count; index++)
{
var val = data[index];
m_Proxy.data[m_Batch, index + m_Offset + writeOffset] = val;
}
}
}

10
com.unity.ml-agents/Runtime/Sensors/RayPerceptionSensor.cs


/// <summary>
/// A sensor implementation that supports ray cast-based observations.
/// </summary>
public class RayPerceptionSensor : ISensor
public class RayPerceptionSensor : ISensor, IBuiltInSensor
{
float[] m_Observations;
int[] m_Shape;

rayOutput.ToFloatArray(numDetectableTags, rayIndex, m_Observations);
}
// Finally, add the observations to the ObservationWriter
writer.AddRange(m_Observations);
writer.AddList(m_Observations);
}
return m_Observations.Length;
}

public virtual SensorCompressionType GetCompressionType()
{
return SensorCompressionType.None;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.RayPerceptionSensor;
}
/// <summary>

9
com.unity.ml-agents/Runtime/Sensors/Reflection/ReflectionSensorBase.cs


/// <summary>
/// Abstract base class for reflection-based sensors.
/// </summary>
internal abstract class ReflectionSensorBase : ISensor
internal abstract class ReflectionSensorBase : ISensor, IBuiltInSensor
{
protected object m_Object;

{
return m_SensorName;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.ReflectionSensor;
}
}
}

9
com.unity.ml-agents/Runtime/Sensors/RenderTextureSensor.cs


/// <summary>
/// Sensor class that wraps a [RenderTexture](https://docs.unity3d.com/ScriptReference/RenderTexture.html) instance.
/// </summary>
public class RenderTextureSensor : ISensor
public class RenderTextureSensor : ISensor, IBuiltInSensor
{
RenderTexture m_RenderTexture;
bool m_Grayscale;

{
return m_CompressionType;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.RenderTextureSensor;
}
/// <summary>
/// Converts a RenderTexture to a 2D texture.

7
com.unity.ml-agents/Runtime/Sensors/SensorShapeValidator.cs


{
// Check for compatibility with the other Agents' Sensors
// TODO make sure this only checks once per agent
Debug.Assert(m_SensorShapes.Count == sensors.Count, $"Number of Sensors must match. {m_SensorShapes.Count} != {sensors.Count}");
Debug.AssertFormat(
m_SensorShapes.Count == sensors.Count,
"Number of Sensors must match. {0} != {1}",
m_SensorShapes.Count,
sensors.Count
);
for (var i = 0; i < Mathf.Min(m_SensorShapes.Count, sensors.Count); i++)
{
var cachedShape = m_SensorShapes[i];

11
com.unity.ml-agents/Runtime/Sensors/StackingSensor.cs


/// Internally, a circular buffer of arrays is used. The m_CurrentIndex represents the most recent observation.
/// Currently, observations are stacked on the last dimension.
/// </summary>
public class StackingSensor : ISparseChannelSensor
public class StackingSensor : ISparseChannelSensor, IBuiltInSensor
{
/// <summary>
/// The wrapped sensor.

for (var i = 0; i < m_NumStackedObservations; i++)
{
var obsIndex = (m_CurrentIndex + 1 + i) % m_NumStackedObservations;
writer.AddRange(m_StackedObservations[obsIndex], numWritten);
writer.AddList(m_StackedObservations[obsIndex], numWritten);
numWritten += m_UnstackedObservationSize;
}
}

}
}
return compressionMapping;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
IBuiltInSensor wrappedBuiltInSensor = m_WrappedSensor as IBuiltInSensor;
return wrappedBuiltInSensor?.GetBuiltInSensorType() ?? BuiltInSensorType.Unknown;
}
}
}

24
com.unity.ml-agents/Runtime/Sensors/VectorSensor.cs


using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using UnityEngine;

/// <summary>
/// A sensor implementation for vector observations.
/// </summary>
public class VectorSensor : ISensor
public class VectorSensor : ISensor, IBuiltInSensor
{
// TODO use float[] instead
// TODO allow setting float[]

m_Observations.Add(0);
}
}
writer.AddRange(m_Observations);
writer.AddList(m_Observations);
return expectedObservations;
}

return SensorCompressionType.None;
}
/// <inheritdoc/>
public BuiltInSensorType GetBuiltInSensorType()
{
return BuiltInSensorType.VectorSensor;
}
void Clear()
{
m_Observations.Clear();

/// Adds a collection of float observations to the vector observations of the agent.
/// </summary>
/// <param name="observation">Observation.</param>
[Obsolete("Use AddObservation(IList<float>) for better performance.")]
}
}
/// <summary>
/// Adds a list or array of float observations to the vector observations of the agent.
/// </summary>
/// <param name="observation">Observation.</param>
public void AddObservation(IList<float> observation)
{
for (var i = 0; i < observation.Count; i++)
{
AddFloatObs(observation[i]);
}
}

26
com.unity.ml-agents/Runtime/SideChannels/SideChannelManager.cs


/// <returns></returns>
internal static byte[] GetSideChannelMessage(Dictionary<Guid, SideChannel> sideChannels)
{
if (!HasOutgoingMessages(sideChannels))
{
// Early out so that we don't create the MemoryStream or BinaryWriter.
// This is the most common case.
return Array.Empty<byte>();
}
using (var memStream = new MemoryStream())
{
using (var binaryWriter = new BinaryWriter(memStream))

return memStream.ToArray();
}
}
}
/// <summary>
/// Check whether any of the sidechannels have queued messages.
/// </summary>
/// <param name="sideChannels"></param>
/// <returns></returns>
static bool HasOutgoingMessages(Dictionary<Guid, SideChannel> sideChannels)
{
foreach (var sideChannel in sideChannels.Values)
{
var messageList = sideChannel.MessageQueue;
if (messageList.Count > 0)
{
return true;
}
}
return false;
}
/// <summary>

19
com.unity.ml-agents/Tests/Editor/Analytics/InferenceAnalyticsTests.cs


[SetUp]
public void SetUp()
{
if (Academy.IsInitialized)
{
Academy.Instance.Dispose();
}
continuousONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousONNXPath, typeof(NNModel));
var go = new GameObject("SensorA");
sensor_21_20_3 = go.AddComponent<Test3DSensorComponent>();

Assert.AreEqual(3, continuousEvent.ObservationSpecs[0].DimensionInfos.Length);
Assert.AreEqual(20, continuousEvent.ObservationSpecs[0].DimensionInfos[0].Size);
Assert.AreEqual("None", continuousEvent.ObservationSpecs[0].CompressionType);
Assert.AreEqual(Test3DSensor.k_BuiltInSensorType, continuousEvent.ObservationSpecs[0].BuiltInSensorType);
Assert.AreNotEqual(null, continuousEvent.ModelHash);
// Make sure nested fields get serialized

Assert.IsTrue(jsonString.Contains("NumDiscreteActions"));
Assert.IsTrue(jsonString.Contains("SensorName"));
Assert.IsTrue(jsonString.Contains("Flags"));
}
[Test]
public void TestBarracudaPolicy()
{
// Explicitly request decisions for a policy so we get code coverage on the event sending
using (new AnalyticsUtils.DisableAnalyticsSending())
{
var sensors = new List<ISensor> { sensor_21_20_3.Sensor, sensor_20_22_3.Sensor };
var policy = new BarracudaPolicy(GetContinuous2vis8vec2actionActionSpec(), continuousONNXModel, InferenceDevice.CPU, "testBehavior");
policy.RequestDecision(new AgentInfo(), sensors);
}
Academy.Instance.Dispose();
}
}
}

32
com.unity.ml-agents/Tests/Editor/Communicator/GrpcExtensionsTests.cs


using NUnit.Framework;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Demonstrations;
using Unity.MLAgents.Analytics;
using Unity.MLAgents.CommunicatorObjects;
using Unity.MLAgents.Demonstrations;
using Unity.MLAgents.Policies;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Tests

Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), false);
sparseChannelSensor.Mapping = new[] { 0, 0, 0, 1, 1, 1 };
Assert.AreEqual(GrpcExtensions.IsTrivialMapping(sparseChannelSensor), false);
}
[Test]
public void TestDefaultTrainingEvents()
{
var trainingEnvInit = new TrainingEnvironmentInitialized
{
PythonVersion = "test",
};
var trainingEnvInitEvent = trainingEnvInit.ToTrainingEnvironmentInitializedEvent();
Assert.AreEqual(trainingEnvInit.PythonVersion, trainingEnvInitEvent.TrainerPythonVersion);
var trainingBehavInit = new TrainingBehaviorInitialized
{
BehaviorName = "testBehavior",
ExtrinsicRewardEnabled = true,
CuriosityRewardEnabled = true,
RecurrentEnabled = true,
SelfPlayEnabled = true,
};
var trainingBehavInitEvent = trainingBehavInit.ToTrainingBehaviorInitializedEvent();
Assert.AreEqual(trainingBehavInit.BehaviorName, trainingBehavInitEvent.BehaviorName);
Assert.AreEqual(RewardSignals.Extrinsic | RewardSignals.Curiosity, trainingBehavInitEvent.RewardSignalFlags);
Assert.AreEqual(TrainingFeatures.Recurrent | TrainingFeatures.SelfPlay, trainingBehavInitEvent.TrainingFeatureFlags);
}
}
}

9
com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs


return Sensor.GetObservationShape();
}
}
public class Test3DSensor : ISensor
public class Test3DSensor : ISensor, IBuiltInSensor
// Dummy value for the IBuiltInSensor interface
public const int k_BuiltInSensorType = -42;
public Test3DSensor(string name, int width, int height, int channels)
{

public string GetName()
{
return m_Name;
}
public BuiltInSensorType GetBuiltInSensorType()
{
return (BuiltInSensorType)k_BuiltInSensorType;
}
}

12
com.unity.ml-agents/Tests/Editor/Sensor/ObservationWriterTests.cs


writer[0] = 3f;
Assert.AreEqual(new[] { 1f, 3f, 2f }, buffer);
// AddRange
// AddList
writer.AddRange(new[] { 4f, 5f });
writer.AddList(new[] { 4f, 5f });
// AddRange with offset
// AddList with offset
writer.AddRange(new[] { 6f, 7f });
writer.AddList(new[] { 6f, 7f });
Assert.AreEqual(new[] { 4f, 6f, 7f }, buffer);
}

Assert.AreEqual(2f, t.data[1, 1]);
Assert.AreEqual(3f, t.data[1, 2]);
// AddRange
// AddList
t = new TensorProxy
{
valueType = TensorProxy.TensorType.FloatingPoint,

writer.SetTarget(t, 1, 1);
writer.AddRange(new[] { -1f, -2f });
writer.AddList(new[] { -1f, -2f });
Assert.AreEqual(0f, t.data[0, 0]);
Assert.AreEqual(0f, t.data[0, 1]);
Assert.AreEqual(0f, t.data[0, 2]);

15
com.unity.ml-agents/Tests/Editor/Sensor/StackingSensorTests.cs


{
return Mapping;
}
}
[Test]

var expected4 = sensor.CreateEmptyPNG();
expected4 = expected4.Concat(Array.ConvertAll(new[] { 10f, 11f, 12f }, (z) => (byte)z)).ToArray();
Assert.AreEqual(sensor.GetCompressedObservation(), expected4);
}
[Test]
public void TestStackingSensorBuiltInSensorType()
{
var dummySensor = new Dummy3DSensor();
dummySensor.Shape = new[] { 2, 2, 4 };
dummySensor.Mapping = new[] { 0, 1, 2, 3 };
var stackedDummySensor = new StackingSensor(dummySensor, 2);
Assert.AreEqual(stackedDummySensor.GetBuiltInSensorType(), BuiltInSensorType.Unknown);
var vectorSensor = new VectorSensor(4);
var stackedVectorSensor = new StackingSensor(vectorSensor, 4);
Assert.AreEqual(stackedVectorSensor.GetBuiltInSensorType(), BuiltInSensorType.VectorSensor);
}
}
}

4
com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs


var behaviorParams = gameObject.AddComponent<BehaviorParameters>();
behaviorParams.BrainParameters.VectorObservationSize = 3;
behaviorParams.BrainParameters.NumStackedVectorObservations = 2;
behaviorParams.BrainParameters.VectorActionDescriptions = new[] { "TestActionA", "TestActionB" };
behaviorParams.BrainParameters.ActionSpec = ActionSpec.MakeDiscrete(2, 2);
behaviorParams.BrainParameters.VectorActionDescriptions = new[] { "Continuous1", "TestActionA", "TestActionB" };
behaviorParams.BrainParameters.ActionSpec = new ActionSpec(1, new []{2, 2});
behaviorParams.BehaviorName = "TestBehavior";
behaviorParams.TeamId = 42;
behaviorParams.UseChildSensors = true;

5
docs/Migrating.md


## Migrating to Release 13
### Implementing IHeuristic in your IActuator implementations
- If you have any custom actuators, you can now implement the `IHeuristicProvider` interface to have your actuator
handle the generation of actions when an Agent is running in heuristic mode.
handle the generation of actions when an Agent is running in heuristic mode.
- `VectorSensor.AddObservation(IEnumerable<float>)` is deprecated. Use `VectorSensor.AddObservation(IList<float>)`
instead.
- `ObservationWriter.AddRange()` is deprecated. Use `ObservationWriter.AddList()` instead.
# Migrating

10
docs/Training-ML-Agents.md


mlagents-learn --help
```
These additional CLI arguments are grouped into environment, engine and checkpoint. The available settings and example values are shown below.
These additional CLI arguments are grouped into environment, engine, checkpoint and torch.
The available settings and example values are shown below.
#### Environment settings

force: true
train_model: false
inference: false
```
#### Torch settings:
```yaml
torch_settings:
device: cpu
```
### Behavior Configurations

17
ml-agents-envs/mlagents_envs/communicator.py


from typing import Optional
from typing import Callable, Optional
# Function to call while waiting for a connection timeout.
# This should raise an exception if it needs to break from waiting for the timeout.
PollCallback = Callable[[], None]
class Communicator:

:int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
"""
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
def initialize(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> UnityOutputProto:
:param poll_callback: Optional callback to be used while polling the connection.
def exchange(self, inputs: UnityInputProto) -> Optional[UnityOutputProto]:
def exchange(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> Optional[UnityOutputProto]:
:param poll_callback: Optional callback to be used while polling the connection.
:return: The UnityOutputs generated by the Environment
"""

11
ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py


name='mlagents_envs/communicator_objects/capabilities.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\x94\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
serialized_pb=_b('\n5mlagents_envs/communicator_objects/capabilities.proto\x12\x14\x63ommunicator_objects\"\xaf\x01\n\x18UnityRLCapabilitiesProto\x12\x1a\n\x12\x62\x61seRLCapabilities\x18\x01 \x01(\x08\x12#\n\x1b\x63oncatenatedPngObservations\x18\x02 \x01(\x08\x12 \n\x18\x63ompressedChannelMapping\x18\x03 \x01(\x08\x12\x15\n\rhybridActions\x18\x04 \x01(\x08\x12\x19\n\x11trainingAnalytics\x18\x05 \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)

message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='trainingAnalytics', full_name='communicator_objects.UnityRLCapabilitiesProto.trainingAnalytics', index=4,
number=5, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],

oneofs=[
],
serialized_start=80,
serialized_end=228,
serialized_end=255,
)
DESCRIPTOR.message_types_by_name['UnityRLCapabilitiesProto'] = _UNITYRLCAPABILITIESPROTO

6
ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi


concatenatedPngObservations = ... # type: builtin___bool
compressedChannelMapping = ... # type: builtin___bool
hybridActions = ... # type: builtin___bool
trainingAnalytics = ... # type: builtin___bool
def __init__(self,
*,

hybridActions : typing___Optional[builtin___bool] = None,
trainingAnalytics : typing___Optional[builtin___bool] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> UnityRLCapabilitiesProto: ...

def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",u"compressedChannelMapping",u"concatenatedPngObservations",u"hybridActions",u"trainingAnalytics"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions"]) -> None: ...
def ClearField(self, field_name: typing_extensions___Literal[u"baseRLCapabilities",b"baseRLCapabilities",u"compressedChannelMapping",b"compressedChannelMapping",u"concatenatedPngObservations",b"concatenatedPngObservations",u"hybridActions",b"hybridActions",u"trainingAnalytics",b"trainingAnalytics"]) -> None: ...

8
ml-agents-envs/mlagents_envs/env_utils.py


from mlagents_envs.exception import UnityEnvironmentException
logger = get_logger(__name__)
def get_platform():
"""
returns the platform of the operating system : linux, darwin or win32

.replace(".x86", "")
)
true_filename = os.path.basename(os.path.normpath(env_path))
get_logger(__name__).debug(f"The true file name is {true_filename}")
logger.debug(f"The true file name is {true_filename}")
if not (glob.glob(env_path) or glob.glob(env_path + ".*")):
return None

f"Couldn't launch the {file_name} environment. Provided filename does not match any environments."
)
else:
get_logger(__name__).debug(f"This is the launch string {launch_string}")
logger.debug(f"The launch string is {launch_string}")
logger.debug(f"Running with args {args}")
# Launch Unity environment
subprocess_args = [launch_string] + args
try:

55
ml-agents-envs/mlagents_envs/environment.py


# * 1.1.0 - support concatenated PNGs for compressed observations.
# * 1.2.0 - support compression mapping for stacked compressed observations.
# * 1.3.0 - support action spaces with both continuous and discrete actions.
API_VERSION = "1.3.0"
# * 1.4.0 - support training analytics sent from python trainer to the editor.
API_VERSION = "1.4.0"
# Default port that the editor listens on. If an environment executable
# isn't specified, this port will be used.

capabilities.concatenatedPngObservations = True
capabilities.compressedChannelMapping = True
capabilities.hybridActions = True
capabilities.trainingAnalytics = True
return capabilities
@staticmethod

# If true, this means the environment was successfully loaded
self._loaded = False
# The process that is started. If None, no process was started
self._proc1 = None
self._process: Optional[subprocess.Popen] = None
self.academy_capabilities: UnityRLCapabilitiesProto = None # type: ignore
# If the environment name is None, a new environment will not be launched
# and the communicator will directly try to connect to an existing unity environment.

)
if file_name is not None:
try:
self._proc1 = env_utils.launch_executable(
self._process = env_utils.launch_executable(
file_name, self._executable_args()
)
except UnityEnvironmentException:

self._env_actions: Dict[str, ActionTuple] = {}
self._is_first_message = True
self._update_behavior_specs(aca_output)
self.academy_capabilities = aca_params.capabilities
@staticmethod
def _get_communicator(worker_id, base_port, timeout_wait):

if self._no_graphics:
args += ["-nographics", "-batchmode"]
args += [UnityEnvironment._PORT_COMMAND_LINE_ARG, str(self._port)]
if self._log_folder:
# If the logfile arg isn't already set in the env args,
# try to set it to an output directory
logfile_set = "-logfile" in (arg.lower() for arg in self._additional_args)
if self._log_folder and not logfile_set:
log_file_path = os.path.join(
self._log_folder, f"Player-{self._worker_id}.log"
)

def reset(self) -> None:
if self._loaded:
outputs = self._communicator.exchange(self._generate_reset_input())
outputs = self._communicator.exchange(
self._generate_reset_input(), self._poll_process
)
if outputs is None:
raise UnityCommunicatorStoppedException("Communicator has exited.")
self._update_behavior_specs(outputs)

].action_spec.empty_action(n_agents)
step_input = self._generate_step_input(self._env_actions)
with hierarchical_timer("communicator.exchange"):
outputs = self._communicator.exchange(step_input)
outputs = self._communicator.exchange(step_input, self._poll_process)
if outputs is None:
raise UnityCommunicatorStoppedException("Communicator has exited.")
self._update_behavior_specs(outputs)

self._assert_behavior_exists(behavior_name)
return self._env_state[behavior_name]
def _poll_process(self) -> None:
"""
Check the status of the subprocess. If it has exited, raise a UnityEnvironmentException
:return: None
"""
if not self._process:
return
poll_res = self._process.poll()
if poll_res is not None:
exc_msg = self._returncode_to_env_message(self._process.returncode)
raise UnityEnvironmentException(exc_msg)
def close(self):
"""
Sends a shutdown signal to the unity environment, and closes the socket connection.

timeout = self._timeout_wait
self._loaded = False
self._communicator.close()
if self._proc1 is not None:
if self._process is not None:
self._proc1.wait(timeout=timeout)
signal_name = self._returncode_to_signal_name(self._proc1.returncode)
signal_name = f" ({signal_name})" if signal_name else ""
return_info = f"Environment shut down with return code {self._proc1.returncode}{signal_name}."
logger.info(return_info)
self._process.wait(timeout=timeout)
logger.info(self._returncode_to_env_message(self._process.returncode))
self._proc1.kill()
self._process.kill()
self._proc1 = None
self._process = None
@timed
def _generate_step_input(

) -> UnityOutputProto:
inputs = UnityInputProto()
inputs.rl_initialization_input.CopyFrom(init_parameters)
return self._communicator.initialize(inputs)
return self._communicator.initialize(inputs, self._poll_process)
@staticmethod
def _wrap_unity_input(rl_input: UnityRLInputProto) -> UnityInputProto:

except Exception:
# Should generally be a ValueError, but catch everything just in case.
return None
@staticmethod
def _returncode_to_env_message(returncode: int) -> str:
signal_name = UnityEnvironment._returncode_to_signal_name(returncode)
signal_name = f" ({signal_name})" if signal_name else ""
return f"Environment shut down with return code {returncode}{signal_name}."

12
ml-agents-envs/mlagents_envs/mock_communicator.py


from .communicator import Communicator
from typing import Optional
from .communicator import Communicator, PollCallback
from .environment import UnityEnvironment
from mlagents_envs.communicator_objects.unity_rl_output_pb2 import UnityRLOutputProto
from mlagents_envs.communicator_objects.brain_parameters_pb2 import (

self.brain_name = brain_name
self.vec_obs_size = vec_obs_size
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
def initialize(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> UnityOutputProto:
if self.is_discrete:
action_spec = ActionSpecProto(
num_discrete_actions=2, discrete_branch_sizes=[3, 2]

)
return dict_agent_info
def exchange(self, inputs: UnityInputProto) -> UnityOutputProto:
def exchange(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> UnityOutputProto:
result = UnityRLOutputProto(agentInfos=self._get_agent_infos())
return UnityOutputProto(rl_output=result)

47
ml-agents-envs/mlagents_envs/rpc_communicator.py


import grpc
from typing import Optional
from multiprocessing import Pipe
from multiprocessing import Pipe
import time
from .communicator import Communicator
from .communicator import Communicator, PollCallback
from mlagents_envs.communicator_objects.unity_to_external_pb2_grpc import (
UnityToExternalProtoServicer,
add_UnityToExternalProtoServicer_to_server,

finally:
s.close()
def poll_for_timeout(self):
def poll_for_timeout(self, poll_callback: Optional[PollCallback] = None) -> None:
Additionally, a callback can be passed to periodically check the state of the environment.
This is used to detect the case when the environment dies without cleaning up the connection,
so that we can stop sooner and raise a more appropriate error.
if not self.unity_to_external.parent_conn.poll(self.timeout_wait):
raise UnityTimeOutException(
"The Unity environment took too long to respond. Make sure that :\n"
"\t The environment does not need user interaction to launch\n"
'\t The Agents\' Behavior Parameters > Behavior Type is set to "Default"\n'
"\t The environment and the Python interface have compatible versions."
)
deadline = time.monotonic() + self.timeout_wait
callback_timeout_wait = self.timeout_wait // 10
while time.monotonic() < deadline:
if self.unity_to_external.parent_conn.poll(callback_timeout_wait):
# Got an acknowledgment from the connection
return
if poll_callback:
# Fire the callback - if it detects something wrong, it should raise an exception.
poll_callback()
def initialize(self, inputs: UnityInputProto) -> UnityOutputProto:
self.poll_for_timeout()
# Got this far without reading any data from the connection, so it must be dead.
raise UnityTimeOutException(
"The Unity environment took too long to respond. Make sure that :\n"
"\t The environment does not need user interaction to launch\n"
'\t The Agents\' Behavior Parameters > Behavior Type is set to "Default"\n'
"\t The environment and the Python interface have compatible versions."
)
def initialize(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> UnityOutputProto:
self.poll_for_timeout(poll_callback)
aca_param = self.unity_to_external.parent_conn.recv().unity_output
message = UnityMessageProto()
message.header.status = 200

return aca_param
def exchange(self, inputs: UnityInputProto) -> Optional[UnityOutputProto]:
def exchange(
self, inputs: UnityInputProto, poll_callback: Optional[PollCallback] = None
) -> Optional[UnityOutputProto]:
self.poll_for_timeout()
self.poll_for_timeout(poll_callback)
output = self.unity_to_external.parent_conn.recv()
if output.header.status != 200:
return None

2
ml-agents-envs/mlagents_envs/side_channel/engine_configuration_channel.py


"""
raise UnityCommunicationException(
"The EngineConfigurationChannel received a message from Unity, "
+ "this should not have happend."
+ "this should not have happened."
)
def set_configuration_parameters(

2
ml-agents-envs/mlagents_envs/side_channel/environment_parameters_channel.py


def on_message_received(self, msg: IncomingMessage) -> None:
raise UnityCommunicationException(
"The EnvironmentParametersChannel received a message from Unity, "
+ "this should not have happend."
+ "this should not have happened."
)
def set_float_parameter(self, key: str, value: float) -> None:

54
ml-agents-envs/mlagents_envs/tests/test_rpc_communicator.py


import pytest
from unittest import mock
import grpc
import mlagents_envs.rpc_communicator
from mlagents_envs.exception import UnityWorkerInUseException
from mlagents_envs.exception import (
UnityWorkerInUseException,
UnityTimeOutException,
UnityEnvironmentException,
)
from mlagents_envs.communicator_objects.unity_input_pb2 import UnityInputProto
def test_rpc_communicator_checks_port_on_create():

second_comm = RpcCommunicator(worker_id=1)
first_comm.close()
second_comm.close()
@mock.patch.object(grpc, "server")
@mock.patch.object(
mlagents_envs.rpc_communicator, "UnityToExternalServicerImplementation"
)
def test_rpc_communicator_initialize_OK(mock_impl, mock_grpc_server):
comm = RpcCommunicator(timeout_wait=0.25)
comm.unity_to_external.parent_conn.poll.return_value = True
input = UnityInputProto()
comm.initialize(input)
comm.unity_to_external.parent_conn.poll.assert_called()
@mock.patch.object(grpc, "server")
@mock.patch.object(
mlagents_envs.rpc_communicator, "UnityToExternalServicerImplementation"
)
def test_rpc_communicator_initialize_timeout(mock_impl, mock_grpc_server):
comm = RpcCommunicator(timeout_wait=0.25)
comm.unity_to_external.parent_conn.poll.return_value = None
input = UnityInputProto()
# Expect a timeout
with pytest.raises(UnityTimeOutException):
comm.initialize(input)
comm.unity_to_external.parent_conn.poll.assert_called()
@mock.patch.object(grpc, "server")
@mock.patch.object(
mlagents_envs.rpc_communicator, "UnityToExternalServicerImplementation"
)
def test_rpc_communicator_initialize_callback(mock_impl, mock_grpc_server):
def callback():
raise UnityEnvironmentException
comm = RpcCommunicator(timeout_wait=0.25)
comm.unity_to_external.parent_conn.poll.return_value = None
input = UnityInputProto()
# Expect a timeout
with pytest.raises(UnityEnvironmentException):
comm.initialize(input, poll_callback=callback)
comm.unity_to_external.parent_conn.poll.assert_called()

1
ml-agents/mlagents/torch_utils/__init__.py


from mlagents.torch_utils.torch import torch as torch # noqa
from mlagents.torch_utils.torch import nn # noqa
from mlagents.torch_utils.torch import set_torch_config # noqa
from mlagents.torch_utils.torch import default_device # noqa

37
ml-agents/mlagents/torch_utils/torch.py


from distutils.version import LooseVersion
import pkg_resources
from mlagents.torch_utils import cpu_utils
from mlagents.trainers.settings import TorchSettings
from mlagents_envs.logging_util import get_logger
logger = get_logger(__name__)
def assert_torch_installed():

torch.set_num_threads(cpu_utils.get_num_threads_to_use())
os.environ["KMP_BLOCKTIME"] = "0"
if torch.cuda.is_available():
torch.set_default_tensor_type(torch.cuda.FloatTensor)
device = torch.device("cuda")
else:
torch.set_default_tensor_type(torch.FloatTensor)
device = torch.device("cpu")
_device = torch.device("cpu")
def set_torch_config(torch_settings: TorchSettings) -> None:
global _device
if torch_settings.device is None:
device_str = "cuda" if torch.cuda.is_available() else "cpu"
else:
device_str = torch_settings.device
_device = torch.device(device_str)
if _device.type == "cuda":
torch.set_default_tensor_type(torch.cuda.FloatTensor)
else:
torch.set_default_tensor_type(torch.FloatTensor)
logger.info(f"default Torch device: {_device}")
# Initialize to default settings
set_torch_config(TorchSettings(device=None))
return device
return _device

15
ml-agents/mlagents/trainers/cli_utils.py


action=DetectDefault,
)
argparser.add_argument(
"--cpu",
default=False,
action=DetectDefaultStoreTrue,
help="Forces training using CPU only",
)
argparser.add_argument(
"--torch",
default=False,
action=RaiseRemovedWarning,

action=DetectDefaultStoreTrue,
help="Whether to run the Unity executable in no-graphics mode (i.e. without initializing "
"the graphics driver. Use this only if your agents don't use visual observations.",
)
torch_conf = argparser.add_argument_group(title="Torch Configuration")
torch_conf.add_argument(
"--torch-device",
default=None,
dest="device",
action=DetectDefault,
help='Settings for the default torch.device used in training, for example, "cpu", "cuda", or "cuda:0"',
)
return argparser

12
ml-agents/mlagents/trainers/env_manager.py


from mlagents.trainers.policy import Policy
from mlagents.trainers.agent_processor import AgentManager, AgentManagerQueue
from mlagents.trainers.action_info import ActionInfo
from mlagents.trainers.settings import TrainerSettings
from mlagents_envs.logging_util import get_logger
AllStepResult = Dict[BehaviorName, Tuple[DecisionSteps, TerminalSteps]]

Sends environment parameter settings to C# via the
EnvironmentParametersSideChannel.
:param config: Dict of environment parameter keys and values
"""
pass
def on_training_started(
self, behavior_name: str, trainer_settings: TrainerSettings
) -> None:
"""
Handle traing starting for a new behavior type. Generally nothing is necessary here.
:param behavior_name:
:param trainer_settings:
:return:
"""
pass

15
ml-agents/mlagents/trainers/learn.py


from mlagents_envs.base_env import BaseEnv
from mlagents.trainers.subprocess_env_manager import SubprocessEnvManager
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents_envs.timers import (
hierarchical_timer,
get_timer_tree,

:param run_options: Command line arguments for training.
"""
with hierarchical_timer("run_training.setup"):
torch_utils.set_torch_config(options.torch_settings)
checkpoint_settings = options.checkpoint_settings
env_settings = options.env_settings
engine_settings = options.engine_settings

env_settings.env_args,
os.path.abspath(run_logs_dir), # Unity environment requires absolute path
)
engine_config = EngineConfig(
width=engine_settings.width,
height=engine_settings.height,
quality_level=engine_settings.quality_level,
time_scale=engine_settings.time_scale,
target_frame_rate=engine_settings.target_frame_rate,
capture_frame_rate=engine_settings.capture_frame_rate,
)
env_manager = SubprocessEnvManager(
env_factory, engine_config, env_settings.num_envs
)
env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs)
env_parameter_manager = EnvironmentParameterManager(
options.environment_parameters, run_seed, restore=checkpoint_settings.resume
)

9
ml-agents/mlagents/trainers/settings.py


@attr.s(auto_attribs=True)
class TorchSettings:
device: Optional[str] = parser.get_default("torch_device")
@attr.s(auto_attribs=True)
class RunOptions(ExportableSettings):
default_settings: Optional[TrainerSettings] = None
behaviors: DefaultDict[str, TrainerSettings] = attr.ib(

engine_settings: EngineSettings = attr.ib(factory=EngineSettings)
environment_parameters: Optional[Dict[str, EnvironmentParameterSettings]] = None
checkpoint_settings: CheckpointSettings = attr.ib(factory=CheckpointSettings)
torch_settings: TorchSettings = attr.ib(factory=TorchSettings)
# These are options that are relevant to the run itself, and not the engine or environment.
# They will be left here.

"checkpoint_settings": {},
"env_settings": {},
"engine_settings": {},
"torch_settings": {},
}
if config_path is not None:
configured_dict.update(load_config(config_path))

configured_dict["env_settings"][key] = val
elif key in attr.fields_dict(EngineSettings):
configured_dict["engine_settings"][key] = val
elif key in attr.fields_dict(TorchSettings):
configured_dict["torch_settings"][key] = val
else: # Base options
configured_dict[key] = val

81
ml-agents/mlagents/trainers/subprocess_env_manager.py


from mlagents_envs.base_env import BaseEnv, BehaviorName, BehaviorSpec
from mlagents_envs import logging_util
from mlagents.trainers.env_manager import EnvManager, EnvironmentStep, AllStepResult
from mlagents.trainers.settings import TrainerSettings
from mlagents_envs.timers import (
TimerNode,
timed,

)
from mlagents.trainers.settings import ParameterRandomizationSettings
from mlagents.trainers.settings import ParameterRandomizationSettings, RunOptions
from mlagents.trainers.action_info import ActionInfo
from mlagents_envs.side_channel.environment_parameters_channel import (
EnvironmentParametersChannel,

EngineConfig,
)
from mlagents_envs.side_channel.stats_side_channel import (
EnvironmentStats,
EnvironmentStats,
from mlagents.training_analytics_side_channel import TrainingAnalyticsSideChannel
from mlagents_envs.side_channel.side_channel import SideChannel

CLOSE = 5
ENV_EXITED = 6
CLOSED = 7
TRAINING_STARTED = 8
class EnvironmentRequest(NamedTuple):

step_queue: Queue,
pickled_env_factory: str,
worker_id: int,
engine_configuration: EngineConfig,
run_options: RunOptions,
log_level: int = logging_util.INFO,
) -> None:
env_factory: Callable[

engine_config = EngineConfig(
width=run_options.engine_settings.width,
height=run_options.engine_settings.height,
quality_level=run_options.engine_settings.quality_level,
time_scale=run_options.engine_settings.time_scale,
target_frame_rate=run_options.engine_settings.target_frame_rate,
capture_frame_rate=run_options.engine_settings.capture_frame_rate,
)
engine_configuration_channel.set_configuration(engine_configuration)
engine_configuration_channel.set_configuration(engine_config)
env: BaseEnv = None
training_analytics_channel: Optional[TrainingAnalyticsSideChannel] = None
if worker_id == 0:
training_analytics_channel = TrainingAnalyticsSideChannel()
env: UnityEnvironment = None
# Set log level. On some platforms, the logger isn't common with the
# main process, so we need to set it again.
logging_util.set_log_level(log_level)

return all_step_result
try:
env = env_factory(
worker_id, [env_parameters, engine_configuration_channel, stats_channel]
)
side_channels = [env_parameters, engine_configuration_channel, stats_channel]
if training_analytics_channel is not None:
side_channels.append(training_analytics_channel)
env = env_factory(worker_id, side_channels)
if (
not env.academy_capabilities
or not env.academy_capabilities.trainingAnalytics
):
# Make sure we don't try to send training analytics if the environment doesn't know how to process
# them. This wouldn't be catastrophic, but would result in unknown SideChannel UUIDs being used.
training_analytics_channel = None
if training_analytics_channel:
training_analytics_channel.environment_initialized(run_options)
while True:
req: EnvironmentRequest = parent_conn.recv()
if req.cmd == EnvironmentCommand.STEP:

for k, v in req.payload.items():
if isinstance(v, ParameterRandomizationSettings):
v.apply(k, env_parameters)
elif req.cmd == EnvironmentCommand.TRAINING_STARTED:
behavior_name, trainer_config = req.payload
if training_analytics_channel:
training_analytics_channel.training_started(
behavior_name, trainer_config
)
elif req.cmd == EnvironmentCommand.RESET:
env.reset()
all_step_result = _generate_all_results()

)
_send_response(EnvironmentCommand.ENV_EXITED, ex)
except Exception as ex:
logger.error(
logger.exception(
f"UnityEnvironment worker {worker_id}: environment raised an unexpected exception."
)
step_queue.put(

def __init__(
self,
env_factory: Callable[[int, List[SideChannel]], BaseEnv],
engine_configuration: EngineConfig,
run_options: RunOptions,
n_env: int = 1,
):
super().__init__()

for worker_idx in range(n_env):
self.env_workers.append(
self.create_worker(
worker_idx, self.step_queue, env_factory, engine_configuration
worker_idx, self.step_queue, env_factory, run_options
)
)
self.workers_alive += 1

worker_id: int,
step_queue: Queue,
env_factory: Callable[[int, List[SideChannel]], BaseEnv],
engine_configuration: EngineConfig,
run_options: RunOptions,
) -> UnityEnvWorker:
parent_conn, child_conn = Pipe()

step_queue,
pickled_env_factory,
worker_id,
engine_configuration,
run_options,
logger.level,
),
)

for ew in self.env_workers:
ew.send(EnvironmentCommand.ENVIRONMENT_PARAMETERS, config)
def on_training_started(
self, behavior_name: str, trainer_settings: TrainerSettings
) -> None:
"""
Handle traing starting for a new behavior type. Generally nothing is necessary here.
:param behavior_name:
:param trainer_settings:
:return:
"""
for ew in self.env_workers:
ew.send(
EnvironmentCommand.TRAINING_STARTED, (behavior_name, trainer_settings)
)
self.env_workers[0].send(EnvironmentCommand.BEHAVIOR_SPECS)
return self.env_workers[0].recv().payload
result: Dict[BehaviorName, BehaviorSpec] = {}
for worker in self.env_workers:
worker.send(EnvironmentCommand.BEHAVIOR_SPECS)
result.update(worker.recv().payload)
return result
def close(self) -> None:
logger.debug("SubprocessEnvManager closing.")

2
ml-agents/mlagents/trainers/tests/simple_test_envs.py


self.step_result: Dict[str, Tuple[DecisionSteps, TerminalSteps]] = {}
self.agent_id: Dict[str, int] = {}
self.step_size = step_size # defines the difficulty of the test
# Allow to be used as a UnityEnvironment during tests
self.academy_capabilities = None
for name in self.names:
self.agent_id[name] = 0

66
ml-agents/mlagents/trainers/tests/test_subprocess_env_manager.py


import pytest
from queue import Empty as EmptyQueue
from mlagents.trainers.settings import RunOptions
from mlagents.trainers.subprocess_env_manager import (
SubprocessEnvManager,
EnvironmentResponse,

from mlagents.trainers.env_manager import EnvironmentStep
from mlagents_envs.base_env import BaseEnv
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
from mlagents_envs.exception import UnityEnvironmentException
from mlagents.trainers.tests.simple_test_envs import (

)
def test_environments_are_created(self, mock_create_worker):
mock_create_worker.side_effect = create_worker_mock
env = SubprocessEnvManager(mock_env_factory, EngineConfig.default_config(), 2)
run_options = RunOptions()
env = SubprocessEnvManager(mock_env_factory, run_options, 2)
mock.call(
0, env.step_queue, mock_env_factory, EngineConfig.default_config()
),
mock.call(
1, env.step_queue, mock_env_factory, EngineConfig.default_config()
),
mock.call(0, env.step_queue, mock_env_factory, run_options),
mock.call(1, env.step_queue, mock_env_factory, run_options),
]
)
self.assertEqual(len(env.env_workers), 2)

)
def test_reset_passes_reset_params(self, mock_create_worker):
mock_create_worker.side_effect = create_worker_mock
manager = SubprocessEnvManager(
mock_env_factory, EngineConfig.default_config(), 1
)
manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 1)
params = {"test": "params"}
manager._reset_env(params)
manager.env_workers[0].send.assert_called_with(

)
def test_reset_collects_results_from_all_envs(self, mock_create_worker):
mock_create_worker.side_effect = create_worker_mock
manager = SubprocessEnvManager(
mock_env_factory, EngineConfig.default_config(), 4
)
manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 4)
params = {"test": "params"}
res = manager._reset_env(params)

@mock.patch(
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.create_worker"
)
def test_training_behaviors_collects_results_from_all_envs(
self, mock_create_worker
):
def create_worker_mock(worker_id, step_queue, env_factor, engine_c):
return MockEnvWorker(
worker_id,
EnvironmentResponse(
EnvironmentCommand.RESET, worker_id, {f"key{worker_id}": worker_id}
),
)
mock_create_worker.side_effect = create_worker_mock
manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 4)
res = manager.training_behaviors
for env in manager.env_workers:
env.send.assert_called_with(EnvironmentCommand.BEHAVIOR_SPECS)
env.recv.assert_called()
for worker_id in range(4):
assert f"key{worker_id}" in res
assert res[f"key{worker_id}"] == worker_id
@mock.patch(
"mlagents.trainers.subprocess_env_manager.SubprocessEnvManager.create_worker"
)
manager = SubprocessEnvManager(
mock_env_factory, EngineConfig.default_config(), 3
)
manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 3)
manager.step_queue = Mock()
manager.step_queue.get_nowait.side_effect = [
EnvironmentResponse(EnvironmentCommand.STEP, 0, StepResponse(0, None, {})),

brain_name = "testbrain"
action_info_dict = {brain_name: MagicMock()}
mock_create_worker.side_effect = create_worker_mock
env_manager = SubprocessEnvManager(
mock_env_factory, EngineConfig.default_config(), 3
)
env_manager = SubprocessEnvManager(mock_env_factory, RunOptions(), 3)
training_behaviors_mock.return_value = [brain_name]
agent_manager_mock = mock.Mock()
mock_policy = mock.Mock()

env = SimpleEnvironment(["1D"], action_sizes=(0, 1))
return env
env_manager = SubprocessEnvManager(
simple_env_factory, EngineConfig.default_config(), num_envs
)
env_manager = SubprocessEnvManager(simple_env_factory, RunOptions(), num_envs)
# Run PPO using env_manager
check_environment_trains(
simple_env_factory(0, []),

)
return env
env_manager = SubprocessEnvManager(
failing_step_env_factory, EngineConfig.default_config()
)
env_manager = SubprocessEnvManager(failing_step_env_factory, RunOptions())
# Expect the exception raised to be routed back up to the top level.
with pytest.raises(CustomTestOnlyException):
check_environment_trains(

time.sleep(0.5)
raise UnityEnvironmentException()
env_manager = SubprocessEnvManager(
failing_env_factory, EngineConfig.default_config(), num_envs
)
env_manager = SubprocessEnvManager(failing_env_factory, RunOptions(), num_envs)
with pytest.raises(UnityEnvironmentException):
env_manager.reset()
env_manager.close()

8
ml-agents/mlagents/trainers/tests/torch/saver/test_saver.py


import os
import numpy as np
from mlagents.torch_utils import torch
from mlagents.torch_utils import torch, default_device
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.ppo.optimizer_torch import TorchPPOOptimizer
from mlagents.trainers.model_saver.torch_model_saver import TorchModelSaver

"""
Make sure two policies have the same output for the same input.
"""
policy1.actor_critic = policy1.actor_critic.to(default_device())
policy2.actor_critic = policy2.actor_critic.to(default_device())
decision_step, _ = mb.create_steps_from_behavior_spec(
policy1.behavior_spec, num_agents=1
)

tensor_obs, masks=masks, memories=memories
)
np.testing.assert_array_equal(
log_probs1.all_discrete_tensor, log_probs2.all_discrete_tensor
ModelUtils.to_numpy(log_probs1.all_discrete_tensor),
ModelUtils.to_numpy(log_probs2.all_discrete_tensor),
)

4
ml-agents/mlagents/trainers/tests/torch/test_action_model.py


for _disc in log_probs.all_discrete_list:
assert _disc.shape == (1, 2)
for clp in log_probs.continuous_tensor[0]:
for clp in log_probs.continuous_tensor[0].tolist():
for ent, val in zip(entropies[0], [1.4189, 0.6191, 0.6191]):
for ent, val in zip(entropies[0].tolist(), [1.4189, 0.6191, 0.6191]):
assert ent == pytest.approx(val, abs=0.01)

10
ml-agents/mlagents/trainers/tests/torch/test_distributions.py


optimizer.zero_grad()
loss.backward()
optimizer.step()
for prob in log_prob.flatten():
for prob in log_prob.flatten().tolist():
assert prob == pytest.approx(-2, abs=0.1)

dist_insts = gauss_dist(sample_embedding, masks=masks)
for dist_inst in dist_insts:
log_prob = dist_inst.all_log_prob()
assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001)
assert log_prob.flatten()[-1].tolist() == pytest.approx(0, abs=0.001)
def test_gaussian_dist_instance():

)
action = dist_instance.sample()
assert action.shape == (1, act_size)
for log_prob in dist_instance.log_prob(torch.zeros((1, act_size))).flatten():
for log_prob in (
dist_instance.log_prob(torch.zeros((1, act_size))).flatten().tolist()
):
for ent in dist_instance.entropy().flatten():
for ent in dist_instance.entropy().flatten().tolist():
# entropy of standard normal at 0, based on 1/2 + ln(sqrt(2pi)sigma)
assert ent == pytest.approx(1.42, abs=0.01)

4
ml-agents/mlagents/trainers/tests/torch/test_encoders.py


norm.update(vec_input3)
# Test normalization
for val in norm(vec_input1)[0]:
for val in norm(vec_input1)[0].tolist():
assert val == pytest.approx(0.707, abs=0.001)
# Test copy normalization

assert compare_models(norm, norm2)
for val in norm2(vec_input1)[0]:
for val in norm2(vec_input1)[0].tolist():
assert val == pytest.approx(0.707, abs=0.001)

6
ml-agents/mlagents/trainers/tests/torch/test_hybrid.py


SAC_TORCH_CONFIG = sac_dummy_config()
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
def test_hybrid_ppo(action_size):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("num_visual", [1, 2])
def test_hybrid_visual_ppo(num_visual):
env = SimpleEnvironment(

check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1336)
@pytest.mark.check_environment_trains
def test_hybrid_recurrent_ppo():
env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
new_network_settings = attr.evolve(

check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("action_size", [(1, 1), (2, 2), (1, 2), (2, 1)])
def test_hybrid_sac(action_size):
env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)

)
@pytest.mark.check_environment_trains
@pytest.mark.parametrize("num_visual", [1, 2])
def test_hybrid_visual_sac(num_visual):
env = SimpleEnvironment(

check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.check_environment_trains
def test_hybrid_recurrent_sac():
env = MemoryEnvironment([BRAIN_NAME], action_sizes=(1, 1), step_size=0.5)
new_networksettings = attr.evolve(

8
ml-agents/mlagents/trainers/tests/torch/test_networks.py


loss.backward()
optimizer.step()
# In the last step, values should be close to 1
for _enc in encoded.flatten():
for _enc in encoded.flatten().tolist():
assert _enc == pytest.approx(1.0, abs=0.1)

loss.backward()
optimizer.step()
# In the last step, values should be close to 1
for _enc in encoded.flatten():
for _enc in encoded.flatten().tolist():
assert _enc == pytest.approx(1.0, abs=0.1)

loss.backward()
optimizer.step()
# In the last step, values should be close to 1
for _enc in encoded.flatten():
for _enc in encoded.flatten().tolist():
assert _enc == pytest.approx(1.0, abs=0.1)

optimizer.step()
# In the last step, values should be close to 1
for value in values.values():
for _out in value:
for _out in value.tolist():
assert _out[0] == pytest.approx(1.0, abs=0.1)

3
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


PPO_TORCH_CONFIG = ppo_dummy_config()
SAC_TORCH_CONFIG = sac_dummy_config()
# tests in this file won't be tested on GPU machine
pytestmark = pytest.mark.check_environment_trains
@pytest.mark.parametrize("action_sizes", [(0, 1), (1, 0)])
def test_simple_ppo(action_sizes):

4
ml-agents/mlagents/trainers/torch/encoders.py


if not exporting_to_onnx.is_exporting():
visual_obs = visual_obs.permute([0, 3, 1, 2])
hidden = self.conv_layers(visual_obs)
hidden = torch.reshape(hidden, (-1, self.final_flat))
hidden = hidden.reshape(-1, self.final_flat)
return self.dense(hidden)

if not exporting_to_onnx.is_exporting():
visual_obs = visual_obs.permute([0, 3, 1, 2])
hidden = self.conv_layers(visual_obs)
hidden = torch.reshape(hidden, (-1, self.final_flat))
hidden = hidden.reshape(-1, self.final_flat)
return self.dense(hidden)

3
ml-agents/mlagents/trainers/trainer_controller.py


target=self.trainer_update_func, args=(trainer,), daemon=True
)
self.trainer_threads.append(trainerthread)
env_manager.on_training_started(
brain_name, self.trainer_factory.trainer_config[brain_name]
)
policy = trainer.create_policy(
parsed_behavior_id,

3
protobuf-definitions/proto/mlagents_envs/communicator_objects/capabilities.proto


// support for hybrid action spaces (discrete + continuous)
bool hybridActions = 4;
// support for training analytics
bool trainingAnalytics = 5;
}

150
utils/make_readme_table.py


Generate the "Releases" table on the main readme. Update the versions lists, run this script, and copy the output
into the markdown file.
"""
from distutils.version import LooseVersion
from distutils.version import LooseVersion, StrictVersion
from collections import Counter
MAX_DAYS = 150 # do not print releases older than this many days
def table_line(display_name, name, date, bold=False):
def table_line(version_info, bold=False):
# For release_X branches, docs are on a separate tag.
if name.startswith("release"):
docs_name = name + "_docs"
cells = [
f"**{version_info.display_name}**",
f"{bold_str}{version_info.release_date}{bold_str}",
f"{bold_str}[source]({version_info.source_link}){bold_str}",
f"{bold_str}[docs]({version_info.doc_link}){bold_str}",
f"{bold_str}[download]({version_info.download_link}){bold_str}",
]
if version_info.is_master:
cells.append("--") # python
cells.append("--") # Unity
docs_name = name
return f"| **{display_name}** | {bold_str}{date}{bold_str} | {bold_str}[source](https://github.com/Unity-Technologies/ml-agents/tree/{name}){bold_str} | {bold_str}[docs](https://github.com/Unity-Technologies/ml-agents/tree/{docs_name}/docs/Readme.md){bold_str} | {bold_str}[download](https://github.com/Unity-Technologies/ml-agents/archive/{name}.zip){bold_str} |" # noqa
cells.append(
f"{bold_str}[{version_info.python_verion}]({version_info.pypi_link}){bold_str}"
)
cells.append(
f"{bold_str}[{version_info.csharp_version}]({version_info.package_link}){bold_str}"
)
joined_cells = " | ".join(cells)
return f"| {joined_cells} |"
class ReleaseInfo(NamedTuple):

release_date: str
@staticmethod
def from_simple_tag(release_tag: str, release_date: str) -> "ReleaseInfo":
"""
Generate the ReleaseInfo for "old style" releases, where the tag and versions
were all the same.
"""
return ReleaseInfo(release_tag, release_tag, release_tag, release_date)
is_verified: bool = False
@property
def loose_version(self) -> LooseVersion:

def is_master(self) -> bool:
return self.release_tag == "master"
@property
def release_datetime(self) -> datetime:
if self.is_master:
return datetime.today()
return datetime.strptime(self.release_date, "%B %d, %Y")
@property
return (
datetime.today() - datetime.strptime(self.release_date, "%B %d, %Y")
).days
return (datetime.today() - self.release_datetime).days
@property
def display_name(self) -> str:

"""
return self.release_tag.replace("_", " ").title()
if self.is_verified:
return f"Verified Package {self.csharp_version}"
elif self.is_master:
return "master (unstable)"
else:
return self.release_tag.replace("_", " ").title()
@property
def source_link(self):
if self.is_verified:
return f"https://github.com/Unity-Technologies/ml-agents/tree/com.unity.ml-agents_{self.csharp_version}"
else:
return f"https://github.com/Unity-Technologies/ml-agents/tree/{self.release_tag}"
@property
def download_link(self):
if self.is_verified:
tag = f"com.unity.ml-agents_{self.csharp_version}"
else:
tag = self.release_tag
return f"https://github.com/Unity-Technologies/ml-agents/archive/{tag}.zip"
@property
def doc_link(self):
if self.is_verified:
return "https://github.com/Unity-Technologies/ml-agents/blob/release_2_verified_docs/docs/Readme.md"
# For release_X branches, docs are on a separate tag.
if self.release_tag.startswith("release"):
docs_name = self.release_tag + "_docs"
else:
docs_name = self.release_tag
return f"https://github.com/Unity-Technologies/ml-agents/tree/{docs_name}/docs/Readme.md"
@property
def package_link(self):
try:
v = StrictVersion(self.csharp_version).version
return f"https://docs.unity3d.com/Packages/com.unity.ml-agents@{v[0]}.{v[1]}/manual/index.html"
except ValueError:
return "--"
@property
def pypi_link(self):
return f"https://pypi.org/project/mlagents/{self.python_verion}/"
ReleaseInfo.from_simple_tag("0.10.0", "September 30, 2019"),
ReleaseInfo.from_simple_tag("0.10.1", "October 9, 2019"),
ReleaseInfo.from_simple_tag("0.11.0", "November 4, 2019"),
ReleaseInfo.from_simple_tag("0.12.0", "December 2, 2019"),
ReleaseInfo.from_simple_tag("0.12.1", "December 11, 2019"),
ReleaseInfo.from_simple_tag("0.13.0", "January 8, 2020"),
ReleaseInfo.from_simple_tag("0.13.1", "January 21, 2020"),
ReleaseInfo.from_simple_tag("0.14.0", "February 13, 2020"),
ReleaseInfo.from_simple_tag("0.14.1", "February 26, 2020"),
ReleaseInfo.from_simple_tag("0.15.0", "March 18, 2020"),
ReleaseInfo.from_simple_tag("0.15.1", "March 30, 2020"),
ReleaseInfo("master", "master", "master", "--"),
ReleaseInfo("release_1", "1.0.0", "0.16.0", "April 30, 2020"),
ReleaseInfo("release_2", "1.0.2", "0.16.1", "May 20, 2020"),
ReleaseInfo("release_3", "1.1.0", "0.17.0", "June 10, 2020"),

ReleaseInfo("release_10", "1.6.0", "0.22.0", "November 18, 2020"),
ReleaseInfo("release_11", "1.7.0", "0.23.0", "December 21, 2020"),
ReleaseInfo("release_12", "1.7.2", "0.23.0", "December 22, 2020"),
# Verified releases
ReleaseInfo("", "1.0.6", "0.16.1", "November 16, 2020", is_verified=True),
ReleaseInfo("", "1.0.5", "0.16.1", "September 23, 2020", is_verified=True),
ReleaseInfo("", "1.0.4", "0.16.1", "August 20, 2020", is_verified=True),
MAX_DAYS = 150 # do not print releases older than this many days
sorted_versions = sorted(
versions, key=lambda x: (x.loose_version, x.csharp_version), reverse=True
sorted_versions = sorted(versions, key=lambda x: x.release_datetime, reverse=True)
highlight_versions = set()
# Highlight the most recent verified version
highlight_versions.add([v for v in sorted_versions if v.is_verified][0])
# Highlight the most recent regular version
highlight_versions.add(
[v for v in sorted_versions if (not v.is_verified and not v.is_master)][0]
print(table_line("master (unstable)", "master", "--"))
highlight = True # whether to bold the line or not
count_by_verified = Counter()
if version_info.elapsed_days <= MAX_DAYS:
print(
table_line(
version_info.display_name,
version_info.release_tag,
version_info.release_date,
highlight,
)
)
highlight = False # only bold the first stable release
highlight = version_info in highlight_versions
if version_info.elapsed_days > MAX_DAYS:
# Make sure we always have at least regular and one verified entry
if count_by_verified[version_info.is_verified] > 0:
continue
print(table_line(version_info, highlight))
count_by_verified[version_info.is_verified] += 1
print("\n\n")

23
.github/workflows/lock.yml


name: 'Lock Threads'
on:
schedule:
- cron: '0 0/4 * * *'
jobs:
lock:
runs-on: ubuntu-latest
steps:
- uses: dessant/lock-threads@v2
with:
github-token: ${{ github.token }}
issue-lock-inactive-days: '30'
issue-exclude-created-before: ''
issue-exclude-labels: ''
issue-lock-labels: ''
issue-lock-comment: >
This thread has been automatically locked since there has not been
any recent activity after it was closed. Please open a new issue for
related bugs.
issue-lock-reason: 'resolved'
process-only: 'issues'

24
.yamato/pytest-gpu.yml


pytest_gpu:
name: Pytest GPU
agent:
type: Unity::VM::GPU
image: package-ci/ubuntu:stable
flavor: b1.large
commands:
- |
sudo apt-get update && sudo apt-get install -y python3-venv
python3 -m venv venv && source venv/bin/activate
python3 -m pip install pyyaml --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python3 -u -m ml-agents.tests.yamato.setup_venv
python3 -m pip install --progress-bar=off -r test_requirements.txt --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python3 -m pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html --index-url https://artifactory.prd.it.unity3d.com/artifactory/api/pypi/pypi/simple
python3 -m pytest -m "not check_environment_trains" --junitxml=junit/test-results.xml -p no:warnings
triggers:
cancel_old_ci: true
recurring:
- branch: master
frequency: daily
artifacts:
logs:
paths:
- "artifacts/standalone_build.txt"

40
com.unity.ml-agents/Runtime/Analytics/AnalyticsUtils.cs


using System;
using UnityEngine;
namespace Unity.MLAgents.Analytics
{
internal static class AnalyticsUtils
{
/// <summary>
/// Hash a string to remove PII or secret info before sending to analytics
/// </summary>
/// <param name="s"></param>
/// <returns>A string containing the Hash128 of the input string.</returns>
public static string Hash(string s)
{
var behaviorNameHash = Hash128.Compute(s);
return behaviorNameHash.ToString();
}
internal static bool s_SendEditorAnalytics = true;
/// <summary>
/// Helper class to temporarily disable sending analytics from unit tests.
/// </summary>
internal class DisableAnalyticsSending : IDisposable
{
private bool m_PreviousSendEditorAnalytics;
public DisableAnalyticsSending()
{
m_PreviousSendEditorAnalytics = s_SendEditorAnalytics;
s_SendEditorAnalytics = false;
}
public void Dispose()
{
s_SendEditorAnalytics = m_PreviousSendEditorAnalytics;
}
}
}
}

3
com.unity.ml-agents/Runtime/Analytics/AnalyticsUtils.cs.meta


fileFormatVersion: 2
guid: af1ef3e70f1242938d7b39284b1a892b
timeCreated: 1610575760

246
com.unity.ml-agents/Runtime/Analytics/TrainingAnalytics.cs


using System;
using System.Collections.Generic;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
using UnityEngine;
using UnityEngine.Analytics;
#if UNITY_EDITOR
using UnityEditor;
using UnityEditor.Analytics;
#endif
namespace Unity.MLAgents.Analytics
{
internal class TrainingAnalytics
{
const string k_VendorKey = "unity.ml-agents";
const string k_TrainingEnvironmentInitializedEventName = "ml_agents_training_environment_initialized";
const string k_TrainingBehaviorInitializedEventName = "ml_agents_training_behavior_initialized";
const string k_RemotePolicyInitializedEventName = "ml_agents_remote_policy_initialized";
private static readonly string[] s_EventNames =
{
k_TrainingEnvironmentInitializedEventName,
k_TrainingBehaviorInitializedEventName,
k_RemotePolicyInitializedEventName
};
/// <summary>
/// Whether or not we've registered this particular event yet
/// </summary>
static bool s_EventsRegistered = false;
/// <summary>
/// Hourly limit for this event name
/// </summary>
const int k_MaxEventsPerHour = 1000;
/// <summary>
/// Maximum number of items in this event.
/// </summary>
const int k_MaxNumberOfElements = 1000;
private static bool s_SentEnvironmentInitialized;
/// <summary>
/// Behaviors that we've already sent events for.
/// </summary>
private static HashSet<string> s_SentRemotePolicyInitialized;
private static HashSet<string> s_SentTrainingBehaviorInitialized;
private static Guid s_TrainingSessionGuid;
// These are set when the RpcCommunicator connects
private static string s_TrainerPackageVersion = "";
private static string s_TrainerCommunicationVersion = "";
static bool EnableAnalytics()
{
if (s_EventsRegistered)
{
return true;
}
foreach (var eventName in s_EventNames)
{
#if UNITY_EDITOR
AnalyticsResult result = EditorAnalytics.RegisterEventWithLimit(eventName, k_MaxEventsPerHour, k_MaxNumberOfElements, k_VendorKey);
#else
AnalyticsResult result = AnalyticsResult.UnsupportedPlatform;
#endif
if (result != AnalyticsResult.Ok)
{
return false;
}
}
s_EventsRegistered = true;
if (s_SentRemotePolicyInitialized == null)
{
s_SentRemotePolicyInitialized = new HashSet<string>();
s_SentTrainingBehaviorInitialized = new HashSet<string>();
s_TrainingSessionGuid = Guid.NewGuid();
}
return s_EventsRegistered;
}
/// <summary>
/// Cache information about the trainer when it becomes available in the RpcCommunicator.
/// </summary>
/// <param name="communicationVersion"></param>
/// <param name="packageVersion"></param>
public static void SetTrainerInformation(string packageVersion, string communicationVersion)
{
s_TrainerPackageVersion = packageVersion;
s_TrainerCommunicationVersion = communicationVersion;
}
public static bool IsAnalyticsEnabled()
{
#if UNITY_EDITOR
return EditorAnalytics.enabled;
#else
return false;
#endif
}
public static void TrainingEnvironmentInitialized(TrainingEnvironmentInitializedEvent tbiEvent)
{
if (!IsAnalyticsEnabled())
return;
if (!EnableAnalytics())
return;
if (s_SentEnvironmentInitialized)
{
// We already sent an TrainingEnvironmentInitializedEvent. Exit so we don't resend.
return;
}
s_SentEnvironmentInitialized = true;
tbiEvent.TrainingSessionGuid = s_TrainingSessionGuid.ToString();
// Note - to debug, use JsonUtility.ToJson on the event.
// Debug.Log(
// $"Would send event {k_TrainingEnvironmentInitializedEventName} with body {JsonUtility.ToJson(tbiEvent, true)}"
// );
#if UNITY_EDITOR
if (AnalyticsUtils.s_SendEditorAnalytics)
{
EditorAnalytics.SendEventWithLimit(k_TrainingEnvironmentInitializedEventName, tbiEvent);
}
#else
return;
#endif
}
public static void RemotePolicyInitialized(
string fullyQualifiedBehaviorName,
IList<ISensor> sensors,
ActionSpec actionSpec
)
{
if (!IsAnalyticsEnabled())
return;
if (!EnableAnalytics())
return;
// Extract base behavior name (no team ID)
var behaviorName = ParseBehaviorName(fullyQualifiedBehaviorName);
var added = s_SentRemotePolicyInitialized.Add(behaviorName);
if (!added)
{
// We previously added this model. Exit so we don't resend.
return;
}
var data = GetEventForRemotePolicy(behaviorName, sensors, actionSpec);
// Note - to debug, use JsonUtility.ToJson on the event.
// Debug.Log(
// $"Would send event {k_RemotePolicyInitializedEventName} with body {JsonUtility.ToJson(data, true)}"
// );
#if UNITY_EDITOR
if (AnalyticsUtils.s_SendEditorAnalytics)
{
EditorAnalytics.SendEventWithLimit(k_RemotePolicyInitializedEventName, data);
}
#else
return;
#endif
}
internal static string ParseBehaviorName(string fullyQualifiedBehaviorName)
{
var lastQuestionIndex = fullyQualifiedBehaviorName.LastIndexOf("?");
if (lastQuestionIndex < 0)
{
// Nothing to remove
return fullyQualifiedBehaviorName;
}
return fullyQualifiedBehaviorName.Substring(0, lastQuestionIndex);
}
public static void TrainingBehaviorInitialized(TrainingBehaviorInitializedEvent tbiEvent)
{
if (!IsAnalyticsEnabled())
return;
if (!EnableAnalytics())
return;
var behaviorName = tbiEvent.BehaviorName;
var added = s_SentTrainingBehaviorInitialized.Add(behaviorName);
if (!added)
{
// We previously added this model. Exit so we don't resend.
return;
}
// Hash the behavior name so that there's no concern about PII or "secret" data being leaked.
tbiEvent.TrainingSessionGuid = s_TrainingSessionGuid.ToString();
tbiEvent.BehaviorName = AnalyticsUtils.Hash(tbiEvent.BehaviorName);
// Note - to debug, use JsonUtility.ToJson on the event.
// Debug.Log(
// $"Would send event {k_TrainingBehaviorInitializedEventName} with body {JsonUtility.ToJson(tbiEvent, true)}"
// );
#if UNITY_EDITOR
if (AnalyticsUtils.s_SendEditorAnalytics)
{
EditorAnalytics.SendEventWithLimit(k_TrainingBehaviorInitializedEventName, tbiEvent);
}
#else
return;
#endif
}
static RemotePolicyInitializedEvent GetEventForRemotePolicy(
string behaviorName,
IList<ISensor> sensors,
ActionSpec actionSpec)
{
var remotePolicyEvent = new RemotePolicyInitializedEvent();
// Hash the behavior name so that there's no concern about PII or "secret" data being leaked.
remotePolicyEvent.BehaviorName = AnalyticsUtils.Hash(behaviorName);
remotePolicyEvent.TrainingSessionGuid = s_TrainingSessionGuid.ToString();
remotePolicyEvent.ActionSpec = EventActionSpec.FromActionSpec(actionSpec);
remotePolicyEvent.ObservationSpecs = new List<EventObservationSpec>(sensors.Count);
foreach (var sensor in sensors)
{
remotePolicyEvent.ObservationSpecs.Add(EventObservationSpec.FromSensor(sensor));
}
remotePolicyEvent.MLAgentsEnvsVersion = s_TrainerPackageVersion;
remotePolicyEvent.TrainerCommunicationVersion = s_TrainerCommunicationVersion;
return remotePolicyEvent;
}
}
}

3
com.unity.ml-agents/Runtime/Analytics/TrainingAnalytics.cs.meta


fileFormatVersion: 2
guid: 5ad0bc6b45614bb7929d25dd59d5ac38
timeCreated: 1608168600

850
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/TrainingAnalytics.cs


// <auto-generated>
// Generated by the protocol buffer compiler. DO NOT EDIT!
// source: mlagents_envs/communicator_objects/training_analytics.proto
// </auto-generated>
#pragma warning disable 1591, 0612, 3021
#region Designer generated code
using pb = global::Google.Protobuf;
using pbc = global::Google.Protobuf.Collections;
using pbr = global::Google.Protobuf.Reflection;
using scg = global::System.Collections.Generic;
namespace Unity.MLAgents.CommunicatorObjects {
/// <summary>Holder for reflection information generated from mlagents_envs/communicator_objects/training_analytics.proto</summary>
internal static partial class TrainingAnalyticsReflection {
#region Descriptor
/// <summary>File descriptor for mlagents_envs/communicator_objects/training_analytics.proto</summary>
public static pbr::FileDescriptor Descriptor {
get { return descriptor; }
}
private static pbr::FileDescriptor descriptor;
static TrainingAnalyticsReflection() {
byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjttbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL3RyYWluaW5n",
"X2FuYWx5dGljcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi2QEKHlRy",
"YWluaW5nRW52aXJvbm1lbnRJbml0aWFsaXplZBIYChBtbGFnZW50c192ZXJz",
"aW9uGAEgASgJEh0KFW1sYWdlbnRzX2VudnNfdmVyc2lvbhgCIAEoCRIWCg5w",
"eXRob25fdmVyc2lvbhgDIAEoCRIVCg10b3JjaF92ZXJzaW9uGAQgASgJEhkK",
"EXRvcmNoX2RldmljZV90eXBlGAUgASgJEhAKCG51bV9lbnZzGAYgASgFEiIK",
"Gm51bV9lbnZpcm9ubWVudF9wYXJhbWV0ZXJzGAcgASgFIq0DChtUcmFpbmlu",
"Z0JlaGF2aW9ySW5pdGlhbGl6ZWQSFQoNYmVoYXZpb3JfbmFtZRgBIAEoCRIU",
"Cgx0cmFpbmVyX3R5cGUYAiABKAkSIAoYZXh0cmluc2ljX3Jld2FyZF9lbmFi",
"bGVkGAMgASgIEhsKE2dhaWxfcmV3YXJkX2VuYWJsZWQYBCABKAgSIAoYY3Vy",
"aW9zaXR5X3Jld2FyZF9lbmFibGVkGAUgASgIEhoKEnJuZF9yZXdhcmRfZW5h",
"YmxlZBgGIAEoCBIiChpiZWhhdmlvcmFsX2Nsb25pbmdfZW5hYmxlZBgHIAEo",
"CBIZChFyZWN1cnJlbnRfZW5hYmxlZBgIIAEoCBIWCg52aXN1YWxfZW5jb2Rl",
"chgJIAEoCRIaChJudW1fbmV0d29ya19sYXllcnMYCiABKAUSIAoYbnVtX25l",
"dHdvcmtfaGlkZGVuX3VuaXRzGAsgASgFEhgKEHRyYWluZXJfdGhyZWFkZWQY",
"DCABKAgSGQoRc2VsZl9wbGF5X2VuYWJsZWQYDSABKAgSGgoSY3VycmljdWx1",
"bV9lbmFibGVkGA4gASgIQiWqAiJVbml0eS5NTEFnZW50cy5Db21tdW5pY2F0",
"b3JPYmplY3RzYgZwcm90bzM="));
descriptor = pbr::FileDescriptor.FromGeneratedCode(descriptorData,
new pbr::FileDescriptor[] { },
new pbr::GeneratedClrTypeInfo(null, new pbr::GeneratedClrTypeInfo[] {
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.TrainingEnvironmentInitialized), global::Unity.MLAgents.CommunicatorObjects.TrainingEnvironmentInitialized.Parser, new[]{ "MlagentsVersion", "MlagentsEnvsVersion", "PythonVersion", "TorchVersion", "TorchDeviceType", "NumEnvs", "NumEnvironmentParameters" }, null, null, null),
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.TrainingBehaviorInitialized), global::Unity.MLAgents.CommunicatorObjects.TrainingBehaviorInitialized.Parser, new[]{ "BehaviorName", "TrainerType", "ExtrinsicRewardEnabled", "GailRewardEnabled", "CuriosityRewardEnabled", "RndRewardEnabled", "BehavioralCloningEnabled", "RecurrentEnabled", "VisualEncoder", "NumNetworkLayers", "NumNetworkHiddenUnits", "TrainerThreaded", "SelfPlayEnabled", "CurriculumEnabled" }, null, null, null)
}));
}
#endregion
}
#region Messages
internal sealed partial class TrainingEnvironmentInitialized : pb::IMessage<TrainingEnvironmentInitialized> {
private static readonly pb::MessageParser<TrainingEnvironmentInitialized> _parser = new pb::MessageParser<TrainingEnvironmentInitialized>(() => new TrainingEnvironmentInitialized());
private pb::UnknownFieldSet _unknownFields;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public static pb::MessageParser<TrainingEnvironmentInitialized> Parser { get { return _parser; } }
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public static pbr::MessageDescriptor Descriptor {
get { return global::Unity.MLAgents.CommunicatorObjects.TrainingAnalyticsReflection.Descriptor.MessageTypes[0]; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
pbr::MessageDescriptor pb::IMessage.Descriptor {
get { return Descriptor; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public TrainingEnvironmentInitialized() {
OnConstruction();
}
partial void OnConstruction();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public TrainingEnvironmentInitialized(TrainingEnvironmentInitialized other) : this() {
mlagentsVersion_ = other.mlagentsVersion_;
mlagentsEnvsVersion_ = other.mlagentsEnvsVersion_;
pythonVersion_ = other.pythonVersion_;
torchVersion_ = other.torchVersion_;
torchDeviceType_ = other.torchDeviceType_;
numEnvs_ = other.numEnvs_;
numEnvironmentParameters_ = other.numEnvironmentParameters_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public TrainingEnvironmentInitialized Clone() {
return new TrainingEnvironmentInitialized(this);
}
/// <summary>Field number for the "mlagents_version" field.</summary>
public const int MlagentsVersionFieldNumber = 1;
private string mlagentsVersion_ = "";
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public string MlagentsVersion {
get { return mlagentsVersion_; }
set {
mlagentsVersion_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
}
}
/// <summary>Field number for the "mlagents_envs_version" field.</summary>
public const int MlagentsEnvsVersionFieldNumber = 2;
private string mlagentsEnvsVersion_ = "";
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public string MlagentsEnvsVersion {
get { return mlagentsEnvsVersion_; }
set {
mlagentsEnvsVersion_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
}
}
/// <summary>Field number for the "python_version" field.</summary>
public const int PythonVersionFieldNumber = 3;
private string pythonVersion_ = "";
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public string PythonVersion {
get { return pythonVersion_; }
set {
pythonVersion_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
}
}
/// <summary>Field number for the "torch_version" field.</summary>
public const int TorchVersionFieldNumber = 4;
private string torchVersion_ = "";
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public string TorchVersion {
get { return torchVersion_; }
set {
torchVersion_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
}
}
/// <summary>Field number for the "torch_device_type" field.</summary>
public const int TorchDeviceTypeFieldNumber = 5;
private string torchDeviceType_ = "";
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public string TorchDeviceType {
get { return torchDeviceType_; }
set {
torchDeviceType_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
}
}
/// <summary>Field number for the "num_envs" field.</summary>
public const int NumEnvsFieldNumber = 6;
private int numEnvs_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int NumEnvs {
get { return numEnvs_; }
set {
numEnvs_ = value;
}
}
/// <summary>Field number for the "num_environment_parameters" field.</summary>
public const int NumEnvironmentParametersFieldNumber = 7;
private int numEnvironmentParameters_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int NumEnvironmentParameters {
get { return numEnvironmentParameters_; }
set {
numEnvironmentParameters_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as TrainingEnvironmentInitialized);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool Equals(TrainingEnvironmentInitialized other) {
if (ReferenceEquals(other, null)) {
return false;
}
if (ReferenceEquals(other, this)) {
return true;
}
if (MlagentsVersion != other.MlagentsVersion) return false;
if (MlagentsEnvsVersion != other.MlagentsEnvsVersion) return false;
if (PythonVersion != other.PythonVersion) return false;
if (TorchVersion != other.TorchVersion) return false;
if (TorchDeviceType != other.TorchDeviceType) return false;
if (NumEnvs != other.NumEnvs) return false;
if (NumEnvironmentParameters != other.NumEnvironmentParameters) return false;
return Equals(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override int GetHashCode() {
int hash = 1;
if (MlagentsVersion.Length != 0) hash ^= MlagentsVersion.GetHashCode();
if (MlagentsEnvsVersion.Length != 0) hash ^= MlagentsEnvsVersion.GetHashCode();
if (PythonVersion.Length != 0) hash ^= PythonVersion.GetHashCode();
if (TorchVersion.Length != 0) hash ^= TorchVersion.GetHashCode();
if (TorchDeviceType.Length != 0) hash ^= TorchDeviceType.GetHashCode();
if (NumEnvs != 0) hash ^= NumEnvs.GetHashCode();
if (NumEnvironmentParameters != 0) hash ^= NumEnvironmentParameters.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}
return hash;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override string ToString() {
return pb::JsonFormatter.ToDiagnosticString(this);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void WriteTo(pb::CodedOutputStream output) {
if (MlagentsVersion.Length != 0) {
output.WriteRawTag(10);
output.WriteString(MlagentsVersion);
}
if (MlagentsEnvsVersion.Length != 0) {
output.WriteRawTag(18);
output.WriteString(MlagentsEnvsVersion);
}
if (PythonVersion.Length != 0) {
output.WriteRawTag(26);
output.WriteString(PythonVersion);
}
if (TorchVersion.Length != 0) {
output.WriteRawTag(34);
output.WriteString(TorchVersion);
}
if (TorchDeviceType.Length != 0) {
output.WriteRawTag(42);
output.WriteString(TorchDeviceType);
}
if (NumEnvs != 0) {
output.WriteRawTag(48);
output.WriteInt32(NumEnvs);
}
if (NumEnvironmentParameters != 0) {
output.WriteRawTag(56);
output.WriteInt32(NumEnvironmentParameters);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int CalculateSize() {
int size = 0;
if (MlagentsVersion.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(MlagentsVersion);
}
if (MlagentsEnvsVersion.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(MlagentsEnvsVersion);
}
if (PythonVersion.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(PythonVersion);
}
if (TorchVersion.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(TorchVersion);
}
if (TorchDeviceType.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(TorchDeviceType);
}
if (NumEnvs != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumEnvs);
}
if (NumEnvironmentParameters != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumEnvironmentParameters);
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}
return size;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void MergeFrom(TrainingEnvironmentInitialized other) {
if (other == null) {
return;
}
if (other.MlagentsVersion.Length != 0) {
MlagentsVersion = other.MlagentsVersion;
}
if (other.MlagentsEnvsVersion.Length != 0) {
MlagentsEnvsVersion = other.MlagentsEnvsVersion;
}
if (other.PythonVersion.Length != 0) {
PythonVersion = other.PythonVersion;
}
if (other.TorchVersion.Length != 0) {
TorchVersion = other.TorchVersion;
}
if (other.TorchDeviceType.Length != 0) {
TorchDeviceType = other.TorchDeviceType;
}
if (other.NumEnvs != 0) {
NumEnvs = other.NumEnvs;
}
if (other.NumEnvironmentParameters != 0) {
NumEnvironmentParameters = other.NumEnvironmentParameters;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void MergeFrom(pb::CodedInputStream input) {
uint tag;
while ((tag = input.ReadTag()) != 0) {
switch(tag) {
default:
_unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input);
break;
case 10: {
MlagentsVersion = input.ReadString();
break;
}
case 18: {
MlagentsEnvsVersion = input.ReadString();
break;
}
case 26: {
PythonVersion = input.ReadString();
break;
}
case 34: {
TorchVersion = input.ReadString();
break;
}
case 42: {
TorchDeviceType = input.ReadString();
break;
}
case 48: {
NumEnvs = input.ReadInt32();
break;
}
case 56: {
NumEnvironmentParameters = input.ReadInt32();
break;
}
}
}
}
}
internal sealed partial class TrainingBehaviorInitialized : pb::IMessage<TrainingBehaviorInitialized> {
private static readonly pb::MessageParser<TrainingBehaviorInitialized> _parser = new pb::MessageParser<TrainingBehaviorInitialized>(() => new TrainingBehaviorInitialized());
private pb::UnknownFieldSet _unknownFields;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public static pb::MessageParser<TrainingBehaviorInitialized> Parser { get { return _parser; } }
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public static pbr::MessageDescriptor Descriptor {
get { return global::Unity.MLAgents.CommunicatorObjects.TrainingAnalyticsReflection.Descriptor.MessageTypes[1]; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
pbr::MessageDescriptor pb::IMessage.Descriptor {
get { return Descriptor; }
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public TrainingBehaviorInitialized() {
OnConstruction();
}
partial void OnConstruction();
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public TrainingBehaviorInitialized(TrainingBehaviorInitialized other) : this() {
behaviorName_ = other.behaviorName_;
trainerType_ = other.trainerType_;
extrinsicRewardEnabled_ = other.extrinsicRewardEnabled_;
gailRewardEnabled_ = other.gailRewardEnabled_;
curiosityRewardEnabled_ = other.curiosityRewardEnabled_;
rndRewardEnabled_ = other.rndRewardEnabled_;
behavioralCloningEnabled_ = other.behavioralCloningEnabled_;
recurrentEnabled_ = other.recurrentEnabled_;
visualEncoder_ = other.visualEncoder_;
numNetworkLayers_ = other.numNetworkLayers_;
numNetworkHiddenUnits_ = other.numNetworkHiddenUnits_;
trainerThreaded_ = other.trainerThreaded_;
selfPlayEnabled_ = other.selfPlayEnabled_;
curriculumEnabled_ = other.curriculumEnabled_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public TrainingBehaviorInitialized Clone() {
return new TrainingBehaviorInitialized(this);
}
/// <summary>Field number for the "behavior_name" field.</summary>
public const int BehaviorNameFieldNumber = 1;
private string behaviorName_ = "";
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public string BehaviorName {
get { return behaviorName_; }
set {
behaviorName_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
}
}
/// <summary>Field number for the "trainer_type" field.</summary>
public const int TrainerTypeFieldNumber = 2;
private string trainerType_ = "";
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public string TrainerType {
get { return trainerType_; }
set {
trainerType_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
}
}
/// <summary>Field number for the "extrinsic_reward_enabled" field.</summary>
public const int ExtrinsicRewardEnabledFieldNumber = 3;
private bool extrinsicRewardEnabled_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool ExtrinsicRewardEnabled {
get { return extrinsicRewardEnabled_; }
set {
extrinsicRewardEnabled_ = value;
}
}
/// <summary>Field number for the "gail_reward_enabled" field.</summary>
public const int GailRewardEnabledFieldNumber = 4;
private bool gailRewardEnabled_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool GailRewardEnabled {
get { return gailRewardEnabled_; }
set {
gailRewardEnabled_ = value;
}
}
/// <summary>Field number for the "curiosity_reward_enabled" field.</summary>
public const int CuriosityRewardEnabledFieldNumber = 5;
private bool curiosityRewardEnabled_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool CuriosityRewardEnabled {
get { return curiosityRewardEnabled_; }
set {
curiosityRewardEnabled_ = value;
}
}
/// <summary>Field number for the "rnd_reward_enabled" field.</summary>
public const int RndRewardEnabledFieldNumber = 6;
private bool rndRewardEnabled_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool RndRewardEnabled {
get { return rndRewardEnabled_; }
set {
rndRewardEnabled_ = value;
}
}
/// <summary>Field number for the "behavioral_cloning_enabled" field.</summary>
public const int BehavioralCloningEnabledFieldNumber = 7;
private bool behavioralCloningEnabled_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool BehavioralCloningEnabled {
get { return behavioralCloningEnabled_; }
set {
behavioralCloningEnabled_ = value;
}
}
/// <summary>Field number for the "recurrent_enabled" field.</summary>
public const int RecurrentEnabledFieldNumber = 8;
private bool recurrentEnabled_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool RecurrentEnabled {
get { return recurrentEnabled_; }
set {
recurrentEnabled_ = value;
}
}
/// <summary>Field number for the "visual_encoder" field.</summary>
public const int VisualEncoderFieldNumber = 9;
private string visualEncoder_ = "";
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public string VisualEncoder {
get { return visualEncoder_; }
set {
visualEncoder_ = pb::ProtoPreconditions.CheckNotNull(value, "value");
}
}
/// <summary>Field number for the "num_network_layers" field.</summary>
public const int NumNetworkLayersFieldNumber = 10;
private int numNetworkLayers_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int NumNetworkLayers {
get { return numNetworkLayers_; }
set {
numNetworkLayers_ = value;
}
}
/// <summary>Field number for the "num_network_hidden_units" field.</summary>
public const int NumNetworkHiddenUnitsFieldNumber = 11;
private int numNetworkHiddenUnits_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int NumNetworkHiddenUnits {
get { return numNetworkHiddenUnits_; }
set {
numNetworkHiddenUnits_ = value;
}
}
/// <summary>Field number for the "trainer_threaded" field.</summary>
public const int TrainerThreadedFieldNumber = 12;
private bool trainerThreaded_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool TrainerThreaded {
get { return trainerThreaded_; }
set {
trainerThreaded_ = value;
}
}
/// <summary>Field number for the "self_play_enabled" field.</summary>
public const int SelfPlayEnabledFieldNumber = 13;
private bool selfPlayEnabled_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool SelfPlayEnabled {
get { return selfPlayEnabled_; }
set {
selfPlayEnabled_ = value;
}
}
/// <summary>Field number for the "curriculum_enabled" field.</summary>
public const int CurriculumEnabledFieldNumber = 14;
private bool curriculumEnabled_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool CurriculumEnabled {
get { return curriculumEnabled_; }
set {
curriculumEnabled_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as TrainingBehaviorInitialized);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool Equals(TrainingBehaviorInitialized other) {
if (ReferenceEquals(other, null)) {
return false;
}
if (ReferenceEquals(other, this)) {
return true;
}
if (BehaviorName != other.BehaviorName) return false;
if (TrainerType != other.TrainerType) return false;
if (ExtrinsicRewardEnabled != other.ExtrinsicRewardEnabled) return false;
if (GailRewardEnabled != other.GailRewardEnabled) return false;
if (CuriosityRewardEnabled != other.CuriosityRewardEnabled) return false;
if (RndRewardEnabled != other.RndRewardEnabled) return false;
if (BehavioralCloningEnabled != other.BehavioralCloningEnabled) return false;
if (RecurrentEnabled != other.RecurrentEnabled) return false;
if (VisualEncoder != other.VisualEncoder) return false;
if (NumNetworkLayers != other.NumNetworkLayers) return false;
if (NumNetworkHiddenUnits != other.NumNetworkHiddenUnits) return false;
if (TrainerThreaded != other.TrainerThreaded) return false;
if (SelfPlayEnabled != other.SelfPlayEnabled) return false;
if (CurriculumEnabled != other.CurriculumEnabled) return false;
return Equals(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override int GetHashCode() {
int hash = 1;
if (BehaviorName.Length != 0) hash ^= BehaviorName.GetHashCode();
if (TrainerType.Length != 0) hash ^= TrainerType.GetHashCode();
if (ExtrinsicRewardEnabled != false) hash ^= ExtrinsicRewardEnabled.GetHashCode();
if (GailRewardEnabled != false) hash ^= GailRewardEnabled.GetHashCode();
if (CuriosityRewardEnabled != false) hash ^= CuriosityRewardEnabled.GetHashCode();
if (RndRewardEnabled != false) hash ^= RndRewardEnabled.GetHashCode();
if (BehavioralCloningEnabled != false) hash ^= BehavioralCloningEnabled.GetHashCode();
if (RecurrentEnabled != false) hash ^= RecurrentEnabled.GetHashCode();
if (VisualEncoder.Length != 0) hash ^= VisualEncoder.GetHashCode();
if (NumNetworkLayers != 0) hash ^= NumNetworkLayers.GetHashCode();
if (NumNetworkHiddenUnits != 0) hash ^= NumNetworkHiddenUnits.GetHashCode();
if (TrainerThreaded != false) hash ^= TrainerThreaded.GetHashCode();
if (SelfPlayEnabled != false) hash ^= SelfPlayEnabled.GetHashCode();
if (CurriculumEnabled != false) hash ^= CurriculumEnabled.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}
return hash;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override string ToString() {
return pb::JsonFormatter.ToDiagnosticString(this);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void WriteTo(pb::CodedOutputStream output) {
if (BehaviorName.Length != 0) {
output.WriteRawTag(10);
output.WriteString(BehaviorName);
}
if (TrainerType.Length != 0) {
output.WriteRawTag(18);
output.WriteString(TrainerType);
}
if (ExtrinsicRewardEnabled != false) {
output.WriteRawTag(24);
output.WriteBool(ExtrinsicRewardEnabled);
}
if (GailRewardEnabled != false) {
output.WriteRawTag(32);
output.WriteBool(GailRewardEnabled);
}
if (CuriosityRewardEnabled != false) {
output.WriteRawTag(40);
output.WriteBool(CuriosityRewardEnabled);
}
if (RndRewardEnabled != false) {
output.WriteRawTag(48);
output.WriteBool(RndRewardEnabled);
}
if (BehavioralCloningEnabled != false) {
output.WriteRawTag(56);
output.WriteBool(BehavioralCloningEnabled);
}
if (RecurrentEnabled != false) {
output.WriteRawTag(64);
output.WriteBool(RecurrentEnabled);
}
if (VisualEncoder.Length != 0) {
output.WriteRawTag(74);
output.WriteString(VisualEncoder);
}
if (NumNetworkLayers != 0) {
output.WriteRawTag(80);
output.WriteInt32(NumNetworkLayers);
}
if (NumNetworkHiddenUnits != 0) {
output.WriteRawTag(88);
output.WriteInt32(NumNetworkHiddenUnits);
}
if (TrainerThreaded != false) {
output.WriteRawTag(96);
output.WriteBool(TrainerThreaded);
}
if (SelfPlayEnabled != false) {
output.WriteRawTag(104);
output.WriteBool(SelfPlayEnabled);
}
if (CurriculumEnabled != false) {
output.WriteRawTag(112);
output.WriteBool(CurriculumEnabled);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int CalculateSize() {
int size = 0;
if (BehaviorName.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(BehaviorName);
}
if (TrainerType.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(TrainerType);
}
if (ExtrinsicRewardEnabled != false) {
size += 1 + 1;
}
if (GailRewardEnabled != false) {
size += 1 + 1;
}
if (CuriosityRewardEnabled != false) {
size += 1 + 1;
}
if (RndRewardEnabled != false) {
size += 1 + 1;
}
if (BehavioralCloningEnabled != false) {
size += 1 + 1;
}
if (RecurrentEnabled != false) {
size += 1 + 1;
}
if (VisualEncoder.Length != 0) {
size += 1 + pb::CodedOutputStream.ComputeStringSize(VisualEncoder);
}
if (NumNetworkLayers != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumNetworkLayers);
}
if (NumNetworkHiddenUnits != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(NumNetworkHiddenUnits);
}
if (TrainerThreaded != false) {
size += 1 + 1;
}
if (SelfPlayEnabled != false) {
size += 1 + 1;
}
if (CurriculumEnabled != false) {
size += 1 + 1;
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}
return size;
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void MergeFrom(TrainingBehaviorInitialized other) {
if (other == null) {
return;
}
if (other.BehaviorName.Length != 0) {
BehaviorName = other.BehaviorName;
}
if (other.TrainerType.Length != 0) {
TrainerType = other.TrainerType;
}
if (other.ExtrinsicRewardEnabled != false) {
ExtrinsicRewardEnabled = other.ExtrinsicRewardEnabled;
}
if (other.GailRewardEnabled != false) {
GailRewardEnabled = other.GailRewardEnabled;
}
if (other.CuriosityRewardEnabled != false) {
CuriosityRewardEnabled = other.CuriosityRewardEnabled;
}
if (other.RndRewardEnabled != false) {
RndRewardEnabled = other.RndRewardEnabled;
}
if (other.BehavioralCloningEnabled != false) {
BehavioralCloningEnabled = other.BehavioralCloningEnabled;
}
if (other.RecurrentEnabled != false) {
RecurrentEnabled = other.RecurrentEnabled;
}
if (other.VisualEncoder.Length != 0) {
VisualEncoder = other.VisualEncoder;
}
if (other.NumNetworkLayers != 0) {
NumNetworkLayers = other.NumNetworkLayers;
}
if (other.NumNetworkHiddenUnits != 0) {
NumNetworkHiddenUnits = other.NumNetworkHiddenUnits;
}
if (other.TrainerThreaded != false) {
TrainerThreaded = other.TrainerThreaded;
}
if (other.SelfPlayEnabled != false) {
SelfPlayEnabled = other.SelfPlayEnabled;
}
if (other.CurriculumEnabled != false) {
CurriculumEnabled = other.CurriculumEnabled;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public void MergeFrom(pb::CodedInputStream input) {
uint tag;
while ((tag = input.ReadTag()) != 0) {
switch(tag) {
default:
_unknownFields = pb::UnknownFieldSet.MergeFieldFrom(_unknownFields, input);
break;
case 10: {
BehaviorName = input.ReadString();
break;
}
case 18: {
TrainerType = input.ReadString();
break;
}
case 24: {
ExtrinsicRewardEnabled = input.ReadBool();
break;
}
case 32: {
GailRewardEnabled = input.ReadBool();
break;
}
case 40: {
CuriosityRewardEnabled = input.ReadBool();
break;
}
case 48: {
RndRewardEnabled = input.ReadBool();
break;
}
case 56: {
BehavioralCloningEnabled = input.ReadBool();
break;
}
case 64: {
RecurrentEnabled = input.ReadBool();
break;
}
case 74: {
VisualEncoder = input.ReadString();
break;
}
case 80: {
NumNetworkLayers = input.ReadInt32();
break;
}
case 88: {
NumNetworkHiddenUnits = input.ReadInt32();
break;
}
case 96: {
TrainerThreaded = input.ReadBool();
break;
}
case 104: {
SelfPlayEnabled = input.ReadBool();
break;
}
case 112: {
CurriculumEnabled = input.ReadBool();
break;
}
}
}
}
}
#endregion
}
#endregion Designer generated code

11
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/TrainingAnalytics.cs.meta


fileFormatVersion: 2
guid: 9e6ac06a3931742d798cf922de6b99f0
MonoImporter:
externalObjects: {}
serializedVersion: 2
defaultReferences: []
executionOrder: 0
icon: {instanceID: 0}
userData:
assetBundleName:
assetBundleVariant:

39
com.unity.ml-agents/Runtime/Sensors/IBuiltInSensor.cs


namespace Unity.MLAgents.Sensors
{
/// <summary>
/// Identifiers for "built in" sensor types.
/// These are only used for analytics, and should not be used for any runtime decisions.
///
/// NOTE: Do not renumber these, since the values are used for analytics. Renaming is allowed though.
/// </summary>
public enum BuiltInSensorType
{
Unknown = 0,
VectorSensor = 1,
// Note that StackingSensor actually returns the wrapped sensor's type
StackingSensor = 2,
RayPerceptionSensor = 3,
ReflectionSensor = 4,
CameraSensor = 5,
RenderTextureSensor = 6,
BufferSensor = 7,
PhysicsBodySensor = 8,
Match3Sensor = 9,
GridSensor = 10
}
/// <summary>
/// Interface for sensors that are provided as part of ML-Agents.
/// User-implemented sensors don't need to use this interface.
/// </summary>
public interface IBuiltInSensor
{
/// <summary>
/// Return the corresponding BuiltInSensorType for the sensor.
/// </summary>
/// <returns>A BuiltInSensorType corresponding to the sensor.</returns>
BuiltInSensorType GetBuiltInSensorType();
}
}

3
com.unity.ml-agents/Runtime/Sensors/IBuiltInSensor.cs.meta


fileFormatVersion: 2
guid: c0c4a98bf1c941b381917cb65209beee
timeCreated: 1611096525

50
com.unity.ml-agents/Runtime/SideChannels/TrainingAnalyticsSideChannel.cs


using System;
using UnityEngine;
using Unity.MLAgents.Analytics;
using Unity.MLAgents.CommunicatorObjects;
namespace Unity.MLAgents.SideChannels
{
public class TrainingAnalyticsSideChannel : SideChannel
{
const string k_TrainingAnalyticsConfigId = "b664a4a9-d86f-5a5f-95cb-e8353a7e8356";
/// <summary>
/// Initializes the side channel. The constructor is internal because only one instance is
/// supported at a time, and is created by the Academy.
/// </summary>
internal TrainingAnalyticsSideChannel()
{
ChannelId = new Guid(k_TrainingAnalyticsConfigId);
}
/// <inheritdoc/>
protected override void OnMessageReceived(IncomingMessage msg)
{
Google.Protobuf.WellKnownTypes.Any anyMessage = null;
try
{
anyMessage = Google.Protobuf.WellKnownTypes.Any.Parser.ParseFrom(msg.GetRawBytes());
}
catch (Google.Protobuf.InvalidProtocolBufferException)
{
// Bad message, nothing we can do about it, so just ignore.
return;
}
if (anyMessage.Is(TrainingEnvironmentInitialized.Descriptor))
{
var envInitProto = anyMessage.Unpack<TrainingEnvironmentInitialized>();
var envInitEvent = envInitProto.ToTrainingEnvironmentInitializedEvent();
TrainingAnalytics.TrainingEnvironmentInitialized(envInitEvent);
}
else if (anyMessage.Is(TrainingBehaviorInitialized.Descriptor))
{
var behaviorInitProto = anyMessage.Unpack<TrainingBehaviorInitialized>();
var behaviorTrainingEvent = behaviorInitProto.ToTrainingBehaviorInitializedEvent();
TrainingAnalytics.TrainingBehaviorInitialized(behaviorTrainingEvent);
}
// Don't do anything for unknown types, since the user probably can't do anything about it.
}
}
}

3
com.unity.ml-agents/Runtime/SideChannels/TrainingAnalyticsSideChannel.cs.meta


fileFormatVersion: 2
guid: 13c87198bbd54b40a0b93308eb37933e
timeCreated: 1608337471

42
com.unity.ml-agents/Tests/Editor/Analytics/TrainingAnalyticsTest.cs


using System.Collections.Generic;
using NUnit.Framework;
using Unity.MLAgents.Sensors;
using UnityEngine;
using Unity.Barracuda;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Analytics;
using Unity.MLAgents.Policies;
using UnityEditor;
namespace Unity.MLAgents.Tests.Analytics
{
[TestFixture]
public class TrainingAnalyticsTests
{
[TestCase("foo?team=42", ExpectedResult = "foo")]
[TestCase("foo", ExpectedResult = "foo")]
[TestCase("foo?bar?team=1337", ExpectedResult = "foo?bar")]
public string TestParseBehaviorName(string fullyQualifiedBehaviorName)
{
return TrainingAnalytics.ParseBehaviorName(fullyQualifiedBehaviorName);
}
[Test]
public void TestRemotePolicy()
{
if (Academy.IsInitialized)
{
Academy.Instance.Dispose();
}
using (new AnalyticsUtils.DisableAnalyticsSending())
{
var actionSpec = ActionSpec.MakeContinuous(3);
var policy = new RemotePolicy(actionSpec, "TestBehavior?team=42");
policy.RequestDecision(new AgentInfo(), new List<ISensor>());
}
Academy.Instance.Dispose();
}
}
}

3
com.unity.ml-agents/Tests/Editor/Analytics/TrainingAnalyticsTest.cs.meta


fileFormatVersion: 2
guid: 70b8f1544bc34b4e8f1bc1068c64f01c
timeCreated: 1610419546

65
com.unity.ml-agents/Tests/Editor/TrainingAnalyticsSideChannelTests.cs


using System;
using System.Linq;
using System.Text;
using NUnit.Framework;
using Google.Protobuf;
using Unity.MLAgents.Analytics;
using Unity.MLAgents.SideChannels;
using Unity.MLAgents.CommunicatorObjects;
namespace Unity.MLAgents.Tests
{
/// <summary>
/// These tests send messages through the event handling code.
/// There's no output to test, so just make sure there are no exceptions
/// (and get the code coverage above the minimum).
/// </summary>
public class TrainingAnalyticsSideChannelTests
{
[Test]
public void TestTrainingEnvironmentReceived()
{
var anyMsg = Google.Protobuf.WellKnownTypes.Any.Pack(new TrainingEnvironmentInitialized());
var anyMsgBytes = anyMsg.ToByteArray();
var sideChannel = new TrainingAnalyticsSideChannel();
using (new AnalyticsUtils.DisableAnalyticsSending())
{
sideChannel.ProcessMessage(anyMsgBytes);
}
}
[Test]
public void TestTrainingBehaviorReceived()
{
var anyMsg = Google.Protobuf.WellKnownTypes.Any.Pack(new TrainingBehaviorInitialized());
var anyMsgBytes = anyMsg.ToByteArray();
var sideChannel = new TrainingAnalyticsSideChannel();
using (new AnalyticsUtils.DisableAnalyticsSending())
{
sideChannel.ProcessMessage(anyMsgBytes);
}
}
[Test]
public void TestInvalidProtobufMessage()
{
// Test an invalid (non-protobuf) message. This should silently ignore the data.
var badBytes = Encoding.ASCII.GetBytes("Lorem ipsum");
var sideChannel = new TrainingAnalyticsSideChannel();
using (new AnalyticsUtils.DisableAnalyticsSending())
{
sideChannel.ProcessMessage(badBytes);
}
// Test an almost-valid message. This should silently ignore the data.
var anyMsg = Google.Protobuf.WellKnownTypes.Any.Pack(new TrainingBehaviorInitialized());
var anyMsgBytes = anyMsg.ToByteArray();
var truncatedMessage = new ArraySegment<byte>(anyMsgBytes, 0, anyMsgBytes.Length - 1).ToArray();
using (new AnalyticsUtils.DisableAnalyticsSending())
{
sideChannel.ProcessMessage(truncatedMessage);
}
}
}
}

3
com.unity.ml-agents/Tests/Editor/TrainingAnalyticsSideChannelTests.cs.meta


fileFormatVersion: 2
guid: c2a71036ddec4ba4bf83c5e8ba1b8daa
timeCreated: 1610574895

243
ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.py


# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: mlagents_envs/communicator_objects/training_analytics.proto
import sys
_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1'))
from google.protobuf import descriptor as _descriptor
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
from google.protobuf import descriptor_pb2
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor.FileDescriptor(
name='mlagents_envs/communicator_objects/training_analytics.proto',
package='communicator_objects',
syntax='proto3',
serialized_pb=_b('\n;mlagents_envs/communicator_objects/training_analytics.proto\x12\x14\x63ommunicator_objects\"\xd9\x01\n\x1eTrainingEnvironmentInitialized\x12\x18\n\x10mlagents_version\x18\x01 \x01(\t\x12\x1d\n\x15mlagents_envs_version\x18\x02 \x01(\t\x12\x16\n\x0epython_version\x18\x03 \x01(\t\x12\x15\n\rtorch_version\x18\x04 \x01(\t\x12\x19\n\x11torch_device_type\x18\x05 \x01(\t\x12\x10\n\x08num_envs\x18\x06 \x01(\x05\x12\"\n\x1anum_environment_parameters\x18\x07 \x01(\x05\"\xad\x03\n\x1bTrainingBehaviorInitialized\x12\x15\n\rbehavior_name\x18\x01 \x01(\t\x12\x14\n\x0ctrainer_type\x18\x02 \x01(\t\x12 \n\x18\x65xtrinsic_reward_enabled\x18\x03 \x01(\x08\x12\x1b\n\x13gail_reward_enabled\x18\x04 \x01(\x08\x12 \n\x18\x63uriosity_reward_enabled\x18\x05 \x01(\x08\x12\x1a\n\x12rnd_reward_enabled\x18\x06 \x01(\x08\x12\"\n\x1a\x62\x65havioral_cloning_enabled\x18\x07 \x01(\x08\x12\x19\n\x11recurrent_enabled\x18\x08 \x01(\x08\x12\x16\n\x0evisual_encoder\x18\t \x01(\t\x12\x1a\n\x12num_network_layers\x18\n \x01(\x05\x12 \n\x18num_network_hidden_units\x18\x0b \x01(\x05\x12\x18\n\x10trainer_threaded\x18\x0c \x01(\x08\x12\x19\n\x11self_play_enabled\x18\r \x01(\x08\x12\x1a\n\x12\x63urriculum_enabled\x18\x0e \x01(\x08\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
)
_TRAININGENVIRONMENTINITIALIZED = _descriptor.Descriptor(
name='TrainingEnvironmentInitialized',
full_name='communicator_objects.TrainingEnvironmentInitialized',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='mlagents_version', full_name='communicator_objects.TrainingEnvironmentInitialized.mlagents_version', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='mlagents_envs_version', full_name='communicator_objects.TrainingEnvironmentInitialized.mlagents_envs_version', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='python_version', full_name='communicator_objects.TrainingEnvironmentInitialized.python_version', index=2,
number=3, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='torch_version', full_name='communicator_objects.TrainingEnvironmentInitialized.torch_version', index=3,
number=4, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='torch_device_type', full_name='communicator_objects.TrainingEnvironmentInitialized.torch_device_type', index=4,
number=5, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='num_envs', full_name='communicator_objects.TrainingEnvironmentInitialized.num_envs', index=5,
number=6, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='num_environment_parameters', full_name='communicator_objects.TrainingEnvironmentInitialized.num_environment_parameters', index=6,
number=7, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=86,
serialized_end=303,
)
_TRAININGBEHAVIORINITIALIZED = _descriptor.Descriptor(
name='TrainingBehaviorInitialized',
full_name='communicator_objects.TrainingBehaviorInitialized',
filename=None,
file=DESCRIPTOR,
containing_type=None,
fields=[
_descriptor.FieldDescriptor(
name='behavior_name', full_name='communicator_objects.TrainingBehaviorInitialized.behavior_name', index=0,
number=1, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='trainer_type', full_name='communicator_objects.TrainingBehaviorInitialized.trainer_type', index=1,
number=2, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='extrinsic_reward_enabled', full_name='communicator_objects.TrainingBehaviorInitialized.extrinsic_reward_enabled', index=2,
number=3, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='gail_reward_enabled', full_name='communicator_objects.TrainingBehaviorInitialized.gail_reward_enabled', index=3,
number=4, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='curiosity_reward_enabled', full_name='communicator_objects.TrainingBehaviorInitialized.curiosity_reward_enabled', index=4,
number=5, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='rnd_reward_enabled', full_name='communicator_objects.TrainingBehaviorInitialized.rnd_reward_enabled', index=5,
number=6, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='behavioral_cloning_enabled', full_name='communicator_objects.TrainingBehaviorInitialized.behavioral_cloning_enabled', index=6,
number=7, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='recurrent_enabled', full_name='communicator_objects.TrainingBehaviorInitialized.recurrent_enabled', index=7,
number=8, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='visual_encoder', full_name='communicator_objects.TrainingBehaviorInitialized.visual_encoder', index=8,
number=9, type=9, cpp_type=9, label=1,
has_default_value=False, default_value=_b("").decode('utf-8'),
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='num_network_layers', full_name='communicator_objects.TrainingBehaviorInitialized.num_network_layers', index=9,
number=10, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='num_network_hidden_units', full_name='communicator_objects.TrainingBehaviorInitialized.num_network_hidden_units', index=10,
number=11, type=5, cpp_type=1, label=1,
has_default_value=False, default_value=0,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='trainer_threaded', full_name='communicator_objects.TrainingBehaviorInitialized.trainer_threaded', index=11,
number=12, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='self_play_enabled', full_name='communicator_objects.TrainingBehaviorInitialized.self_play_enabled', index=12,
number=13, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
_descriptor.FieldDescriptor(
name='curriculum_enabled', full_name='communicator_objects.TrainingBehaviorInitialized.curriculum_enabled', index=13,
number=14, type=8, cpp_type=7, label=1,
has_default_value=False, default_value=False,
message_type=None, enum_type=None, containing_type=None,
is_extension=False, extension_scope=None,
options=None, file=DESCRIPTOR),
],
extensions=[
],
nested_types=[],
enum_types=[
],
options=None,
is_extendable=False,
syntax='proto3',
extension_ranges=[],
oneofs=[
],
serialized_start=306,
serialized_end=735,
)
DESCRIPTOR.message_types_by_name['TrainingEnvironmentInitialized'] = _TRAININGENVIRONMENTINITIALIZED
DESCRIPTOR.message_types_by_name['TrainingBehaviorInitialized'] = _TRAININGBEHAVIORINITIALIZED
_sym_db.RegisterFileDescriptor(DESCRIPTOR)
TrainingEnvironmentInitialized = _reflection.GeneratedProtocolMessageType('TrainingEnvironmentInitialized', (_message.Message,), dict(
DESCRIPTOR = _TRAININGENVIRONMENTINITIALIZED,
__module__ = 'mlagents_envs.communicator_objects.training_analytics_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.TrainingEnvironmentInitialized)
))
_sym_db.RegisterMessage(TrainingEnvironmentInitialized)
TrainingBehaviorInitialized = _reflection.GeneratedProtocolMessageType('TrainingBehaviorInitialized', (_message.Message,), dict(
DESCRIPTOR = _TRAININGBEHAVIORINITIALIZED,
__module__ = 'mlagents_envs.communicator_objects.training_analytics_pb2'
# @@protoc_insertion_point(class_scope:communicator_objects.TrainingBehaviorInitialized)
))
_sym_db.RegisterMessage(TrainingBehaviorInitialized)
DESCRIPTOR.has_options = True
DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('\252\002\"Unity.MLAgents.CommunicatorObjects'))
# @@protoc_insertion_point(module_scope)

97
ml-agents-envs/mlagents_envs/communicator_objects/training_analytics_pb2.pyi


# @generated by generate_proto_mypy_stubs.py. Do not edit!
import sys
from google.protobuf.descriptor import (
Descriptor as google___protobuf___descriptor___Descriptor,
)
from google.protobuf.message import (
Message as google___protobuf___message___Message,
)
from typing import (
Optional as typing___Optional,
Text as typing___Text,
)
from typing_extensions import (
Literal as typing_extensions___Literal,
)
builtin___bool = bool
builtin___bytes = bytes
builtin___float = float
builtin___int = int
class TrainingEnvironmentInitialized(google___protobuf___message___Message):
DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
mlagents_version = ... # type: typing___Text
mlagents_envs_version = ... # type: typing___Text
python_version = ... # type: typing___Text
torch_version = ... # type: typing___Text
torch_device_type = ... # type: typing___Text
num_envs = ... # type: builtin___int
num_environment_parameters = ... # type: builtin___int
def __init__(self,
*,
mlagents_version : typing___Optional[typing___Text] = None,
mlagents_envs_version : typing___Optional[typing___Text] = None,
python_version : typing___Optional[typing___Text] = None,
torch_version : typing___Optional[typing___Text] = None,
torch_device_type : typing___Optional[typing___Text] = None,
num_envs : typing___Optional[builtin___int] = None,
num_environment_parameters : typing___Optional[builtin___int] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> TrainingEnvironmentInitialized: ...
def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
if sys.version_info >= (3,):
def ClearField(self, field_name: typing_extensions___Literal[u"mlagents_envs_version",u"mlagents_version",u"num_environment_parameters",u"num_envs",u"python_version",u"torch_device_type",u"torch_version"]) -> None: ...
else:
def ClearField(self, field_name: typing_extensions___Literal[u"mlagents_envs_version",b"mlagents_envs_version",u"mlagents_version",b"mlagents_version",u"num_environment_parameters",b"num_environment_parameters",u"num_envs",b"num_envs",u"python_version",b"python_version",u"torch_device_type",b"torch_device_type",u"torch_version",b"torch_version"]) -> None: ...
class TrainingBehaviorInitialized(google___protobuf___message___Message):
DESCRIPTOR: google___protobuf___descriptor___Descriptor = ...
behavior_name = ... # type: typing___Text
trainer_type = ... # type: typing___Text
extrinsic_reward_enabled = ... # type: builtin___bool
gail_reward_enabled = ... # type: builtin___bool
curiosity_reward_enabled = ... # type: builtin___bool
rnd_reward_enabled = ... # type: builtin___bool
behavioral_cloning_enabled = ... # type: builtin___bool
recurrent_enabled = ... # type: builtin___bool
visual_encoder = ... # type: typing___Text
num_network_layers = ... # type: builtin___int
num_network_hidden_units = ... # type: builtin___int
trainer_threaded = ... # type: builtin___bool
self_play_enabled = ... # type: builtin___bool
curriculum_enabled = ... # type: builtin___bool
def __init__(self,
*,
behavior_name : typing___Optional[typing___Text] = None,
trainer_type : typing___Optional[typing___Text] = None,
extrinsic_reward_enabled : typing___Optional[builtin___bool] = None,
gail_reward_enabled : typing___Optional[builtin___bool] = None,
curiosity_reward_enabled : typing___Optional[builtin___bool] = None,
rnd_reward_enabled : typing___Optional[builtin___bool] = None,
behavioral_cloning_enabled : typing___Optional[builtin___bool] = None,
recurrent_enabled : typing___Optional[builtin___bool] = None,
visual_encoder : typing___Optional[typing___Text] = None,
num_network_layers : typing___Optional[builtin___int] = None,
num_network_hidden_units : typing___Optional[builtin___int] = None,
trainer_threaded : typing___Optional[builtin___bool] = None,
self_play_enabled : typing___Optional[builtin___bool] = None,
curriculum_enabled : typing___Optional[builtin___bool] = None,
) -> None: ...
@classmethod
def FromString(cls, s: builtin___bytes) -> TrainingBehaviorInitialized: ...
def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ...
if sys.version_info >= (3,):
def ClearField(self, field_name: typing_extensions___Literal[u"behavior_name",u"behavioral_cloning_enabled",u"curiosity_reward_enabled",u"curriculum_enabled",u"extrinsic_reward_enabled",u"gail_reward_enabled",u"num_network_hidden_units",u"num_network_layers",u"recurrent_enabled",u"rnd_reward_enabled",u"self_play_enabled",u"trainer_threaded",u"trainer_type",u"visual_encoder"]) -> None: ...
else:
def ClearField(self, field_name: typing_extensions___Literal[u"behavior_name",b"behavior_name",u"behavioral_cloning_enabled",b"behavioral_cloning_enabled",u"curiosity_reward_enabled",b"curiosity_reward_enabled",u"curriculum_enabled",b"curriculum_enabled",u"extrinsic_reward_enabled",b"extrinsic_reward_enabled",u"gail_reward_enabled",b"gail_reward_enabled",u"num_network_hidden_units",b"num_network_hidden_units",u"num_network_layers",b"num_network_layers",u"recurrent_enabled",b"recurrent_enabled",u"rnd_reward_enabled",b"rnd_reward_enabled",u"self_play_enabled",b"self_play_enabled",u"trainer_threaded",b"trainer_threaded",u"trainer_type",b"trainer_type",u"visual_encoder",b"visual_encoder"]) -> None: ...

41
ml-agents/mlagents/trainers/tests/test_torch_utils.py


import pytest
from unittest import mock
import torch # noqa I201
from mlagents.torch_utils import set_torch_config, default_device
from mlagents.trainers.settings import TorchSettings
@pytest.mark.parametrize(
"device_str, expected_type, expected_index, expected_tensor_type",
[
("cpu", "cpu", None, torch.FloatTensor),
("cuda", "cuda", None, torch.cuda.FloatTensor),
("cuda:42", "cuda", 42, torch.cuda.FloatTensor),
("opengl", "opengl", None, torch.FloatTensor),
],
)
@mock.patch.object(torch, "set_default_tensor_type")
def test_set_torch_device(
mock_set_default_tensor_type,
device_str,
expected_type,
expected_index,
expected_tensor_type,
):
try:
torch_settings = TorchSettings(device=device_str)
set_torch_config(torch_settings)
assert default_device().type == expected_type
if expected_index is None:
assert default_device().index is None
else:
assert default_device().index == expected_index
mock_set_default_tensor_type.assert_called_once_with(expected_tensor_type)
except Exception:
raise
finally:
# restore the defaults
torch_settings = TorchSettings(device=None)
set_torch_config(torch_settings)

99
ml-agents/mlagents/training_analytics_side_channel.py


import sys
from typing import Optional
import uuid
import mlagents_envs
import mlagents.trainers
from mlagents import torch_utils
from mlagents.trainers.settings import RewardSignalType
from mlagents_envs.exception import UnityCommunicationException
from mlagents_envs.side_channel import SideChannel, IncomingMessage, OutgoingMessage
from mlagents_envs.communicator_objects.training_analytics_pb2 import (
TrainingEnvironmentInitialized,
TrainingBehaviorInitialized,
)
from google.protobuf.any_pb2 import Any
from mlagents.trainers.settings import TrainerSettings, RunOptions
class TrainingAnalyticsSideChannel(SideChannel):
"""
Side channel that sends information about the training to the Unity environment so it can be logged.
"""
def __init__(self) -> None:
# >>> uuid.uuid5(uuid.NAMESPACE_URL, "com.unity.ml-agents/TrainingAnalyticsSideChannel")
# UUID('b664a4a9-d86f-5a5f-95cb-e8353a7e8356')
super().__init__(uuid.UUID("b664a4a9-d86f-5a5f-95cb-e8353a7e8356"))
self.run_options: Optional[RunOptions] = None
def on_message_received(self, msg: IncomingMessage) -> None:
raise UnityCommunicationException(
"The TrainingAnalyticsSideChannel received a message from Unity, "
+ "this should not have happened."
)
def environment_initialized(self, run_options: RunOptions) -> None:
self.run_options = run_options
# Tuple of (major, minor, patch)
vi = sys.version_info
env_params = run_options.environment_parameters
msg = TrainingEnvironmentInitialized(
python_version=f"{vi[0]}.{vi[1]}.{vi[2]}",
mlagents_version=mlagents.trainers.__version__,
mlagents_envs_version=mlagents_envs.__version__,
torch_version=torch_utils.torch.__version__,
torch_device_type=torch_utils.default_device().type,
num_envs=run_options.env_settings.num_envs,
num_environment_parameters=len(env_params) if env_params else 0,
)
any_message = Any()
any_message.Pack(msg)
env_init_msg = OutgoingMessage()
env_init_msg.set_raw_bytes(any_message.SerializeToString())
super().queue_message_to_send(env_init_msg)
def training_started(self, behavior_name: str, config: TrainerSettings) -> None:
msg = TrainingBehaviorInitialized(
behavior_name=behavior_name,
trainer_type=config.trainer_type.value,
extrinsic_reward_enabled=(
RewardSignalType.EXTRINSIC in config.reward_signals
),
gail_reward_enabled=(RewardSignalType.GAIL in config.reward_signals),
curiosity_reward_enabled=(
RewardSignalType.CURIOSITY in config.reward_signals
),
rnd_reward_enabled=(RewardSignalType.RND in config.reward_signals),
behavioral_cloning_enabled=config.behavioral_cloning is not None,
recurrent_enabled=config.network_settings.memory is not None,
visual_encoder=config.network_settings.vis_encode_type.value,
num_network_layers=config.network_settings.num_layers,
num_network_hidden_units=config.network_settings.hidden_units,
trainer_threaded=config.threaded,
self_play_enabled=config.self_play is not None,
curriculum_enabled=self._behavior_uses_curriculum(behavior_name),
)
any_message = Any()
any_message.Pack(msg)
training_start_msg = OutgoingMessage()
training_start_msg.set_raw_bytes(any_message.SerializeToString())
super().queue_message_to_send(training_start_msg)
def _behavior_uses_curriculum(self, behavior_name: str) -> bool:
if not self.run_options or not self.run_options.environment_parameters:
return False
for param_settings in self.run_options.environment_parameters.values():
for lesson in param_settings.curriculum:
cc = lesson.completion_criteria
if cc and cc.behavior == behavior_name:
return True
return False

31
protobuf-definitions/proto/mlagents_envs/communicator_objects/training_analytics.proto


syntax = "proto3";
option csharp_namespace = "Unity.MLAgents.CommunicatorObjects";
package communicator_objects;
message TrainingEnvironmentInitialized {
string mlagents_version = 1;
string mlagents_envs_version = 2;
string python_version = 3;
string torch_version = 4;
string torch_device_type = 5;
int32 num_envs = 6;
int32 num_environment_parameters = 7;
}
message TrainingBehaviorInitialized {
string behavior_name = 1;
string trainer_type = 2;
bool extrinsic_reward_enabled = 3;
bool gail_reward_enabled = 4;
bool curiosity_reward_enabled = 5;
bool rnd_reward_enabled = 6;
bool behavioral_cloning_enabled = 7;
bool recurrent_enabled = 8;
string visual_encoder = 9;
int32 num_network_layers = 10;
int32 num_network_hidden_units = 11;
bool trainer_threaded = 12;
bool self_play_enabled = 13;
bool curriculum_enabled = 14;
}

4
pytest.ini


[pytest]
addopts = --strict-markers
markers =
check_environment_trains: Slow training tests, do not run on yamato

38
.github/lock.yml


# Configuration for Lock Threads - https://github.com/dessant/lock-threads
# Number of days of inactivity before a closed issue or pull request is locked
daysUntilLock: 365
# Skip issues and pull requests created before a given timestamp. Timestamp must
# follow ISO 8601 (`YYYY-MM-DD`). Set to `false` to disable
skipCreatedBefore: false
# Issues and pull requests with these labels will be ignored. Set to `[]` to disable
exemptLabels: []
# Label to add before locking, such as `outdated`. Set to `false` to disable
lockLabel: false
# Comment to post before locking. Set to `false` to disable
lockComment: >
This thread has been automatically locked since there has not been
any recent activity after it was closed. Please open a new issue for
related bugs.
# Assign `resolved` as the reason for locking. Set to `false` to disable
setLockReason: true
# Limit to only `issues` or `pulls`
only: issues
# Optionally, specify configuration settings just for `issues` or `pulls`
# issues:
# exemptLabels:
# - help-wanted
# lockLabel: outdated
# pulls:
# daysUntilLock: 30
# Repository to extend settings from
# _extends: repo
正在加载...
取消
保存