浏览代码

Merge master -> release_13_branch-to-master

/release_13_branch
Christopher Goy 4 年前
当前提交
9cadfa7a
共有 106 个文件被更改,包括 6585 次插入1404 次删除
  1. 3
      .github/workflows/pytest.yml
  2. 3
      .pre-commit-config.yaml
  3. 929
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
  4. 935
      Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab
  5. 22
      Project/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity
  6. 1
      Project/Assets/ML-Agents/Examples/Soccer/Scenes/StrikersVsGoalie.unity
  7. 1
      Project/ProjectSettings/TagManager.asset
  8. 6
      README.md
  9. 115
      com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md
  10. 1
      com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md
  11. 15
      com.unity.ml-agents/CHANGELOG.md
  12. 16
      com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
  13. 14
      com.unity.ml-agents/Runtime/Academy.cs
  14. 69
      com.unity.ml-agents/Runtime/Agent.cs
  15. 3
      com.unity.ml-agents/Runtime/Analytics/Events.cs
  16. 17
      com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
  17. 43
      com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs
  18. 5
      com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs
  19. 67
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentInfo.cs
  20. 40
      com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs
  21. 70
      com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
  22. 6
      com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
  23. 21
      com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
  24. 14
      com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
  25. 17
      com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs
  26. 2
      com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
  27. 2
      com.unity.ml-agents/Tests/Editor/Analytics/InferenceAnalyticsTests.cs
  28. 42
      com.unity.ml-agents/Tests/Editor/ParameterLoaderTest.cs
  29. 6
      config/imitation/CrawlerStatic.yaml
  30. 6
      config/imitation/FoodCollector.yaml
  31. 3
      config/imitation/Hallway.yaml
  32. 18
      config/imitation/PushBlock.yaml
  33. 4
      config/imitation/Pyramids.yaml
  34. 3
      config/ppo/Pyramids.yaml
  35. 4
      config/ppo/PyramidsRND.yaml
  36. 3
      config/ppo/VisualPyramids.yaml
  37. 1
      config/sac/Pyramids.yaml
  38. 1
      config/sac/VisualPyramids.yaml
  39. 51
      docs/Learning-Environment-Design-Agents.md
  40. 27
      docs/Learning-Environment-Examples.md
  41. 48
      docs/ML-Agents-Overview.md
  42. 6
      docs/Training-Configuration-File.md
  43. 2
      gym-unity/README.md
  44. 4
      gym-unity/gym_unity/__init__.py
  45. 4
      gym-unity/gym_unity/envs/__init__.py
  46. 7
      gym-unity/gym_unity/tests/test_gym.py
  47. 4
      ml-agents-envs/mlagents_envs/__init__.py
  48. 22
      ml-agents-envs/mlagents_envs/base_env.py
  49. 18
      ml-agents-envs/mlagents_envs/communicator_objects/agent_info_pb2.py
  50. 8
      ml-agents-envs/mlagents_envs/communicator_objects/agent_info_pb2.pyi
  51. 11
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.py
  52. 6
      ml-agents-envs/mlagents_envs/communicator_objects/capabilities_pb2.pyi
  53. 4
      ml-agents-envs/mlagents_envs/environment.py
  54. 46
      ml-agents-envs/mlagents_envs/rpc_utils.py
  55. 14
      ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
  56. 4
      ml-agents-envs/mlagents_envs/tests/test_steps.py
  57. 4
      ml-agents/mlagents/trainers/__init__.py
  58. 20
      ml-agents/mlagents/trainers/settings.py
  59. 10
      ml-agents/mlagents/trainers/tests/mock_brain.py
  60. 65
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  61. 49
      ml-agents/mlagents/trainers/torch/attention.py
  62. 28
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  63. 27
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  64. 18
      ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
  65. 4
      ml-agents/tests/yamato/standalone_build_tests.py
  66. 11
      ml-agents/tests/yamato/yamato_utils.py
  67. 2
      protobuf-definitions/proto/mlagents_envs/communicator_objects/agent_info.proto
  68. 3
      protobuf-definitions/proto/mlagents_envs/communicator_objects/capabilities.proto
  69. 30
      .yamato/standalone-build-webgl-test.yml
  70. 8
      Project/Assets/ML-Agents/Examples/Sorter.meta
  71. 35
      com.unity.ml-agents/Runtime/Communicator/CommunicatorFactory.cs
  72. 3
      com.unity.ml-agents/Runtime/Communicator/CommunicatorFactory.cs.meta
  73. 26
      com.unity.ml-agents/Runtime/IMultiAgentGroup.cs
  74. 11
      com.unity.ml-agents/Runtime/IMultiAgentGroup.cs.meta
  75. 13
      com.unity.ml-agents/Runtime/MultiAgentGroupIdCounter.cs
  76. 11
      com.unity.ml-agents/Runtime/MultiAgentGroupIdCounter.cs.meta
  77. 143
      com.unity.ml-agents/Runtime/SimpleMultiAgentGroup.cs
  78. 11
      com.unity.ml-agents/Runtime/SimpleMultiAgentGroup.cs.meta
  79. 120
      com.unity.ml-agents/Tests/Editor/MultiAgentGroupTests.cs
  80. 11
      com.unity.ml-agents/Tests/Editor/MultiAgentGroupTests.cs.meta
  81. 105
      config/ppo/Sorter_curriculum.yaml
  82. 1001
      docs/images/sorter.png
  83. 1001
      docs/images/variable-length-observation-illustrated.png
  84. 8
      Project/Assets/ML-Agents/Examples/Sorter/Meshes.meta
  85. 63
      Project/Assets/ML-Agents/Examples/Sorter/Meshes/ArenaWalls.fbx
  86. 247
      Project/Assets/ML-Agents/Examples/Sorter/Meshes/ArenaWalls.fbx.meta
  87. 8
      Project/Assets/ML-Agents/Examples/Sorter/Prefabs.meta
  88. 1001
      Project/Assets/ML-Agents/Examples/Sorter/Prefabs/Area.prefab
  89. 7
      Project/Assets/ML-Agents/Examples/Sorter/Prefabs/Area.prefab.meta
  90. 8
      Project/Assets/ML-Agents/Examples/Sorter/Scenes.meta
  91. 9
      Project/Assets/ML-Agents/Examples/Sorter/Scenes/Sorter.unity.meta
  92. 1001
      Project/Assets/ML-Agents/Examples/Sorter/Scenes/Sorter.unity
  93. 8
      Project/Assets/ML-Agents/Examples/Sorter/Scripts.meta
  94. 34
      Project/Assets/ML-Agents/Examples/Sorter/Scripts/NumberTile.cs
  95. 11
      Project/Assets/ML-Agents/Examples/Sorter/Scripts/NumberTile.cs.meta

3
.github/workflows/pytest.yml


run: python -c "import sys; print(sys.version)"
- name: Install dependencies
run: |
# pin pip to workaround https://github.com/pypa/pip/issues/9180
python -m pip install pip==20.2
python -m pip install --upgrade pip
python -m pip install --upgrade setuptools
python -m pip install --progress-bar=off -e ./ml-agents-envs
python -m pip install --progress-bar=off -e ./ml-agents

3
.pre-commit-config.yaml


args: [--py3-plus, --py36-plus]
exclude: >
(?x)^(
.*barracuda.py|
.*_pb2.py|
.*_pb2_grpc.py
)$

args: [--assume-in-merge]
- id: check-yaml
# Won't handle the templating in yamato
exclude: \.yamato/*
exclude: \.yamato/.*
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.4.2

929
Project/Assets/ML-Agents/Examples/Soccer/Prefabs/SoccerFieldTwos.prefab
文件差异内容过多而无法显示
查看文件

935
Project/Assets/ML-Agents/Examples/Soccer/Prefabs/StrikersVsGoalieField.prefab
文件差异内容过多而无法显示
查看文件

22
Project/Assets/ML-Agents/Examples/Soccer/Scenes/SoccerTwos.unity


m_Modification:
m_TransformParent: {fileID: 0}
m_Modifications:
- target: {fileID: 1141134673700168, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
propertyPath: m_Name
value: SoccerFieldTwos
objectReference: {fileID: 0}
- target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
propertyPath: m_LocalPosition.x
value: 0

- target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
propertyPath: m_RootOrder
value: 4
objectReference: {fileID: 0}
- target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
propertyPath: m_LocalEulerAnglesHint.x
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
propertyPath: m_LocalEulerAnglesHint.y
value: 0
objectReference: {fileID: 0}
- target: {fileID: 4558743310993102, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}
propertyPath: m_LocalEulerAnglesHint.z
value: 0
objectReference: {fileID: 0}
m_RemovedComponents: []
m_SourcePrefab: {fileID: 100100000, guid: 54f3340298537426e96a6cc530e2d5d8, type: 3}

m_Name:
m_EditorClassIdentifier:
gravityMultiplier: 1
monitorVerticalOffset: 0
reuseCollisionCallbacks: 1
--- !u!114 &1574236051
MonoBehaviour:
m_ObjectHideFlags: 0

blueMaterial: {fileID: 2100000, guid: c9fa44c2c3f8ce74ca39a3355ea42631, type: 2}
randomizePlayersTeamForTraining: 0
agentRunSpeed: 2
strikerPunish: -0.1
strikerReward: 1
goaliePunish: -1
goalieReward: 0.1
--- !u!1001 &1606160104
PrefabInstance:
m_ObjectHideFlags: 0

1
Project/Assets/ML-Agents/Examples/Soccer/Scenes/StrikersVsGoalie.unity


maximumDeltaTime: 0.33333334
solverIterations: 6
solverVelocityIterations: 1
reuseCollisionCallbacks: 1
--- !u!114 &1574236051
MonoBehaviour:
m_ObjectHideFlags: 0

1
Project/ProjectSettings/TagManager.asset


- symbol_O_Goal
- purpleAgent
- purpleGoal
- tile
layers:
- Default
- TransparentFX

6
README.md


## Features
- 15+ [example Unity environments](docs/Learning-Environment-Examples.md)
- 18+ [example Unity environments](docs/Learning-Environment-Examples.md)
- Built-in support for Imitation Learning through Behavioral Cloning or
Generative Adversarial Imitation Learning
- Built-in support for Imitation Learning through Behavioral Cloning (BC) or
Generative Adversarial Imitation Learning (GAIL)
- Self-play mechanism for training agents in adversarial scenarios
- Easily definable Curriculum Learning scenarios for complex tasks
- Train robust agents using environment randomization

115
com.unity.ml-agents.extensions/Documentation~/Grid-Sensor.md


# Summary
The Grid Sensor combines the generality of data extraction from Raycasts with the image processing power of Convolutional Neural Networks. The Grid Sensor can be used to collect data in the general form of a "Width x Height x Channel" matrix which can be used for training Reinforcement Learning agents or for data analysis.
The Grid Sensor is an alternative method for collecting observations which combines the generality of data extraction from Raycasts with the image processing power of Convolutional Neural Networks. The Grid Sensor can be used to collect data in the general form of a "Width x Height x Channel" matrix which can be used for training agent policies or for data analysis.
In MLAgents there are 2 main sensors for observing information that is "physically" around the agent.
In ML-Agents there are two main sensors for observing information that is "physically" around the agent.
This is simple to implement and provides enough information for most simple games. When few are used, they are computationally fast. However, there are multiple limiting factors:
* The rays need to be at the same height as the things the agent should observe
* Objects can remain hidden by line of sight and if the knowledge of those objects is crucial to the success of the agent, then this limitation must be compensated for by the agents networks capacity (i.e., need a bigger brain with memory)
Raycasts are simple to implement and provides enough information for most simple games. When few are used, they are also computationally lightweight. However, there are multiple limiting factors:
* The rays need to be at the same height as the things the agent should observe.
* Objects can remain hidden by line of sight and if the knowledge of those objects is crucial to the success of the agent, then this limitation must be compensated for by the agents networks capacity (i.e., need a bigger brain with memory).
* Typically the length of the raycasts is limited because the agent need not know about objects that are at the other side of the level. Combined with few raycasts for computational efficiency, this means that an agent may not observe objects that fall between these rays and the issue becomes worse as the objects reduce in size.
* Typically, the length of the raycasts is limited because the agent need not know about objects that are at the other side of the level. Combined with few raycasts for computational efficiency, this means that an agent may not observe objects that fall between these rays and the issue becomes worse as the objects reduce in size.
The Camera provides the agent with either a grayscale or an RGB image of the game environment. It goes without saying that there non-linear relationships between nearby pixels in an image. It is this intuition that helps form the basis of Convolutional Neural Networks (CNNs) and established the literature of designing networks that take advantage of these relationships between pixels. Following this established literature of CNNs on image based data, the MLAgent's Camera Sensor provides a means by which the agent can include high dimensional inputs (images) into its observation stream.
The Camera provides the agent with either a grayscale or an RGB image of the game environment. In many cases, what we want to extract from a set of pixels is invariant to the location of those pixels in the image. It is this intuition that helps form the basis of Convolutional Neural Networks (CNNs) and established the literature of designing networks that take advantage of these relationships between pixels. Following this established literature of CNNs on image based data, the ML-Agent's Camera Sensor provides a means by which the agent can include high dimensional inputs (images) into its observation stream.
* It requires render the scene and thus is computationally slower than alternatives that do not use rendering
* It has yet been shown that the Camera Sensor can be used on a headless machine which means it is not yet possible (if at all) to train an agent on a headless infrastructure.
* It requires rendering the scene and thus is computationally slower than alternatives that do not use rendering.
* The RGB of the camera only provides a maximum of 3 channels to the agent.
* The RGB of the camera only provides a maximum of three channels to the agent.
These limitations provided the motivation towards the development of the Grid Sensor and Grid Observations as described below.

Before jumping into the details of the Grid Sensor, an important thing to note is the agent performance and qualitatively different behavior over raycasts. Unity MLAgent's comes with a suite of example environments. One in particular, the [Food Collector](https://github.com/Unity-Technologies/ml-agents/tree/release_13_docs/docs/Learning-Environment-Examples.md#food-collector), has been the focus of the Grid Sensor development.
Before jumping into the details of the Grid Sensor, an important thing to note is the agent performance and qualitatively different behavior over raycasts. Unity MLAgent's comes with a suite of example environments. One in particular, the [Food Collector](https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/Learning-Environment-Examples.md#food-collector), has been the focus of the Grid Sensor development.
The Food Collector environment can be described as:
* Set-up: A multi-agent environment where agents compete to collect food.

## Overview
There are 3 main phases to the Grid Sensor:
There are three main phases to the observation process of the Grid Sensor:
1. **Collection** - data is extracted from observed objects
2. **Encoding** - the extracted data is encoded into a grid observation
3. **Communication** - the grid observation is sent to python or used by a trained model

## Collection
A Grid Sensor is the Grid Observation analog of a Unity Camera but with some notable differences. The sensor is made up of a grid of identical Box Colliders which designate the "cells" of the grid. The Grid Sensor also has a list of "detectable objects" in the form of Unity gameobject tags. When an object that is tagged as a detectable object is present within a cell's Box Collider, that cell is "activated" and a method on the Grid Sensor extracts data from said object and associates that data with the position of the activated cell. Thus the Grid Sensor is always orthographic:
A Grid Sensor is the Grid Observation analog of a Unity Camera but with some notable differences. The sensor is made up of a grid of identical Box Colliders which designate the "cells" of the grid. The Grid Sensor also has a list of "detectable objects" in the form of Unity GameObject tags. When an object that is tagged as a detectable object is present within a cell's Box Collider, that cell is "activated" and a method on the Grid Sensor extracts data from said object and associates that data with the position of the activated cell. Thus the Grid Sensor is always orthographic:
<img src="images/persp_ortho_proj.png" width="500">
<cite><a href="https://www.geofx.com/graphics/nehe-three-js/lessons17-24/lesson21/lesson21.html">geofx.com</a></cite>

Just like the Raycasts mentioned earlier, the Grid Sensor can extract any kind of data from a detected object and just like the Camera, the Grid Sensor maintains the spacial relationship between nearby cells that allows one to take advantage of the CNN literature. Thus the Grid Sensor tries to take the best of both sensors and combines them to something that is more expressive.
Just like the Raycasts mentioned earlier, the Grid Sensor can extract any kind of data from a detected object, and just like the Camera, the Grid Sensor maintains the spacial relationship between nearby cells that allows one to take advantage of the computational properties of CNNs. Thus the Grid Sensor tries to take the best of both sensors and combines them to something that is more expressive.
Lets imagine a scenario where an agent is faced with 2 enemies and there are 2 "equipable" weapons somewhat behind the agent. Lets also keep in mind some important properties of the enemies and weapons that would be useful for the agent to know. For simplicity, lets assume enemies represent their health as a percentage (0-100%). Lets also assume that enemies and weapons are the only 2 kind of objects that the agent would see in the entire game.
Let's imagine a scenario where an agent is faced with two enemies and there are two "equipable" weapons somewhat behind the agent. It would be helpful for the agent to know the location and properties of both the enemies as well as the equippable items. For simplicity, let's assume enemies represent their health as a percentage (0-100%). Also assume that enemies and weapons are the only two kinds of objects that the agent would see in the entire game.
<img src="images/gridsensor-example.png" align="middle" width="3000"/>
<img src="images/gridsensor-example.png" align="middle" width="512"/>
If a raycast hits an object, not only could we get the distance (normalized by the maximum raycast distance) we would be able to extract its type (enemy vs weapon) and if its an enemy then we could get its health (e.g., .6).
If a raycast hits an object, not only could we get the distance (normalized by the maximum raycast distance) we would be able to extract its type (enemy vs weapon) and any attribute associate with it (e.g. an enemy's health).
There are many ways in which one could encode this information but one reasonable encoding is this:
```

For example, if the raycast hit nothing then this would be represented by `[0, 0, 0, 1]`.
If instead the raycast hit an enemy with 60% health that is 50% of the maximum raycast distance, the data would be represented by `[0, 1, .6, .5]`.
The limitations of raycasts which were presented above are easy to visualize in the below image. The agent is unable to see where the weapons are and only sees one of the enemies. Typically in the MLAgents examples, this situation is mitigated by including previous frames of data so that the agent observes changes through time. However, in more complex games, it is not difficult to imagine scenarios where an agent would not be able to observe important information using only Raycasts.
The limitations of raycasts which were presented above are easy to visualize in the below image. The agent is unable to see where the weapons are and only sees one of the enemies. Typically in the ML-Agents examples, this situation is mitigated by including previous frames of data so that the agent observes changes through time. However, in more complex games, it is not difficult to imagine scenarios where an agent might miss important information using only Raycasts.
<img src="images/gridsensor-example-raycast.png" align="middle" width="3000"/>
<img src="images/gridsensor-example-raycast.png" align="middle" width="512"/>
<img src="images/gridsensor-example-camera.png" align="middle" width="3000"/>
<img src="images/gridsensor-example-camera.png" align="middle" width="512"/>
#### Grid Sensor

Following the same data extraction method presented in the section on raycasts, if a Grid Sensor was used instead of Raycasts or a Camera, then not only would the agent be able to extract the health value of the enemies but it would also be able to encode the relative positions of those objects as is done with Camera. Additionally, as the texture of the objects is not used, this data can be collected without rendering the scene.
<img src="images/gridsensor-example-gridsensor.png" align="middle" width="3000"/>
In our example, we can collect data in the form of [objectType, health] by overriding `GetObjectData` as the following:
```csharp
protected override float[] GetObjectData(GameObject currentColliderGo, float type_index, float normalized_distance)
{
float[] channelValues = new float[ChannelDepth.Length]; // ChannelDepth.Length = 2 in this example
channelValues[0] = type_index; // this is the observation collected in default implementation
if (currentColliderGo.tag == "enemy")
{
var enemy = currentColliderGo.GetComponent<EnemyClass>();
channelValues[1] = enemy.health; // the value may have to be normalized depends on the type of GridSensor encoding you use (see sections below)
}
return channelValues;
}
```
<img src="images/gridsensor-example-gridsensor.png" align="middle" width="512"/>
At the end of the Collection phase, each cell with an object inside of it has `GetObjectData` called and the returned values (named `channelValues`) is then processed in the Encoding phase which is described in the next section.
At the end of the Collection phase, each cell with an object inside of it has `GetObjectData` called and the returned values is then processed in the Encoding phase which is described in the next section.
The CountingGridSensor builds on the GridSesnor to perform the specific job of counting the number of object types that are based on the different detectable object tags. The encoding and is meant to exploit a key feature of the Grid Sensor. In both the Channel and the Channel Hot DepthTypes, the closest detectable object, in relation to the agent, that lays within a cell is used for encoding the value for that cell. In the CountingGridSensor, the number of each type of object is recorded and then normalized according to a max count, stored in the ChannelDepth.
The CountingGridSensor builds on the GridSensor to perform the specific job of counting the number of object types that are based on the different detectable object tags. The encoding is meant to exploit a key feature of the GridSensor. In original GridSensor, only the closest detectable object, in relation to the agent, that lies within a cell is used for encoding the value for that cell. In the CountingGridSensor, the number of each type of object is recorded and then normalized according to a max count.
An example of the CountingGridSensor can be found below.

In order to support different ways of representing the data extracted from an object, multiple "depth types" were implemented. Each has pros and cons and, depending on the use-case of the Grid Sensor, one may be more beneficial than the others.
The data stored that is extracted during the *Collection* phase, and stored in `channelValues`, may come from different sources. For instance, going back the Enemy/Weapon example in the previous section, an enemy's health is continuous whereas the object type (enemy or weapon) is categorical data. This distinction is important as categorical data requires a different encoding mechanism than continuous data.
The stored data that is extracted during the *Collection* phase may come from different sources, and thus be of a different nature. For instance, going back to the Enemy/Weapon example in the previous section, an enemy's health is continuous whereas the object type (enemy or weapon) is categorical data. This distinction is important as categorical data requires a different encoding mechanism than continuous data.
The GridSensor handles this distinction with two user defined properties that define how this data is to be encoded:
The Grid Sensor handles this distinction with 4 properties that define how this data is to be encoded:
* ObservationPerCell - the total number of values that are in each cell of the grid observation
* ChannelDepth - int[] describing the range of each data within the `channelValues`
* ChannelOffset - int[] describing the number of encoded values that come before each data within `channelValues`
* ChannelDepth - `int[]` describing the range of each data and is used differently with different DepthType
How categorical and continuous data is treated is different between the different DepthTypes as will be explored in the sections below. The sections will use an on-going example similar to the example mentioned earlier where, within a cell, the sensor observes: `an enemy with 60% health`. Thus the cell contains two kinds of data: categorical data (object type) and the continuous data (health). Additionally, the order of the observed tags is important as it allows one to encode the tag of the observed object by its index within the list of observed tags. Note that in the example, the observed tags is defined as ["weapon", "enemy"].
The ChannelDepth and the DepthType are user defined and gives control to the developer to how they can encode their data. The ChannelDepth and ChannelOffset are both initialized and used in different ways depending on the ChannelDepth and the DepthType.
### Channel Based
How categorical and continuous data is treated is different between the different DepthTypes as will be explored in the sections below. The sections will use an on-going example similar to example mentioned earlier where, within a cell, the sensor observes: `an enemy with 60% health`. Thus the cell contains 2 kinds of data: categorical data (object type) and the continuous data (health). Additionally, the order of the observed tags is important as it allows one to encode the tag of the observed object by its index within list of observed tags. Note that in the example, the observed tags is defined as ["weapon", "enemy"].
The Channel Based Grid Observations represent obsevations in a normalized form with 0 to 1. To distinguish between categorical and continuous data, one would use the ChannelDepth array to signify the ranges that the values in the `channelValues` array could take. If one sets ChannelDepth[i] to be 1, it is assumed that the value of `channelValues[i]` is already normalized. Else ChannelDepth[i] represents the total number of possible values that `channelValues[i]` can take and will be used for normalization.
### Channel Based
For continuous data, you should specify `ChannelDepth[i]` to 1 and the collected data should be already normalized by its min/max range. For discrete data, you should specify `ChannelDepth[i]` to be the total number of possible values, and the collected data should be an integer value within range of `ChannelDepth[i]`.
The Channel Based Grid Observations is perhaps the simplest in terms of usability and similarity with other machine learning applications. Each grid is of size WxHxC where C is the number of channels. To distinguish between categorical and continuous data, one would use the ChannelDepth array to signify the ranges that the values in the `channelValues` array could take. If one sets ChannelDepth[i] to be 1, it is assumed that the value of `channelValues[i]` is already normalized. Else ChannelDepth[i] represents the total number of possible values that `channelValues[i]` can take.
Using the example described earlier, if one was using Channel Based Grid Observations, they would have a ChannelDepth = {2, 1} to describe that there are two possible values for the first channel (ObjectType) and the 1 represents that the second channel (EnemyHealth) is continuous and should be already normalized.
Using the example described earlier, if one was using Channel Based Grid Observations, they would have a ChannelDepth = {2, 1} to describe that there are two possible values for the first channel and the 1 represents that the second channel is already normalized.
As the "enemy" is in the second position of the observed tags, its value can be normalized by:
For ObjectType, "weapon", "enemy" will be represented respectively as:
num = detectableObjects.IndexOfTag("enemy")/ChannelDepth[0] = 2/2 = 1;
weapon = DetectableObjects.IndexOfTag("weapon")/ChannelDepth[0] = 1/2 = 0.5;
enemy = DetectableObjects.IndexOfTag("enemy")/ChannelDepth[0] = 2/2 = 1;
`[1, .6]`
`[1, .6]`. If the health in the game is not represented in a normalized form, for example if the health is represented in an integer ranging from -100 to 100, you'll need to manully nomalize it during collection. That is, If you get value 50, you need to normalize it by `50/(100- (-100))=0.25` and collect 0.25 instead of 50.
The Channel Hot DepthType generalizes the classic OneHot encoding to differentiate combinations of different data. Rather than normalizing the data like in the Channel Based section, each element of `channelValues` is represented by an encoding based on the ChannelDepth. If ChannelDepth[i] = 1, then this represents that `channelValues[i]` is already normalized (between 0-1) and will be used directly within the encoding. However if ChannelDepth[i] is an integer greater than 1, then the value in `channelValues[i]` will be converted into a OneHot encoding based on the following:
The Channel Hot DepthType generalizes the classic OneHot encoding to differentiate combinations of different data. Rather than normalizing the data like in the Channel Based section, each element of `channelValues` is represented by an encoding based on the ChannelDepth. If ChannelDepth[i] = 1, then this represents that `channelValues[i]` is already normalized (between 0-1) and will be used directly within the encoding which is same as with Channel Based. However if ChannelDepth[i] is an integer greater than 1, then the value in `channelValues[i]` will be converted into a OneHot encoding based on the following:
```
float[] arr = new float[ChannelDepth[i] + 1];

The encoding of each channel is then concatenated together. Clearly using this setup allows the developer to be able to encode values using the classic OneHot encoding. Below are some different variations of the ChannelDepth which create different encodings of the example:
##### ChannelDepth = {3, 1}
The first element, 3, signifies that there are 3 possibilities for the first channel and as the "enemy" is 2nd in the detected objects list, the "enemy" in the example is encoded as `[0, 0, 1]` where the first index represents "no object". The second element, 1, signifies that the health is already normalized and, following the table, is used directly. The resulting encoding is thus:
The first element, 3, signifies that there are three possibilities for the first channel and as the "enemy" is 2nd in the detected objects list, the "enemy" in the example is encoded as `[0, 0, 1]` where the first index represents "no object". The second element, 1, signifies that the health is already normalized and, following the table, is used directly. The resulting encoding is thus:
```
[0, 0, 1, 0.6]
```

### CountingGridSensor
As introduced above, the CountingGridSensor inherits from the GridSensor for the sole purpose of counting the different objects that lay within a cell. In order to normalize the counts so that the grid can be properly encoded as PNG, the ChannelDepth is used to represent the "maximum count" of each type. For the working example, if the ChannelDepth is set as {50, 10}, which represents that the maximum count for objects with the "weapon" and "enemy" tag is 50 and 10, respectively, then the resulting data would be:
As mentioned above, the CountingGridSensor inherits from the GridSensor for the sole purpose of counting the different objects that lay within a cell. In order to normalize the counts so that the grid can be properly encoded as PNG, the ChannelDepth is used to represent the "maximum count" of each type. For the working example, if the ChannelDepth is set as {50, 10}, which represents that the maximum count for objects with the "weapon" and "enemy" tag is 50 and 10, respectively, then the resulting data would be:
```
encoding = [0 weapons/ 50 weapons, 1 enemy / 10 enemies] = [0, .1]
```

At the end of the Encoding phase, all of the data for a Grid Observation is placed into a float[] referred to as the perception buffer. Now the data is ready to be sent to either the python side for training or to be used by a trained model within Unity. This is where the Grid Sensor takes advantage of 2D textures and the PNG encoding schema to reduce the number of bytes that are being sent.
The 2D texture is a Unity class that encodes the colors of an image. It is used for many ways through out Unity but it has 2 specific methods that the Grid Sensor takes advantage of:
`SetPixels` takes a 2D array of Colors and assigns the color values to the texture.
`EncodeToPNG` returns a byte[] containing the PNG encoding of the colors of the texture.
Together these 2 functions allow one to "push" a WxHx3 normalized array to a PNG byte[]. And indeed, this is how the Camera Sensor in Unity MLAgents sends its data to python. However, the grid sensor can have N channels so there needs to be a more generic way to send the data.
The core idea behind how a Grid Observation is encoded is the following:
1. split the channels of a Grid Observation into groups of 3
2. encode each of these groups as a PNG byte[]
3. concatenate all byte[] and send the combined array to python
4. reconstruct the Grid Observation by splitting up the array and decoding the sections
At the end of the Encoding phase, all the Grid Observations will be sent to either the python side for training or to be used by a trained model within Unity. Since the data format is similar to images collected by Camera Sensors, Grid Observations also have the CompressionType option to specify whether to send the data directly or send in PNG compressed form for better communication efficiency.
Once the bytes are sent to python, they are then decoded and used as a tensor of the correct shape within the mlagents python codebase.
Once the bytes are sent to Python, they are then decoded and provided as a tensor of the correct shape.

1
com.unity.ml-agents.extensions/Documentation~/com.unity.ml-agents.extensions.md


In Unity 2019.4 or later, open the Package Manager, hit the "+" button, and select "Add package from git URL".
![Package Manager git URL](https://github.com/Unity-Technologies/ml-agents/blob/release_13_docs/docs/images/unity_package_manager_git_url.png)
In the dialog that appears, enter
```
git+https://github.com/Unity-Technologies/ml-agents.git?path=com.unity.ml-agents.extensions

15
com.unity.ml-agents/CHANGELOG.md


and this project adheres to
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Major Changes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
### Minor Changes
#### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- The `encoding_size` setting for RewardSignals has been deprecated. Please use `network_settings` instead. (#4982)
### Bug Fixes
#### com.unity.ml-agents (C#)
#### ml-agents / ml-agents-envs / gym-unity (Python)
- An issue that caused `GAIL` to fail for environments where agents can terminate episodes by self-sacrifice has been fixed. (#4971)
## [1.8.0-preview] - 2021-02-17
### Major Changes
#### com.unity.ml-agents (C#)

16
com.unity.ml-agents/Editor/BehaviorParametersEditor.cs


// Grab the sensor components, since we need them to determine the observation sizes.
// TODO make these methods of BehaviorParameters
SensorComponent[] sensorComponents;
if (behaviorParameters.UseChildSensors)
{
sensorComponents = behaviorParameters.GetComponentsInChildren<SensorComponent>();
}
else
{
sensorComponents = behaviorParameters.GetComponents<SensorComponent>();
}
var agent = behaviorParameters.gameObject.GetComponent<Agent>();
agent.sensors = new List<ISensor>();
agent.InitializeSensors();
var sensors = agent.sensors.ToArray();
ActuatorComponent[] actuatorComponents;
if (behaviorParameters.UseChildActuators)

// Get the total size of the sensors generated by ObservableAttributes.
// If there are any errors (e.g. unsupported type, write-only properties), display them too.
int observableAttributeSensorTotalSize = 0;
var agent = behaviorParameters.GetComponent<Agent>();
if (agent != null && behaviorParameters.ObservableAttributeHandling != ObservableAttributeOptions.Ignore)
{
List<string> observableErrors = new List<string>();

if (brainParameters != null)
{
var failedChecks = Inference.BarracudaModelParamLoader.CheckModel(
barracudaModel, brainParameters, sensorComponents, actuatorComponents,
barracudaModel, brainParameters, sensors, actuatorComponents,
observableAttributeSensorTotalSize, behaviorParameters.BehaviorType
);
foreach (var check in failedChecks)

14
com.unity.ml-agents/Runtime/Academy.cs


/// <term>1.4.0</term>
/// <description>Support training analytics sent from python trainer to the editor.</description>
/// </item>
/// <item>
/// <term>1.5.0</term>
/// <description>Support variable length observation training.</description>
/// </item>
const string k_ApiVersion = "1.4.0";
const string k_ApiVersion = "1.5.0";
/// <summary>
/// Unity package version of com.unity.ml-agents.

var port = ReadPortFromArgs();
if (port > 0)
{
Communicator = new RpcCommunicator(
new CommunicatorInitParameters
{
port = port
}
);
Communicator = CommunicatorFactory.Create();
}
if (Communicator != null)

bool initSuccessful = false;
var communicatorInitParams = new CommunicatorInitParameters
{
port = port,
unityCommunicationVersion = k_ApiVersion,
unityPackageVersion = k_PackageVersion,
name = "AcademySingleton",

69
com.unity.ml-agents/Runtime/Agent.cs


public float reward;
/// <summary>
/// The current group reward received by the agent.
/// </summary>
public float groupReward;
/// <summary>
/// Whether the agent is done or not.
/// </summary>
public bool done;

/// to separate between different agents in the environment.
/// </summary>
public int episodeId;
/// <summary>
/// MultiAgentGroup identifier.
/// </summary>
public int groupId;
public void ClearActions()
{

/// Additionally, the magnitude of the reward should not exceed 1.0
float m_Reward;
/// Represents the group reward the agent accumulated during the current step.
float m_GroupReward;
/// Keeps track of the cumulative reward in this episode.
float m_CumulativeReward;

/// </summary>
float[] m_LegacyHeuristicCache;
/// Currect MultiAgentGroup ID. Default to 0 (meaning no group)
int m_GroupId;
/// Delegate for the agent to unregister itself from the MultiAgentGroup without cyclic reference
/// between agent and the group
internal event Action<Agent> OnAgentDisabled;
/// <summary>
/// Called when the attached [GameObject] becomes enabled and active.
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html

new int[m_ActuatorManager.NumDiscreteActions]
);
m_Info.groupId = m_GroupId;
// The first time the Academy resets, all Agents in the scene will be
// forced to reset through the <see cref="AgentForceReset"/> event.
// To avoid the Agent resetting twice, the Agents will not begin their

NotifyAgentDone(DoneReason.Disabled);
}
m_Brain?.Dispose();
OnAgentDisabled?.Invoke(this);
m_Initialized = false;
}

}
m_Info.episodeId = m_EpisodeId;
m_Info.reward = m_Reward;
m_Info.groupReward = m_GroupReward;
m_Info.groupId = m_GroupId;
if (collectObservationsSensor != null)
{
// Make sure the latest observations are being passed to training.

}
m_Reward = 0f;
m_GroupReward = 0f;
m_CumulativeReward = 0f;
m_RequestAction = false;
m_RequestDecision = false;

m_CumulativeReward += increment;
}
internal void SetGroupReward(float reward)
{
#if DEBUG
Utilities.DebugCheckNanAndInfinity(reward, nameof(reward), nameof(SetGroupReward));
#endif
m_GroupReward = reward;
}
internal void AddGroupReward(float increment)
{
#if DEBUG
Utilities.DebugCheckNanAndInfinity(increment, nameof(increment), nameof(AddGroupReward));
#endif
m_GroupReward += increment;
}
/// <summary>
/// Retrieves the episode reward for the Agent.
/// </summary>

/// </summary>
internal void InitializeSensors()
{
if (m_PolicyFactory == null)
{
m_PolicyFactory = GetComponent<BehaviorParameters>();
}
if (m_PolicyFactory.ObservableAttributeHandling != ObservableAttributeOptions.Ignore)
{
var excludeInherited =

m_Info.discreteActionMasks = m_ActuatorManager.DiscreteActionMask?.GetMask();
m_Info.reward = m_Reward;
m_Info.groupReward = m_GroupReward;
m_Info.groupId = m_GroupId;
using (TimerStack.Instance.Scoped("RequestDecision"))
{

{
SendInfoToBrain();
m_Reward = 0f;
m_GroupReward = 0f;
m_RequestDecision = false;
}
}

var actions = m_Brain?.DecideAction() ?? new ActionBuffers();
m_Info.CopyActions(actions);
m_ActuatorManager.UpdateActions(actions);
}
internal void SetMultiAgentGroup(IMultiAgentGroup multiAgentGroup)
{
if (multiAgentGroup == null)
{
m_GroupId = 0;
}
else
{
var newGroupId = multiAgentGroup.GetId();
if (m_GroupId == 0 || m_GroupId == newGroupId)
{
m_GroupId = newGroupId;
}
else
{
throw new UnityAgentsException("Agent is already registered with a group. Unregister it first.");
}
}
}
}
}

3
com.unity.ml-agents/Runtime/Analytics/Events.cs


public static EventObservationSpec FromSensor(ISensor sensor)
{
var shape = sensor.GetObservationShape();
var dimProps = (sensor as IDimensionPropertiesSensor)?.GetDimensionProperties();
// TODO copy flags when we have them
dimInfos[i].Flags = dimProps != null ? (int)dimProps[i] : 0;
}
var builtInSensorType =

17
com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs


var agentInfoProto = new AgentInfoProto
{
Reward = ai.reward,
GroupReward = ai.groupReward,
GroupId = ai.groupId,
};
if (ai.discreteActionMasks != null)

{
observationProto.DimensionProperties.Add((int)dimensionProperties[i]);
}
// Checking trainer compatibility with variable length observations
if (dimensionProperties.Length == 2)
{
if (dimensionProperties[0] == DimensionProperty.VariableSize &&
dimensionProperties[1] == DimensionProperty.None)
{
var trainerCanHandleVarLenObs = Academy.Instance.TrainerCapabilities == null || Academy.Instance.TrainerCapabilities.VariableLengthObservation;
if (!trainerCanHandleVarLenObs)
{
throw new UnityAgentsException("Variable Length Observations are not supported by the trainer");
}
}
}
}
observationProto.Shape.AddRange(shape);

CompressedChannelMapping = proto.CompressedChannelMapping,
HybridActions = proto.HybridActions,
TrainingAnalytics = proto.TrainingAnalytics,
VariableLengthObservation = proto.VariableLengthObservation,
};
}

CompressedChannelMapping = rlCaps.CompressedChannelMapping,
HybridActions = rlCaps.HybridActions,
TrainingAnalytics = rlCaps.TrainingAnalytics,
VariableLengthObservation = rlCaps.VariableLengthObservation,
};
}

43
com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs


# if MLA_SUPPORTED_TRAINING_PLATFORM
using Grpc.Core;
#endif
#if UNITY_EDITOR
using UnityEditor;
#endif

#if MLA_SUPPORTED_TRAINING_PLATFORM
/// The Unity to External client.
UnityToExternalProto.UnityToExternalProtoClient m_Client;
#endif
/// The communicator parameters sent at construction
CommunicatorInitParameters m_CommunicatorInitParameters;
/// <param name="communicatorInitParameters">Communicator parameters.</param>
public RpcCommunicator(CommunicatorInitParameters communicatorInitParameters)
public RpcCommunicator()
m_CommunicatorInitParameters = communicatorInitParameters;
#region Initialization
#region Initialization
internal static bool CheckCommunicationVersionsAreCompatible(
string unityCommunicationVersion,

try
{
initializationInput = Initialize(
initParameters.port,
new UnityOutputProto
{
RlInitializationOutput = academyParameters

SendCommandEvent(rlInput.Command);
}
UnityInputProto Initialize(UnityOutputProto unityOutput, out UnityInputProto unityInput)
UnityInputProto Initialize(int port, UnityOutputProto unityOutput, out UnityInputProto unityInput)
var channel = new Channel(
"localhost:" + m_CommunicatorInitParameters.port,
ChannelCredentials.Insecure);
var channel = new Channel($"localhost:{port}", ChannelCredentials.Insecure);
m_Client = new UnityToExternalProto.UnityToExternalProtoClient(channel);
var result = m_Client.Exchange(WrapMessage(unityOutput, 200));

QuitCommandReceived?.Invoke();
}
return result.UnityInput;
#else
throw new UnityAgentsException("You cannot perform training on this platform.");
#endif
#endregion
#endregion
#region Destruction
#region Destruction
/// <summary>
/// Close the communicator gracefully on both sides of the communication.

{
// ignored
}
#else
throw new UnityAgentsException(
"You cannot perform training on this platform.");
#endif
#endregion
#endregion
#region Sending Events
#region Sending Events
void SendCommandEvent(CommandProto command)
{

}
}
#endregion
#endregion
#region Sending and retreiving data
#region Sending and retreiving data
public void DecideBatch()
{

QuitCommandReceived?.Invoke();
return null;
}
#else
throw new UnityAgentsException(
"You cannot perform training on this platform.");
#endif
}
/// <summary>

}
}
#endregion
#endregion
#if UNITY_EDITOR
/// <summary>

#endif
}
}
#endif // UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX

5
com.unity.ml-agents/Runtime/Communicator/UnityRLCapabilities.cs


public bool CompressedChannelMapping;
public bool HybridActions;
public bool TrainingAnalytics;
public bool VariableLengthObservation;
/// <summary>
/// A class holding the capabilities flags for Reinforcement Learning across C# and the Trainer codebase. This

bool concatenatedPngObservations = true,
bool compressedChannelMapping = true,
bool hybridActions = true,
bool trainingAnalytics = true)
bool trainingAnalytics = true,
bool variableLengthObservation = true)
{
BaseRLCapabilities = baseRlCapabilities;
ConcatenatedPngObservations = concatenatedPngObservations;

VariableLengthObservation = variableLengthObservation;
}
/// <summary>

67
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/AgentInfo.cs


string.Concat(
"CjNtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2FnZW50X2lu",
"Zm8ucHJvdG8SFGNvbW11bmljYXRvcl9vYmplY3RzGjRtbGFnZW50c19lbnZz",
"L2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0aW9uLnByb3RvItEBCg5B",
"L2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0aW9uLnByb3RvIvkBCg5B",
"YXRvcl9vYmplY3RzLk9ic2VydmF0aW9uUHJvdG9KBAgBEAJKBAgCEANKBAgD",
"EARKBAgEEAVKBAgFEAZKBAgGEAdKBAgMEA1CJaoCIlVuaXR5Lk1MQWdlbnRz",
"LkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
"YXRvcl9vYmplY3RzLk9ic2VydmF0aW9uUHJvdG8SEAoIZ3JvdXBfaWQYDiAB",
"KAUSFAoMZ3JvdXBfcmV3YXJkGA8gASgCSgQIARACSgQIAhADSgQIAxAESgQI",
"BBAFSgQIBRAGSgQIBhAHSgQIDBANQiWqAiJVbml0eS5NTEFnZW50cy5Db21t",
"dW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentInfoProto), global::Unity.MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "Reward", "Done", "MaxStepReached", "Id", "ActionMask", "Observations" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.AgentInfoProto), global::Unity.MLAgents.CommunicatorObjects.AgentInfoProto.Parser, new[]{ "Reward", "Done", "MaxStepReached", "Id", "ActionMask", "Observations", "GroupId", "GroupReward" }, null, null, null)
}));
}
#endregion

id_ = other.id_;
actionMask_ = other.actionMask_.Clone();
observations_ = other.observations_.Clone();
groupId_ = other.groupId_;
groupReward_ = other.groupReward_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}

get { return observations_; }
}
/// <summary>Field number for the "group_id" field.</summary>
public const int GroupIdFieldNumber = 14;
private int groupId_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public int GroupId {
get { return groupId_; }
set {
groupId_ = value;
}
}
/// <summary>Field number for the "group_reward" field.</summary>
public const int GroupRewardFieldNumber = 15;
private float groupReward_;
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public float GroupReward {
get { return groupReward_; }
set {
groupReward_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as AgentInfoProto);

if (Id != other.Id) return false;
if(!actionMask_.Equals(other.actionMask_)) return false;
if(!observations_.Equals(other.observations_)) return false;
if (GroupId != other.GroupId) return false;
if (!pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.Equals(GroupReward, other.GroupReward)) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (Id != 0) hash ^= Id.GetHashCode();
hash ^= actionMask_.GetHashCode();
hash ^= observations_.GetHashCode();
if (GroupId != 0) hash ^= GroupId.GetHashCode();
if (GroupReward != 0F) hash ^= pbc::ProtobufEqualityComparers.BitwiseSingleEqualityComparer.GetHashCode(GroupReward);
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}

}
actionMask_.WriteTo(output, _repeated_actionMask_codec);
observations_.WriteTo(output, _repeated_observations_codec);
if (GroupId != 0) {
output.WriteRawTag(112);
output.WriteInt32(GroupId);
}
if (GroupReward != 0F) {
output.WriteRawTag(125);
output.WriteFloat(GroupReward);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);
}

}
size += actionMask_.CalculateSize(_repeated_actionMask_codec);
size += observations_.CalculateSize(_repeated_observations_codec);
if (GroupId != 0) {
size += 1 + pb::CodedOutputStream.ComputeInt32Size(GroupId);
}
if (GroupReward != 0F) {
size += 1 + 4;
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

}
actionMask_.Add(other.actionMask_);
observations_.Add(other.observations_);
if (other.GroupId != 0) {
GroupId = other.GroupId;
}
if (other.GroupReward != 0F) {
GroupReward = other.GroupReward;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}

}
case 106: {
observations_.AddEntriesFrom(input, _repeated_observations_codec);
break;
}
case 112: {
GroupId = input.ReadInt32();
break;
}
case 125: {
GroupReward = input.ReadFloat();
break;
}
}

40
com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Capabilities.cs


byte[] descriptorData = global::System.Convert.FromBase64String(
string.Concat(
"CjVtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL2NhcGFiaWxp",
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMirwEKGFVuaXR5UkxD",
"dGllcy5wcm90bxIUY29tbXVuaWNhdG9yX29iamVjdHMi0gEKGFVuaXR5UkxD",
"ASgIEhkKEXRyYWluaW5nQW5hbHl0aWNzGAUgASgIQiWqAiJVbml0eS5NTEFn",
"ZW50cy5Db21tdW5pY2F0b3JPYmplY3RzYgZwcm90bzM="));
"ASgIEhkKEXRyYWluaW5nQW5hbHl0aWNzGAUgASgIEiEKGXZhcmlhYmxlTGVu",
"Z3RoT2JzZXJ2YXRpb24YBiABKAhCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11",
"bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions", "TrainingAnalytics" }, null, null, null)
new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto), global::Unity.MLAgents.CommunicatorObjects.UnityRLCapabilitiesProto.Parser, new[]{ "BaseRLCapabilities", "ConcatenatedPngObservations", "CompressedChannelMapping", "HybridActions", "TrainingAnalytics", "VariableLengthObservation" }, null, null, null)
}));
}
#endregion

compressedChannelMapping_ = other.compressedChannelMapping_;
hybridActions_ = other.hybridActions_;
trainingAnalytics_ = other.trainingAnalytics_;
variableLengthObservation_ = other.variableLengthObservation_;
_unknownFields = pb::UnknownFieldSet.Clone(other._unknownFields);
}

}
}
/// <summary>Field number for the "variableLengthObservation" field.</summary>
public const int VariableLengthObservationFieldNumber = 6;
private bool variableLengthObservation_;
/// <summary>
/// Support for variable length observations of rank 2
/// </summary>
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public bool VariableLengthObservation {
get { return variableLengthObservation_; }
set {
variableLengthObservation_ = value;
}
}
[global::System.Diagnostics.DebuggerNonUserCodeAttribute]
public override bool Equals(object other) {
return Equals(other as UnityRLCapabilitiesProto);

if (CompressedChannelMapping != other.CompressedChannelMapping) return false;
if (HybridActions != other.HybridActions) return false;
if (TrainingAnalytics != other.TrainingAnalytics) return false;
if (VariableLengthObservation != other.VariableLengthObservation) return false;
return Equals(_unknownFields, other._unknownFields);
}

if (CompressedChannelMapping != false) hash ^= CompressedChannelMapping.GetHashCode();
if (HybridActions != false) hash ^= HybridActions.GetHashCode();
if (TrainingAnalytics != false) hash ^= TrainingAnalytics.GetHashCode();
if (VariableLengthObservation != false) hash ^= VariableLengthObservation.GetHashCode();
if (_unknownFields != null) {
hash ^= _unknownFields.GetHashCode();
}

if (TrainingAnalytics != false) {
output.WriteRawTag(40);
output.WriteBool(TrainingAnalytics);
}
if (VariableLengthObservation != false) {
output.WriteRawTag(48);
output.WriteBool(VariableLengthObservation);
}
if (_unknownFields != null) {
_unknownFields.WriteTo(output);

if (TrainingAnalytics != false) {
size += 1 + 1;
}
if (VariableLengthObservation != false) {
size += 1 + 1;
}
if (_unknownFields != null) {
size += _unknownFields.CalculateSize();
}

if (other.TrainingAnalytics != false) {
TrainingAnalytics = other.TrainingAnalytics;
}
if (other.VariableLengthObservation != false) {
VariableLengthObservation = other.VariableLengthObservation;
}
_unknownFields = pb::UnknownFieldSet.MergeFrom(_unknownFields, other._unknownFields);
}

}
case 40: {
TrainingAnalytics = input.ReadBool();
break;
}
case 48: {
VariableLengthObservation = input.ReadBool();
break;
}
}

70
com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs


/// <param name="brainParameters">
/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="sensorComponents">Attached sensor components</param>
/// <param name="sensors">Attached sensor components</param>
SensorComponent[] sensorComponents, ActuatorComponent[] actuatorComponents,
ISensor[] sensors, ActuatorComponent[] actuatorComponents,
int observableAttributeTotalSize = 0,
BehaviorType behaviorType = BehaviorType.Default)
{

}
failedModelChecks.AddRange(
CheckInputTensorPresence(model, brainParameters, memorySize, sensorComponents)
CheckInputTensorPresence(model, brainParameters, memorySize, sensors)
CheckInputTensorShape(model, brainParameters, sensorComponents, observableAttributeTotalSize)
CheckInputTensorShape(model, brainParameters, sensors, observableAttributeTotalSize)
);
failedModelChecks.AddRange(
CheckOutputTensorShape(model, brainParameters, actuatorComponents)

/// <param name="memory">
/// The memory size that the model is expecting.
/// </param>
/// <param name="sensorComponents">Array of attached sensor components</param>
/// <param name="sensors">Array of attached sensor components</param>
/// <returns>
/// A IEnumerable of string corresponding to the failed input presence checks.
/// </returns>

int memory,
SensorComponent[] sensorComponents
ISensor[] sensors
)
{
var failedModelChecks = new List<string>();

// If there are not enough Visual Observation Input compared to what the
// sensors expect.
var visObsIndex = 0;
for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
for (var sensorIndex = 0; sensorIndex < sensors.Length; sensorIndex++)
var sensor = sensorComponents[sensorIndex];
var sensor = sensors[sensorIndex];
if (sensor.GetObservationShape().Length == 3)
{
if (!tensorsNames.Contains(

/// Checks that the shape of the visual observation input placeholder is the same as the corresponding sensor.
/// </summary>
/// <param name="tensorProxy">The tensor that is expected by the model</param>
/// <param name="sensorComponent">The sensor that produces the visual observation.</param>
/// <param name="sensor">The sensor that produces the visual observation.</param>
TensorProxy tensorProxy, SensorComponent sensorComponent)
TensorProxy tensorProxy, ISensor sensor)
var shape = sensorComponent.GetObservationShape();
var shape = sensor.GetObservationShape();
var heightBp = shape[0];
var widthBp = shape[1];
var pixelBp = shape[2];

/// Checks that the shape of the rank 2 observation input placeholder is the same as the corresponding sensor.
/// </summary>
/// <param name="tensorProxy">The tensor that is expected by the model</param>
/// <param name="sensorComponent">The sensor that produces the visual observation.</param>
/// <param name="sensor">The sensor that produces the visual observation.</param>
TensorProxy tensorProxy, SensorComponent sensorComponent)
TensorProxy tensorProxy, ISensor sensor)
var shape = sensorComponent.GetObservationShape();
var shape = sensor.GetObservationShape();
var dim1Bp = shape[0];
var dim2Bp = shape[1];
var dim1T = tensorProxy.Channels;

/// <param name="brainParameters">
/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="sensorComponents">Attached sensors</param>
/// <param name="sensors">Attached sensors</param>
Model model, BrainParameters brainParameters, SensorComponent[] sensorComponents,
Model model, BrainParameters brainParameters, ISensor[] sensors,
new Dictionary<string, Func<BrainParameters, TensorProxy, SensorComponent[], int, string>>()
new Dictionary<string, Func<BrainParameters, TensorProxy, ISensor[], int, string>>()
{
{TensorNames.VectorObservationPlaceholder, CheckVectorObsShape},
{TensorNames.PreviousActionPlaceholder, CheckPreviousActionShape},

}
var visObsIndex = 0;
for (var sensorIndex = 0; sensorIndex < sensorComponents.Length; sensorIndex++)
for (var sensorIndex = 0; sensorIndex < sensors.Length; sensorIndex++)
var sensorComponent = sensorComponents[sensorIndex];
if (sensorComponent.GetObservationShape().Length == 3)
var sens = sensors[sensorIndex];
if (sens.GetObservationShape().Length == 3)
(bp, tensor, scs, i) => CheckVisualObsShape(tensor, sensorComponent);
(bp, tensor, scs, i) => CheckVisualObsShape(tensor, sens);
if (sensorComponent.GetObservationShape().Length == 2)
if (sens.GetObservationShape().Length == 2)
(bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sensorComponent);
(bp, tensor, scs, i) => CheckRankTwoObsShape(tensor, sens);
}
}

else
{
var tester = tensorTester[tensor.name];
var error = tester.Invoke(brainParameters, tensor, sensorComponents, observableAttributeTotalSize);
var error = tester.Invoke(brainParameters, tensor, sensors, observableAttributeTotalSize);
if (error != null)
{
failedModelChecks.Add(error);

/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="tensorProxy">The tensor that is expected by the model</param>
/// <param name="sensorComponents">Array of attached sensor components</param>
/// <param name="sensors">Array of attached sensor components</param>
/// <param name="observableAttributeTotalSize">Sum of the sizes of all ObservableAttributes.</param>
/// <returns>
/// If the Check failed, returns a string containing information about why the

BrainParameters brainParameters, TensorProxy tensorProxy, SensorComponent[] sensorComponents,
BrainParameters brainParameters, TensorProxy tensorProxy, ISensor[] sensors,
int observableAttributeTotalSize)
{
var vecObsSizeBp = brainParameters.VectorObservationSize;

var totalVectorSensorSize = 0;
foreach (var sensorComp in sensorComponents)
foreach (var sens in sensors)
if (sensorComp.GetObservationShape().Length == 1)
if ((sens.GetObservationShape().Length == 1))
totalVectorSensorSize += sensorComp.GetObservationShape()[0];
totalVectorSensorSize += sens.GetObservationShape()[0];
totalVectorSensorSize += observableAttributeTotalSize;
if (vecObsSizeBp * numStackedVector + totalVectorSensorSize != totalVecObsSizeT)
if (totalVectorSensorSize != totalVecObsSizeT)
foreach (var sensorComp in sensorComponents)
foreach (var sensorComp in sensors)
{
if (sensorComp.GetObservationShape().Length == 1)
{

$"but received: \n" +
$"Vector observations: {vecObsSizeBp} x {numStackedVector}\n" +
$"Total [Observable] attributes: {observableAttributeTotalSize}\n" +
$"SensorComponent sizes: {sensorSizes}.";
$"Sensor sizes: {sensorSizes}.";
}
return null;
}

/// The BrainParameters that are used verify the compatibility with the InferenceEngine
/// </param>
/// <param name="tensorProxy"> The tensor that is expected by the model</param>
/// <param name="sensorComponents">Array of attached sensor components (unused).</param>
/// <param name="sensors">Array of attached sensor components (unused).</param>
SensorComponent[] sensorComponents, int observableAttributeTotalSize)
ISensor[] sensors, int observableAttributeTotalSize)
{
var numberActionsBp = brainParameters.ActionSpec.NumDiscreteActions;
var numberActionsT = tensorProxy.shape[tensorProxy.shape.Length - 1];

6
com.unity.ml-agents/Runtime/Inference/ModelRunner.cs


SensorShapeValidator m_SensorShapeValidator = new SensorShapeValidator();
bool m_VisualObservationsInitialized;
bool m_ObservationsInitialized;
/// <summary>
/// Initializes the Brain with the Model that it will use when selecting actions for

{
return;
}
if (!m_VisualObservationsInitialized)
if (!m_ObservationsInitialized)
m_VisualObservationsInitialized = true;
m_ObservationsInitialized = true;
}
Profiler.BeginSample("ModelRunner.DecideAction");

21
com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs


namespace Unity.MLAgents.Sensors
{
internal class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
/// <summary>
/// A Sensor that allows to observe a variable number of entities.
/// </summary>
public class BufferSensor : ISensor, IDimensionPropertiesSensor, IBuiltInSensor
static DimensionProperty[] s_DimensionProperties = new DimensionProperty[]{
DimensionProperty.VariableSize,
DimensionProperty.None
};
public BufferSensor(int maxNumberObs, int obsSize)
{
m_MaxNumObs = maxNumberObs;

/// <inheritdoc/>
public DimensionProperty[] GetDimensionProperties()
{
return new DimensionProperty[]{
DimensionProperty.VariableSize,
DimensionProperty.None
};
return s_DimensionProperties;
}
/// <summary>

/// <param name="obs"> The float array observation</param>
public void AppendObservation(float[] obs)
{
if (obs.Length != m_ObsSize)
{
throw new UnityAgentsException(
"The BufferSensor was expecting an observation of size " +
$"{m_ObsSize} but received {obs.Length} observations instead."
);
}
if (m_CurrentNumObservables >= m_MaxNumObs)
{
return;

14
com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs


{
/// <summary>
/// A component for BufferSensor.
/// A SensorComponent that creates a <see cref="BufferSensor"/>.
internal class BufferSensorComponent : SensorComponent
public class BufferSensorComponent : SensorComponent
/// <summary>
/// This is how many floats each entities will be represented with. This number
/// is fixed and all entities must have the same representation.
/// </summary>
/// <summary>
/// This is the maximum number of entities the `BufferSensor` will be able to
/// collect.
/// </summary>
private BufferSensor m_Sensor;
/// <inheritdoc/>

17
com.unity.ml-agents/Runtime/Sensors/CameraSensor.cs


/// <summary>
/// A sensor that wraps a Camera object to generate visual observations for an agent.
/// </summary>
public class CameraSensor : ISensor, IBuiltInSensor
public class CameraSensor : ISensor, IBuiltInSensor, IDimensionPropertiesSensor
{
Camera m_Camera;
int m_Width;

int[] m_Shape;
SensorCompressionType m_CompressionType;
static DimensionProperty[] s_DimensionProperties = new DimensionProperty[] {
DimensionProperty.TranslationalEquivariance,
DimensionProperty.TranslationalEquivariance,
DimensionProperty.None };
/// <summary>
/// The Camera used for rendering the sensor observations.

public int[] GetObservationShape()
{
return m_Shape;
}
/// <summary>
/// Accessor for the dimension properties of a camera sensor. A camera sensor
/// Has translational equivariance along width and hight and no property along
/// the channels dimension.
/// </summary>
/// <returns></returns>
public DimensionProperty[] GetDimensionProperties()
{
return s_DimensionProperties;
}
/// <summary>

2
com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs


/// The Dimension property flags of the observations
/// </summary>
[System.Flags]
internal enum DimensionProperty
public enum DimensionProperty
{
/// <summary>
/// No properties specified.

2
com.unity.ml-agents/Tests/Editor/Analytics/InferenceAnalyticsTests.cs


Assert.AreEqual(2, continuousEvent.ObservationSpecs.Count);
Assert.AreEqual(3, continuousEvent.ObservationSpecs[0].DimensionInfos.Length);
Assert.AreEqual(20, continuousEvent.ObservationSpecs[0].DimensionInfos[0].Size);
Assert.AreEqual((int)DimensionProperty.TranslationalEquivariance, continuousEvent.ObservationSpecs[0].DimensionInfos[0].Flags);
Assert.AreEqual((int)DimensionProperty.None, continuousEvent.ObservationSpecs[0].DimensionInfos[2].Flags);
Assert.AreEqual("None", continuousEvent.ObservationSpecs[0].CompressionType);
Assert.AreEqual(Test3DSensor.k_BuiltInSensorType, continuousEvent.