Merge pull request #4763 from Unity-Technologies/develop-att

WIP Made initial changes to enable dimension properties and added attention module
4 年前 · 458fee17
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
                    observationProto.CompressedChannelMapping.AddRange(compressibleSensor.GetCompressedChannelMapping());
                }
            }
+            // Add the dimension properties if any to the observationProto
+            var dimensionPropertySensor = sensor as IDimensionPropertiesSensor;
+            if (dimensionPropertySensor != null)
+            {
+                var dimensionProperties = dimensionPropertySensor.GetDimensionProperties();
+                int[] intDimensionProperties = new int[dimensionProperties.Length];
+                for (int i = 0; i < dimensionProperties.Length; i++)
+                {
+                    observationProto.DimensionProperties.Add((int)dimensionProperties[i]);
+                }
+            }
            observationProto.Shape.AddRange(shape);
            return observationProto;
        }
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
-            "aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKdAgoQT2JzZXJ2YXRp",
+            "aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyK7AgoQT2JzZXJ2YXRp",
-            "KAUaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25f",
-            "ZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5H",
-            "EAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
-            "b3RvMw=="));
+            "KAUSHAoUZGltZW5zaW9uX3Byb3BlcnRpZXMYBiADKAUaGQoJRmxvYXREYXRh",
+            "EgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25fZGF0YSopChRDb21wcmVz",
+            "c2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5HEAFCJaoCIlVuaXR5Lk1M",
+            "QWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping", "DimensionProperties" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
          }));
    }
    #endregion
      shape_ = other.shape_.Clone();
      compressionType_ = other.compressionType_;
      compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
+      dimensionProperties_ = other.dimensionProperties_.Clone();
      switch (other.ObservationDataCase) {
        case ObservationDataOneofCase.CompressedData:
          CompressedData = other.CompressedData;
      get { return compressedChannelMapping_; }
    }

+    /// <summary>Field number for the "dimension_properties" field.</summary>
+    public const int DimensionPropertiesFieldNumber = 6;
+    private static readonly pb::FieldCodec<int> _repeated_dimensionProperties_codec
+        = pb::FieldCodec.ForInt32(50);
+    private readonly pbc::RepeatedField<int> dimensionProperties_ = new pbc::RepeatedField<int>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<int> DimensionProperties {
+      get { return dimensionProperties_; }
+    }
+
    private object observationData_;
    /// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
    public enum ObservationDataOneofCase {
      if (CompressedData != other.CompressedData) return false;
      if (!object.Equals(FloatData, other.FloatData)) return false;
      if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
+      if(!dimensionProperties_.Equals(other.dimensionProperties_)) return false;
      if (ObservationDataCase != other.ObservationDataCase) return false;
      return Equals(_unknownFields, other._unknownFields);
    }
      if (observationDataCase_ == ObservationDataOneofCase.CompressedData) hash ^= CompressedData.GetHashCode();
      if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
      hash ^= compressedChannelMapping_.GetHashCode();
+      hash ^= dimensionProperties_.GetHashCode();
      hash ^= (int) observationDataCase_;
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
        output.WriteMessage(FloatData);
      }
      compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
+      dimensionProperties_.WriteTo(output, _repeated_dimensionProperties_codec);
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
        size += 1 + pb::CodedOutputStream.ComputeMessageSize(FloatData);
      }
      size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
+      size += dimensionProperties_.CalculateSize(_repeated_dimensionProperties_codec);
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
        CompressionType = other.CompressionType;
      }
      compressedChannelMapping_.Add(other.compressedChannelMapping_);
+      dimensionProperties_.Add(other.dimensionProperties_);
      switch (other.ObservationDataCase) {
        case ObservationDataOneofCase.CompressedData:
          CompressedData = other.CompressedData;
          case 42:
          case 40: {
            compressedChannelMapping_.AddEntriesFrom(input, _repeated_compressedChannelMapping_codec);
+            break;
+          }
+          case 50:
+          case 48: {
+            dimensionProperties_.AddEntriesFrom(input, _repeated_dimensionProperties_codec);
            break;
          }
        }
--- a/docs/Python-API.md
+++ b/docs/Python-API.md

 A `BehaviorSpec` has the following fields :

- `observation_shapes` is a List of Tuples of int : Each Tuple corresponds to an
-  observation's dimensions (without the number of agents dimension). The shape
-  tuples have the same ordering as the ordering of the DecisionSteps,
+- `sensor_specs` is a List of `SensorSpec` objects : Each `SensorSpec`
+  corresponds to an observation's properties: `shape` is a tuple of ints that
+  corresponds to the shape of the observation (without the number of agents dimension).
+  `dimension_property` is a tuple of flags containing extra information about how the
+  data should be processed in the corresponding dimension. Note that the `SensorSpec`
+  have the same ordering as the ordering of observations in the DecisionSteps,
  DecisionStep, TerminalSteps and TerminalStep.
 - `action_spec` is an `ActionSpec` namedtuple that defines the number and types
  of actions for the Agent.
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py

    def _get_n_vis_obs(self) -> int:
        result = 0
-        for shape in self.group_spec.observation_shapes:
-            if len(shape) == 3:
+        for sen_spec in self.group_spec.sensor_specs:
+            if len(sen_spec.shape) == 3:
-        for shape in self.group_spec.observation_shapes:
-            if len(shape) == 3:
-                result.append(shape)
+        for sen_spec in self.group_spec.sensor_specs:
+            if len(sen_spec.shape) == 3:
+                result.append(sen_spec.shape)
        return result

    def _get_vis_obs_list(

    def _get_vec_obs_size(self) -> int:
        result = 0
-        for shape in self.group_spec.observation_shapes:
-            if len(shape) == 1:
-                result += shape[0]
+        for sen_spec in self.group_spec.sensor_specs:
+            if len(sen_spec.shape) == 1:
+                result += sen_spec.shape[0]
        return result

    def render(self, mode="rgb_array"):
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
    TerminalSteps,
    BehaviorMapping,
 )
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes


 def test_gym_wrapper():
    obs_shapes = [(vector_observation_space_size,)]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
-    return BehaviorSpec(obs_shapes, action_spec)
+    sen_spec = create_sensor_specs_with_shapes(obs_shapes)
+    return BehaviorSpec(sen_spec, action_spec)


 def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
    Any,
    Mapping as MappingType,
 )
+from enum import IntFlag
 import numpy as np

 from mlagents_envs.exception import UnityActionException
        :param spec: The BehaviorSpec for the DecisionSteps
        """
        obs: List[np.ndarray] = []
-        for shape in spec.observation_shapes:
-            obs += [np.zeros((0,) + shape, dtype=np.float32)]
+        for sen_spec in spec.sensor_specs:
+            obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
        return DecisionSteps(
            obs=obs,
            reward=np.zeros(0, dtype=np.float32),
        :param spec: The BehaviorSpec for the TerminalSteps
        """
        obs: List[np.ndarray] = []
-        for shape in spec.observation_shapes:
-            obs += [np.zeros((0,) + shape, dtype=np.float32)]
+        for sen_spec in spec.sensor_specs:
+            obs += [np.zeros((0,) + sen_spec.shape, dtype=np.float32)]
        return TerminalSteps(
            obs=obs,
            reward=np.zeros(0, dtype=np.float32),
        return ActionSpec(0, discrete_branches)


+class DimensionProperty(IntFlag):
+    """
+    No properties specified.
+    """
+
+    UNSPECIFIED = 0
+    """
+    No Property of the observation in that dimension. Observation can be processed with
+    Fully connected networks.
+    """
+    NONE = 1
+    """
+    Means it is suitable to do a convolution in this dimension.
+    """
+    TRANSLATIONAL_EQUIVARIANCE = 2
+    """
+    Means that there can be a variable number of observations in this dimension.
+    The observations are unordered.
+    """
+    VARIABLE_SIZE = 4
+
+
+class SensorSpec(NamedTuple):
+    """
+    A NamedTuple containing information about the observation of Agents.
+    - shape is a Tuple of int : It corresponds to the shape of
+    an observation's dimensions.
+    - dimension_property is a Tuple of DimensionProperties flag, one flag for each
+    dimension.
+    """
+
+    shape: Tuple[int, ...]
+    dimension_property: Tuple[DimensionProperty, ...]
+
+
-    - observation_shapes is a List of Tuples of int : Each Tuple corresponds
-    to an observation's dimensions. The shape tuples have the same ordering as
-    the ordering of the DecisionSteps and TerminalSteps.
-    - action_spec is an ActionSpec NamedTuple
+    - sensor_specs is a List of SensorSpec NamedTuple containing
+    information about the information of the Agent's observations such as their shapes.
+    The order of the SensorSpec is the same as the order of the observations of an
+    agent.
+    - action_spec is an ActionSpec NamedTuple.
-    observation_shapes: List[Tuple]
+    sensor_specs: List[SensorSpec]
    action_spec: ActionSpec


--- a/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  name='mlagents_envs/communicator_objects/observation.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x9d\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xbb\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x12\x1c\n\x14\x64imension_properties\x18\x06 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )

 _COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(
  ],
  containing_type=None,
  options=None,
-  serialized_start=366,
-  serialized_end=407,
+  serialized_start=396,
+  serialized_end=437,
 )
 _sym_db.RegisterEnumDescriptor(_COMPRESSIONTYPEPROTO)

  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=319,
-  serialized_end=344,
+  serialized_start=349,
+  serialized_end=374,
 )

 _OBSERVATIONPROTO = _descriptor.Descriptor(
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='dimension_properties', full_name='communicator_objects.ObservationProto.dimension_properties', index=5,
+      number=6, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=79,
-  serialized_end=364,
+  serialized_end=394,
 )

 _OBSERVATIONPROTO_FLOATDATA.containing_type = _OBSERVATIONPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
    compression_type = ... # type: CompressionTypeProto
    compressed_data = ... # type: builtin___bytes
    compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
+    dimension_properties = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]

    @property
    def float_data(self) -> ObservationProto.FloatData: ...
        compressed_data : typing___Optional[builtin___bytes] = None,
        float_data : typing___Optional[ObservationProto.FloatData] = None,
        compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
+        dimension_properties : typing___Optional[typing___Iterable[builtin___int]] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> ObservationProto: ...
        def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"dimension_properties",u"float_data",u"observation_data",u"shape"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"dimension_properties",b"dimension_properties",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
    def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
 from mlagents_envs.base_env import (
    ActionSpec,
+    SensorSpec,
+    DimensionProperty,
    BehaviorSpec,
    DecisionSteps,
    TerminalSteps,
    :return: BehaviorSpec object.
    """
    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
-    # proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
+    dim_props = [
+        tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
+        for obs in agent_info.observations
+    ]
+    sensor_specs = [
+        SensorSpec(obs_shape, dim_p)
+        for obs_shape, dim_p in zip(observation_shape, dim_props)
+    ]
+    # proto from communicator < v1.3 does not set action spec, use deprecated fields instead
    if (
        brain_param_proto.action_spec.num_continuous_actions == 0
        and brain_param_proto.action_spec.num_discrete_actions == 0
            action_spec_proto.num_continuous_actions,
            tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
        )
-    return BehaviorSpec(observation_shape, action_spec)
+    return BehaviorSpec(sensor_specs, action_spec)


 class OffsetBytesIO:
    ]
    decision_obs_list: List[np.ndarray] = []
    terminal_obs_list: List[np.ndarray] = []
-    for obs_index, obs_shape in enumerate(behavior_spec.observation_shapes):
-        is_visual = len(obs_shape) == 3
+    for obs_index, sensor_specs in enumerate(behavior_spec.sensor_specs):
+        is_visual = len(sensor_specs.shape) == 3
-            obs_shape = cast(Tuple[int, int, int], obs_shape)
+            obs_shape = cast(Tuple[int, int, int], sensor_specs.shape)
            decision_obs_list.append(
                _process_visual_observation(
                    obs_index, obs_shape, decision_agent_info_list
        else:
            decision_obs_list.append(
                _process_vector_observation(
-                    obs_index, obs_shape, decision_agent_info_list
+                    obs_index, sensor_specs.shape, decision_agent_info_list
-                    obs_index, obs_shape, terminal_agent_info_list
+                    obs_index, sensor_specs.shape, terminal_agent_info_list
                )
            )
    decision_rewards = np.array(
--- a/ml-agents-envs/mlagents_envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_envs.py
    env.close()
    assert isinstance(decision_steps, DecisionSteps)
    assert isinstance(terminal_steps, TerminalSteps)
-    assert len(spec.observation_shapes) == len(decision_steps.obs)
-    assert len(spec.observation_shapes) == len(terminal_steps.obs)
+    assert len(spec.sensor_specs) == len(decision_steps.obs)
+    assert len(spec.sensor_specs) == len(terminal_steps.obs)
-    for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
-        assert (n_agents,) + shape == obs.shape
+    for sen_spec, obs in zip(spec.sensor_specs, decision_steps.obs):
+        assert (n_agents,) + sen_spec.shape == obs.shape
-    for shape, obs in zip(spec.observation_shapes, terminal_steps.obs):
-        assert (n_agents,) + shape == obs.shape
+    for sen_spec, obs in zip(spec.sensor_specs, terminal_steps.obs):
+        assert (n_agents,) + sen_spec.shape == obs.shape


@mock.patch("mlagents_envs.env_utils.launch_executable")
    env.close()
    assert isinstance(decision_steps, DecisionSteps)
    assert isinstance(terminal_steps, TerminalSteps)
-    assert len(spec.observation_shapes) == len(decision_steps.obs)
-    assert len(spec.observation_shapes) == len(terminal_steps.obs)
-    for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
-        assert (n_agents,) + shape == obs.shape
+    assert len(spec.sensor_specs) == len(decision_steps.obs)
+    assert len(spec.sensor_specs) == len(terminal_steps.obs)
+    for spec, obs in zip(spec.sensor_specs, decision_steps.obs):
+        assert (n_agents,) + spec.shape == obs.shape
    assert 0 in decision_steps
    assert 2 in terminal_steps

--- a/ml-agents-envs/mlagents_envs/tests/test_registry.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_registry.py
    for worker_id in range(2):
        assert BASIC_ID in registry
        env = registry[BASIC_ID].make(
-            base_port=6005, worker_id=worker_id, no_graphics=True
+            base_port=6002, worker_id=worker_id, no_graphics=True
        )
        env.reset()
        env.step()
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
    steps_from_proto,
 )
 from PIL import Image
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes


 def generate_list_agent_proto(
 def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3,), (4,)]
-    spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
+    spec = BehaviorSpec(
+        create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
    for agent_id in range(n_agents):
 def test_action_masking_discrete():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((7, 3)))
+    behavior_spec = BehaviorSpec(
+        create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((7, 3))
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_discrete_1():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((10,)))
+    behavior_spec = BehaviorSpec(
+        create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((10,))
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_discrete_2():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((2, 2, 6)))
+    behavior_spec = BehaviorSpec(
+        create_sensor_specs_with_shapes(shapes), ActionSpec.create_discrete((2, 2, 6))
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10))
+    behavior_spec = BehaviorSpec(
+        create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(10)
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert behavior_spec.action_spec.is_discrete()
    assert not behavior_spec.action_spec.is_continuous()
-    assert behavior_spec.observation_shapes == [(3,), (4,)]
+    assert [spec.shape for spec in behavior_spec.sensor_specs] == [(3,), (4,)]
    assert behavior_spec.action_spec.discrete_branches == (5, 4)
    assert behavior_spec.action_spec.discrete_size == 2
    bp = BrainParametersProto()
 def test_batched_step_result_from_proto_raises_on_infinite():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
+    behavior_spec = BehaviorSpec(
+        create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
+    behavior_spec = BehaviorSpec(
+        create_sensor_specs_with_shapes(shapes), ActionSpec.create_continuous(3)
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py
    ActionSpec,
    BehaviorSpec,
 )
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes


 def test_decision_steps():

 def test_empty_decision_steps():
    specs = BehaviorSpec(
-        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
+        sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
+        action_spec=ActionSpec.create_continuous(3),
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2

 def test_empty_terminal_steps():
    specs = BehaviorSpec(
-        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
+        sensor_specs=create_sensor_specs_with_shapes([(3, 2), (5,)]),
+        action_spec=ActionSpec.create_continuous(3),
    )
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
                )
            )
        # check observations match
-        if len(behavior_spec.observation_shapes) != len(
-            expected_behavior_spec.observation_shapes
-        ):
+        if len(behavior_spec.sensor_specs) != len(expected_behavior_spec.sensor_specs):
-                zip(
-                    behavior_spec.observation_shapes,
-                    expected_behavior_spec.observation_shapes,
-                )
+                zip(behavior_spec.sensor_specs, expected_behavior_spec.sensor_specs)
-                if demo_obs != policy_obs:
+                if demo_obs.shape != policy_obs.shape:
                    raise RuntimeError(
                        f"The shape {demo_obs} for observation {i} in demonstration \
                        do not match the policy's {policy_obs}."
--- a/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
+++ b/ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
    def get_trajectory_value_estimates(
        self, batch: AgentBuffer, next_obs: List[np.ndarray], done: bool
    ) -> Tuple[Dict[str, np.ndarray], Dict[str, float]]:
-        n_obs = len(self.policy.behavior_spec.observation_shapes)
+        n_obs = len(self.policy.behavior_spec.sensor_specs)
        current_obs = ObsUtil.from_buffer(batch, n_obs)

        # Convert to tensors
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
            else [self.behavior_spec.action_spec.continuous_size]
        )
        self.vec_obs_size = sum(
-            shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
+            sen_spec.shape[0]
+            for sen_spec in behavior_spec.sensor_specs
+            if len(sen_spec.shape) == 1
-            1 for shape in behavior_spec.observation_shapes if len(shape) == 3
+            1 for sen_spec in behavior_spec.sensor_specs if len(sen_spec.shape) == 3
        )
        self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
        self.previous_action_dict: Dict[str, np.ndarray] = {}
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
        else:
            ac_class = SharedActorCritic
        self.actor_critic = ac_class(
-            observation_shapes=self.behavior_spec.observation_shapes,
+            sensor_specs=self.behavior_spec.sensor_specs,
            network_settings=trainer_settings.network_settings,
            action_spec=behavior_spec.action_spec,
            stream_names=reward_signal_names,
--- a/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/ppo/optimizer_torch.py
            )
            returns[name] = ModelUtils.list_to_tensor(batch[f"{name}_returns"])

-        n_obs = len(self.policy.behavior_spec.observation_shapes)
+        n_obs = len(self.policy.behavior_spec.sensor_specs)
        current_obs = ObsUtil.from_buffer(batch, n_obs)
        # Convert to tensors
        current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py
 from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.buffer import AgentBuffer
 from mlagents_envs.timers import timed
-from mlagents_envs.base_env import ActionSpec
+from mlagents_envs.base_env import ActionSpec, SensorSpec
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.settings import TrainerSettings, SACSettings
 from contextlib import ExitStack
        def __init__(
            self,
            stream_names: List[str],
-            observation_shapes: List[Tuple[int, ...]],
+            sensor_specs: List[SensorSpec],
            network_settings: NetworkSettings,
            action_spec: ActionSpec,
        ):

            self.q1_network = ValueNetwork(
                stream_names,
-                observation_shapes,
+                sensor_specs,
                network_settings,
                num_action_ins,
                num_value_outs,
-                observation_shapes,
+                sensor_specs,
                network_settings,
                num_action_ins,
                num_value_outs,
            # ExitStack allows us to enter the torch.no_grad() context conditionally
            with ExitStack() as stack:
                if not q1_grad:
-                    stack.enter_context(torch.no_grad())
+                    stack.enter_context(torch.no_grad())  # pylint: disable=E1101
                q1_out, _ = self.q1_network(
                    inputs,
                    actions=actions,
            with ExitStack() as stack:
                if not q2_grad:
-                    stack.enter_context(torch.no_grad())
+                    stack.enter_context(torch.no_grad())  # pylint: disable=E1101
                q2_out, _ = self.q2_network(
                    inputs,
                    actions=actions,

        self.value_network = TorchSACOptimizer.PolicyValueNetwork(
            self.stream_names,
-            self.policy.behavior_spec.observation_shapes,
+            self.policy.behavior_spec.sensor_specs,
            policy_network_settings,
            self._action_spec,
        )
-            self.policy.behavior_spec.observation_shapes,
+            self.policy.behavior_spec.sensor_specs,
            policy_network_settings,
        )
        ModelUtils.soft_update(
        for name in self.reward_signals:
            rewards[name] = ModelUtils.list_to_tensor(batch[f"{name}_rewards"])

-        n_obs = len(self.policy.behavior_spec.observation_shapes)
+        n_obs = len(self.policy.behavior_spec.sensor_specs)
        current_obs = ObsUtil.from_buffer(batch, n_obs)
        # Convert to tensors
        current_obs = [ModelUtils.list_to_tensor(obs) for obs in current_obs]
--- a/ml-agents/mlagents/trainers/tests/check_env_trains.py
+++ b/ml-agents/mlagents/trainers/tests/check_env_trains.py
    env_parameter_manager=None,
    success_threshold=0.9,
    env_manager=None,
+    training_seed=None,
 ):
    if env_parameter_manager is None:
        env_parameter_manager = EnvironmentParameterManager()
-        seed = 1337
+        seed = 1337 if training_seed is None else training_seed
        StatsReporter.writers.clear()  # Clear StatsReporters so we don't write to file
        debug_writer = DebugWriter()
        StatsReporter.add_writer(debug_writer)
--- a/ml-agents/mlagents/trainers/tests/dummy_config.py
+++ b/ml-agents/mlagents/trainers/tests/dummy_config.py
+from typing import List, Tuple
+from mlagents_envs.base_env import SensorSpec, DimensionProperty
 import pytest
 import copy
 import os
@pytest.fixture
 def extrinsic_dummy_config():
    return {RewardSignalType.EXTRINSIC: RewardSignalSettings()}
+
+
+def create_sensor_specs_with_shapes(shapes: List[Tuple[int, ...]]) -> List[SensorSpec]:
+    sen_spec: List[SensorSpec] = []
+    for shape in shapes:
+        dim_prop = (DimensionProperty.UNSPECIFIED,) * len(shape)
+        spec = SensorSpec(shape, dim_prop)
+        sen_spec.append(spec)
+    return sen_spec
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
 from mlagents_envs.base_env import (
    DecisionSteps,
    TerminalSteps,
+    SensorSpec,
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
-    observation_shapes: List[Tuple],
+    sensor_specs: List[SensorSpec],
    action_spec: ActionSpec,
    done: bool = False,
 ) -> Tuple[DecisionSteps, TerminalSteps]:

    :int num_agents: Number of "agents" to imitate.
-    :List observation_shapes: A List of the observation spaces in your steps
+    :List sensor_specs: A List of the observation specs in your steps
-    for _shape in observation_shapes:
-        obs_list.append(np.ones((num_agents,) + _shape, dtype=np.float32))
+    for sen_spec in sensor_specs:
+        obs_list.append(np.ones((num_agents,) + sen_spec.shape, dtype=np.float32))
    action_mask = None
    if action_spec.is_discrete():
        action_mask = [
    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
-    behavior_spec = BehaviorSpec(observation_shapes, action_spec)
+    behavior_spec = BehaviorSpec(sensor_specs, action_spec)
    if done:
        return (
            DecisionSteps.empty(behavior_spec),
 ) -> Tuple[DecisionSteps, TerminalSteps]:
    return create_mock_steps(
        num_agents=num_agents,
-        observation_shapes=behavior_spec.observation_shapes,
+        sensor_specs=behavior_spec.sensor_specs,
        action_spec=behavior_spec.action_spec,
    )

-    observation_shapes: List[Tuple],
+    sensor_specs: List[SensorSpec],
    action_spec: ActionSpec,
    max_step_complete: bool = False,
    memory_size: int = 10,
    action_size = action_spec.discrete_size + action_spec.continuous_size
    for _i in range(length - 1):
        obs = []
-        for _shape in observation_shapes:
-            obs.append(np.ones(_shape, dtype=np.float32))
+        for sen_spec in sensor_specs:
+            obs.append(np.ones(sen_spec.shape, dtype=np.float32))
        reward = 1.0
        done = False
        action = ActionTuple(
        )
        steps_list.append(experience)
    obs = []
-    for _shape in observation_shapes:
-        obs.append(np.ones(_shape, dtype=np.float32))
+    for sen_spec in sensor_specs:
+        obs.append(np.ones(sen_spec.shape, dtype=np.float32))
    last_experience = AgentExperience(
        obs=obs,
        reward=reward,
 ) -> AgentBuffer:
    trajectory = make_fake_trajectory(
        length,
-        behavior_spec.observation_shapes,
+        behavior_spec.sensor_specs,
        action_spec=behavior_spec.action_spec,
        memory_size=memory_size,
    )
        action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
    else:
        action_spec = ActionSpec.create_continuous(vector_action_space)
-    behavior_spec = BehaviorSpec(
-        [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
-    )
+    observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
+    sen_spec = create_sensor_specs_with_shapes(observation_shapes)
+    behavior_spec = BehaviorSpec(sen_spec, action_spec)
    return behavior_spec


--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py

 from mlagents_envs.base_env import (
    ActionSpec,
+    SensorSpec,
    ActionTuple,
    BaseEnv,
    BehaviorSpec,
 from mlagents_envs.communicator_objects.agent_info_action_pair_pb2 import (
    AgentInfoActionPairProto,
 )
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes

 OBS_SIZE = 1
 VIS_OBS_SIZE = (20, 20, 3)
            continuous_action_size + discrete_action_size
        )  # to set the goals/positions
        self.action_spec = action_spec
-        self.behavior_spec = BehaviorSpec(self._make_obs_spec(), action_spec)
+        self.behavior_spec = BehaviorSpec(self._make_sensor_specs(), action_spec)
        self.action_spec = action_spec
        self.names = brain_names
        self.positions: Dict[str, List[float]] = {}
            self.action[name] = None
            self.step_result[name] = None

-    def _make_obs_spec(self) -> List[Any]:
-        obs_spec: List[Any] = []
+    def _make_sensor_specs(self) -> SensorSpec:
+        obs_shape: List[Any] = []
-            obs_spec.append((self.vec_obs_size,))
+            obs_shape.append((self.vec_obs_size,))
-            obs_spec.append(self.vis_obs_size)
-        return obs_spec
+            obs_shape.append(self.vis_obs_size)
+        sen_spec = create_sensor_specs_with_shapes(obs_shape)
+        return sen_spec

    def _make_obs(self, value: float) -> List[np.ndarray]:
        obs = []
--- a/ml-agents/mlagents/trainers/tests/test_agent_processor.py
+++ b/ml-agents/mlagents/trainers/tests/test_agent_processor.py
 from mlagents.trainers.stats import StatsReporter, StatsSummary
 from mlagents.trainers.behavior_id_utils import get_global_agent_id
 from mlagents_envs.side_channel.stats_side_channel import StatsAggregationMethod
-
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
 from mlagents_envs.base_env import ActionSpec, ActionTuple


    }
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=2,
-        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
+        sensor_specs=create_sensor_specs_with_shapes(
+            [(8,)] + num_vis_obs * [(84, 84, 3)]
+        ),
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
    # Test empty steps
    mock_decision_steps, mock_terminal_steps = mb.create_mock_steps(
        num_agents=0,
-        observation_shapes=[(8,)] + num_vis_obs * [(84, 84, 3)],
+        sensor_specs=create_sensor_specs_with_shapes(
+            [(8,)] + num_vis_obs * [(84, 84, 3)]
+        ),
        action_spec=ActionSpec.create_continuous(2),
    )
    processor.add_experiences(

    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
-        observation_shapes=[(8,)],
+        sensor_specs=create_sensor_specs_with_shapes([(8,)]),
-        observation_shapes=[(8,)],
+        sensor_specs=create_sensor_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
        done=True,
    )

    mock_decision_step, mock_terminal_step = mb.create_mock_steps(
        num_agents=1,
-        observation_shapes=[(8,)],
+        sensor_specs=create_sensor_specs_with_shapes([(8,)]),
        action_spec=ActionSpec.create_continuous(2),
    )
    fake_action_info = ActionInfo(
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
    behavior_spec, pair_infos, total_expected = load_demonstration(
        path_prefix + "/test.demo"
    )
-    assert np.sum(behavior_spec.observation_shapes[0]) == 8
+    assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)
    behavior_spec, pair_infos, total_expected = load_demonstration(
        path_prefix + "/test_demo_dir"
    )
-    assert np.sum(behavior_spec.observation_shapes[0]) == 8
+    assert np.sum(behavior_spec.sensor_specs[0].shape) == 8
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)
--- a/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
+++ b/ml-agents/mlagents/trainers/tests/test_rl_trainer.py
 from mlagents.trainers.tests.test_buffer import construct_fake_buffer
 from mlagents.trainers.agent_processor import AgentManagerQueue
 from mlagents.trainers.settings import TrainerSettings
-
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
 from mlagents_envs.base_env import ActionSpec


    time_horizon = 10
    trajectory = mb.make_fake_trajectory(
        length=time_horizon,
-        observation_shapes=[(1,)],
+        sensor_specs=create_sensor_specs_with_shapes([(1,)]),
        max_step_complete=True,
        action_spec=ActionSpec.create_discrete((2,)),
    )
    checkpoint_interval = trainer.trainer_settings.checkpoint_interval
    trajectory = mb.make_fake_trajectory(
        length=time_horizon,
-        observation_shapes=[(1,)],
+        sensor_specs=create_sensor_specs_with_shapes([(1,)]),
        max_step_complete=True,
        action_spec=ActionSpec.create_discrete((2,)),
    )
--- a/ml-agents/mlagents/trainers/tests/test_trajectory.py
+++ b/ml-agents/mlagents/trainers/tests/test_trajectory.py
 from mlagents.trainers.tests.mock_brain import make_fake_trajectory
-
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
 from mlagents_envs.base_env import ActionSpec

 VEC_OBS_SIZE = 6
    wanted_keys = set(wanted_keys)
    trajectory = make_fake_trajectory(
        length=length,
-        observation_shapes=[(VEC_OBS_SIZE,), (84, 84, 3)],
+        sensor_specs=create_sensor_specs_with_shapes([(VEC_OBS_SIZE,), (84, 84, 3)]),
        action_spec=ActionSpec.create_continuous(ACTION_SIZE),
    )
    agentbuffer = trajectory.to_agentbuffer()
--- a/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
 from mlagents.trainers.tests import mock_brain as mb
 from mlagents.trainers.tests.test_trajectory import make_fake_trajectory
 from mlagents.trainers.settings import TrainerSettings, SelfPlaySettings
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes


@pytest.fixture
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
-        observation_shapes=[(1,)],
+        sensor_specs=create_sensor_specs_with_shapes([(1,)]),
        action_spec=mock_specs.action_spec,
    )
    trajectory_queue0.put(trajectory)
--- a/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
        PPO_TORCH_CONFIG.hyperparameters, learning_rate=3.0e-4
    )
    config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
-    check_environment_trains(env, {BRAIN_NAME: config})
+    check_environment_trains(env, {BRAIN_NAME: config}, training_seed=1336)


 def test_hybrid_recurrent_ppo():
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
-        max_steps=3000,
+        max_steps=5000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

        SAC_TORCH_CONFIG.hyperparameters,
        buffer_size=50000,
        batch_size=256,
-        buffer_init_steps=2000,
+        buffer_init_steps=0,
-        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
+        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2200
-    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)
+    check_environment_trains(
+        env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1336
+    )


@pytest.mark.parametrize("num_visual", [1, 2])
--- a/ml-agents/mlagents/trainers/tests/torch/test_networks.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_networks.py
 )
 from mlagents.trainers.settings import NetworkSettings
 from mlagents_envs.base_env import ActionSpec
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes


 def test_networkbody_vector():
    obs_shapes = [(obs_size,)]

-    networkbody = NetworkBody(obs_shapes, network_settings, encoded_act_size=2)
+    networkbody = NetworkBody(
+        create_sensor_specs_with_shapes(obs_shapes),
+        network_settings,
+        encoded_act_size=2,
+    )
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = 0.1 * torch.ones((1, obs_size))
    sample_act = 0.1 * torch.ones((1, 2))
    )
    obs_shapes = [(obs_size,)]

-    networkbody = NetworkBody(obs_shapes, network_settings)
+    networkbody = NetworkBody(
+        create_sensor_specs_with_shapes(obs_shapes), network_settings
+    )
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
    sample_obs = torch.ones((1, seq_len, obs_size))

    network_settings = NetworkSettings()
    obs_shapes = [(vec_obs_size,), obs_size]

-    networkbody = NetworkBody(obs_shapes, network_settings)
+    networkbody = NetworkBody(
+        create_sensor_specs_with_shapes(obs_shapes), network_settings
+    )
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = 0.1 * torch.ones((1, 84, 84, 3))
    sample_vec_obs = torch.ones((1, vec_obs_size))
    obs_size = 4
    num_outputs = 2
    network_settings = NetworkSettings()
-    obs_shapes = [(obs_size,)]
+    sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
-        stream_names, obs_shapes, network_settings, outputs_per_stream=num_outputs
+        stream_names, sen_spec, network_settings, outputs_per_stream=num_outputs
    )
    optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

    network_settings = NetworkSettings(
        memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True
    )
-    obs_shapes = [(obs_size,)]
+    sen_spec = create_sensor_specs_with_shapes([(obs_size,)])
-    actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
+    actor = ac_type(sen_spec, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones((1, network_settings.memory.sequence_length, obs_size))
        memories = torch.ones(
--- a/ml-agents/mlagents/trainers/tests/torch/test_policy.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_policy.py
    buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
    act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
    agent_action = AgentAction.from_dict(buffer)
-    np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_shapes))
+    np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
    tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

    memories = [
    buffer = mb.simulate_rollout(64, policy.behavior_spec, memory_size=policy.m_size)
    act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])

-    np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.observation_shapes))
+    np_obs = ObsUtil.from_buffer(buffer, len(policy.behavior_spec.sensor_specs))
    tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]

    memories = [
--- a/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
    time_horizon = 15
    trajectory = make_fake_trajectory(
        length=time_horizon,
-        observation_shapes=optimizer.policy.behavior_spec.observation_shapes,
+        sensor_specs=optimizer.policy.behavior_spec.sensor_specs,
        action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
        max_step_complete=True,
    )
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
    create_agent_buffer,
 )
 from mlagents.trainers.torch.utils import ModelUtils
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes

 SEED = [42]

@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_construction(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(
+            create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(
+            create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
+            ACTIONSPEC_TWODISCRETE,
+        ),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_factory(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
-    "behavior_spec", [BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS)]
+    "behavior_spec",
+    [BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS)],
 )
 def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
    np.random.seed(seed)
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
 )
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes


 ACTIONSPEC_CONTINUOUS = ActionSpec.create_continuous(5)
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_construction(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_factory(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
 from mlagents.trainers.torch.components.reward_providers.gail_reward_provider import (
    DiscriminatorNetwork,
 )
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes


 CONTINUOUS_PATH = (
 ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20,))


-@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
+@pytest.mark.parametrize(
+    "behavior_spec",
+    [BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
+)
 def test_construction(behavior_spec: BehaviorSpec) -> None:
    gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
    gail_rp = GAILRewardProvider(behavior_spec, gail_settings)
-@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
+@pytest.mark.parametrize(
+    "behavior_spec",
+    [BehaviorSpec(create_sensor_specs_with_shapes([(8,)]), ACTIONSPEC_CONTINUOUS)],
+)
 def test_factory(behavior_spec: BehaviorSpec) -> None:
    gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
    gail_rp = create_reward_provider(
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
+        ),
+        BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
@pytest.mark.parametrize("use_actions", [False, True])
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            create_sensor_specs_with_shapes([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
+        ),
+        BehaviorSpec(create_sensor_specs_with_shapes([(50,)]), ACTIONSPEC_FOURDISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
@pytest.mark.parametrize("use_actions", [False, True])
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
 )
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes


 SEED = [42]
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_construction(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(
+            create_sensor_specs_with_shapes([(10,), (64, 66, 3), (84, 86, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(
+            create_sensor_specs_with_shapes([(10,), (64, 66, 1)]),
+            ACTIONSPEC_TWODISCRETE,
+        ),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_factory(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            create_sensor_specs_with_shapes([(10,), (64, 66, 3), (24, 26, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(create_sensor_specs_with_shapes([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
 ) -> AgentBuffer:
    buffer = AgentBuffer()
    curr_obs = [
-        np.random.normal(size=shape).astype(np.float32)
-        for shape in behavior_spec.observation_shapes
+        np.random.normal(size=sen_spec.shape).astype(np.float32)
+        for sen_spec in behavior_spec.sensor_specs
-        np.random.normal(size=shape).astype(np.float32)
-        for shape in behavior_spec.observation_shapes
+        np.random.normal(size=sen_spec.shape).astype(np.float32)
+        for sen_spec in behavior_spec.sensor_specs
    ]
    action_buffer = behavior_spec.action_spec.random_action(1)
    action = {}
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    }
-    hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3e-4)
+    hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=5e-3)
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        reward_signals=reward_signals,
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
 from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.torch.encoders import VectorInput
+from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes


 def test_min_visual_size():
    for _ in range(num_visual):
        obs_shapes.append(vis_obs_shape)
    h_size = 128
+    sen_spec = create_sensor_specs_with_shapes(obs_shapes)
-        obs_shapes, h_size, encoder_type, normalize
+        sen_spec, h_size, encoder_type, normalize
    )
    total_output = sum(embedding_sizes)
    vec_enc = []
--- a/ml-agents/mlagents/trainers/torch/components/bc/module.py
+++ b/ml-agents/mlagents/trainers/torch/components/bc/module.py
        Helper function for update_batch.
        """
        np_obs = ObsUtil.from_buffer(
-            mini_batch_demo, len(self.policy.behavior_spec.observation_shapes)
+            mini_batch_demo, len(self.policy.behavior_spec.sensor_specs)
        )
        # Convert to tensors
        tensor_obs = [ModelUtils.list_to_tensor(obs) for obs in np_obs]
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
            vis_encode_type=EncoderType.SIMPLE,
            memory=None,
        )
-        self._state_encoder = NetworkBody(
-            specs.observation_shapes, state_encoder_settings
-        )
+        self._state_encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)

        self._action_flattener = ActionFlattener(self._action_spec)

--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
        unencoded_size = (
            self._action_flattener.flattened_size + 1 if settings.use_actions else 0
        )  # +1 is for dones
-        self.encoder = NetworkBody(
-            specs.observation_shapes, encoder_settings, unencoded_size
-        )
+        self.encoder = NetworkBody(specs.sensor_specs, encoder_settings, unencoded_size)

        estimator_input_size = settings.encoding_size
        if settings.use_vail:
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
            vis_encode_type=EncoderType.SIMPLE,
            memory=None,
        )
-        self._encoder = NetworkBody(specs.observation_shapes, state_encoder_settings)
+        self._encoder = NetworkBody(specs.sensor_specs, state_encoder_settings)

    def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
        n_obs = len(self._encoder.processors)
--- a/ml-agents/mlagents/trainers/torch/model_serialization.py
+++ b/ml-agents/mlagents/trainers/torch/model_serialization.py
        seq_len_dim = [1]
        dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
        # create input shape of NCHW
-        # (It's NHWC in self.policy.behavior_spec.observation_shapes)
+        # (It's NHWC in self.policy.behavior_spec.sensor_specs.shape)
-            torch.zeros(batch_dim + [shape[2], shape[0], shape[1]])
-            for shape in self.policy.behavior_spec.observation_shapes
-            if len(shape) == 3
+            torch.zeros(
+                batch_dim + [sen_spec.shape[2], sen_spec.shape[0], sen_spec.shape[1]]
+            )
+            for sen_spec in self.policy.behavior_spec.sensor_specs
+            if len(sen_spec.shape) == 3
        ]
        dummy_masks = torch.ones(
            batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py

 from mlagents.torch_utils import torch, nn

-from mlagents_envs.base_env import ActionSpec
+from mlagents_envs.base_env import ActionSpec, SensorSpec
 from mlagents.trainers.torch.action_model import ActionModel
 from mlagents.trainers.torch.agent_action import AgentAction
 from mlagents.trainers.torch.action_log_probs import ActionLogProbs
 class NetworkBody(nn.Module):
    def __init__(
        self,
-        observation_shapes: List[Tuple[int, ...]],
+        sensor_specs: List[SensorSpec],
        network_settings: NetworkSettings,
        encoded_act_size: int = 0,
    ):
        )

        self.processors, self.embedding_sizes = ModelUtils.create_input_processors(
-            observation_shapes,
+            sensor_specs,
            self.h_size,
            network_settings.vis_encode_type,
            normalize=self.normalize,
    def __init__(
        self,
        stream_names: List[str],
-        observation_shapes: List[Tuple[int, ...]],
+        sensor_specs: List[SensorSpec],
        network_settings: NetworkSettings,
        encoded_act_size: int = 0,
        outputs_per_stream: int = 1,
        nn.Module.__init__(self)
        self.network_body = NetworkBody(
-            observation_shapes, network_settings, encoded_act_size=encoded_act_size
+            sensor_specs, network_settings, encoded_act_size=encoded_act_size
        )
        if network_settings.memory is not None:
            encoding_size = network_settings.memory.memory_size // 2
 class SimpleActor(nn.Module, Actor):
    def __init__(
        self,
-        observation_shapes: List[Tuple[int, ...]],
+        sensor_specs: List[SensorSpec],
        network_settings: NetworkSettings,
        action_spec: ActionSpec,
        conditional_sigma: bool = False,
            ),
            requires_grad=False,
        )
-        self.network_body = NetworkBody(observation_shapes, network_settings)
+        self.network_body = NetworkBody(sensor_specs, network_settings)
        if network_settings.memory is not None:
            self.encoding_size = network_settings.memory.memory_size // 2
        else:
 class SharedActorCritic(SimpleActor, ActorCritic):
    def __init__(
        self,
-        observation_shapes: List[Tuple[int, ...]],
+        sensor_specs: List[SensorSpec],
        network_settings: NetworkSettings,
        action_spec: ActionSpec,
        stream_names: List[str],
        self.use_lstm = network_settings.memory is not None
        super().__init__(
-            observation_shapes,
-            network_settings,
-            action_spec,
-            conditional_sigma,
-            tanh_squash,
+            sensor_specs, network_settings, action_spec, conditional_sigma, tanh_squash
        )
        self.stream_names = stream_names
        self.value_heads = ValueHeads(stream_names, self.encoding_size)
 class SeparateActorCritic(SimpleActor, ActorCritic):
    def __init__(
        self,
-        observation_shapes: List[Tuple[int, ...]],
+        sensor_specs: List[SensorSpec],
        network_settings: NetworkSettings,
        action_spec: ActionSpec,
        stream_names: List[str],
        self.use_lstm = network_settings.memory is not None
        super().__init__(
-            observation_shapes,
-            network_settings,
-            action_spec,
-            conditional_sigma,
-            tanh_squash,
+            sensor_specs, network_settings, action_spec, conditional_sigma, tanh_squash
-        self.critic = ValueNetwork(stream_names, observation_shapes, network_settings)
+        self.critic = ValueNetwork(stream_names, sensor_specs, network_settings)

    @property
    def memory_size(self) -> int:
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
 )
 from mlagents.trainers.settings import EncoderType, ScheduleType
 from mlagents.trainers.exception import UnityTrainerException
+from mlagents_envs.base_env import SensorSpec


 class ModelUtils:

    @staticmethod
    def create_input_processors(
-        observation_shapes: List[Tuple[int, ...]],
+        sensor_specs: List[SensorSpec],
        h_size: int,
        vis_encode_type: EncoderType,
        normalize: bool = False,
-        :param observation_shapes: List of Tuples that represent the action dimensions.
+        :param sensor_specs: List of SensorSpec that represent the observation dimensions.
        :param action_size: Number of additional un-normalized inputs to each vector encoder. Used for
            conditioning network on other values (e.g. actions for a Q function)
        :param h_size: Number of hidden units per layer.
        """
        encoders: List[nn.Module] = []
        embedding_sizes: List[int] = []
-
-        for dimension in observation_shapes:
+        for sen_spec in sensor_specs:
-                dimension, normalize, h_size, vis_encode_type
+                sen_spec.shape, normalize, h_size, vis_encode_type
            )
            encoders.append(encoder)
            embedding_sizes.append(embedding_size)
--- a/ml-agents/tests/yamato/scripts/run_llapi.py
+++ b/ml-agents/tests/yamato/scripts/run_llapi.py
        decision_steps, terminal_steps = env.get_steps(group_name)

        # Examine the number of observations per Agent
-        print("Number of observations : ", len(group_spec.observation_shapes))
+        print("Number of observations : ", len(group_spec.sensor_specs))
-        vis_obs = any(len(shape) == 3 for shape in group_spec.observation_shapes)
+        vis_obs = any(len(sen_spec.shape) == 3 for sen_spec in group_spec.sensor_specs)
        print("Is there a visual observation ?", vis_obs)

        # Examine the state space for the first observation for the first agent
--- a/protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
+++ b/protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
        FloatData float_data = 4;
    }
    repeated int32 compressed_channel_mapping = 5;
+    repeated int32 dimension_properties = 6;
 }
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
+using System;
+
+namespace Unity.MLAgents.Sensors
+{
+    public class BufferSensor : ISensor, IDimensionPropertiesSensor
+    {
+        private int m_MaxNumObs;
+        private int m_ObsSize;
+        float[] m_ObservationBuffer;
+        int m_CurrentNumObservables;
+        public BufferSensor(int maxNumberObs, int obsSize)
+        {
+            m_MaxNumObs = maxNumberObs;
+            m_ObsSize = obsSize;
+            m_ObservationBuffer = new float[m_ObsSize * m_MaxNumObs];
+            m_CurrentNumObservables = 0;
+        }
+
+        /// <inheritdoc/>
+        public int[] GetObservationShape()
+        {
+            return new int[] { m_MaxNumObs, m_ObsSize };
+        }
+
+        /// <inheritdoc/>
+        public DimensionProperty[] GetDimensionProperties()
+        {
+            return new DimensionProperty[]{
+                DimensionProperty.VariableSize,
+                DimensionProperty.None
+            };
+        }
+
+        /// <summary>
+        /// Appends an observation to the buffer. If the buffer is full (maximum number
+        /// of observation is reached) the observation will be ignored. the length of
+        /// the provided observation array must be equal to the observation size of
+        /// the buffer sensor.
+        /// </summary>
+        /// <param name="obs"> The float array observation</param>
+        public void AppendObservation(float[] obs)
+        {
+            if (m_CurrentNumObservables >= m_MaxNumObs)
+            {
+                return;
+            }
+            for (int i = 0; i < obs.Length; i++)
+            {
+                m_ObservationBuffer[m_CurrentNumObservables * m_ObsSize + i] = obs[i];
+            }
+            m_CurrentNumObservables++;
+        }
+
+        /// <inheritdoc/>
+        public int Write(ObservationWriter writer)
+        {
+            for (int i = 0; i < m_ObsSize * m_MaxNumObs; i++)
+            {
+                writer[i] = m_ObservationBuffer[i];
+            }
+            return m_ObsSize * m_MaxNumObs;
+        }
+
+        /// <inheritdoc/>
+        public virtual byte[] GetCompressedObservation()
+        {
+            return null;
+        }
+
+        /// <inheritdoc/>
+        public void Update()
+        {
+            Reset();
+        }
+
+        /// <inheritdoc/>
+        public void Reset()
+        {
+            m_CurrentNumObservables = 0;
+            Array.Clear(m_ObservationBuffer, 0, m_ObservationBuffer.Length);
+        }
+
+        public SensorCompressionType GetCompressionType()
+        {
+            return SensorCompressionType.None;
+        }
+
+        public string GetName()
+        {
+            return "BufferSensor";
+        }
+
+    }
+
+}
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs.meta
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs.meta
+fileFormatVersion: 2
+guid: 034f05c858e684e5498d9a548c9d1fc5
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
+using UnityEngine;
+
+namespace Unity.MLAgents.Sensors
+{
+
+    /// <summary>
+    /// A component for BufferSensor.
+    /// </summary>
+    [AddComponentMenu("ML Agents/Buffer Sensor", (int)MenuGroup.Sensors)]
+    public class BufferSensorComponent : SensorComponent
+    {
+        public int ObservableSize;
+        public int MaxNumObservables;
+        private BufferSensor m_Sensor;
+
+        /// <inheritdoc/>
+        public override ISensor CreateSensor()
+        {
+            m_Sensor = new BufferSensor(MaxNumObservables, ObservableSize);
+            return m_Sensor;
+        }
+
+        /// <inheritdoc/>
+        public override int[] GetObservationShape()
+        {
+            return new[] { MaxNumObservables, ObservableSize };
+        }
+
+        /// <summary>
+        /// Appends an observation to the buffer. If the buffer is full (maximum number
+        /// of observation is reached) the observation will be ignored. the length of
+        /// the provided observation array must be equal to the observation size of
+        /// the buffer sensor.
+        /// </summary>
+        /// <param name="obs"> The float array observation</param>
+        public void AppendObservation(float[] obs)
+        {
+            m_Sensor.AppendObservation(obs);
+        }
+    }
+}
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs.meta
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs.meta
+fileFormatVersion: 2
+guid: dd8012d5925524537b27131fef517017
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
+namespace Unity.MLAgents.Sensors
+{
+
+    /// <summary>
+    /// The Dimension property flags of the observations
+    /// </summary>
+    [System.Flags]
+    public enum DimensionProperty
+    {
+        /// <summary>
+        /// No properties specified.
+        /// </summary>
+        Unspecified = 0,
+
+        /// <summary>
+        /// No Property of the observation in that dimension. Observation can be processed with
+        /// fully connected networks.
+        /// </summary>
+        None = 1,
+
+        /// <summary>
+        /// Means it is suitable to do a convolution in this dimension.
+        /// </summary>
+        TranslationalEquivariance = 2,
+
+        /// <summary>
+        /// Means that there can be a variable number of observations in this dimension.
+        /// The observations are unordered.
+        /// </summary>
+        VariableSize = 4,
+    }
+
+
+    /// <summary>
+    /// Sensor interface for sensors with special dimension properties.
+    /// </summary>
+    public interface IDimensionPropertiesSensor
+    {
+        /// <summary>
+        /// Returns the array containing the properties of each dimensions of the
+        /// observation. The length of the array must be equal to the rank of the
+        /// observation tensor.
+        /// </summary>
+        /// <returns>The array of DimensionProperty</returns>
+        DimensionProperty[] GetDimensionProperties();
+    }
+}
--- a/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs.meta
+++ b/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs.meta
+fileFormatVersion: 2
+guid: 297e9ec12d6de45adbcf6dea1a9de019
+MonoImporter:
+  externalObjects: {}
+  serializedVersion: 2
+  defaultReferences: []
+  executionOrder: 0
+  icon: {instanceID: 0}
+  userData: 
+  assetBundleName: 
+  assetBundleVariant: 
--- a/ml-agents/mlagents/trainers/tests/torch/test_attention.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_attention.py
+from mlagents.torch_utils import torch
+import numpy as np
+
+from mlagents.trainers.torch.layers import linear_layer
+from mlagents.trainers.torch.attention import MultiHeadAttention, SimpleTransformer
+
+
+def test_multi_head_attention_initialization():
+    q_size, k_size, v_size, o_size, n_h, emb_size = 7, 8, 9, 10, 11, 12
+    n_k, n_q, b = 13, 14, 15
+    mha = MultiHeadAttention(q_size, k_size, v_size, o_size, n_h, emb_size)
+
+    query = torch.ones((b, n_q, q_size))
+    key = torch.ones((b, n_k, k_size))
+    value = torch.ones((b, n_k, v_size))
+
+    output, attention = mha.forward(query, key, value)
+
+    assert output.shape == (b, n_q, o_size)
+    assert attention.shape == (b, n_h, n_q, n_k)
+
+
+def test_multi_head_attention_masking():
+    epsilon = 0.0001
+    q_size, k_size, v_size, o_size, n_h, emb_size = 7, 8, 9, 10, 11, 12
+    n_k, n_q, b = 13, 14, 15
+    mha = MultiHeadAttention(q_size, k_size, v_size, o_size, n_h, emb_size)
+
+    # create a key input with some keys all 0
+    key = torch.ones((b, n_k, k_size))
+    mask = torch.zeros((b, n_k))
+    for i in range(n_k):
+        if i % 3 == 0:
+            key[:, i, :] = 0
+            mask[:, i] = 1
+
+    query = torch.ones((b, n_q, q_size))
+    value = torch.ones((b, n_k, v_size))
+
+    _, attention = mha.forward(query, key, value, mask)
+    for i in range(n_k):
+        if i % 3 == 0:
+            assert torch.sum(attention[:, :, :, i] ** 2) < epsilon
+        else:
+            assert torch.sum(attention[:, :, :, i] ** 2) > epsilon
+
+
+def test_multi_head_attention_training():
+    np.random.seed(1336)
+    torch.manual_seed(1336)
+    size, n_h, n_k, n_q = 3, 10, 5, 1
+    embedding_size = 64
+    mha = MultiHeadAttention(size, size, size, size, n_h, embedding_size)
+    optimizer = torch.optim.Adam(mha.parameters(), lr=0.001)
+    batch_size = 200
+    point_range = 3
+    init_error = -1.0
+    for _ in range(50):
+        query = torch.rand((batch_size, n_q, size)) * point_range * 2 - point_range
+        key = torch.rand((batch_size, n_k, size)) * point_range * 2 - point_range
+        value = key
+        with torch.no_grad():
+            # create the target : The key closest to the query in euclidean distance
+            distance = torch.sum((query - key) ** 2, dim=2)
+            argmin = torch.argmin(distance, dim=1)
+            target = []
+            for i in range(batch_size):
+                target += [key[i, argmin[i], :]]
+            target = torch.stack(target, dim=0)
+            target = target.detach()
+
+        prediction, _ = mha.forward(query, key, value)
+        prediction = prediction.reshape((batch_size, size))
+        error = torch.mean((prediction - target) ** 2, dim=1)
+        error = torch.mean(error) / 2
+        if init_error == -1.0:
+            init_error = error.item()
+        else:
+            assert error.item() < init_error
+        print(error.item())
+        optimizer.zero_grad()
+        error.backward()
+        optimizer.step()
+    assert error.item() < 0.5
+
+
+def test_zero_mask_layer():
+    batch_size, size = 10, 30
+
+    def generate_input_helper(pattern):
+        _input = torch.zeros((batch_size, 0, size))
+        for i in range(len(pattern)):
+            if i % 2 == 0:
+                _input = torch.cat(
+                    [_input, torch.rand((batch_size, pattern[i], size))], dim=1
+                )
+            else:
+                _input = torch.cat(
+                    [_input, torch.zeros((batch_size, pattern[i], size))], dim=1
+                )
+        return _input
+
+    masking_pattern_1 = [3, 2, 3, 4]
+    masking_pattern_2 = [5, 7, 8, 2]
+    input_1 = generate_input_helper(masking_pattern_1)
+    input_2 = generate_input_helper(masking_pattern_2)
+
+    masks = SimpleTransformer.get_masks([input_1, input_2])
+    assert len(masks) == 2
+    masks_1 = masks[0]
+    masks_2 = masks[1]
+    assert masks_1.shape == (batch_size, sum(masking_pattern_1))
+    assert masks_2.shape == (batch_size, sum(masking_pattern_2))
+    for i in masking_pattern_1:
+        assert masks_1[0, 1] == 0 if i % 2 == 0 else 1
+    for i in masking_pattern_2:
+        assert masks_2[0, 1] == 0 if i % 2 == 0 else 1
+
+
+def test_simple_transformer_training():
+    np.random.seed(1336)
+    torch.manual_seed(1336)
+    size, n_k, = 3, 5
+    embedding_size = 64
+    transformer = SimpleTransformer(size, [size], embedding_size)
+    l_layer = linear_layer(embedding_size, size)
+    optimizer = torch.optim.Adam(
+        list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001
+    )
+    batch_size = 200
+    point_range = 3
+    init_error = -1.0
+    for _ in range(100):
+        center = torch.rand((batch_size, size)) * point_range * 2 - point_range
+        key = torch.rand((batch_size, n_k, size)) * point_range * 2 - point_range
+        with torch.no_grad():
+            # create the target : The key closest to the query in euclidean distance
+            distance = torch.sum(
+                (center.reshape((batch_size, 1, size)) - key) ** 2, dim=2
+            )
+            argmin = torch.argmin(distance, dim=1)
+            target = []
+            for i in range(batch_size):
+                target += [key[i, argmin[i], :]]
+            target = torch.stack(target, dim=0)
+            target = target.detach()
+
+        masks = SimpleTransformer.get_masks([key])
+        prediction = transformer.forward(center, [key], masks)
+        prediction = l_layer(prediction)
+        prediction = prediction.reshape((batch_size, size))
+        error = torch.mean((prediction - target) ** 2, dim=1)
+        error = torch.mean(error) / 2
+        if init_error == -1.0:
+            init_error = error.item()
+        else:
+            assert error.item() < init_error
+        print(error.item())
+        optimizer.zero_grad()
+        error.backward()
+        optimizer.step()
+    assert error.item() < 0.3
--- a/ml-agents/mlagents/trainers/torch/attention.py
+++ b/ml-agents/mlagents/trainers/torch/attention.py
+from mlagents.torch_utils import torch
+from typing import Tuple, Optional, List
+from mlagents.trainers.torch.layers import LinearEncoder
+
+
+class MultiHeadAttention(torch.nn.Module):
+    """
+    Multi Head Attention module. We do not use the regular Torch implementation since
+    Barracuda does not support some operators it uses.
+    Takes as input to the forward method 3 tensors:
+     - query: of dimensions (batch_size, number_of_queries, key_size)
+     - key: of dimensions (batch_size, number_of_keys, key_size)
+     - value: of dimensions (batch_size, number_of_keys, value_size)
+    The forward method will return 2 tensors:
+     - The output: (batch_size, number_of_queries, output_size)
+     - The attention matrix: (batch_size, num_heads, number_of_queries, number_of_keys)
+    """
+
+    NEG_INF = -1e6
+
+    def __init__(
+        self,
+        query_size: int,
+        key_size: int,
+        value_size: int,
+        output_size: int,
+        num_heads: int,
+        embedding_size: int,
+    ):
+        super().__init__()
+        self.n_heads, self.embedding_size = num_heads, embedding_size
+        self.output_size = output_size
+        self.fc_q = torch.nn.Linear(query_size, self.n_heads * self.embedding_size)
+        self.fc_k = torch.nn.Linear(key_size, self.n_heads * self.embedding_size)
+        self.fc_v = torch.nn.Linear(value_size, self.n_heads * self.embedding_size)
+        # self.fc_q = LinearEncoder(query_size, 2, self.n_heads * self.embedding_size)
+        # self.fc_k = LinearEncoder(key_size,2, self.n_heads * self.embedding_size)
+        # self.fc_v = LinearEncoder(value_size,2, self.n_heads * self.embedding_size)
+        self.fc_out = torch.nn.Linear(
+            self.n_heads * self.embedding_size, self.output_size
+        )
+
+    def forward(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        key_mask: Optional[torch.Tensor] = None,
+        number_of_keys: int = -1,
+        number_of_queries: int = -1,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        b = -1  # the batch size
+        # This is to avoid using .size() when possible as Barracuda does not support
+        n_q = number_of_queries if number_of_queries != -1 else query.size(1)
+        n_k = number_of_keys if number_of_keys != -1 else key.size(1)
+
+        query = self.fc_q(query)  # (b, n_q, h*d)
+        key = self.fc_k(key)  # (b, n_k, h*d)
+        value = self.fc_v(value)  # (b, n_k, h*d)
+
+        query = query.reshape(b, n_q, self.n_heads, self.embedding_size)
+        key = key.reshape(b, n_k, self.n_heads, self.embedding_size)
+        value = value.reshape(b, n_k, self.n_heads, self.embedding_size)
+
+        query = query.permute([0, 2, 1, 3])  # (b, h, n_q, emb)
+        # The next few lines are equivalent to : key.permute([0, 2, 3, 1])
+        # This is a hack, ONNX will compress two permute operations and
+        # Barracuda will not like seeing `permute([0,2,3,1])`
+        key = key.permute([0, 2, 1, 3])  # (b, h, emb, n_k)
+        key -= 1
+        key += 1
+        key = key.permute([0, 1, 3, 2])  # (b, h, emb, n_k)
+
+        qk = torch.matmul(query, key)  # (b, h, n_q, n_k)
+
+        if key_mask is None:
+            qk = qk / (self.embedding_size ** 0.5)
+        else:
+            key_mask = key_mask.reshape(b, 1, 1, n_k)
+            qk = (1 - key_mask) * qk / (
+                self.embedding_size ** 0.5
+            ) + key_mask * self.NEG_INF
+
+        att = torch.softmax(qk, dim=3)  # (b, h, n_q, n_k)
+
+        value = value.permute([0, 2, 1, 3])  # (b, h, n_k, emb)
+        value_attention = torch.matmul(att, value)  # (b, h, n_q, emb)
+
+        value_attention = value_attention.permute([0, 2, 1, 3])  # (b, n_q, h, emb)
+        value_attention = value_attention.reshape(
+            b, n_q, self.n_heads * self.embedding_size
+        )  # (b, n_q, h*emb)
+
+        out = self.fc_out(value_attention)  # (b, n_q, emb)
+        return out, att
+
+
+class SimpleTransformer(torch.nn.Module):
+    """
+    A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
+    multi head self attention to encode information about a "Self" and a list of
+    relevant "Entities".
+    """
+
+    EPISLON = 1e-7
+
+    def __init__(
+        self,
+        x_self_size: int,
+        entities_sizes: List[int],
+        embedding_size: int,
+        output_size: Optional[int] = None,
+    ):
+        super().__init__()
+        self.self_size = x_self_size
+        self.entities_sizes = entities_sizes
+        self.entities_num_max_elements: Optional[List[int]] = None
+        self.ent_encoders = torch.nn.ModuleList(
+            [
+                LinearEncoder(self.self_size + ent_size, 2, embedding_size)
+                for ent_size in self.entities_sizes
+            ]
+        )
+        self.attention = MultiHeadAttention(
+            query_size=embedding_size,
+            key_size=embedding_size,
+            value_size=embedding_size,
+            output_size=embedding_size,
+            num_heads=4,
+            embedding_size=embedding_size,
+        )
+        self.residual_layer = LinearEncoder(embedding_size, 1, embedding_size)
+        if output_size is None:
+            output_size = embedding_size
+        self.x_self_residual_layer = LinearEncoder(
+            embedding_size + x_self_size, 1, output_size
+        )
+
+    def forward(
+        self,
+        x_self: torch.Tensor,
+        entities: List[torch.Tensor],
+        key_masks: List[torch.Tensor],
+    ) -> torch.Tensor:
+        # Gather the maximum number of entities information
+        if self.entities_num_max_elements is None:
+            self.entities_num_max_elements = []
+            for ent in entities:
+                self.entities_num_max_elements.append(ent.shape[1])
+        # Concatenate all observations with self
+        self_and_ent: List[torch.Tensor] = []
+        for num_entities, ent in zip(self.entities_num_max_elements, entities):
+            expanded_self = x_self.reshape(-1, 1, self.self_size)
+            # .repeat(
+            #     1, num_entities, 1
+            # )
+            expanded_self = torch.cat([expanded_self] * num_entities, dim=1)
+            self_and_ent.append(torch.cat([expanded_self, ent], dim=2))
+        # Generate the tensor that will serve as query, key and value to self attention
+        qkv = torch.cat(
+            [ent_encoder(x) for ent_encoder, x in zip(self.ent_encoders, self_and_ent)],
+            dim=1,
+        )
+        mask = torch.cat(key_masks, dim=1)
+        # Feed to self attention
+        max_num_ent = sum(self.entities_num_max_elements)
+        output, _ = self.attention(qkv, qkv, qkv, mask, max_num_ent, max_num_ent)
+        # Residual
+        output = self.residual_layer(output) + qkv
+        # Average Pooling
+        numerator = torch.sum(output * (1 - mask).reshape(-1, max_num_ent, 1), dim=1)
+        denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPISLON
+        output = numerator / denominator
+        # Residual between x_self and the output of the module
+        output = self.x_self_residual_layer(torch.cat([output, x_self], dim=1))
+        return output
+
+    @staticmethod
+    def get_masks(observations: List[torch.Tensor]) -> List[torch.Tensor]:
+        """
+        Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
+        all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
+        layer to mask the padding observations.
+        """
+        with torch.no_grad():
+            # Generate the masking tensors for each entities tensor (mask only if all zeros)
+            key_masks: List[torch.Tensor] = [
+                (torch.sum(ent ** 2, axis=2) < 0.01).type(torch.FloatTensor)
+                for ent in observations
+            ]
+        return key_masks