WIP Made initial changes to enale dimension properties and added attention module

3 年前 · 8cb050ef
--- a/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Communicator/GrpcExtensions.cs
                {
                    observationProto.CompressedChannelMapping.AddRange(compressibleSensor.GetCompressedChannelMapping());
                }
+                var dimensionPropertySensor = sensor as IDimensionPropertiesSensor;
+                if (dimensionPropertySensor != null)
+                {
+                    observationProto.DimensionProperties.AddRange(dimensionPropertySensor.GetDimensionProperties());
+                }
+
            }
            observationProto.Shape.AddRange(shape);
            return observationProto;
--- a/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
+++ b/com.unity.ml-agents/Runtime/Grpc/CommunicatorObjects/Observation.cs
      byte[] descriptorData = global::System.Convert.FromBase64String(
          string.Concat(
            "CjRtbGFnZW50c19lbnZzL2NvbW11bmljYXRvcl9vYmplY3RzL29ic2VydmF0",
-            "aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyKdAgoQT2JzZXJ2YXRp",
+            "aW9uLnByb3RvEhRjb21tdW5pY2F0b3Jfb2JqZWN0cyK7AgoQT2JzZXJ2YXRp",
-            "KAUaGQoJRmxvYXREYXRhEgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25f",
-            "ZGF0YSopChRDb21wcmVzc2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5H",
-            "EAFCJaoCIlVuaXR5Lk1MQWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnBy",
-            "b3RvMw=="));
+            "KAUSHAoUZGltZW5zaW9uX3Byb3BlcnRpZXMYBiADKAUaGQoJRmxvYXREYXRh",
+            "EgwKBGRhdGEYASADKAJCEgoQb2JzZXJ2YXRpb25fZGF0YSopChRDb21wcmVz",
+            "c2lvblR5cGVQcm90bxIICgROT05FEAASBwoDUE5HEAFCJaoCIlVuaXR5Lk1M",
+            "QWdlbnRzLkNvbW11bmljYXRvck9iamVjdHNiBnByb3RvMw=="));
-            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
+            new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Parser, new[]{ "Shape", "CompressionType", "CompressedData", "FloatData", "CompressedChannelMapping", "DimensionProperties" }, new[]{ "ObservationData" }, null, new pbr::GeneratedClrTypeInfo[] { new pbr::GeneratedClrTypeInfo(typeof(global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData), global::Unity.MLAgents.CommunicatorObjects.ObservationProto.Types.FloatData.Parser, new[]{ "Data" }, null, null, null)})
          }));
    }
    #endregion
      shape_ = other.shape_.Clone();
      compressionType_ = other.compressionType_;
      compressedChannelMapping_ = other.compressedChannelMapping_.Clone();
+      dimensionProperties_ = other.dimensionProperties_.Clone();
      switch (other.ObservationDataCase) {
        case ObservationDataOneofCase.CompressedData:
          CompressedData = other.CompressedData;
      get { return compressedChannelMapping_; }
    }

+    /// <summary>Field number for the "dimension_properties" field.</summary>
+    public const int DimensionPropertiesFieldNumber = 6;
+    private static readonly pb::FieldCodec<int> _repeated_dimensionProperties_codec
+        = pb::FieldCodec.ForInt32(50);
+    private readonly pbc::RepeatedField<int> dimensionProperties_ = new pbc::RepeatedField<int>();
+    [global::System.Diagnostics.DebuggerNonUserCodeAttribute]
+    public pbc::RepeatedField<int> DimensionProperties {
+      get { return dimensionProperties_; }
+    }
+
    private object observationData_;
    /// <summary>Enum of possible cases for the "observation_data" oneof.</summary>
    public enum ObservationDataOneofCase {
      if (CompressedData != other.CompressedData) return false;
      if (!object.Equals(FloatData, other.FloatData)) return false;
      if(!compressedChannelMapping_.Equals(other.compressedChannelMapping_)) return false;
+      if(!dimensionProperties_.Equals(other.dimensionProperties_)) return false;
      if (ObservationDataCase != other.ObservationDataCase) return false;
      return Equals(_unknownFields, other._unknownFields);
    }
      if (observationDataCase_ == ObservationDataOneofCase.CompressedData) hash ^= CompressedData.GetHashCode();
      if (observationDataCase_ == ObservationDataOneofCase.FloatData) hash ^= FloatData.GetHashCode();
      hash ^= compressedChannelMapping_.GetHashCode();
+      hash ^= dimensionProperties_.GetHashCode();
      hash ^= (int) observationDataCase_;
      if (_unknownFields != null) {
        hash ^= _unknownFields.GetHashCode();
        output.WriteMessage(FloatData);
      }
      compressedChannelMapping_.WriteTo(output, _repeated_compressedChannelMapping_codec);
+      dimensionProperties_.WriteTo(output, _repeated_dimensionProperties_codec);
      if (_unknownFields != null) {
        _unknownFields.WriteTo(output);
      }
        size += 1 + pb::CodedOutputStream.ComputeMessageSize(FloatData);
      }
      size += compressedChannelMapping_.CalculateSize(_repeated_compressedChannelMapping_codec);
+      size += dimensionProperties_.CalculateSize(_repeated_dimensionProperties_codec);
      if (_unknownFields != null) {
        size += _unknownFields.CalculateSize();
      }
        CompressionType = other.CompressionType;
      }
      compressedChannelMapping_.Add(other.compressedChannelMapping_);
+      dimensionProperties_.Add(other.dimensionProperties_);
      switch (other.ObservationDataCase) {
        case ObservationDataOneofCase.CompressedData:
          CompressedData = other.CompressedData;
          case 42:
          case 40: {
            compressedChannelMapping_.AddEntriesFrom(input, _repeated_compressedChannelMapping_codec);
+            break;
+          }
+          case 50:
+          case 48: {
+            dimensionProperties_.AddEntriesFrom(input, _repeated_dimensionProperties_codec);
            break;
          }
        }
--- a/gym-unity/gym_unity/envs/init.py
+++ b/gym-unity/gym_unity/envs/init.py

    def _get_n_vis_obs(self) -> int:
        result = 0
-        for shape in self.group_spec.observation_shapes:
+        for shape in self.group_spec.observation_spec.shapes:
            if len(shape) == 3:
                result += 1
        return result
-        for shape in self.group_spec.observation_shapes:
+        for shape in self.group_spec.observation_spec.shapes:
            if len(shape) == 3:
                result.append(shape)
        return result

    def _get_vec_obs_size(self) -> int:
        result = 0
-        for shape in self.group_spec.observation_shapes:
+        for shape in self.group_spec.observation_spec.shapes:
            if len(shape) == 1:
                result += shape[0]
        return result
--- a/gym-unity/gym_unity/tests/test_gym.py
+++ b/gym-unity/gym_unity/tests/test_gym.py
 from gym_unity.envs import UnityToGymWrapper
 from mlagents_envs.base_env import (
    BehaviorSpec,
+    ObservationSpec,
    ActionSpec,
    DecisionSteps,
    TerminalSteps,
    obs_shapes = [(vector_observation_space_size,)]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
-    return BehaviorSpec(obs_shapes, action_spec)
+    obs_spec = ObservationSpec.create_simple(obs_shapes)
+    return BehaviorSpec(obs_spec, action_spec)


 def create_mock_vector_steps(specs, num_agents=1, number_visual_observations=0):
--- a/ml-agents-envs/mlagents_envs/base_env.py
+++ b/ml-agents-envs/mlagents_envs/base_env.py
    Any,
    Mapping as MappingType,
 )
+from enum import IntFlag
 import numpy as np

 from mlagents_envs.exception import UnityActionException
        :param spec: The BehaviorSpec for the DecisionSteps
        """
        obs: List[np.ndarray] = []
-        for shape in spec.observation_shapes:
+        for shape in spec.observation_spec.shapes:
            obs += [np.zeros((0,) + shape, dtype=np.float32)]
        return DecisionSteps(
            obs=obs,
        :param spec: The BehaviorSpec for the TerminalSteps
        """
        obs: List[np.ndarray] = []
-        for shape in spec.observation_shapes:
+        for shape in spec.observation_spec.shapes:
            obs += [np.zeros((0,) + shape, dtype=np.float32)]
        return TerminalSteps(
            obs=obs,
        return ActionSpec(0, discrete_branches)


+class DimensionProperty(IntFlag):
+    """
+    No properties specified.
+    """
+
+    UNSPECIFIED = 0
+    """
+    No Property of the observation in that dimension. Observation can be processed with
+    Fully connected networks.
+    """
+    NONE = 1
+    """
+    Means it is possible to do a convolution in this dimension.
+    """
+    TRANSLATIONAL_EQUIVARIANCE = 2
+    """
+    Means that there can be a variable number of observations in this dimension.
+    The observations are unordered.
+    """
+    VARIABLE_SIZE = 3
+
+
+class ObservationSpec(NamedTuple):
+    """
+    A NamedTuple containing information about the observation of Agents under the
+    same behavior.
+    - observation_shapes is a List of Tuples of int : Each Tuple corresponds
+    to an observation's dimensions. The shape tuples have the same ordering as
+    the ordering of the DecisionSteps and TerminalSteps.
+    - dimension_properties is a List of Tuples of DimensionProperties flag. Each Tuple
+    corresponds to an observation's properties. The tuples have the same ordering as
+    the ordering of the DecisionSteps and TerminalSteps.
+    """
+
+    shapes: List[Tuple[int, ...]]
+    dimension_properties: List[Tuple[DimensionProperty, ...]]
+
+    @staticmethod
+    def create_simple(shapes: List[Tuple[int, ...]]) -> "ObservationSpec":
+        dim_prop: List[Tuple[DimensionProperty, ...]] = []
+        for shape in shapes:
+            dim_prop += [(DimensionProperty.UNSPECIFIED,) * len(shape)]
+        return ObservationSpec(shapes, dim_prop)
+
+
-    - observation_shapes is a List of Tuples of int : Each Tuple corresponds
-    to an observation's dimensions. The shape tuples have the same ordering as
-    the ordering of the DecisionSteps and TerminalSteps.
+    - observation_spec is an ObservationSpec NamedTuple containing information about
+    the information of the Agent's observations such as their shapes.
-    observation_shapes: List[Tuple]
+    observation_spec: ObservationSpec
    action_spec: ActionSpec


--- a/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.py
  name='mlagents_envs/communicator_objects/observation.proto',
  package='communicator_objects',
  syntax='proto3',
-  serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\x9d\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
+  serialized_pb=_b('\n4mlagents_envs/communicator_objects/observation.proto\x12\x14\x63ommunicator_objects\"\xbb\x02\n\x10ObservationProto\x12\r\n\x05shape\x18\x01 \x03(\x05\x12\x44\n\x10\x63ompression_type\x18\x02 \x01(\x0e\x32*.communicator_objects.CompressionTypeProto\x12\x19\n\x0f\x63ompressed_data\x18\x03 \x01(\x0cH\x00\x12\x46\n\nfloat_data\x18\x04 \x01(\x0b\x32\x30.communicator_objects.ObservationProto.FloatDataH\x00\x12\"\n\x1a\x63ompressed_channel_mapping\x18\x05 \x03(\x05\x12\x1c\n\x14\x64imension_properties\x18\x06 \x03(\x05\x1a\x19\n\tFloatData\x12\x0c\n\x04\x64\x61ta\x18\x01 \x03(\x02\x42\x12\n\x10observation_data*)\n\x14\x43ompressionTypeProto\x12\x08\n\x04NONE\x10\x00\x12\x07\n\x03PNG\x10\x01\x42%\xaa\x02\"Unity.MLAgents.CommunicatorObjectsb\x06proto3')
 )

 _COMPRESSIONTYPEPROTO = _descriptor.EnumDescriptor(
  ],
  containing_type=None,
  options=None,
-  serialized_start=366,
-  serialized_end=407,
+  serialized_start=396,
+  serialized_end=437,
 )
 _sym_db.RegisterEnumDescriptor(_COMPRESSIONTYPEPROTO)

  extension_ranges=[],
  oneofs=[
  ],
-  serialized_start=319,
-  serialized_end=344,
+  serialized_start=349,
+  serialized_end=374,
 )

 _OBSERVATIONPROTO = _descriptor.Descriptor(
      message_type=None, enum_type=None, containing_type=None,
      is_extension=False, extension_scope=None,
      options=None, file=DESCRIPTOR),
+    _descriptor.FieldDescriptor(
+      name='dimension_properties', full_name='communicator_objects.ObservationProto.dimension_properties', index=5,
+      number=6, type=5, cpp_type=1, label=3,
+      has_default_value=False, default_value=[],
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None, file=DESCRIPTOR),
  ],
  extensions=[
  ],
      index=0, containing_type=None, fields=[]),
  ],
  serialized_start=79,
-  serialized_end=364,
+  serialized_end=394,
 )

 _OBSERVATIONPROTO_FLOATDATA.containing_type = _OBSERVATIONPROTO
--- a/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
+++ b/ml-agents-envs/mlagents_envs/communicator_objects/observation_pb2.pyi
    compression_type = ... # type: CompressionTypeProto
    compressed_data = ... # type: builtin___bytes
    compressed_channel_mapping = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]
+    dimension_properties = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[builtin___int]

    @property
    def float_data(self) -> ObservationProto.FloatData: ...
        compressed_data : typing___Optional[builtin___bytes] = None,
        float_data : typing___Optional[ObservationProto.FloatData] = None,
        compressed_channel_mapping : typing___Optional[typing___Iterable[builtin___int]] = None,
+        dimension_properties : typing___Optional[typing___Iterable[builtin___int]] = None,
        ) -> None: ...
    @classmethod
    def FromString(cls, s: builtin___bytes) -> ObservationProto: ...
        def HasField(self, field_name: typing_extensions___Literal[u"compressed_data",u"float_data",u"observation_data"]) -> builtin___bool: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"float_data",u"observation_data",u"shape"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",u"compressed_data",u"compression_type",u"dimension_properties",u"float_data",u"observation_data",u"shape"]) -> None: ...
-        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
+        def ClearField(self, field_name: typing_extensions___Literal[u"compressed_channel_mapping",b"compressed_channel_mapping",u"compressed_data",b"compressed_data",u"compression_type",b"compression_type",u"dimension_properties",b"dimension_properties",u"float_data",b"float_data",u"observation_data",b"observation_data",u"shape",b"shape"]) -> None: ...
    def WhichOneof(self, oneof_group: typing_extensions___Literal[u"observation_data",b"observation_data"]) -> typing_extensions___Literal["compressed_data","float_data"]: ...
--- a/ml-agents-envs/mlagents_envs/rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/rpc_utils.py
 from mlagents_envs.base_env import (
    ActionSpec,
+    ObservationSpec,
+    DimensionProperty,
    BehaviorSpec,
    DecisionSteps,
    TerminalSteps,
    :return: BehaviorSpec object.
    """
    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
+    dim_props = [
+        tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
+        for obs in agent_info.observations
+    ]
+    obs_spec = ObservationSpec(observation_shape, dim_props)
    # proto from comminicator < v1.3 does not set action spec, use deprecated fields instead
    if (
        brain_param_proto.action_spec.num_continuous_actions == 0
            action_spec_proto.num_continuous_actions,
            tuple(branch for branch in action_spec_proto.discrete_branch_sizes),
        )
-    return BehaviorSpec(observation_shape, action_spec)
+    return BehaviorSpec(obs_spec, action_spec)


 class OffsetBytesIO:
    ]
    decision_obs_list: List[np.ndarray] = []
    terminal_obs_list: List[np.ndarray] = []
-    for obs_index, obs_shape in enumerate(behavior_spec.observation_shapes):
+    for obs_index, obs_shape in enumerate(behavior_spec.observation_spec.shapes):
        is_visual = len(obs_shape) == 3
        if is_visual:
            obs_shape = cast(Tuple[int, int, int], obs_shape)
--- a/ml-agents-envs/mlagents_envs/tests/test_envs.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_envs.py
    env.close()
    assert isinstance(decision_steps, DecisionSteps)
    assert isinstance(terminal_steps, TerminalSteps)
-    assert len(spec.observation_shapes) == len(decision_steps.obs)
-    assert len(spec.observation_shapes) == len(terminal_steps.obs)
+    assert len(spec.observation_spec.shapes) == len(decision_steps.obs)
+    assert len(spec.observation_spec.shapes) == len(terminal_steps.obs)
-    for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
+    for shape, obs in zip(spec.observation_spec.shapes, decision_steps.obs):
-    for shape, obs in zip(spec.observation_shapes, terminal_steps.obs):
+    for shape, obs in zip(spec.observation_spec.shapes, terminal_steps.obs):
        assert (n_agents,) + shape == obs.shape


    env.close()
    assert isinstance(decision_steps, DecisionSteps)
    assert isinstance(terminal_steps, TerminalSteps)
-    assert len(spec.observation_shapes) == len(decision_steps.obs)
-    assert len(spec.observation_shapes) == len(terminal_steps.obs)
-    for shape, obs in zip(spec.observation_shapes, decision_steps.obs):
+    assert len(spec.observation_spec.shapes) == len(decision_steps.obs)
+    assert len(spec.observation_spec.shapes) == len(terminal_steps.obs)
+    for shape, obs in zip(spec.observation_spec.shapes, decision_steps.obs):
        assert (n_agents,) + shape == obs.shape
    assert 0 in decision_steps
    assert 2 in terminal_steps
--- a/ml-agents-envs/mlagents_envs/tests/test_registry.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_registry.py
    for worker_id in range(2):
        assert BASIC_ID in registry
        env = registry[BASIC_ID].make(
-            base_port=6005, worker_id=worker_id, no_graphics=True
+            base_port=6002, worker_id=worker_id, no_graphics=True
        )
        env.reset()
        env.step()
--- a/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_rpc_utils.py
 from mlagents_envs.communicator_objects.agent_action_pb2 import AgentActionProto
 from mlagents_envs.base_env import (
    BehaviorSpec,
+    ObservationSpec,
    ActionSpec,
    DecisionSteps,
    TerminalSteps,
 def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3,), (4,)]
-    spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
+    spec = BehaviorSpec(
+        ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(3)
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, spec)
    for agent_id in range(n_agents):
 def test_action_masking_discrete():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((7, 3)))
+    behavior_spec = BehaviorSpec(
+        ObservationSpec.create_simple(shapes), ActionSpec.create_discrete((7, 3))
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_discrete_1():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((10,)))
+    behavior_spec = BehaviorSpec(
+        ObservationSpec.create_simple(shapes), ActionSpec.create_discrete((10,))
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_discrete_2():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_discrete((2, 2, 6)))
+    behavior_spec = BehaviorSpec(
+        ObservationSpec.create_simple(shapes), ActionSpec.create_discrete((2, 2, 6))
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
 def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(10))
+    behavior_spec = BehaviorSpec(
+        ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(10)
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes)
    decision_steps, terminal_steps = steps_from_proto(ap_list, behavior_spec)
    masks = decision_steps.action_mask
    behavior_spec = behavior_spec_from_proto(bp, agent_proto)
    assert behavior_spec.action_spec.is_discrete()
    assert not behavior_spec.action_spec.is_continuous()
-    assert behavior_spec.observation_shapes == [(3,), (4,)]
+    assert behavior_spec.observation_spec.shapes == [(3,), (4,)]
    assert behavior_spec.action_spec.discrete_branches == (5, 4)
    assert behavior_spec.action_spec.discrete_size == 2
    bp = BrainParametersProto()
 def test_batched_step_result_from_proto_raises_on_infinite():
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
+    behavior_spec = BehaviorSpec(
+        ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(3)
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes, infinite_rewards=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
    n_agents = 10
    shapes = [(3,), (4,)]
-    behavior_spec = BehaviorSpec(shapes, ActionSpec.create_continuous(3))
+    behavior_spec = BehaviorSpec(
+        ObservationSpec.create_simple(shapes), ActionSpec.create_continuous(3)
+    )
    ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
    with pytest.raises(RuntimeError):
        steps_from_proto(ap_list, behavior_spec)
--- a/ml-agents-envs/mlagents_envs/tests/test_steps.py
+++ b/ml-agents-envs/mlagents_envs/tests/test_steps.py

 from mlagents_envs.base_env import (
    DecisionSteps,
+    ObservationSpec,
    TerminalSteps,
    ActionSpec,
    BehaviorSpec,

 def test_empty_decision_steps():
    specs = BehaviorSpec(
-        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
+        observation_spec=ObservationSpec.create_simple([(3, 2), (5,)]),
+        action_spec=ActionSpec.create_continuous(3),
    )
    ds = DecisionSteps.empty(specs)
    assert len(ds.obs) == 2

 def test_empty_terminal_steps():
    specs = BehaviorSpec(
-        observation_shapes=[(3, 2), (5,)], action_spec=ActionSpec.create_continuous(3)
+        observation_spec=ObservationSpec.create_simple([(3, 2), (5,)]),
+        action_spec=ActionSpec.create_continuous(3),
    )
    ts = TerminalSteps.empty(specs)
    assert len(ts.obs) == 2
--- a/ml-agents/mlagents/trainers/demo_loader.py
+++ b/ml-agents/mlagents/trainers/demo_loader.py
                )
            )
        # check observations match
-        if len(behavior_spec.observation_shapes) != len(
-            expected_behavior_spec.observation_shapes
+        if len(behavior_spec.observation_spec.shapes) != len(
+            expected_behavior_spec.observation_spec.shapes
        ):
            raise RuntimeError(
                "The demonstrations do not have the same number of observations as the policy."
                zip(
-                    behavior_spec.observation_shapes,
-                    expected_behavior_spec.observation_shapes,
+                    behavior_spec.observation_spec.shapes,
+                    expected_behavior_spec.observation_spec.shapes,
                )
            ):
                if demo_obs != policy_obs:
--- a/ml-agents/mlagents/trainers/policy/policy.py
+++ b/ml-agents/mlagents/trainers/policy/policy.py
            else [self.behavior_spec.action_spec.continuous_size]
        )
        self.vec_obs_size = sum(
-            shape[0] for shape in behavior_spec.observation_shapes if len(shape) == 1
+            shape[0]
+            for shape in behavior_spec.observation_spec.shapes
+            if len(shape) == 1
-            1 for shape in behavior_spec.observation_shapes if len(shape) == 3
+            1 for shape in behavior_spec.observation_spec.shapes if len(shape) == 3
        )
        self.use_continuous_act = self.behavior_spec.action_spec.is_continuous()
        self.previous_action_dict: Dict[str, np.ndarray] = {}
--- a/ml-agents/mlagents/trainers/policy/torch_policy.py
+++ b/ml-agents/mlagents/trainers/policy/torch_policy.py
        else:
            ac_class = SharedActorCritic
        self.actor_critic = ac_class(
-            observation_shapes=self.behavior_spec.observation_shapes,
+            observation_shapes=self.behavior_spec.observation_spec.shapes,
            network_settings=trainer_settings.network_settings,
            action_spec=behavior_spec.action_spec,
            stream_names=reward_signal_names,
--- a/ml-agents/mlagents/trainers/sac/optimizer_torch.py
+++ b/ml-agents/mlagents/trainers/sac/optimizer_torch.py

        self.value_network = TorchSACOptimizer.PolicyValueNetwork(
            self.stream_names,
-            self.policy.behavior_spec.observation_shapes,
+            self.policy.behavior_spec.observation_spec.shapes,
            policy_network_settings,
            self._action_spec,
        )
-            self.policy.behavior_spec.observation_shapes,
+            self.policy.behavior_spec.observation_spec.shapes,
            policy_network_settings,
        )
        ModelUtils.soft_update(
--- a/ml-agents/mlagents/trainers/tests/mock_brain.py
+++ b/ml-agents/mlagents/trainers/tests/mock_brain.py
 from mlagents_envs.base_env import (
    DecisionSteps,
    TerminalSteps,
+    ObservationSpec,
    BehaviorSpec,
    ActionSpec,
    ActionTuple,
    reward = np.array(num_agents * [1.0], dtype=np.float32)
    interrupted = np.array(num_agents * [False], dtype=np.bool)
    agent_id = np.arange(num_agents, dtype=np.int32)
-    behavior_spec = BehaviorSpec(observation_shapes, action_spec)
+    obs_spec = ObservationSpec.create_simple(observation_shapes)
+    behavior_spec = BehaviorSpec(obs_spec, action_spec)
    if done:
        return (
            DecisionSteps.empty(behavior_spec),
 ) -> Tuple[DecisionSteps, TerminalSteps]:
    return create_mock_steps(
        num_agents=num_agents,
-        observation_shapes=behavior_spec.observation_shapes,
+        observation_shapes=behavior_spec.observation_spec.shapes,
        action_spec=behavior_spec.action_spec,
    )

 ) -> AgentBuffer:
    trajectory = make_fake_trajectory(
        length,
-        behavior_spec.observation_shapes,
+        behavior_spec.observation_spec.shapes,
        action_spec=behavior_spec.action_spec,
        memory_size=memory_size,
    )
        action_spec = ActionSpec.create_discrete(tuple(vector_action_space))
    else:
        action_spec = ActionSpec.create_continuous(vector_action_space)
-    behavior_spec = BehaviorSpec(
-        [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)], action_spec
-    )
+    observation_shapes = [(84, 84, 3)] * int(use_visual) + [(vector_obs_space,)]
+    obs_spec = ObservationSpec.create_simple(observation_shapes)
+    behavior_spec = BehaviorSpec(obs_spec, action_spec)
    return behavior_spec


--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py

 from mlagents_envs.base_env import (
    ActionSpec,
+    ObservationSpec,
    ActionTuple,
    BaseEnv,
    BehaviorSpec,
            self.action[name] = None
            self.step_result[name] = None

-    def _make_obs_spec(self) -> List[Any]:
-        obs_spec: List[Any] = []
+    def _make_obs_spec(self) -> ObservationSpec:
+        obs_shape: List[Any] = []
-            obs_spec.append((self.vec_obs_size,))
+            obs_shape.append((self.vec_obs_size,))
-            obs_spec.append(self.vis_obs_size)
+            obs_shape.append(self.vis_obs_size)
+        obs_spec = ObservationSpec.create_simple(obs_shape)
        return obs_spec

    def _make_obs(self, value: float) -> List[np.ndarray]:
--- a/ml-agents/mlagents/trainers/tests/test_demo_loader.py
+++ b/ml-agents/mlagents/trainers/tests/test_demo_loader.py
    behavior_spec, pair_infos, total_expected = load_demonstration(
        path_prefix + "/test.demo"
    )
-    assert np.sum(behavior_spec.observation_shapes[0]) == 8
+    assert np.sum(behavior_spec.observation_spec.shapes[0]) == 8
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test.demo", 1, BEHAVIOR_SPEC)
    behavior_spec, pair_infos, total_expected = load_demonstration(
        path_prefix + "/test_demo_dir"
    )
-    assert np.sum(behavior_spec.observation_shapes[0]) == 8
+    assert np.sum(behavior_spec.observation_spec.shapes[0]) == 8
    assert len(pair_infos) == total_expected

    _, demo_buffer = demo_to_buffer(path_prefix + "/test_demo_dir", 1, BEHAVIOR_SPEC)
--- a/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
        PPO_TORCH_CONFIG,
        hyperparameters=new_hyperparams,
        network_settings=new_network_settings,
-        max_steps=3000,
+        max_steps=5000,
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

        SAC_TORCH_CONFIG.hyperparameters,
        buffer_size=50000,
        batch_size=256,
-        buffer_init_steps=2000,
+        buffer_init_steps=0,
-        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=6000
+        SAC_TORCH_CONFIG, hyperparameters=new_hyperparams, max_steps=2000
    )
    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)

--- a/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_ppo.py
    time_horizon = 15
    trajectory = make_fake_trajectory(
        length=time_horizon,
-        observation_shapes=optimizer.policy.behavior_spec.observation_shapes,
+        observation_shapes=optimizer.policy.behavior_spec.observation_spec.shapes,
        action_spec=DISCRETE_ACTION_SPEC if discrete else CONTINUOUS_ACTION_SPEC,
        max_step_complete=True,
    )
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
    CuriosityRewardProvider,
    create_reward_provider,
 )
-from mlagents_envs.base_env import BehaviorSpec, ActionSpec
+from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
 from mlagents.trainers.settings import CuriositySettings, RewardSignalType
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_construction(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(
+            ObservationSpec.create_simple([(10,), (64, 66, 3), (84, 86, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(
+            ObservationSpec.create_simple([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
+        ),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_factory(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            ObservationSpec.create_simple([(10,), (64, 66, 3), (24, 26, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:

@pytest.mark.parametrize("seed", SEED)
@pytest.mark.parametrize(
-    "behavior_spec", [BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS)]
+    "behavior_spec",
+    [BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS)],
 )
 def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
    np.random.seed(seed)
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            ObservationSpec.create_simple([(10,), (64, 66, 3), (24, 26, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_extrinsic.py
    ExtrinsicRewardProvider,
    create_reward_provider,
 )
-from mlagents_envs.base_env import BehaviorSpec, ActionSpec
+from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
 from mlagents.trainers.settings import RewardSignalSettings, RewardSignalType
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_construction(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_factory(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_reward(behavior_spec: BehaviorSpec, reward: float) -> None:
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_gail.py
    GAILRewardProvider,
    create_reward_provider,
 )
-from mlagents_envs.base_env import BehaviorSpec, ActionSpec
+from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
 from mlagents.trainers.settings import GAILSettings, RewardSignalType
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
 ACTIONSPEC_DISCRETE = ActionSpec.create_discrete((20,))


-@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
+@pytest.mark.parametrize(
+    "behavior_spec",
+    [BehaviorSpec(ObservationSpec.create_simple([(8,)]), ACTIONSPEC_CONTINUOUS)],
+)
 def test_construction(behavior_spec: BehaviorSpec) -> None:
    gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
    gail_rp = GAILRewardProvider(behavior_spec, gail_settings)
-@pytest.mark.parametrize("behavior_spec", [BehaviorSpec([(8,)], ACTIONSPEC_CONTINUOUS)])
+@pytest.mark.parametrize(
+    "behavior_spec",
+    [BehaviorSpec(ObservationSpec.create_simple([(8,)]), ACTIONSPEC_CONTINUOUS)],
+)
 def test_factory(behavior_spec: BehaviorSpec) -> None:
    gail_settings = GAILSettings(demo_path=CONTINUOUS_PATH)
    gail_rp = create_reward_provider(
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            ObservationSpec.create_simple([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
+        ),
+        BehaviorSpec(ObservationSpec.create_simple([(50,)]), ACTIONSPEC_FOURDISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
@pytest.mark.parametrize("use_actions", [False, True])
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(8,), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(50,)], ACTIONSPEC_FOURDISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            ObservationSpec.create_simple([(8,), (24, 26, 1)]), ACTIONSPEC_CONTINUOUS
+        ),
+        BehaviorSpec(ObservationSpec.create_simple([(50,)]), ACTIONSPEC_FOURDISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
@pytest.mark.parametrize("use_actions", [False, True])
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_rnd.py
    RNDRewardProvider,
    create_reward_provider,
 )
-from mlagents_envs.base_env import BehaviorSpec, ActionSpec
+from mlagents_envs.base_env import BehaviorSpec, ActionSpec, ObservationSpec
 from mlagents.trainers.settings import RNDSettings, RewardSignalType
 from mlagents.trainers.tests.torch.test_reward_providers.utils import (
    create_agent_buffer,
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
    ],
 )
 def test_construction(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 3), (84, 86, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,), (64, 66, 1)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_CONTINUOUS),
+        BehaviorSpec(
+            ObservationSpec.create_simple([(10,), (64, 66, 3), (84, 86, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(
+            ObservationSpec.create_simple([(10,), (64, 66, 1)]), ACTIONSPEC_TWODISCRETE
+        ),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_factory(behavior_spec: BehaviorSpec) -> None:
@pytest.mark.parametrize(
    "behavior_spec",
    [
-        BehaviorSpec([(10,), (64, 66, 3), (24, 26, 1)], ACTIONSPEC_CONTINUOUS),
-        BehaviorSpec([(10,)], ACTIONSPEC_TWODISCRETE),
-        BehaviorSpec([(10,)], ACTIONSPEC_DISCRETE),
+        BehaviorSpec(
+            ObservationSpec.create_simple([(10,), (64, 66, 3), (24, 26, 1)]),
+            ACTIONSPEC_CONTINUOUS,
+        ),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_TWODISCRETE),
+        BehaviorSpec(ObservationSpec.create_simple([(10,)]), ACTIONSPEC_DISCRETE),
    ],
 )
 def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:
--- a/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_reward_providers/utils.py
    buffer = AgentBuffer()
    curr_observations = [
        np.random.normal(size=shape).astype(np.float32)
-        for shape in behavior_spec.observation_shapes
+        for shape in behavior_spec.observation_spec.shapes
-        for shape in behavior_spec.observation_shapes
+        for shape in behavior_spec.observation_spec.shapes
    ]
    action_buffer = behavior_spec.action_spec.random_action(1)
    action = {}
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
    reward_signals = {
        RewardSignalType.GAIL: GAILSettings(encoding_size=32, demo_path=demo_path)
    }
-    hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=3e-4)
+    hyperparams = attr.evolve(PPO_TORCH_CONFIG.hyperparameters, learning_rate=5e-3)
    config = attr.evolve(
        PPO_TORCH_CONFIG,
        reward_signals=reward_signals,
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
            memory=None,
        )
        self._state_encoder = NetworkBody(
-            specs.observation_shapes, state_encoder_settings
+            specs.observation_spec.shapes, state_encoder_settings
        )

        self._action_flattener = ActionFlattener(self._action_spec)
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
            self._action_flattener.flattened_size + 1 if settings.use_actions else 0
        )  # +1 is for dones
        self.encoder = NetworkBody(
-            specs.observation_shapes, encoder_settings, unencoded_size
+            specs.observation_spec.shapes, encoder_settings, unencoded_size
        )

        estimator_input_size = settings.encoding_size
--- a/ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
+++ b/ml-agents/mlagents/trainers/torch/components/reward_providers/rnd_reward_provider.py
            vis_encode_type=EncoderType.SIMPLE,
            memory=None,
        )
-        self._encoder = NetworkBody(specs.observation_shapes, state_encoder_settings)
+        self._encoder = NetworkBody(
+            specs.observation_spec.shapes, state_encoder_settings
+        )

    def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
        n_vis = len(self._encoder.visual_processors)
--- a/ml-agents/mlagents/trainers/torch/model_serialization.py
+++ b/ml-agents/mlagents/trainers/torch/model_serialization.py
        seq_len_dim = [1]
        dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
        # create input shape of NCHW
-        # (It's NHWC in self.policy.behavior_spec.observation_shapes)
+        # (It's NHWC in self.policy.behavior_spec.observation_spec.shapes)
-            for shape in self.policy.behavior_spec.observation_shapes
+            for shape in self.policy.behavior_spec.observation_spec.shapes
            if len(shape) == 3
        ]
        dummy_masks = torch.ones(
--- a/ml-agents/tests/yamato/scripts/run_llapi.py
+++ b/ml-agents/tests/yamato/scripts/run_llapi.py
        decision_steps, terminal_steps = env.get_steps(group_name)

        # Examine the number of observations per Agent
-        print("Number of observations : ", len(group_spec.observation_shapes))
+        print("Number of observations : ", len(group_spec.observation_spec.shapes))
-        vis_obs = any(len(shape) == 3 for shape in group_spec.observation_shapes)
+        vis_obs = any(len(shape) == 3 for shape in group_spec.observation_spec.shapes)
        print("Is there a visual observation ?", vis_obs)

        # Examine the state space for the first observation for the first agent
--- a/protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
+++ b/protobuf-definitions/proto/mlagents_envs/communicator_objects/observation.proto
        FloatData float_data = 4;
    }
    repeated int32 compressed_channel_mapping = 5;
+    repeated int32 dimension_properties = 6;
 }
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensor.cs
+namespace Unity.MLAgents.Sensors
+{
+    public class BufferSensor : ISensor
+    {
+        private int m_MaxNumObs;
+        private int m_ObsSize;
+        float[] m_ObservationBuffer;
+        int m_CurrentNumObservables;
+        public BufferSensor(int maxNumberObs, int obsSize)
+        {
+            m_MaxNumObs = maxNumberObs;
+            m_ObsSize = obsSize;
+            m_ObservationBuffer = new float[m_ObservableSize * m_MaxNumObservables];
+            m_CurrentNumObservables = 0;
+        }
+
+        /// <inheritdoc/>
+        public int[] GetObservationShape()
+        {
+            return new int[] { m_MaxNumObs, m_ObsSize };
+        }
+
+        /// <summary>
+        /// Appends an observation to the buffer. If the buffer is full (maximum number
+        /// of observation is reached) the observation will be ignored. the length of
+        /// the provided observation array must be equal to the observation size of
+        /// the buffer sensor.
+        /// </summary>
+        /// <param name="obs"> The float array observation</param>
+        public void AppendObservation(float[] obs)
+        {
+            if (m_CurrentNumObservables >= m_MaxNumObs)
+            {
+                return;
+            }
+            for (int i = 0; i < obs.Length; i++)
+            {
+                m_ObservationBuffer[m_CurrentNumObservables * m_MaxNumObs + i] = obs[i];
+            }
+            m_CurrentNumObservables++;
+        }
+
+        /// <inheritdoc/>
+        public int Write(ObservationWriter writer)
+        {
+            for (int i = 0; i < m_ObservableSize * m_MaxNumObservables; i++)
+            {
+                writer[i] = m_ObservationBuffer[i];
+            }
+            return m_ObservableSize * m_MaxNumObservables;
+        }
+
+        /// <inheritdoc/>
+        public virtual byte[] GetCompressedObservation()
+        {
+            return null;
+        }
+
+        /// <inheritdoc/>
+        public void Update()
+        {
+            Reset();
+        }
+
+        /// <inheritdoc/>
+        public void Reset()
+        {
+            m_CurrentNumObservables = 0;
+            Array.Clear(m_ObservationBuffer, 0, m_ObservationBuffer.Length);
+        }
+
+        public SensorCompressionType GetCompressionType()
+        {
+            return SensorCompressionType.None;
+        }
+
+        public string GetName()
+        {
+            return "BufferSensor";
+        }
+
+    }
+
+}
--- a/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/BufferSensorComponent.cs
+using UnityEngine;
+
+namespace Unity.MLAgents.Sensors
+{
+
+    /// <summary>
+    /// A component for BufferSensor.
+    /// </summary>
+    [AddComponentMenu("ML Agents/Buffer Sensor", (int)MenuGroup.Sensors)]
+    public class BufferSensorComponent : SensorComponent
+    {
+        public int ObservableSize;
+        public int MaxNumObservables;
+
+        /// <inheritdoc/>
+        public override ISensor CreateSensor()
+        {
+            return new BufferSensor(ObservableSize, MaxNumObservables);
+        }
+
+        /// <inheritdoc/>
+        public override int[] GetObservationShape()
+        {
+            return new[] { MaxNumObservables, ObservableSize };
+        }
+    }
+}
--- a/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
+++ b/com.unity.ml-agents/Runtime/Sensors/IDimensionPropertiesSensor.cs
+namespace Unity.MLAgents.Sensors
+{
+
+    /// <summary>
+    /// The Dimension property flags of the observations
+    /// </summary>
+    [System.Flags]
+    internal enum DimensionProperty
+    {
+        /// <summary>
+        ///     No properties specified.
+        /// </summary>
+        Unspecified = 0,
+
+        /// <summary>
+        /// No Property of the observation in that dimension. Observation can be processed with
+        /// Fully connected networks.
+        /// </summary>
+        None = 1,
+
+        /// <summary>
+        /// Means it is possible to do a convolution in this dimension.
+        /// </summary>
+        TranslationalEquivariance = 2,
+
+        /// <summary>
+        /// Means that there can be a variable number of observations in this dimension.
+        /// The observations are unordered.
+        /// </summary>
+        VariableSize = 3,
+    }
+
+
+    /// <summary>
+    /// Sensor interface for sensors with special dimension properties.
+    /// </summary>
+    internal interface IDimensionPropertiesSensor : ISensor
+    {
+        /// <summary>
+        /// Returns the array containing the properties of each dimensions of the
+        /// observation. The length of the array must be equal to the rank of the
+        /// observation tensor.
+        /// </summary>
+        /// <returns>The array of DimensionProperty</returns>
+        DimensionProperty[] GetDimensionProperties();
+    }
+}
--- a/ml-agents/mlagents/trainers/tests/torch/test_attention.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_attention.py
+from mlagents.torch_utils import torch
+import numpy as np
+
+from mlagents.trainers.torch.layers import linear_layer
+from mlagents.trainers.torch.attention import MultiHeadAttention, SimpleTransformer
+
+
+def test_multi_head_attention_initialization():
+    q_size, k_size, v_size, o_size, n_h, emb_size = 7, 8, 9, 10, 11, 12
+    n_k, n_q, b = 13, 14, 15
+    mha = MultiHeadAttention(q_size, k_size, v_size, o_size, n_h, emb_size)
+
+    query = torch.ones((b, n_q, q_size))
+    key = torch.ones((b, n_k, k_size))
+    value = torch.ones((b, n_k, v_size))
+
+    output, attention = mha.forward(query, key, value)
+
+    assert output.shape == (b, n_q, o_size)
+    assert attention.shape == (b, n_h, n_q, n_k)
+
+
+def test_multi_head_attention_masking():
+    epsilon = 0.0001
+    q_size, k_size, v_size, o_size, n_h, emb_size = 7, 8, 9, 10, 11, 12
+    n_k, n_q, b = 13, 14, 15
+    mha = MultiHeadAttention(q_size, k_size, v_size, o_size, n_h, emb_size)
+
+    # create a key input with some keys all 0
+    key = torch.ones((b, n_k, k_size))
+    mask = torch.zeros((b, n_k))
+    for i in range(n_k):
+        if i % 3 == 0:
+            key[:, i, :] = 0
+            mask[:, i] = 1
+
+    query = torch.ones((b, n_q, q_size))
+    value = torch.ones((b, n_k, v_size))
+
+    _, attention = mha.forward(query, key, value, mask)
+    for i in range(n_k):
+        if i % 3 == 0:
+            assert torch.sum(attention[:, :, :, i] ** 2) < epsilon
+        else:
+            assert torch.sum(attention[:, :, :, i] ** 2) > epsilon
+
+
+def test_multi_head_attention_training():
+    np.random.seed(1336)
+    torch.manual_seed(1336)
+    size, n_h, n_k, n_q = 3, 10, 5, 1
+    embedding_size = 64
+    mha = MultiHeadAttention(size, size, size, size, n_h, embedding_size)
+    optimizer = torch.optim.Adam(mha.parameters(), lr=0.001)
+    batch_size = 200
+    point_range = 3
+    init_error = -1.0
+    for _ in range(50):
+        query = torch.rand((batch_size, n_q, size)) * point_range * 2 - point_range
+        key = torch.rand((batch_size, n_k, size)) * point_range * 2 - point_range
+        value = key
+        with torch.no_grad():
+            # create the target : The key closest to the query in euclidean distance
+            distance = torch.sum((query - key) ** 2, dim=2)
+            argmin = torch.argmin(distance, dim=1)
+            target = []
+            for i in range(batch_size):
+                target += [key[i, argmin[i], :]]
+            target = torch.stack(target, dim=0)
+            target = target.detach()
+
+        prediction, _ = mha.forward(query, key, value)
+        prediction = prediction.reshape((batch_size, size))
+        error = torch.mean((prediction - target) ** 2, dim=1)
+        error = torch.mean(error) / 2
+        if init_error == -1.0:
+            init_error = error.item()
+        else:
+            assert error.item() < init_error
+        print(error.item())
+        optimizer.zero_grad()
+        error.backward()
+        optimizer.step()
+    assert error.item() < 0.5
+
+
+def test_zero_mask_layer():
+    batch_size, size = 10, 30
+
+    def generate_input_helper(pattern):
+        _input = torch.zeros((batch_size, 0, size))
+        for i in range(len(pattern)):
+            if i % 2 == 0:
+                _input = torch.cat(
+                    [_input, torch.rand((batch_size, pattern[i], size))], dim=1
+                )
+            else:
+                _input = torch.cat(
+                    [_input, torch.zeros((batch_size, pattern[i], size))], dim=1
+                )
+        return _input
+
+    masking_pattern_1 = [3, 2, 3, 4]
+    masking_pattern_2 = [5, 7, 8, 2]
+    input_1 = generate_input_helper(masking_pattern_1)
+    input_2 = generate_input_helper(masking_pattern_2)
+
+    masks = SimpleTransformer.get_masks([input_1, input_2])
+    assert len(masks) == 2
+    masks_1 = masks[0]
+    masks_2 = masks[1]
+    assert masks_1.shape == (batch_size, sum(masking_pattern_1))
+    assert masks_2.shape == (batch_size, sum(masking_pattern_2))
+    for i in masking_pattern_1:
+        assert masks_1[0, 1] == 0 if i % 2 == 0 else 1
+    for i in masking_pattern_2:
+        assert masks_2[0, 1] == 0 if i % 2 == 0 else 1
+
+
+def test_simple_transformer_training():
+    np.random.seed(1336)
+    torch.manual_seed(1336)
+    size, n_k, = 3, 5
+    embedding_size = 64
+    transformer = SimpleTransformer(size, [size], embedding_size)
+    l_layer = linear_layer(embedding_size, size)
+    optimizer = torch.optim.Adam(
+        list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001
+    )
+    batch_size = 200
+    point_range = 3
+    init_error = -1.0
+    for _ in range(100):
+        center = torch.rand((batch_size, size)) * point_range * 2 - point_range
+        key = torch.rand((batch_size, n_k, size)) * point_range * 2 - point_range
+        with torch.no_grad():
+            # create the target : The key closest to the query in euclidean distance
+            distance = torch.sum(
+                (center.reshape((batch_size, 1, size)) - key) ** 2, dim=2
+            )
+            argmin = torch.argmin(distance, dim=1)
+            target = []
+            for i in range(batch_size):
+                target += [key[i, argmin[i], :]]
+            target = torch.stack(target, dim=0)
+            target = target.detach()
+
+        masks = SimpleTransformer.get_masks([key])
+        prediction = transformer.forward(center, [key], masks)
+        prediction = l_layer(prediction)
+        prediction = prediction.reshape((batch_size, size))
+        error = torch.mean((prediction - target) ** 2, dim=1)
+        error = torch.mean(error) / 2
+        if init_error == -1.0:
+            init_error = error.item()
+        else:
+            assert error.item() < init_error
+        print(error.item())
+        optimizer.zero_grad()
+        error.backward()
+        optimizer.step()
+    assert error.item() < 0.3
--- a/ml-agents/mlagents/trainers/torch/attention.py
+++ b/ml-agents/mlagents/trainers/torch/attention.py
+from mlagents.torch_utils import torch
+from typing import Tuple, Optional, List
+from mlagents.trainers.torch.layers import LinearEncoder
+
+
+class MultiHeadAttention(torch.nn.Module):
+    """
+    Multi Head Attention module. We do not use the regular Torch implementation since
+    Barracuda does not support some operators it uses.
+    Takes as input to the forward method 3 tensors:
+     - query: of dimensions (batch_size, number_of_queries, key_size)
+     - key: of dimensions (batch_size, number_of_keys, key_size)
+     - value: of dimensions (batch_size, number_of_keys, value_size)
+    The forward method will return 2 tensors:
+     - The output: (batch_size, number_of_queries, output_size)
+     - The attention matrix: (batch_size, num_heads, number_of_queries, number_of_keys)
+    """
+
+    NEG_INF = -1e6
+
+    def __init__(
+        self,
+        query_size: int,
+        key_size: int,
+        value_size: int,
+        output_size: int,
+        num_heads: int,
+        embedding_size: int,
+    ):
+        super().__init__()
+        self.n_heads, self.embedding_size = num_heads, embedding_size
+        self.output_size = output_size
+        self.fc_q = torch.nn.Linear(query_size, self.n_heads * self.embedding_size)
+        self.fc_k = torch.nn.Linear(key_size, self.n_heads * self.embedding_size)
+        self.fc_v = torch.nn.Linear(value_size, self.n_heads * self.embedding_size)
+        # self.fc_q = LinearEncoder(query_size, 2, self.n_heads * self.embedding_size)
+        # self.fc_k = LinearEncoder(key_size,2, self.n_heads * self.embedding_size)
+        # self.fc_v = LinearEncoder(value_size,2, self.n_heads * self.embedding_size)
+        self.fc_out = torch.nn.Linear(
+            self.n_heads * self.embedding_size, self.output_size
+        )
+
+    def forward(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        value: torch.Tensor,
+        key_mask: Optional[torch.Tensor] = None,
+        number_of_keys: int = -1,
+        number_of_queries: int = -1,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        b = -1  # the batch size
+        # This is to avoid using .size() when possible as Barracuda does not support
+        n_q = number_of_queries if number_of_queries != -1 else query.size(1)
+        n_k = number_of_keys if number_of_keys != -1 else key.size(1)
+
+        query = self.fc_q(query)  # (b, n_q, h*d)
+        key = self.fc_k(key)  # (b, n_k, h*d)
+        value = self.fc_v(value)  # (b, n_k, h*d)
+
+        query = query.reshape(b, n_q, self.n_heads, self.embedding_size)
+        key = key.reshape(b, n_k, self.n_heads, self.embedding_size)
+        value = value.reshape(b, n_k, self.n_heads, self.embedding_size)
+
+        query = query.permute([0, 2, 1, 3])  # (b, h, n_q, emb)
+        # The next few lines are equivalent to : key.permute([0, 2, 3, 1])
+        # This is a hack, ONNX will compress two permute operations and
+        # Barracuda will not like seeing `permute([0,2,3,1])`
+        key = key.permute([0, 2, 1, 3])  # (b, h, emb, n_k)
+        key -= 1
+        key += 1
+        key = key.permute([0, 1, 3, 2])  # (b, h, emb, n_k)
+
+        qk = torch.matmul(query, key)  # (b, h, n_q, n_k)
+
+        if key_mask is None:
+            qk = qk / (self.embedding_size ** 0.5)
+        else:
+            key_mask = key_mask.reshape(b, 1, 1, n_k)
+            qk = (1 - key_mask) * qk / (
+                self.embedding_size ** 0.5
+            ) + key_mask * self.NEG_INF
+
+        att = torch.softmax(qk, dim=3)  # (b, h, n_q, n_k)
+
+        value = value.permute([0, 2, 1, 3])  # (b, h, n_k, emb)
+        value_attention = torch.matmul(att, value)  # (b, h, n_q, emb)
+
+        value_attention = value_attention.permute([0, 2, 1, 3])  # (b, n_q, h, emb)
+        value_attention = value_attention.reshape(
+            b, n_q, self.n_heads * self.embedding_size
+        )  # (b, n_q, h*emb)
+
+        out = self.fc_out(value_attention)  # (b, n_q, emb)
+        return out, att
+
+
+class SimpleTransformer(torch.nn.Module):
+    """
+    A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
+    multi head self attention to encode information about a "Self" and a list of
+    relevant "Entities".
+    """
+
+    EPISLON = 1e-7
+
+    def __init__(
+        self,
+        x_self_size: int,
+        entities_sizes: List[int],
+        embedding_size: int,
+        output_size: Optional[int] = None,
+    ):
+        super().__init__()
+        self.self_size = x_self_size
+        self.entities_sizes = entities_sizes
+        self.entities_num_max_elements: Optional[List[int]] = None
+        self.ent_encoders = torch.nn.ModuleList(
+            [
+                LinearEncoder(self.self_size + ent_size, 2, embedding_size)
+                for ent_size in self.entities_sizes
+            ]
+        )
+        self.attention = MultiHeadAttention(
+            query_size=embedding_size,
+            key_size=embedding_size,
+            value_size=embedding_size,
+            output_size=embedding_size,
+            num_heads=4,
+            embedding_size=embedding_size,
+        )
+        self.residual_layer = LinearEncoder(embedding_size, 1, embedding_size)
+        if output_size is None:
+            output_size = embedding_size
+        self.x_self_residual_layer = LinearEncoder(
+            embedding_size + x_self_size, 1, output_size
+        )
+
+    def forward(
+        self,
+        x_self: torch.Tensor,
+        entities: List[torch.Tensor],
+        key_masks: List[torch.Tensor],
+    ) -> torch.Tensor:
+        # Gather the maximum number of entities information
+        if self.entities_num_max_elements is None:
+            self.entities_num_max_elements = []
+            for ent in entities:
+                self.entities_num_max_elements.append(ent.shape[1])
+        # Concatenate all observations with self
+        self_and_ent: List[torch.Tensor] = []
+        for num_entities, ent in zip(self.entities_num_max_elements, entities):
+            expanded_self = x_self.reshape(-1, 1, self.self_size)
+            # .repeat(
+            #     1, num_entities, 1
+            # )
+            expanded_self = torch.cat([expanded_self] * num_entities, dim=1)
+            self_and_ent.append(torch.cat([expanded_self, ent], dim=2))
+        # Generate the tensor that will serve as query, key and value to self attention
+        qkv = torch.cat(
+            [ent_encoder(x) for ent_encoder, x in zip(self.ent_encoders, self_and_ent)],
+            dim=1,
+        )
+        mask = torch.cat(key_masks, dim=1)
+        # Feed to self attention
+        max_num_ent = sum(self.entities_num_max_elements)
+        output, _ = self.attention(qkv, qkv, qkv, mask, max_num_ent, max_num_ent)
+        # Residual
+        output = self.residual_layer(output) + qkv
+        # Average Pooling
+        numerator = torch.sum(output * (1 - mask).reshape(-1, max_num_ent, 1), dim=1)
+        denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPISLON
+        output = numerator / denominator
+        # Residual between x_self and the output of the module
+        output = self.x_self_residual_layer(torch.cat([output, x_self], dim=1))
+        return output
+
+    @staticmethod
+    def get_masks(observations: List[torch.Tensor]) -> List[torch.Tensor]:
+        """
+        Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
+        all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
+        layer to mask the padding observations.
+        """
+        with torch.no_grad():
+            # Generate the masking tensors for each entities tensor (mask only if all zeros)
+            key_masks: List[torch.Tensor] = [
+                (torch.sum(ent ** 2, axis=2) < 0.01).type(torch.FloatTensor)
+                for ent in observations
+            ]
+        return key_masks