addresing some comments

4 年前 · 38fc2536
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
 from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.exception import UnityTrainerException
 from mlagents.trainers.torch.encoders import VectorInput
-from mlagents_envs.base_env import ObservationSpec, DimensionProperty, ObservationType
 from mlagents.trainers.tests.dummy_config import create_observation_specs_with_shapes



    ModelUtils.soft_update(tm1, tm2, tau=1.0)
    assert torch.equal(tm2.parameter, tm1.parameter)
-
-
-def test_can_train_dim_property():
-    spec = ObservationSpec(
-        (5, 5, 3),
-        (
-            DimensionProperty.UNSPECIFIED,
-            DimensionProperty.UNSPECIFIED,
-            DimensionProperty.UNSPECIFIED,
-        ),
-        ObservationType.DEFAULT,
-    )
-    assert ModelUtils.can_encode_visual(spec)
-    assert not ModelUtils.can_encode_vector(spec)
-    assert not ModelUtils.can_encode_attention(spec)
-
-    spec = ObservationSpec(
-        (5, 5, 3),
-        (
-            DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
-            DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
-            DimensionProperty.NONE,
-        ),
-        ObservationType.DEFAULT,
-    )
-    assert ModelUtils.can_encode_visual(spec)
-    assert not ModelUtils.can_encode_vector(spec)
-    assert not ModelUtils.can_encode_attention(spec)
-
-    spec = ObservationSpec(
-        (5, 5, 3, 5),
-        (
-            DimensionProperty.UNSPECIFIED,
-            DimensionProperty.UNSPECIFIED,
-            DimensionProperty.UNSPECIFIED,
-            DimensionProperty.UNSPECIFIED,
-        ),
-        ObservationType.DEFAULT,
-    )
-    assert not ModelUtils.can_encode_visual(spec)
-    assert not ModelUtils.can_encode_vector(spec)
-    assert not ModelUtils.can_encode_attention(spec)
-
-    spec = ObservationSpec(
-        (5, 6),
-        (DimensionProperty.UNSPECIFIED, DimensionProperty.UNSPECIFIED),
-        ObservationType.DEFAULT,
-    )
-    assert not ModelUtils.can_encode_visual(spec)
-    assert not ModelUtils.can_encode_vector(spec)
-    assert not ModelUtils.can_encode_attention(spec)
-
-    spec = ObservationSpec(
-        (5, 6),
-        (
-            DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
-            DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
-        ),
-        ObservationType.DEFAULT,
-    )
-    assert not ModelUtils.can_encode_visual(spec)
-    assert not ModelUtils.can_encode_vector(spec)
-    assert not ModelUtils.can_encode_attention(spec)
-
-    spec = ObservationSpec(
-        (5, 6),
-        (DimensionProperty.VARIABLE_SIZE, DimensionProperty.NONE),
-        ObservationType.DEFAULT,
-    )
-    assert not ModelUtils.can_encode_visual(spec)
-    assert not ModelUtils.can_encode_vector(spec)
-    assert ModelUtils.can_encode_attention(spec)
-
-    spec = ObservationSpec(
-        (5,), (DimensionProperty.UNSPECIFIED,), ObservationType.DEFAULT
-    )
-    assert not ModelUtils.can_encode_visual(spec)
-    assert ModelUtils.can_encode_vector(spec)
-    assert not ModelUtils.can_encode_attention(spec)
-
-    spec = ObservationSpec((5,), (DimensionProperty.NONE,), ObservationType.DEFAULT)
-    assert not ModelUtils.can_encode_visual(spec)
-    assert ModelUtils.can_encode_vector(spec)
-    assert not ModelUtils.can_encode_attention(spec)
--- a/ml-agents/mlagents/trainers/torch/model_serialization.py
+++ b/ml-agents/mlagents/trainers/torch/model_serialization.py
        ]

        dummy_var_len_obs = [
-            torch.zeros(batch_dim + [sen_spec.shape[0], sen_spec.shape[1]])
-            for sen_spec in self.policy.behavior_spec.observation_specs
-            if len(sen_spec.shape) == 2
+            torch.zeros(batch_dim + [obs_spec.shape[0], obs_spec.shape[1]])
+            for obs_spec in self.policy.behavior_spec.observation_specs
+            if len(obs_spec.shape) == 2
        ]

        dummy_masks = torch.ones(
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
            entities_max_len = [
                observation_specs[idx].shape[0] for idx in var_len_indices
            ]
-
-            self.x_self_encoder = LinearEncoder(x_self_len, 2, self.h_size // 2)
-            self.var_len_encoders = torch.nn.ModuleList(
-                [
-                    LinearEncoder(ent_size, 2, self.h_size // 2)
-                    for ent_size in entities_sizes
-                ]
-            )
-                self.h_size // 2,
-                [self.h_size // 2] * len(var_len_indices),
-                entities_max_len,
-                self.h_size,
+                self.h_size, entities_sizes, entities_max_len, self.h_size, False
-            total_enc_size = self.h_size // 2 + self.h_size
+            total_enc_size = x_self_len + self.h_size

            n_layers = max(1, network_settings.num_layers - 2)
        else:
        if len(var_len_inputs) > 0:
            # Some inputs need to be processed with a variable length encoder
            masks = EntityEmbeddings.get_masks(var_len_inputs)
-            encoded_self = self.x_self_encoder(encoded_self)
-            encoded_var_len = [
-                encoder(x) for encoder, x in zip(self.var_len_encoders, var_len_inputs)
-            ]
-            qkv = self.entities_embeddings(encoded_self, encoded_var_len)
+            qkv = self.entities_embeddings(encoded_self, var_len_inputs)
-        if len(encodes) == 0:
+        if encoded_self.shape[1] == 0:
            raise Exception("No valid inputs to network.")

        # Constants don't work in Barracuda
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py

    @staticmethod
    def get_encoder_for_obs(
-        shape: Tuple[int, ...],
+        obs_spec: ObservationSpec,
        normalize: bool,
        h_size: int,
        vis_encode_type: EncoderType,
        :param h_size: Number of hidden units per layer.
        :param vis_encode_type: Type of visual encoder to use.
        """
-        if len(shape) == 1:
-            # Case rank 1 tensor
-            return (VectorInput(shape[0], normalize), shape[0])
-        if len(shape) == 3:
-            ModelUtils._check_resolution_for_encoder(
-                shape[0], shape[1], vis_encode_type
-            )
+        shape = obs_spec.shape
+        dim_prop = obs_spec.dimension_property
+
+        # VISUAL
+        valid_visual = (
+            DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
+            DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
+            DimensionProperty.NONE,
+        )
+        valid_visual_unspecified = (DimensionProperty.UNSPECIFIED,) * 3
+        if dim_prop == valid_visual or dim_prop == valid_visual_unspecified:
-        raise UnityTrainerException(f"Unsupported shape of {shape} for observation")
-
-    @staticmethod
-    def can_encode_visual(sensor_spec: ObservationSpec) -> bool:
-        """
-        Returns True if it is possible to create a visual embedding for the sensor
-        """
-        if len(sensor_spec.shape) != 3:
-            return False
-        for conv_dim in [0, 1]:
-            prop = sensor_spec.dimension_property[conv_dim]
-            if (prop != DimensionProperty.UNSPECIFIED) and (
-                prop != DimensionProperty.TRANSLATIONAL_EQUIVARIANCE
-            ):
-                return False
-        prop = sensor_spec.dimension_property[2]
-        if (
-            (prop != DimensionProperty.UNSPECIFIED)
-            and (prop != DimensionProperty.TRANSLATIONAL_EQUIVARIANCE)
-            and (prop != DimensionProperty.NONE)
-        ):
-            return False
-        return True
-
-    @staticmethod
-    def can_encode_vector(sensor_spec: ObservationSpec) -> bool:
-        """
-        Returns True if it is possible to create a vector embedding for the sensor
-        """
-        if len(sensor_spec.shape) != 1:
-            return False
-        prop = sensor_spec.dimension_property[0]
-        if (prop != DimensionProperty.UNSPECIFIED) and (prop != DimensionProperty.NONE):
-            return False
-        return True
-
-    @staticmethod
-    def can_encode_attention(sensor_spec: ObservationSpec) -> bool:
-        """
-        Returns True if it is possible to create an attention embedding for the sensor
-        """
-        if len(sensor_spec.shape) != 2:
-            return False
-        if sensor_spec.dimension_property[0] != DimensionProperty.VARIABLE_SIZE:
-            return False
-        if sensor_spec.dimension_property[1] != DimensionProperty.NONE:
-            return False
-        return True
+        # VECTOR
+        valid_vector = (DimensionProperty.NONE,)
+        valid_vector_unspecified = (DimensionProperty.UNSPECIFIED,)
+        if dim_prop == valid_vector or dim_prop == valid_vector_unspecified:
+            return (VectorInput(shape[0], normalize), shape[0])
+        # VARIABLE LENGTH
+        valid_var_len = (DimensionProperty.VARIABLE_SIZE, DimensionProperty.NONE)
+        if dim_prop == valid_var_len:
+            # None means the residual self attention must be used
+            return (None, 0)
+        # OTHER
+        raise UnityTrainerException(f"Unsupported Sensor with specs {obs_spec}")

    @staticmethod
    def create_input_processors(
         observation encoder)
         - A list of embedding sizes (0 if the input requires to be processed with a variable length
         observation encoder)
-         - A list of the inputs that need to be processed by a variable length observation encder.
+         - A list of the inputs that need to be processed by a variable length observation encoder.
-            if ModelUtils.can_encode_attention(obs_spec):
-                # This is a 2D tensor
-                # TODO : better if condition
+            encoder, embedding_size = ModelUtils.get_encoder_for_obs(
+                obs_spec, normalize, h_size, vis_encode_type
+            )
+            encoders.append(encoder)
+            embedding_sizes.append(embedding_size)
+            if encoder is None:
-                encoders.append(None)
-                embedding_sizes.append(0)
-            elif ModelUtils.can_encode_vector(obs_spec) or ModelUtils.can_encode_visual(
-                obs_spec
-            ):
-                encoder, embedding_size = ModelUtils.get_encoder_for_obs(
-                    obs_spec.shape, normalize, h_size, vis_encode_type
-                )
-                encoders.append(encoder)
-                embedding_sizes.append(embedding_size)
-            else:
-                raise UnityTrainerException(
-                    "The following Sensor is incompatible with the trainer {sen_spec}"
-                )
-
        return (nn.ModuleList(encoders), embedding_sizes, var_len_indices)

    @staticmethod