check min size for visual encoders (#3112)

* check min size for visual encoders * friendlier exception * fix typo
5 年前 · 3de3c1f1
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
 logger = logging.getLogger("mlagents.trainers")

 ActivationFunction = Callable[[tf.Tensor], tf.Tensor]
+EncoderFunction = Callable[
+    [tf.Tensor, int, ActivationFunction, int, str, bool], tf.Tensor
+]

 EPSILON = 1e-7

    LINEAR = "linear"


-class LearningModel(object):
+class LearningModel:
+
+    # Minimum supported side for each encoder type. If refactoring an encoder, please
+    # adjust these also.
+    MIN_RESOLUTION_FOR_ENCODER = {
+        EncoderType.SIMPLE: 20,
+        EncoderType.NATURE_CNN: 36,
+        EncoderType.RESNET: 15,
+    }

    def __init__(
        self, m_size, normalize, use_recurrent, brain, seed, stream_names=None
        return hidden_flat

    @staticmethod
+    def get_encoder_for_type(encoder_type: EncoderType) -> EncoderFunction:
+        ENCODER_FUNCTION_BY_TYPE = {
+            EncoderType.SIMPLE: LearningModel.create_visual_observation_encoder,
+            EncoderType.NATURE_CNN: LearningModel.create_nature_cnn_visual_observation_encoder,
+            EncoderType.RESNET: LearningModel.create_resnet_visual_observation_encoder,
+        }
+        return ENCODER_FUNCTION_BY_TYPE.get(
+            encoder_type, LearningModel.create_visual_observation_encoder
+        )
+
+    @staticmethod
    def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
        """
        Creates a masking layer for the discrete actions
            ),
        )

+    @staticmethod
+    def _check_resolution_for_encoder(
+        camera_res: CameraResolution, vis_encoder_type: EncoderType
+    ) -> None:
+        min_res = LearningModel.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type]
+        if camera_res.height < min_res or camera_res.width < min_res:
+            raise UnityTrainerException(
+                f"Visual observation resolution ({camera_res.width}x{camera_res.height}) is too small for"
+                f"the provided EncoderType ({vis_encoder_type.value}). The min dimension is {min_res}"
+            )
+
    def create_observation_streams(
        self,
        num_streams: int,

        self.visual_in = []
        for i in range(brain.number_visual_observations):
+            LearningModel._check_resolution_for_encoder(
+                brain.camera_resolutions[i], vis_encode_type
+            )
            visual_input = self.create_visual_input(
                brain.camera_resolutions[i], name="visual_observation_" + str(i)
            )
-        # Pick the encoder function based on the EncoderType
-        create_encoder_func = LearningModel.create_visual_observation_encoder
-        if vis_encode_type == EncoderType.RESNET:
-            create_encoder_func = LearningModel.create_resnet_visual_observation_encoder
-        elif vis_encode_type == EncoderType.NATURE_CNN:
-            create_encoder_func = (
-                LearningModel.create_nature_cnn_visual_observation_encoder
-            )
-
+            # Pick the encoder function based on the EncoderType
+            create_encoder_func = LearningModel.get_encoder_for_type(vis_encode_type)
+
            visual_encoders = []
            hidden_state, hidden_visual = None, None
            _scope_add = stream_scopes[i] if stream_scopes else ""
                        h_size,
                        activation_fn,
                        num_layers,
-                        scope=f"{_scope_add}main_graph_{i}_encoder{j}",
-                        reuse=False,
+                        f"{_scope_add}main_graph_{i}_encoder{j}",  # scope
+                        False,  # reuse
                    )
                    visual_encoders.append(encoded_visual)
                hidden_visual = tf.concat(visual_encoders, axis=1)
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
 from mlagents.trainers.ppo.models import PPOModel
 from mlagents.trainers.ppo.trainer import PPOTrainer, discount_rewards
 from mlagents.trainers.ppo.policy import PPOPolicy
-from mlagents.trainers.brain import BrainParameters
+from mlagents.trainers.models import EncoderType, LearningModel
+from mlagents.trainers.trainer import UnityTrainerException
+from mlagents.trainers.brain import BrainParameters, CameraResolution
 from mlagents_envs.environment import UnityEnvironment
 from mlagents_envs.mock_communicator import MockCommunicator
 from mlagents.trainers.tests import mock_brain as mb
    assert steps == 16
    assert mean[0] == 0.8125
    assert (variance[0] - 1) / steps == pytest.approx(0.152, abs=0.01)
+
+
+def test_min_visual_size():
+    # Make sure each EncoderType has an entry in MIS_RESOLUTION_FOR_ENCODER
+    assert set(LearningModel.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType)
+
+    for encoder_type in EncoderType:
+        with tf.Graph().as_default():
+            good_size = LearningModel.MIN_RESOLUTION_FOR_ENCODER[encoder_type]
+            good_res = CameraResolution(
+                width=good_size, height=good_size, num_channels=3
+            )
+            LearningModel._check_resolution_for_encoder(good_res, encoder_type)
+            vis_input = LearningModel.create_visual_input(
+                good_res, "test_min_visual_size"
+            )
+            enc_func = LearningModel.get_encoder_for_type(encoder_type)
+            enc_func(vis_input, 32, LearningModel.swish, 1, "test", False)
+
+        # Anything under the min size should raise an exception. If not, decrease the min size!
+        with pytest.raises(Exception):
+            with tf.Graph().as_default():
+                bad_size = LearningModel.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1
+                bad_res = CameraResolution(
+                    width=bad_size, height=bad_size, num_channels=3
+                )
+
+                with pytest.raises(UnityTrainerException):
+                    # Make sure we'd hit a friendly error during model setup time.
+                    LearningModel._check_resolution_for_encoder(bad_res, encoder_type)
+
+                vis_input = LearningModel.create_visual_input(
+                    bad_res, "test_min_visual_size"
+                )
+                enc_func = LearningModel.get_encoder_for_type(encoder_type)
+                enc_func(vis_input, 32, LearningModel.swish, 1, "test", False)


 if __name__ == "__main__":