Add different types of visual encoder (nature cnn/resnet)

Add resnet and nature cnn in addition to default visual encoder
6 年前 · be4292fb
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml
    sequence_length: 64
    summary_freq: 1000
    use_recurrent: false
+    vis_encode_type: default
    reward_signals: 
        extrinsic:
            strength: 1.0
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py
 import logging
+from enum import Enum
 from typing import Any, Callable, Dict

 import numpy as np
 logger = logging.getLogger("mlagents.trainers")

 ActivationFunction = Callable[[tf.Tensor], tf.Tensor]
+
+
+class EncoderType(Enum):
+    RESNET = "resnet"
+    NATURE_CNN = "nature_cnn"
+    DEFAUL = "default"


 class LearningModel(object):
        reuse: bool,
    ) -> tf.Tensor:
        """
-        Builds a set of visual (CNN) encoders.
-        :param reuse: Whether to re-use the weights within the same scope.
-        :param scope: The scope of the graph within which to create the ops.
+        Builds a set of resnet visual encoders.
+        :param scope: The scope of the graph within which to create the ops.
+        :param reuse: Whether to re-use the weights within the same scope.
        :return: List of hidden layer tensors.
        """
        with tf.variable_scope(scope):
            )
        return hidden_flat

+    def create_nature_cnn_visual_observation_encoder(
+        self,
+        image_input: tf.Tensor,
+        h_size: int,
+        activation: ActivationFunction,
+        num_layers: int,
+        scope: str,
+        reuse: bool,
+    ) -> tf.Tensor:
+        """
+        Builds a set of resnet visual encoders.
+        :param image_input: The placeholder for the image input to use.
+        :param h_size: Hidden layer size.
+        :param activation: What type of activation function to use for layers.
+        :param num_layers: number of hidden layers to create.
+        :param scope: The scope of the graph within which to create the ops.
+        :param reuse: Whether to re-use the weights within the same scope.
+        :return: List of hidden layer tensors.
+        """
+        print("creating nature cnn")
+        with tf.variable_scope(scope):
+            conv1 = tf.layers.conv2d(
+                image_input,
+                32,
+                kernel_size=[8, 8],
+                strides=[4, 4],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_1",
+            )
+            conv2 = tf.layers.conv2d(
+                conv1,
+                64,
+                kernel_size=[4, 4],
+                strides=[2, 2],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_2",
+            )
+            conv3 = tf.layers.conv2d(
+                conv2,
+                64,
+                kernel_size=[3, 3],
+                strides=[1, 1],
+                activation=tf.nn.elu,
+                reuse=reuse,
+                name="conv_3",
+            )
+            hidden = c_layers.flatten(conv3)
+
+        with tf.variable_scope(scope + "/" + "flat_encoding"):
+            hidden_flat = self.create_vector_observation_encoder(
+                hidden, h_size, activation, num_layers, scope, reuse
+            )
+        return hidden_flat
+
+    def create_resnet_visual_observation_encoder(
+        self,
+        image_input: tf.Tensor,
+        h_size: int,
+        activation: ActivationFunction,
+        num_layers: int,
+        scope: str,
+        reuse: bool,
+    ) -> tf.Tensor:
+        """
+        Builds a set of resnet visual encoders.
+        :param image_input: The placeholder for the image input to use.
+        :param h_size: Hidden layer size.
+        :param activation: What type of activation function to use for layers.
+        :param num_layers: number of hidden layers to create.
+        :param scope: The scope of the graph within which to create the ops.
+        :param reuse: Whether to re-use the weights within the same scope.
+        :return: List of hidden layer tensors.
+        """
+        print("creating resnet")
+        n_channels = [16, 32, 32]  # channel for each stack
+        n_blocks = 2  # number of residual blocks
+        with tf.variable_scope(scope):
+            hidden = image_input
+            for i, ch in enumerate(n_channels):
+                hidden = tf.layers.conv2d(
+                    hidden,
+                    ch,
+                    kernel_size=[3, 3],
+                    strides=[1, 1],
+                    reuse=reuse,
+                    name="layer%dconv_1" % i,
+                )
+                hidden = tf.layers.max_pooling2d(
+                    hidden, pool_size=[3, 3], strides=[2, 2], padding="same"
+                )
+                # create residual blocks
+                for j in range(n_blocks):
+                    block_input = hidden
+                    hidden = tf.nn.relu(hidden)
+                    hidden = tf.layers.conv2d(
+                        hidden,
+                        ch,
+                        kernel_size=[3, 3],
+                        strides=[1, 1],
+                        padding="same",
+                        reuse=reuse,
+                        name="layer%d_%d_conv1" % (i, j),
+                    )
+                    hidden = tf.nn.relu(hidden)
+                    hidden = tf.layers.conv2d(
+                        hidden,
+                        ch,
+                        kernel_size=[3, 3],
+                        strides=[1, 1],
+                        padding="same",
+                        reuse=reuse,
+                        name="layer%d_%d_conv2" % (i, j),
+                    )
+                    hidden = tf.add(block_input, hidden)
+            hidden = tf.nn.relu(hidden)
+            hidden = c_layers.flatten(hidden)
+
+        with tf.variable_scope(scope + "/" + "flat_encoding"):
+            hidden_flat = self.create_vector_observation_encoder(
+                hidden, h_size, activation, num_layers, scope, reuse
+            )
+        return hidden_flat
+
    @staticmethod
    def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
        """
            ),
        )

-    def create_observation_streams(self, num_streams, h_size, num_layers):
+    def create_observation_streams(
+        self, num_streams, h_size, num_layers, vis_encode_type="default"
+    ):
        """
        Creates encoding stream for observations.
        :param num_streams: Number of streams to create.
            visual_encoders = []
            hidden_state, hidden_visual = None, None
            if self.vis_obs_size > 0:
-                for j in range(brain.number_visual_observations):
-                    encoded_visual = self.create_visual_observation_encoder(
-                        self.visual_in[j],
-                        h_size,
-                        activation_fn,
-                        num_layers,
-                        "main_graph_{}_encoder{}".format(i, j),
-                        False,
-                    )
-                    visual_encoders.append(encoded_visual)
+                vis_encode_type = EncoderType(vis_encode_type)
+                if vis_encode_type == EncoderType.RESNET:
+                    for j in range(brain.number_visual_observations):
+                        encoded_visual = self.create_resnet_visual_observation_encoder(
+                            self.visual_in[j],
+                            h_size,
+                            activation_fn,
+                            num_layers,
+                            "main_graph_{}_encoder{}".format(i, j),
+                            False,
+                        )
+                        visual_encoders.append(encoded_visual)
+                elif vis_encode_type == EncoderType.NATURE_CNN:
+                    for j in range(brain.number_visual_observations):
+                        encoded_visual = self.create_nature_cnn_visual_observation_encoder(
+                            self.visual_in[j],
+                            h_size,
+                            activation_fn,
+                            num_layers,
+                            "main_graph_{}_encoder{}".format(i, j),
+                            False,
+                        )
+                        visual_encoders.append(encoded_visual)
+                else:
+                    for j in range(brain.number_visual_observations):
+                        encoded_visual = self.create_visual_observation_encoder(
+                            self.visual_in[j],
+                            h_size,
+                            activation_fn,
+                            num_layers,
+                            "main_graph_{}_encoder{}".format(i, j),
+                            False,
+                        )
+                        visual_encoders.append(encoded_visual)
                hidden_visual = tf.concat(visual_encoders, axis=1)
            if brain.vector_observation_space_size > 0:
                hidden_state = self.create_vector_observation_encoder(
            self.value_heads[name] = value
        self.value = tf.reduce_mean(list(self.value_heads.values()), 0)

-    def create_cc_actor_critic(self, h_size, num_layers):
+    def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type):
-        hidden_streams = self.create_observation_streams(2, h_size, num_layers)
+        hidden_streams = self.create_observation_streams(
+            2, h_size, num_layers, vis_encode_type
+        )

        if self.use_recurrent:
            self.memory_in = tf.placeholder(
            (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
        )

-    def create_dc_actor_critic(self, h_size, num_layers):
+    def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type):
-        hidden_streams = self.create_observation_streams(1, h_size, num_layers)
+        hidden_streams = self.create_observation_streams(
+            1, h_size, num_layers, vis_encode_type
+        )
        hidden = hidden_streams[0]

        if self.use_recurrent:
--- a/ml-agents/mlagents/trainers/ppo/models.py
+++ b/ml-agents/mlagents/trainers/ppo/models.py
        m_size=None,
        seed=0,
        stream_names=None,
+        vis_encode_type="default",
    ):
        """
        Takes a Unity environment and model-specific hyper-parameters and returns the
        if num_layers < 1:
            num_layers = 1
        if brain.vector_action_space_type == "continuous":
-            self.create_cc_actor_critic(h_size, num_layers)
+            self.create_cc_actor_critic(h_size, num_layers, vis_encode_type)
-            self.create_dc_actor_critic(h_size, num_layers)
+            self.create_dc_actor_critic(h_size, num_layers, vis_encode_type)
        self.create_losses(
            self.log_probs,
            self.old_log_probs,
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py
                m_size=self.m_size,
                seed=seed,
                stream_names=list(reward_signal_configs.keys()),
+                vis_encode_type=trainer_params["vis_encode_type"],
            )
            self.model.create_ppo_optimizer()

--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
            "memory_size",
            "model_path",
            "reward_signals",
+            "vis_encode_type",
        ]
        self.check_param_keys()

--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
        sequence_length: 64
        summary_freq: 1000
        use_recurrent: false
+        vis_encode_type: default
        memory_size: 8
        pretraining:
          demo_path: ./demos/ExpertPyramid.demo
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
        sequence_length: 64
        summary_freq: 1000
        use_recurrent: false
+        vis_encode_type: default
        memory_size: 8
        curiosity_strength: 0.0
        curiosity_enc_size: 1
        "sequence_length": 64,
        "summary_freq": 3000,
        "use_recurrent": False,
+        "vis_encode_type": "default",
        "use_curiosity": False,
        "curiosity_strength": 0.01,
        "curiosity_enc_size": 128,
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
        sequence_length: 64
        summary_freq: 1000
        use_recurrent: false
+        vis_encode_type: default
        memory_size: 8
        curiosity_strength: 0.0
        curiosity_enc_size: 1