refactor vis_encoder_type and add to doc

6 年前 · 6225317d
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md

 Typical Range: `32` - `512`

+### (Optional) Visual Encoder Type
+
+`vis_encode_type` corresponds to the encoder type for encoding visual observations.
+Valid options include:
+* `simple` (default): a simple encoder which consists of two convolutional layers
+* `nature_cnn`: CNN implementation proposed by Mnih et al.(https://www.nature.com/articles/nature14236), 
+consisting of three convolutional layers
+* `resnet`: IMPALA Resnet implementation (https://arxiv.org/abs/1802.01561),
+consisting of three stacked layers, each with two risidual blocks, making a 
+much larger network than the other two.
+
+Options: `simple`, `nature_cnn`, `resnet`
+
 ## (Optional) Recurrent Neural Network Hyperparameters

 The below hyperparameters are only used when `use_recurrent` is set to true.
--- a/ml-agents/mlagents/trainers/models.py
+++ b/ml-agents/mlagents/trainers/models.py


 class EncoderType(Enum):
-    RESNET = "resnet"
+    SIMPLE = "simple"
-    DEFAUL = "default"
+    RESNET = "resnet"


 class LearningModel(object):
        :param reuse: Whether to re-use the weights within the same scope.
        :return: List of hidden layer tensors.
        """
-        print("creating nature cnn")
        with tf.variable_scope(scope):
            conv1 = tf.layers.conv2d(
                image_input,
        :param reuse: Whether to re-use the weights within the same scope.
        :return: List of hidden layer tensors.
        """
-        print("creating resnet")
        n_channels = [16, 32, 32]  # channel for each stack
        n_blocks = 2  # number of residual blocks
        with tf.variable_scope(scope):
        )

    def create_observation_streams(
-        self, num_streams, h_size, num_layers, vis_encode_type="default"
-    ):
+        self,
+        num_streams: int,
+        h_size: int,
+        num_layers: int,
+        vis_encode_type: EncoderType = EncoderType.SIMPLE,
+    ) -> tf.Tensor:
        """
        Creates encoding stream for observations.
        :param num_streams: Number of streams to create.
            visual_encoders = []
            hidden_state, hidden_visual = None, None
            if self.vis_obs_size > 0:
-                vis_encode_type = EncoderType(vis_encode_type)
                if vis_encode_type == EncoderType.RESNET:
                    for j in range(brain.number_visual_observations):
                        encoded_visual = self.create_resnet_visual_observation_encoder(
            self.value_heads[name] = value
        self.value = tf.reduce_mean(list(self.value_heads.values()), 0)

-    def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type):
+    def create_cc_actor_critic(
+        self, h_size: int, num_layers: int, vis_encode_type: EncoderType
+    ) -> None:
        """
        Creates Continuous control actor-critic model.
        :param h_size: Size of hidden linear layers.
            (tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
        )

-    def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type):
+    def create_dc_actor_critic(
+        self, h_size: int, num_layers: int, vis_encode_type: EncoderType
+    ) -> None:
        """
        Creates Discrete control actor-critic model.
        :param h_size: Size of hidden linear layers.
--- a/ml-agents/mlagents/trainers/ppo/models.py
+++ b/ml-agents/mlagents/trainers/ppo/models.py
 import numpy as np

 import tensorflow as tf
-from mlagents.trainers.models import LearningModel
+from mlagents.trainers.models import LearningModel, EncoderType

 logger = logging.getLogger("mlagents.trainers")

        m_size=None,
        seed=0,
        stream_names=None,
-        vis_encode_type="default",
+        vis_encode_type=EncoderType.SIMPLE,
    ):
        """
        Takes a Unity environment and model-specific hyper-parameters and returns the
--- a/ml-agents/mlagents/trainers/ppo/policy.py
+++ b/ml-agents/mlagents/trainers/ppo/policy.py

 from mlagents.envs.timers import timed
 from mlagents.trainers import BrainInfo, ActionInfo
+from mlagents.trainers.models import EncoderType
 from mlagents.trainers.ppo.models import PPOModel
 from mlagents.trainers.tf_policy import TFPolicy
 from mlagents.trainers.components.reward_signals.reward_signal_factory import (
                m_size=self.m_size,
                seed=seed,
                stream_names=list(reward_signal_configs.keys()),
-                vis_encode_type=trainer_params["vis_encode_type"],
+                vis_encode_type=EncoderType(
+                    trainer_params.get("vis_encode_type", "simple")
+                ),
            )
            self.model.create_ppo_optimizer()

--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
            "memory_size",
            "model_path",
            "reward_signals",
-            "vis_encode_type",
        ]
        self.check_param_keys()

--- a/ml-agents/mlagents/trainers/tests/test_bcmodule.py
+++ b/ml-agents/mlagents/trainers/tests/test_bcmodule.py
        sequence_length: 64
        summary_freq: 1000
        use_recurrent: false
-        vis_encode_type: default
        memory_size: 8
        pretraining:
          demo_path: ./demos/ExpertPyramid.demo
--- a/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py
+++ b/ml-agents/mlagents/trainers/tests/test_environments/test_simple.py
                extrinsic:
                    strength: 1.0
                    gamma: 0.99
-            vis_encode_type: default
    """
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
--- a/ml-agents/mlagents/trainers/tests/test_ppo.py
+++ b/ml-agents/mlagents/trainers/tests/test_ppo.py
        sequence_length: 64
        summary_freq: 1000
        use_recurrent: false
-        vis_encode_type: default
        memory_size: 8
        curiosity_strength: 0.0
        curiosity_enc_size: 1
        "sequence_length": 64,
        "summary_freq": 3000,
        "use_recurrent": False,
-        "vis_encode_type": "default",
        "use_curiosity": False,
        "curiosity_strength": 0.01,
        "curiosity_enc_size": 128,
--- a/ml-agents/mlagents/trainers/tests/test_reward_signals.py
+++ b/ml-agents/mlagents/trainers/tests/test_reward_signals.py
        sequence_length: 64
        summary_freq: 1000
        use_recurrent: false
-        vis_encode_type: default
        memory_size: 8
        curiosity_strength: 0.0
        curiosity_enc_size: 1