浏览代码

Add different types of visual encoder (nature cnn/resnet)

Add resnet and nature cnn in addition to default visual encoder
/develop-generalizationTraining-TrainerController
GitHub 5 年前
当前提交
be4292fb
共有 8 个文件被更改,包括 190 次插入20 次删除
  1. 1
      config/trainer_config.yaml
  2. 198
      ml-agents/mlagents/trainers/models.py
  3. 5
      ml-agents/mlagents/trainers/ppo/models.py
  4. 1
      ml-agents/mlagents/trainers/ppo/policy.py
  5. 1
      ml-agents/mlagents/trainers/ppo/trainer.py
  6. 1
      ml-agents/mlagents/trainers/tests/test_bcmodule.py
  7. 2
      ml-agents/mlagents/trainers/tests/test_ppo.py
  8. 1
      ml-agents/mlagents/trainers/tests/test_reward_signals.py

1
config/trainer_config.yaml


sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
reward_signals:
extrinsic:
strength: 1.0

198
ml-agents/mlagents/trainers/models.py


import logging
from enum import Enum
from typing import Any, Callable, Dict
import numpy as np

logger = logging.getLogger("mlagents.trainers")
ActivationFunction = Callable[[tf.Tensor], tf.Tensor]
class EncoderType(Enum):
RESNET = "resnet"
NATURE_CNN = "nature_cnn"
DEFAUL = "default"
class LearningModel(object):

reuse: bool,
) -> tf.Tensor:
"""
Builds a set of visual (CNN) encoders.
:param reuse: Whether to re-use the weights within the same scope.
:param scope: The scope of the graph within which to create the ops.
Builds a set of resnet visual encoders.
:param scope: The scope of the graph within which to create the ops.
:param reuse: Whether to re-use the weights within the same scope.
:return: List of hidden layer tensors.
"""
with tf.variable_scope(scope):

)
return hidden_flat
def create_nature_cnn_visual_observation_encoder(
self,
image_input: tf.Tensor,
h_size: int,
activation: ActivationFunction,
num_layers: int,
scope: str,
reuse: bool,
) -> tf.Tensor:
"""
Builds a set of resnet visual encoders.
:param image_input: The placeholder for the image input to use.
:param h_size: Hidden layer size.
:param activation: What type of activation function to use for layers.
:param num_layers: number of hidden layers to create.
:param scope: The scope of the graph within which to create the ops.
:param reuse: Whether to re-use the weights within the same scope.
:return: List of hidden layer tensors.
"""
print("creating nature cnn")
with tf.variable_scope(scope):
conv1 = tf.layers.conv2d(
image_input,
32,
kernel_size=[8, 8],
strides=[4, 4],
activation=tf.nn.elu,
reuse=reuse,
name="conv_1",
)
conv2 = tf.layers.conv2d(
conv1,
64,
kernel_size=[4, 4],
strides=[2, 2],
activation=tf.nn.elu,
reuse=reuse,
name="conv_2",
)
conv3 = tf.layers.conv2d(
conv2,
64,
kernel_size=[3, 3],
strides=[1, 1],
activation=tf.nn.elu,
reuse=reuse,
name="conv_3",
)
hidden = c_layers.flatten(conv3)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = self.create_vector_observation_encoder(
hidden, h_size, activation, num_layers, scope, reuse
)
return hidden_flat
def create_resnet_visual_observation_encoder(
self,
image_input: tf.Tensor,
h_size: int,
activation: ActivationFunction,
num_layers: int,
scope: str,
reuse: bool,
) -> tf.Tensor:
"""
Builds a set of resnet visual encoders.
:param image_input: The placeholder for the image input to use.
:param h_size: Hidden layer size.
:param activation: What type of activation function to use for layers.
:param num_layers: number of hidden layers to create.
:param scope: The scope of the graph within which to create the ops.
:param reuse: Whether to re-use the weights within the same scope.
:return: List of hidden layer tensors.
"""
print("creating resnet")
n_channels = [16, 32, 32] # channel for each stack
n_blocks = 2 # number of residual blocks
with tf.variable_scope(scope):
hidden = image_input
for i, ch in enumerate(n_channels):
hidden = tf.layers.conv2d(
hidden,
ch,
kernel_size=[3, 3],
strides=[1, 1],
reuse=reuse,
name="layer%dconv_1" % i,
)
hidden = tf.layers.max_pooling2d(
hidden, pool_size=[3, 3], strides=[2, 2], padding="same"
)
# create residual blocks
for j in range(n_blocks):
block_input = hidden
hidden = tf.nn.relu(hidden)
hidden = tf.layers.conv2d(
hidden,
ch,
kernel_size=[3, 3],
strides=[1, 1],
padding="same",
reuse=reuse,
name="layer%d_%d_conv1" % (i, j),
)
hidden = tf.nn.relu(hidden)
hidden = tf.layers.conv2d(
hidden,
ch,
kernel_size=[3, 3],
strides=[1, 1],
padding="same",
reuse=reuse,
name="layer%d_%d_conv2" % (i, j),
)
hidden = tf.add(block_input, hidden)
hidden = tf.nn.relu(hidden)
hidden = c_layers.flatten(hidden)
with tf.variable_scope(scope + "/" + "flat_encoding"):
hidden_flat = self.create_vector_observation_encoder(
hidden, h_size, activation, num_layers, scope, reuse
)
return hidden_flat
@staticmethod
def create_discrete_action_masking_layer(all_logits, action_masks, action_size):
"""

),
)
def create_observation_streams(self, num_streams, h_size, num_layers):
def create_observation_streams(
self, num_streams, h_size, num_layers, vis_encode_type="default"
):
"""
Creates encoding stream for observations.
:param num_streams: Number of streams to create.

visual_encoders = []
hidden_state, hidden_visual = None, None
if self.vis_obs_size > 0:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_visual_observation_encoder(
self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}".format(i, j),
False,
)
visual_encoders.append(encoded_visual)
vis_encode_type = EncoderType(vis_encode_type)
if vis_encode_type == EncoderType.RESNET:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_resnet_visual_observation_encoder(
self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}".format(i, j),
False,
)
visual_encoders.append(encoded_visual)
elif vis_encode_type == EncoderType.NATURE_CNN:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_nature_cnn_visual_observation_encoder(
self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}".format(i, j),
False,
)
visual_encoders.append(encoded_visual)
else:
for j in range(brain.number_visual_observations):
encoded_visual = self.create_visual_observation_encoder(
self.visual_in[j],
h_size,
activation_fn,
num_layers,
"main_graph_{}_encoder{}".format(i, j),
False,
)
visual_encoders.append(encoded_visual)
hidden_visual = tf.concat(visual_encoders, axis=1)
if brain.vector_observation_space_size > 0:
hidden_state = self.create_vector_observation_encoder(

self.value_heads[name] = value
self.value = tf.reduce_mean(list(self.value_heads.values()), 0)
def create_cc_actor_critic(self, h_size, num_layers):
def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type):
hidden_streams = self.create_observation_streams(2, h_size, num_layers)
hidden_streams = self.create_observation_streams(
2, h_size, num_layers, vis_encode_type
)
if self.use_recurrent:
self.memory_in = tf.placeholder(

(tf.identity(self.all_old_log_probs)), axis=1, keepdims=True
)
def create_dc_actor_critic(self, h_size, num_layers):
def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type):
hidden_streams = self.create_observation_streams(1, h_size, num_layers)
hidden_streams = self.create_observation_streams(
1, h_size, num_layers, vis_encode_type
)
hidden = hidden_streams[0]
if self.use_recurrent:

5
ml-agents/mlagents/trainers/ppo/models.py


m_size=None,
seed=0,
stream_names=None,
vis_encode_type="default",
):
"""
Takes a Unity environment and model-specific hyper-parameters and returns the

if num_layers < 1:
num_layers = 1
if brain.vector_action_space_type == "continuous":
self.create_cc_actor_critic(h_size, num_layers)
self.create_cc_actor_critic(h_size, num_layers, vis_encode_type)
self.create_dc_actor_critic(h_size, num_layers)
self.create_dc_actor_critic(h_size, num_layers, vis_encode_type)
self.create_losses(
self.log_probs,
self.old_log_probs,

1
ml-agents/mlagents/trainers/ppo/policy.py


m_size=self.m_size,
seed=seed,
stream_names=list(reward_signal_configs.keys()),
vis_encode_type=trainer_params["vis_encode_type"],
)
self.model.create_ppo_optimizer()

1
ml-agents/mlagents/trainers/ppo/trainer.py


"memory_size",
"model_path",
"reward_signals",
"vis_encode_type",
]
self.check_param_keys()

1
ml-agents/mlagents/trainers/tests/test_bcmodule.py


sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
memory_size: 8
pretraining:
demo_path: ./demos/ExpertPyramid.demo

2
ml-agents/mlagents/trainers/tests/test_ppo.py


sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1

"sequence_length": 64,
"summary_freq": 3000,
"use_recurrent": False,
"vis_encode_type": "default",
"use_curiosity": False,
"curiosity_strength": 0.01,
"curiosity_enc_size": 128,

1
ml-agents/mlagents/trainers/tests/test_reward_signals.py


sequence_length: 64
summary_freq: 1000
use_recurrent: false
vis_encode_type: default
memory_size: 8
curiosity_strength: 0.0
curiosity_enc_size: 1

正在加载...
取消
保存