|
|
|
|
|
|
import logging |
|
|
|
from enum import Enum |
|
|
|
from typing import Any, Callable, Dict |
|
|
|
|
|
|
|
import numpy as np |
|
|
|
|
|
|
logger = logging.getLogger("mlagents.trainers") |
|
|
|
|
|
|
|
ActivationFunction = Callable[[tf.Tensor], tf.Tensor] |
|
|
|
|
|
|
|
|
|
|
|
class EncoderType(Enum): |
|
|
|
RESNET = "resnet" |
|
|
|
NATURE_CNN = "nature_cnn" |
|
|
|
DEFAUL = "default" |
|
|
|
|
|
|
|
|
|
|
|
class LearningModel(object): |
|
|
|
|
|
|
reuse: bool, |
|
|
|
) -> tf.Tensor: |
|
|
|
""" |
|
|
|
Builds a set of visual (CNN) encoders. |
|
|
|
:param reuse: Whether to re-use the weights within the same scope. |
|
|
|
:param scope: The scope of the graph within which to create the ops. |
|
|
|
Builds a set of resnet visual encoders. |
|
|
|
:param scope: The scope of the graph within which to create the ops. |
|
|
|
:param reuse: Whether to re-use the weights within the same scope. |
|
|
|
:return: List of hidden layer tensors. |
|
|
|
""" |
|
|
|
with tf.variable_scope(scope): |
|
|
|
|
|
|
) |
|
|
|
return hidden_flat |
|
|
|
|
|
|
|
def create_nature_cnn_visual_observation_encoder( |
|
|
|
self, |
|
|
|
image_input: tf.Tensor, |
|
|
|
h_size: int, |
|
|
|
activation: ActivationFunction, |
|
|
|
num_layers: int, |
|
|
|
scope: str, |
|
|
|
reuse: bool, |
|
|
|
) -> tf.Tensor: |
|
|
|
""" |
|
|
|
Builds a set of resnet visual encoders. |
|
|
|
:param image_input: The placeholder for the image input to use. |
|
|
|
:param h_size: Hidden layer size. |
|
|
|
:param activation: What type of activation function to use for layers. |
|
|
|
:param num_layers: number of hidden layers to create. |
|
|
|
:param scope: The scope of the graph within which to create the ops. |
|
|
|
:param reuse: Whether to re-use the weights within the same scope. |
|
|
|
:return: List of hidden layer tensors. |
|
|
|
""" |
|
|
|
print("creating nature cnn") |
|
|
|
with tf.variable_scope(scope): |
|
|
|
conv1 = tf.layers.conv2d( |
|
|
|
image_input, |
|
|
|
32, |
|
|
|
kernel_size=[8, 8], |
|
|
|
strides=[4, 4], |
|
|
|
activation=tf.nn.elu, |
|
|
|
reuse=reuse, |
|
|
|
name="conv_1", |
|
|
|
) |
|
|
|
conv2 = tf.layers.conv2d( |
|
|
|
conv1, |
|
|
|
64, |
|
|
|
kernel_size=[4, 4], |
|
|
|
strides=[2, 2], |
|
|
|
activation=tf.nn.elu, |
|
|
|
reuse=reuse, |
|
|
|
name="conv_2", |
|
|
|
) |
|
|
|
conv3 = tf.layers.conv2d( |
|
|
|
conv2, |
|
|
|
64, |
|
|
|
kernel_size=[3, 3], |
|
|
|
strides=[1, 1], |
|
|
|
activation=tf.nn.elu, |
|
|
|
reuse=reuse, |
|
|
|
name="conv_3", |
|
|
|
) |
|
|
|
hidden = c_layers.flatten(conv3) |
|
|
|
|
|
|
|
with tf.variable_scope(scope + "/" + "flat_encoding"): |
|
|
|
hidden_flat = self.create_vector_observation_encoder( |
|
|
|
hidden, h_size, activation, num_layers, scope, reuse |
|
|
|
) |
|
|
|
return hidden_flat |
|
|
|
|
|
|
|
def create_resnet_visual_observation_encoder( |
|
|
|
self, |
|
|
|
image_input: tf.Tensor, |
|
|
|
h_size: int, |
|
|
|
activation: ActivationFunction, |
|
|
|
num_layers: int, |
|
|
|
scope: str, |
|
|
|
reuse: bool, |
|
|
|
) -> tf.Tensor: |
|
|
|
""" |
|
|
|
Builds a set of resnet visual encoders. |
|
|
|
:param image_input: The placeholder for the image input to use. |
|
|
|
:param h_size: Hidden layer size. |
|
|
|
:param activation: What type of activation function to use for layers. |
|
|
|
:param num_layers: number of hidden layers to create. |
|
|
|
:param scope: The scope of the graph within which to create the ops. |
|
|
|
:param reuse: Whether to re-use the weights within the same scope. |
|
|
|
:return: List of hidden layer tensors. |
|
|
|
""" |
|
|
|
print("creating resnet") |
|
|
|
n_channels = [16, 32, 32] # channel for each stack |
|
|
|
n_blocks = 2 # number of residual blocks |
|
|
|
with tf.variable_scope(scope): |
|
|
|
hidden = image_input |
|
|
|
for i, ch in enumerate(n_channels): |
|
|
|
hidden = tf.layers.conv2d( |
|
|
|
hidden, |
|
|
|
ch, |
|
|
|
kernel_size=[3, 3], |
|
|
|
strides=[1, 1], |
|
|
|
reuse=reuse, |
|
|
|
name="layer%dconv_1" % i, |
|
|
|
) |
|
|
|
hidden = tf.layers.max_pooling2d( |
|
|
|
hidden, pool_size=[3, 3], strides=[2, 2], padding="same" |
|
|
|
) |
|
|
|
# create residual blocks |
|
|
|
for j in range(n_blocks): |
|
|
|
block_input = hidden |
|
|
|
hidden = tf.nn.relu(hidden) |
|
|
|
hidden = tf.layers.conv2d( |
|
|
|
hidden, |
|
|
|
ch, |
|
|
|
kernel_size=[3, 3], |
|
|
|
strides=[1, 1], |
|
|
|
padding="same", |
|
|
|
reuse=reuse, |
|
|
|
name="layer%d_%d_conv1" % (i, j), |
|
|
|
) |
|
|
|
hidden = tf.nn.relu(hidden) |
|
|
|
hidden = tf.layers.conv2d( |
|
|
|
hidden, |
|
|
|
ch, |
|
|
|
kernel_size=[3, 3], |
|
|
|
strides=[1, 1], |
|
|
|
padding="same", |
|
|
|
reuse=reuse, |
|
|
|
name="layer%d_%d_conv2" % (i, j), |
|
|
|
) |
|
|
|
hidden = tf.add(block_input, hidden) |
|
|
|
hidden = tf.nn.relu(hidden) |
|
|
|
hidden = c_layers.flatten(hidden) |
|
|
|
|
|
|
|
with tf.variable_scope(scope + "/" + "flat_encoding"): |
|
|
|
hidden_flat = self.create_vector_observation_encoder( |
|
|
|
hidden, h_size, activation, num_layers, scope, reuse |
|
|
|
) |
|
|
|
return hidden_flat |
|
|
|
|
|
|
|
@staticmethod |
|
|
|
def create_discrete_action_masking_layer(all_logits, action_masks, action_size): |
|
|
|
""" |
|
|
|
|
|
|
), |
|
|
|
) |
|
|
|
|
|
|
|
def create_observation_streams(self, num_streams, h_size, num_layers): |
|
|
|
def create_observation_streams( |
|
|
|
self, num_streams, h_size, num_layers, vis_encode_type="default" |
|
|
|
): |
|
|
|
""" |
|
|
|
Creates encoding stream for observations. |
|
|
|
:param num_streams: Number of streams to create. |
|
|
|
|
|
|
visual_encoders = [] |
|
|
|
hidden_state, hidden_visual = None, None |
|
|
|
if self.vis_obs_size > 0: |
|
|
|
for j in range(brain.number_visual_observations): |
|
|
|
encoded_visual = self.create_visual_observation_encoder( |
|
|
|
self.visual_in[j], |
|
|
|
h_size, |
|
|
|
activation_fn, |
|
|
|
num_layers, |
|
|
|
"main_graph_{}_encoder{}".format(i, j), |
|
|
|
False, |
|
|
|
) |
|
|
|
visual_encoders.append(encoded_visual) |
|
|
|
vis_encode_type = EncoderType(vis_encode_type) |
|
|
|
if vis_encode_type == EncoderType.RESNET: |
|
|
|
for j in range(brain.number_visual_observations): |
|
|
|
encoded_visual = self.create_resnet_visual_observation_encoder( |
|
|
|
self.visual_in[j], |
|
|
|
h_size, |
|
|
|
activation_fn, |
|
|
|
num_layers, |
|
|
|
"main_graph_{}_encoder{}".format(i, j), |
|
|
|
False, |
|
|
|
) |
|
|
|
visual_encoders.append(encoded_visual) |
|
|
|
elif vis_encode_type == EncoderType.NATURE_CNN: |
|
|
|
for j in range(brain.number_visual_observations): |
|
|
|
encoded_visual = self.create_nature_cnn_visual_observation_encoder( |
|
|
|
self.visual_in[j], |
|
|
|
h_size, |
|
|
|
activation_fn, |
|
|
|
num_layers, |
|
|
|
"main_graph_{}_encoder{}".format(i, j), |
|
|
|
False, |
|
|
|
) |
|
|
|
visual_encoders.append(encoded_visual) |
|
|
|
else: |
|
|
|
for j in range(brain.number_visual_observations): |
|
|
|
encoded_visual = self.create_visual_observation_encoder( |
|
|
|
self.visual_in[j], |
|
|
|
h_size, |
|
|
|
activation_fn, |
|
|
|
num_layers, |
|
|
|
"main_graph_{}_encoder{}".format(i, j), |
|
|
|
False, |
|
|
|
) |
|
|
|
visual_encoders.append(encoded_visual) |
|
|
|
hidden_visual = tf.concat(visual_encoders, axis=1) |
|
|
|
if brain.vector_observation_space_size > 0: |
|
|
|
hidden_state = self.create_vector_observation_encoder( |
|
|
|
|
|
|
self.value_heads[name] = value |
|
|
|
self.value = tf.reduce_mean(list(self.value_heads.values()), 0) |
|
|
|
|
|
|
|
def create_cc_actor_critic(self, h_size, num_layers): |
|
|
|
def create_cc_actor_critic(self, h_size, num_layers, vis_encode_type): |
|
|
|
hidden_streams = self.create_observation_streams(2, h_size, num_layers) |
|
|
|
hidden_streams = self.create_observation_streams( |
|
|
|
2, h_size, num_layers, vis_encode_type |
|
|
|
) |
|
|
|
|
|
|
|
if self.use_recurrent: |
|
|
|
self.memory_in = tf.placeholder( |
|
|
|
|
|
|
(tf.identity(self.all_old_log_probs)), axis=1, keepdims=True |
|
|
|
) |
|
|
|
|
|
|
|
def create_dc_actor_critic(self, h_size, num_layers): |
|
|
|
def create_dc_actor_critic(self, h_size, num_layers, vis_encode_type): |
|
|
|
hidden_streams = self.create_observation_streams(1, h_size, num_layers) |
|
|
|
hidden_streams = self.create_observation_streams( |
|
|
|
1, h_size, num_layers, vis_encode_type |
|
|
|
) |
|
|
|
hidden = hidden_streams[0] |
|
|
|
|
|
|
|
if self.use_recurrent: |
|
|
|