from typing import Callable, Dict, List, Tuple, NamedTuple

import numpy as np
from mlagents.tf_utils import tf
from mlagents.trainers.settings import EncoderType, ScheduleType

from mlagents.trainers.exception import UnityTrainerException

ActivationFunction = Callable[[tf.Tensor], tf.Tensor]
EncoderFunction = Callable[
    [tf.Tensor, int, ActivationFunction, int, str, bool], tf.Tensor
]

EPSILON = 1e-7


class Tensor3DShape(NamedTuple):
    height: int
    width: int
    num_channels: int


class NormalizerTensors(NamedTuple):
    init_op: tf.Operation
    update_op: tf.Operation
    steps: tf.Tensor
    running_mean: tf.Tensor
    running_variance: tf.Tensor


class ModelUtils:
    # Minimum supported side for each encoder type. If refactoring an encoder, please
    # adjust these also.
    MIN_RESOLUTION_FOR_ENCODER = {
        EncoderType.MATCH3: 5,
        EncoderType.SIMPLE: 20,
        EncoderType.NATURE_CNN: 36,
        EncoderType.RESNET: 15,
    }

    @staticmethod
    def create_global_steps():
        """Creates TF ops to track and increment global training step."""
        global_step = tf.Variable(
            0, name="global_step", trainable=False, dtype=tf.int64
        )
        steps_to_increment = tf.placeholder(
            shape=[], dtype=tf.int64, name="steps_to_increment"
        )
        increment_step = tf.assign(global_step, tf.add(global_step, steps_to_increment))
        return global_step, increment_step, steps_to_increment

    @staticmethod
    def create_schedule(
        schedule: ScheduleType,
        parameter: float,
        global_step: tf.Tensor,
        max_step: int,
        min_value: float,
    ) -> tf.Tensor:
        """
        Create a learning rate tensor.
        :param lr_schedule: Type of learning rate schedule.
        :param lr: Base learning rate.
        :param global_step: A TF Tensor representing the total global step.
        :param max_step: The maximum number of steps in the training run.
        :return: A Tensor containing the learning rate.
        """
        if schedule == ScheduleType.CONSTANT:
            parameter_rate = tf.Variable(parameter, trainable=False)
        elif schedule == ScheduleType.LINEAR:
            parameter_rate = tf.train.polynomial_decay(
                parameter, global_step, max_step, min_value, power=1.0
            )
        else:
            raise UnityTrainerException(f"The schedule {schedule} is invalid.")
        return parameter_rate

    @staticmethod
    def scaled_init(scale):
        return tf.initializers.variance_scaling(scale)

    @staticmethod
    def swish(input_activation: tf.Tensor) -> tf.Tensor:
        """Swish activation function. For more info: https://arxiv.org/abs/1710.05941"""
        return tf.multiply(input_activation, tf.nn.sigmoid(input_activation))

    @staticmethod
    def create_visual_input(camera_parameters: Tensor3DShape, name: str) -> tf.Tensor:
        """
        Creates image input op.
        :param camera_parameters: Parameters for visual observation.
        :param name: Desired name of input op.
        :return: input op.
        """
        o_size_h = camera_parameters.height
        o_size_w = camera_parameters.width
        c_channels = camera_parameters.num_channels

        visual_in = tf.placeholder(
            shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32, name=name
        )
        return visual_in

    @staticmethod
    def create_input_placeholders(
        observation_shapes: List[Tuple], name_prefix: str = ""
    ) -> Tuple[tf.Tensor, List[tf.Tensor]]:
        """
        Creates input placeholders for visual inputs.
        :param observation_shapes: A List of tuples that specify the resolutions
            of the input observations. Tuples for now are restricted to 1D (vector) or 3D (Tensor)
        :param name_prefix: A name prefix to add to the placeholder names. This is used so that there
            is no conflict when creating multiple placeholder sets.
        :returns: A List of Tensorflow placeholders where the input iamges should be fed.
        """
        visual_in: List[tf.Tensor] = []
        vector_in_size = 0
        for i, dimension in enumerate(observation_shapes):
            if len(dimension) == 3:
                _res = Tensor3DShape(
                    height=dimension[0], width=dimension[1], num_channels=dimension[2]
                )
                visual_input = ModelUtils.create_visual_input(
                    _res, name=name_prefix + "visual_observation_" + str(i)
                )
                visual_in.append(visual_input)
            elif len(dimension) == 1:
                vector_in_size += dimension[0]
            else:
                raise UnityTrainerException(
                    f"Unsupported shape of {dimension} for observation {i}"
                )
        vector_in = tf.placeholder(
            shape=[None, vector_in_size],
            dtype=tf.float32,
            name=name_prefix + "vector_observation",
        )
        return vector_in, visual_in

    @staticmethod
    def create_vector_input(
        vec_obs_size: int, name: str = "vector_observation"
    ) -> tf.Tensor:
        """
        Creates ops for vector observation input.
        :param vec_obs_size: Size of stacked vector observation.
        :param name: Name of the placeholder op.
        :return: Placeholder for vector observations.
        """
        vector_in = tf.placeholder(
            shape=[None, vec_obs_size], dtype=tf.float32, name=name
        )
        return vector_in

    @staticmethod
    def normalize_vector_obs(
        vector_obs: tf.Tensor,
        running_mean: tf.Tensor,
        running_variance: tf.Tensor,
        normalization_steps: tf.Tensor,
    ) -> tf.Tensor:
        """
        Create a normalized version of an input tensor.
        :param vector_obs: Input vector observation tensor.
        :param running_mean: Tensorflow tensor representing the current running mean.
        :param running_variance: Tensorflow tensor representing the current running variance.
        :param normalization_steps: Tensorflow tensor representing the current number of normalization_steps.
        :return: A normalized version of vector_obs.
        """
        normalized_state = tf.clip_by_value(
            (vector_obs - running_mean)
            / tf.sqrt(
                running_variance / (tf.cast(normalization_steps, tf.float32) + 1)
            ),
            -5,
            5,
            name="normalized_state",
        )
        return normalized_state

    @staticmethod
    def create_normalizer(vector_obs: tf.Tensor) -> NormalizerTensors:
        """
        Creates the normalizer and the variables required to store its state.
        :param vector_obs: A Tensor representing the next value to normalize. When the
            update operation is called, it will use vector_obs to update the running mean
            and variance.
        :return: A NormalizerTensors tuple that holds running mean, running variance, number of steps,
            and the update operation.
        """
        vec_obs_size = vector_obs.shape[1]

        steps = tf.get_variable(
            "normalization_steps",
            [],
            trainable=False,
            dtype=tf.int64,
            initializer=tf.zeros_initializer(),
        )
        running_mean = tf.get_variable(
            "running_mean",
            [vec_obs_size],
            trainable=False,
            dtype=tf.float32,
            initializer=tf.zeros_initializer(),
        )
        running_variance = tf.get_variable(
            "running_variance",
            [vec_obs_size],
            trainable=False,
            dtype=tf.float32,
            initializer=tf.ones_initializer(),
        )
        (
            initialize_normalization,
            update_normalization,
        ) = ModelUtils.create_normalizer_update(
            vector_obs, steps, running_mean, running_variance
        )
        return NormalizerTensors(
            initialize_normalization,
            update_normalization,
            steps,
            running_mean,
            running_variance,
        )

    @staticmethod
    def create_normalizer_update(
        vector_input: tf.Tensor,
        steps: tf.Tensor,
        running_mean: tf.Tensor,
        running_variance: tf.Tensor,
    ) -> Tuple[tf.Operation, tf.Operation]:
        """
        Creates the update operation for the normalizer.
        :param vector_input: Vector observation to use for updating the running mean and variance.
        :param running_mean: Tensorflow tensor representing the current running mean.
        :param running_variance: Tensorflow tensor representing the current running variance.
        :param steps: Tensorflow tensor representing the current number of steps that have been normalized.
        :return: A TF operation that updates the normalization based on vector_input.
        """
        # Based on Welford's algorithm for running mean and standard deviation, for batch updates. Discussion here:
        # https://stackoverflow.com/questions/56402955/whats-the-formula-for-welfords-algorithm-for-variance-std-with-batch-updates
        steps_increment = tf.shape(vector_input)[0]
        total_new_steps = tf.add(steps, tf.cast(steps_increment, dtype=tf.int64))

        # Compute the incremental update and divide by the number of new steps.
        input_to_old_mean = tf.subtract(vector_input, running_mean)
        new_mean = running_mean + tf.reduce_sum(
            input_to_old_mean / tf.cast(total_new_steps, dtype=tf.float32), axis=0
        )
        # Compute difference of input to the new mean for Welford update
        input_to_new_mean = tf.subtract(vector_input, new_mean)
        new_variance = running_variance + tf.reduce_sum(
            input_to_new_mean * input_to_old_mean, axis=0
        )
        update_mean = tf.assign(running_mean, new_mean)
        update_variance = tf.assign(running_variance, new_variance)
        update_norm_step = tf.assign(steps, total_new_steps)
        # First mean and variance calculated normally
        initial_mean, initial_variance = tf.nn.moments(vector_input, axes=[0])
        initialize_mean = tf.assign(running_mean, initial_mean)
        # Multiplied by total_new_step because it is divided by total_new_step in the normalization
        initialize_variance = tf.assign(
            running_variance,
            (initial_variance + EPSILON) * tf.cast(total_new_steps, dtype=tf.float32),
        )
        return (
            tf.group([initialize_mean, initialize_variance, update_norm_step]),
            tf.group([update_mean, update_variance, update_norm_step]),
        )

    @staticmethod
    def create_vector_observation_encoder(
        observation_input: tf.Tensor,
        h_size: int,
        activation: ActivationFunction,
        num_layers: int,
        scope: str,
        reuse: bool,
    ) -> tf.Tensor:
        """
        Builds a set of hidden state encoders.
        :param reuse: Whether to re-use the weights within the same scope.
        :param scope: Graph scope for the encoder ops.
        :param observation_input: Input vector.
        :param h_size: Hidden layer size.
        :param activation: What type of activation function to use for layers.
        :param num_layers: number of hidden layers to create.
        :return: List of hidden layer tensors.
        """
        with tf.variable_scope(scope):
            hidden = observation_input
            for i in range(num_layers):
                hidden = tf.layers.dense(
                    hidden,
                    h_size,
                    activation=activation,
                    reuse=reuse,
                    name=f"hidden_{i}",
                    kernel_initializer=tf.initializers.variance_scaling(1.0),
                )
        return hidden

    @staticmethod
    def create_visual_observation_encoder(
        image_input: tf.Tensor,
        h_size: int,
        activation: ActivationFunction,
        num_layers: int,
        scope: str,
        reuse: bool,
    ) -> tf.Tensor:
        """
        Builds a set of resnet visual encoders.
        :param image_input: The placeholder for the image input to use.
        :param h_size: Hidden layer size.
        :param activation: What type of activation function to use for layers.
        :param num_layers: number of hidden layers to create.
        :param scope: The scope of the graph within which to create the ops.
        :param reuse: Whether to re-use the weights within the same scope.
        :return: List of hidden layer tensors.
        """
        with tf.variable_scope(scope):
            conv1 = tf.layers.conv2d(
                image_input,
                16,
                kernel_size=[8, 8],
                strides=[4, 4],
                activation=tf.nn.elu,
                reuse=reuse,
                name="conv_1",
            )
            conv2 = tf.layers.conv2d(
                conv1,
                32,
                kernel_size=[4, 4],
                strides=[2, 2],
                activation=tf.nn.elu,
                reuse=reuse,
                name="conv_2",
            )
            hidden = tf.layers.flatten(conv2)

        with tf.variable_scope(scope + "/" + "flat_encoding"):
            hidden_flat = ModelUtils.create_vector_observation_encoder(
                hidden, h_size, activation, num_layers, scope, reuse
            )
        return hidden_flat

    @staticmethod
    def create_match3_visual_observation_encoder(
        image_input: tf.Tensor,
        h_size: int,
        activation: ActivationFunction,
        num_layers: int,
        scope: str,
        reuse: bool,
    ) -> tf.Tensor:
        """
        Builds a CNN with the architecture used by King for Candy Crush. Optimized
        for grid-shaped boards, such as with Match-3 games.
        :param image_input: The placeholder for the image input to use.
        :param h_size: Hidden layer size.
        :param activation: What type of activation function to use for layers.
        :param num_layers: number of hidden layers to create.
        :param scope: The scope of the graph within which to create the ops.
        :param reuse: Whether to re-use the weights within the same scope.
        :return: List of hidden layer tensors.
        """
        with tf.variable_scope(scope):
            conv1 = tf.layers.conv2d(
                image_input,
                35,
                kernel_size=[3, 3],
                strides=[1, 1],
                activation=tf.nn.elu,
                reuse=reuse,
                name="conv_1",
            )
            conv2 = tf.layers.conv2d(
                conv1,
                144,
                kernel_size=[3, 3],
                strides=[1, 1],
                activation=tf.nn.elu,
                reuse=reuse,
                name="conv_2",
            )
            hidden = tf.layers.flatten(conv2)

        with tf.variable_scope(scope + "/" + "flat_encoding"):
            hidden_flat = ModelUtils.create_vector_observation_encoder(
                hidden, h_size, activation, num_layers, scope, reuse
            )
        return hidden_flat

    @staticmethod
    def create_nature_cnn_visual_observation_encoder(
        image_input: tf.Tensor,
        h_size: int,
        activation: ActivationFunction,
        num_layers: int,
        scope: str,
        reuse: bool,
    ) -> tf.Tensor:
        """
        Builds a set of resnet visual encoders.
        :param image_input: The placeholder for the image input to use.
        :param h_size: Hidden layer size.
        :param activation: What type of activation function to use for layers.
        :param num_layers: number of hidden layers to create.
        :param scope: The scope of the graph within which to create the ops.
        :param reuse: Whether to re-use the weights within the same scope.
        :return: List of hidden layer tensors.
        """
        with tf.variable_scope(scope):
            conv1 = tf.layers.conv2d(
                image_input,
                32,
                kernel_size=[8, 8],
                strides=[4, 4],
                activation=tf.nn.elu,
                reuse=reuse,
                name="conv_1",
            )
            conv2 = tf.layers.conv2d(
                conv1,
                64,
                kernel_size=[4, 4],
                strides=[2, 2],
                activation=tf.nn.elu,
                reuse=reuse,
                name="conv_2",
            )
            conv3 = tf.layers.conv2d(
                conv2,
                64,
                kernel_size=[3, 3],
                strides=[1, 1],
                activation=tf.nn.elu,
                reuse=reuse,
                name="conv_3",
            )
            hidden = tf.layers.flatten(conv3)

        with tf.variable_scope(scope + "/" + "flat_encoding"):
            hidden_flat = ModelUtils.create_vector_observation_encoder(
                hidden, h_size, activation, num_layers, scope, reuse
            )
        return hidden_flat

    @staticmethod
    def create_resnet_visual_observation_encoder(
        image_input: tf.Tensor,
        h_size: int,
        activation: ActivationFunction,
        num_layers: int,
        scope: str,
        reuse: bool,
    ) -> tf.Tensor:
        """
        Builds a set of resnet visual encoders.
        :param image_input: The placeholder for the image input to use.
        :param h_size: Hidden layer size.
        :param activation: What type of activation function to use for layers.
        :param num_layers: number of hidden layers to create.
        :param scope: The scope of the graph within which to create the ops.
        :param reuse: Whether to re-use the weights within the same scope.
        :return: List of hidden layer tensors.
        """
        n_channels = [16, 32, 32]  # channel for each stack
        n_blocks = 2  # number of residual blocks
        with tf.variable_scope(scope):
            hidden = image_input
            for i, ch in enumerate(n_channels):
                hidden = tf.layers.conv2d(
                    hidden,
                    ch,
                    kernel_size=[3, 3],
                    strides=[1, 1],
                    reuse=reuse,
                    name="layer%dconv_1" % i,
                )
                hidden = tf.layers.max_pooling2d(
                    hidden, pool_size=[3, 3], strides=[2, 2], padding="same"
                )
                # create residual blocks
                for j in range(n_blocks):
                    block_input = hidden
                    hidden = tf.nn.relu(hidden)
                    hidden = tf.layers.conv2d(
                        hidden,
                        ch,
                        kernel_size=[3, 3],
                        strides=[1, 1],
                        padding="same",
                        reuse=reuse,
                        name="layer%d_%d_conv1" % (i, j),
                    )
                    hidden = tf.nn.relu(hidden)
                    hidden = tf.layers.conv2d(
                        hidden,
                        ch,
                        kernel_size=[3, 3],
                        strides=[1, 1],
                        padding="same",
                        reuse=reuse,
                        name="layer%d_%d_conv2" % (i, j),
                    )
                    hidden = tf.add(block_input, hidden)
            hidden = tf.nn.relu(hidden)
            hidden = tf.layers.flatten(hidden)

        with tf.variable_scope(scope + "/" + "flat_encoding"):
            hidden_flat = ModelUtils.create_vector_observation_encoder(
                hidden, h_size, activation, num_layers, scope, reuse
            )
        return hidden_flat

    @staticmethod
    def get_encoder_for_type(encoder_type: EncoderType) -> EncoderFunction:
        ENCODER_FUNCTION_BY_TYPE = {
            EncoderType.SIMPLE: ModelUtils.create_visual_observation_encoder,
            EncoderType.NATURE_CNN: ModelUtils.create_nature_cnn_visual_observation_encoder,
            EncoderType.RESNET: ModelUtils.create_resnet_visual_observation_encoder,
            EncoderType.MATCH3: ModelUtils.create_match3_visual_observation_encoder,
        }
        return ENCODER_FUNCTION_BY_TYPE.get(
            encoder_type, ModelUtils.create_visual_observation_encoder
        )

    @staticmethod
    def break_into_branches(
        concatenated_logits: tf.Tensor, action_size: List[int]
    ) -> List[tf.Tensor]:
        """
        Takes a concatenated set of logits that represent multiple discrete action branches
        and breaks it up into one Tensor per branch.
        :param concatenated_logits: Tensor that represents the concatenated action branches
        :param action_size: List of ints containing the number of possible actions for each branch.
        :return: A List of Tensors containing one tensor per branch.
        """
        action_idx = [0] + list(np.cumsum(action_size))
        branched_logits = [
            concatenated_logits[:, action_idx[i] : action_idx[i + 1]]
            for i in range(len(action_size))
        ]
        return branched_logits

    @staticmethod
    def create_discrete_action_masking_layer(
        branches_logits: List[tf.Tensor],
        action_masks: tf.Tensor,
        action_size: List[int],
    ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
        """
        Creates a masking layer for the discrete actions
        :param branches_logits: A List of the unnormalized action probabilities for each branch
        :param action_masks: The mask for the logits. Must be of dimension [None x total_number_of_action]
        :param action_size: A list containing the number of possible actions for each branch
        :return: The action output dimension [batch_size, num_branches], the concatenated
            normalized log_probs (after softmax)
        and the concatenated normalized log log_probs
        """
        branch_masks = ModelUtils.break_into_branches(action_masks, action_size)
        raw_probs = [
            tf.multiply(tf.nn.softmax(branches_logits[k]) + EPSILON, branch_masks[k])
            for k in range(len(action_size))
        ]
        normalized_probs = [
            tf.divide(raw_probs[k], tf.reduce_sum(raw_probs[k], axis=1, keepdims=True))
            for k in range(len(action_size))
        ]
        output = tf.concat(
            [
                tf.multinomial(tf.log(normalized_probs[k] + EPSILON), 1)
                for k in range(len(action_size))
            ],
            axis=1,
        )
        return (
            output,
            tf.concat([normalized_probs[k] for k in range(len(action_size))], axis=1),
            tf.concat(
                [
                    tf.log(normalized_probs[k] + EPSILON)
                    for k in range(len(action_size))
                ],
                axis=1,
            ),
        )

    @staticmethod
    def _check_resolution_for_encoder(
        vis_in: tf.Tensor, vis_encoder_type: EncoderType
    ) -> None:
        min_res = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[vis_encoder_type]
        height = vis_in.shape[1]
        width = vis_in.shape[2]
        if height < min_res or width < min_res:
            raise UnityTrainerException(
                f"Visual observation resolution ({width}x{height}) is too small for"
                f"the provided EncoderType ({vis_encoder_type.value}). The min dimension is {min_res}"
            )

    @staticmethod
    def create_observation_streams(
        visual_in: List[tf.Tensor],
        vector_in: tf.Tensor,
        num_streams: int,
        h_size: int,
        num_layers: int,
        vis_encode_type: EncoderType = EncoderType.SIMPLE,
        stream_scopes: List[str] = None,
    ) -> List[tf.Tensor]:
        """
        Creates encoding stream for observations.
        :param num_streams: Number of streams to create.
        :param h_size: Size of hidden linear layers in stream.
        :param num_layers: Number of hidden linear layers in stream.
        :param stream_scopes: List of strings (length == num_streams), which contains
            the scopes for each of the streams. None if all under the same TF scope.
        :return: List of encoded streams.
        """
        activation_fn = ModelUtils.swish
        vector_observation_input = vector_in

        final_hiddens = []
        for i in range(num_streams):
            # Pick the encoder function based on the EncoderType
            create_encoder_func = ModelUtils.get_encoder_for_type(vis_encode_type)

            visual_encoders = []
            hidden_state, hidden_visual = None, None
            _scope_add = stream_scopes[i] if stream_scopes else ""
            if len(visual_in) > 0:
                for j, vis_in in enumerate(visual_in):
                    ModelUtils._check_resolution_for_encoder(vis_in, vis_encode_type)
                    encoded_visual = create_encoder_func(
                        vis_in,
                        h_size,
                        activation_fn,
                        num_layers,
                        f"{_scope_add}main_graph_{i}_encoder{j}",  # scope
                        False,  # reuse
                    )
                    visual_encoders.append(encoded_visual)
                hidden_visual = tf.concat(visual_encoders, axis=1)
            if vector_in.get_shape()[-1] > 0:
                # Don't encode non-existant or 0-shape inputs
                hidden_state = ModelUtils.create_vector_observation_encoder(
                    vector_observation_input,
                    h_size,
                    activation_fn,
                    num_layers,
                    scope=f"{_scope_add}main_graph_{i}",
                    reuse=False,
                )
            if hidden_state is not None and hidden_visual is not None:
                final_hidden = tf.concat([hidden_visual, hidden_state], axis=1)
            elif hidden_state is None and hidden_visual is not None:
                final_hidden = hidden_visual
            elif hidden_state is not None and hidden_visual is None:
                final_hidden = hidden_state
            else:
                raise Exception(
                    "No valid network configuration possible. "
                    "There are no states or observations in this brain"
                )
            final_hiddens.append(final_hidden)
        return final_hiddens

    @staticmethod
    def create_recurrent_encoder(input_state, memory_in, sequence_length, name="lstm"):
        """
        Builds a recurrent encoder for either state or observations (LSTM).
        :param sequence_length: Length of sequence to unroll.
        :param input_state: The input tensor to the LSTM cell.
        :param memory_in: The input memory to the LSTM cell.
        :param name: The scope of the LSTM cell.
        """
        s_size = input_state.get_shape().as_list()[1]
        m_size = memory_in.get_shape().as_list()[1]
        lstm_input_state = tf.reshape(input_state, shape=[-1, sequence_length, s_size])
        memory_in = tf.reshape(memory_in[:, :], [-1, m_size])
        half_point = int(m_size / 2)
        with tf.variable_scope(name):
            rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(half_point)
            lstm_vector_in = tf.nn.rnn_cell.LSTMStateTuple(
                memory_in[:, :half_point], memory_in[:, half_point:]
            )
            recurrent_output, lstm_state_out = tf.nn.dynamic_rnn(
                rnn_cell, lstm_input_state, initial_state=lstm_vector_in
            )

        recurrent_output = tf.reshape(recurrent_output, shape=[-1, half_point])
        return recurrent_output, tf.concat([lstm_state_out.c, lstm_state_out.h], axis=1)

    @staticmethod
    def create_value_heads(
        stream_names: List[str], hidden_input: tf.Tensor
    ) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]:
        """
        Creates one value estimator head for each reward signal in stream_names.
        Also creates the node corresponding to the mean of all the value heads in self.value.
        self.value_head is a dictionary of stream name to node containing the value estimator head for that signal.
        :param stream_names: The list of reward signal names
        :param hidden_input: The last layer of the Critic. The heads will consist of one dense hidden layer on top
        of the hidden input.
        """
        value_heads = {}
        for name in stream_names:
            value = tf.layers.dense(hidden_input, 1, name=f"{name}_value")
            value_heads[name] = value
        value = tf.reduce_mean(list(value_heads.values()), 0)
        return value_heads, value