ml-agents/ml-agents/mlagents/trainers/torch/conditioning.py


								from mlagents.torch_utils import torch

								from typing import List

								import math


								from mlagents.trainers.torch.layers import (

								    linear_layer,

								    Swish,

								    Initialization,

								    LayerNorm,

								)


								class ConditionalEncoder(torch.nn.Module):

								    def __init__(

								        self,

								        input_size: int,

								        goal_size: int,

								        hidden_size: int,

								        num_layers: int,

								        num_conditional_layers: int,

								        kernel_init: Initialization = Initialization.KaimingHeNormal,

								        kernel_gain: float = 1.0,

								    ):

								        """

								        ConditionalEncoder module. A fully connected network of which some of the

								        weights are generated by a goal conditioning. Uses the HyperNetwork module to

								        generate the weights of the network. Only the weights of the last

								        "num_conditional_layers" layers will be generated by HyperNetworks, the others

								        will use regular parameters.

								        :param input_size: The size of the input of the encoder

								        :param goal_size: The size of the goal tensor that will condition the encoder

								        :param hidden_size: The number of hidden units in the encoder

								        :param num_layers: The total number of layers of the encoder (both regular and

								        generated by HyperNetwork)

								        :param num_conditional_layers: The number of layers generated with hypernetworks

								        :param kernel_init: The Initialization to use for the weights of the layer

								        :param kernel_gain: The multiplier for the weights of the kernel.

								        """

								        super().__init__()

								        layers: List[torch.nn.Module] = []

								        prev_size = input_size + goal_size

								        for i in range(num_layers):

								            if num_layers - i <= num_conditional_layers:

								                # This means layer i is a conditional layer since the conditional

								                # leyers are the last num_conditional_layers

								                layers.append(

								                    HyperNetwork(prev_size, hidden_size, goal_size, hidden_size, 2)

								                )

								            else:

								                layers.append(

								                    linear_layer(

								                        prev_size,

								                        hidden_size,

								                        kernel_init=kernel_init,

								                        kernel_gain=kernel_gain,

								                    )

								                )

								            layers.append(Swish())

								            prev_size = hidden_size

								        self.layers = torch.nn.ModuleList(layers)


								    def forward(

								        self, input_tensor: torch.Tensor, goal_tensor: torch.Tensor

								    ) -> torch.Tensor:  # type: ignore

								        activation = torch.cat([input_tensor, goal_tensor], dim=-1)

								        for layer in self.layers:

								            if isinstance(layer, HyperNetwork):

								                activation = layer(activation, goal_tensor)

								            else:

								                activation = layer(activation)

								        return activation


								class HyperNetwork(torch.nn.Module):

								    def __init__(

								        self, input_size, output_size, hyper_input_size, layer_size, num_layers

								    ):

								        """

								        Hyper Network module. This module will use the hyper_input tensor to generate

								        the weights of the main network. The main network is a single fully connected

								        layer.

								        :param input_size: The size of the input of the main network

								        :param output_size: The size of the output of the main network

								        :param hyper_input_size: The size of the input of the hypernetwork that will

								        generate the main network.

								        :param layer_size: The number of hidden units in the layers of the hypernetwork

								        :param num_layers: The number of layers of the hypernetwork

								        """

								        super().__init__()

								        self.input_size = input_size

								        self.output_size = output_size


								        layer_in_size = hyper_input_size

								        layers = []

								        for _ in range(num_layers):

								            layers.append(

								                linear_layer(

								                    layer_in_size,

								                    layer_size,

								                    kernel_init=Initialization.KaimingHeNormal,

								                    kernel_gain=1.0,

								                    bias_init=Initialization.Zero,

								                )

								            )

								            layers.append(Swish())

								            layer_in_size = layer_size

								        flat_output = linear_layer(

								            layer_size,

								            input_size * output_size,

								            kernel_init=Initialization.KaimingHeNormal,

								            kernel_gain=0.1,

								            bias_init=Initialization.Zero,

								        )


								        # Re-initializing the weights of the last layer of the hypernetwork

								        bound = math.sqrt(1 / (layer_size * self.input_size))

								        flat_output.weight.data.uniform_(-bound, bound)


								        self.hypernet = torch.nn.Sequential(*layers, LayerNorm(), flat_output)


								        # The hypernetwork will not generate the bias of the main network layer

								        self.bias = torch.nn.Parameter(torch.zeros(output_size))


								    def forward(self, input_activation, hyper_input):

								        output_weights = self.hypernet(hyper_input)


								        output_weights = output_weights.view(-1, self.input_size, self.output_size)


								        result = (

								            torch.bmm(input_activation.unsqueeze(1), output_weights).squeeze(1)

								            + self.bias

								        )

								        return result