Goal conditioning integration (#5142)

* Adding Hypernetwork modules and unit tests * Edits * Integration of the hypernetowrk to the trainer * Update ml-agents/mlagents/trainers/torch/networks.py Co-authored-by: Arthur Juliani <awjuliani@gmail.com> * Making the default hyper and added the conditioning type None * Reducing the number of hypernetwork layers * addressing comments Co-authored-by: Arthur Juliani <awjuliani@gmail.com>
4 年前 · 81705d6d
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
    LINEAR = "linear"


+class ConditioningType(Enum):
+    HYPER = "hyper"
+    NONE = "none"
+
+
@attr.s(auto_attribs=True)
 class NetworkSettings:
    @attr.s
    num_layers: int = 2
    vis_encode_type: EncoderType = EncoderType.SIMPLE
    memory: Optional[MemorySettings] = None
+    goal_conditioning_type: ConditioningType = ConditioningType.HYPER


@attr.s(auto_attribs=True)
--- a/ml-agents/mlagents/trainers/tests/simple_test_envs.py
+++ b/ml-agents/mlagents/trainers/tests/simple_test_envs.py
 from mlagents_envs.base_env import (
    ActionSpec,
    ObservationSpec,
+    ObservationType,
    ActionTuple,
    BaseEnv,
    BehaviorSpec,
        vec_obs_size=OBS_SIZE,
        var_len_obs_size=VAR_LEN_SIZE,
        action_sizes=(1, 0),
+        goal_indices=None,
    ):
        super().__init__()
        self.num_visual = num_visual
        self.vec_obs_size = vec_obs_size
        self.var_len_obs_size = var_len_obs_size
+        self.goal_indices = goal_indices
        continuous_action_size, discrete_action_size = action_sizes
        discrete_tuple = tuple(2 for _ in range(discrete_action_size))
        action_spec = ActionSpec(continuous_action_size, discrete_tuple)
        for _ in range(self.num_var_len):
            obs_shape.append(self.var_len_obs_size)
        obs_spec = create_observation_specs_with_shapes(obs_shape)
+        if self.goal_indices is not None:
+            for i in range(len(obs_spec)):
+                if i in self.goal_indices:
+                    obs_spec[i] = ObservationSpec(
+                        shape=obs_spec[i].shape,
+                        dimension_property=obs_spec[i].dimension_property,
+                        observation_type=ObservationType.GOAL,
+                        name=obs_spec[i].name,
+                    )
        return obs_spec

    def _make_obs(self, value: float) -> List[np.ndarray]:
        var_len_obs_size=VAR_LEN_SIZE,
        action_sizes=(1, 0),
        num_agents=2,
+        goal_indices=None,
    ):
        super().__init__()
        self.envs = {}
                    vec_obs_size,
                    var_len_obs_size,
                    action_sizes,
+                    goal_indices,
                )
                self.dones[name_and_num] = False
                self.envs[name_and_num].reset()
--- a/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
    GAILSettings,
    RewardSignalType,
    EncoderType,
+    ConditioningType,
 )

 from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (
    check_environment_trains(env, {BRAIN_NAME: config})


+@pytest.mark.parametrize("conditioning_type", [ConditioningType.HYPER])
-def test_var_len_obs_poca(num_vis, num_vector, num_var_len):
+def test_var_len_obs_and_goal_poca(num_vis, num_vector, num_var_len, conditioning_type):
    env = MultiAgentEnvironment(
        [BRAIN_NAME],
        action_sizes=(0, 1),
        step_size=0.2,
        num_agents=2,
+        goal_indices=[0],
+    )
+    new_network = attr.evolve(
+        POCA_TORCH_CONFIG.network_settings, goal_conditioning_type=conditioning_type
-    config = attr.evolve(POCA_TORCH_CONFIG, hyperparameters=new_hyperparams)
+    config = attr.evolve(
+        POCA_TORCH_CONFIG, hyperparameters=new_hyperparams, network_settings=new_network
+    )
    check_environment_trains(env, {BRAIN_NAME: config})


    check_environment_trains(env, {BRAIN_NAME: config})


+@pytest.mark.parametrize("conditioning_type", [ConditioningType.HYPER])
-def test_var_len_obs_ppo(num_vis, num_vector, num_var_len, action_sizes):
+def test_var_len_obs_and_goal_ppo(
+    num_vis, num_vector, num_var_len, action_sizes, conditioning_type
+):
    env = SimpleEnvironment(
        [BRAIN_NAME],
        action_sizes=action_sizes,
        step_size=0.2,
+        goal_indices=[0],
+    )
+    new_network = attr.evolve(
+        POCA_TORCH_CONFIG.network_settings, goal_conditioning_type=conditioning_type
-    config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
+    config = attr.evolve(
+        PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, network_settings=new_network
+    )
    check_environment_trains(env, {BRAIN_NAME: config})


--- a/ml-agents/mlagents/trainers/torch/conditioning.py
+++ b/ml-agents/mlagents/trainers/torch/conditioning.py
        """
        super().__init__()
        layers: List[torch.nn.Module] = []
-        prev_size = input_size + goal_size
+        prev_size = input_size
        for i in range(num_layers):
            if num_layers - i <= num_conditional_layers:
                # This means layer i is a conditional layer since the conditional
    def forward(
        self, input_tensor: torch.Tensor, goal_tensor: torch.Tensor
    ) -> torch.Tensor:  # type: ignore
-        activation = torch.cat([input_tensor, goal_tensor], dim=-1)
+        activation = input_tensor
        for layer in self.layers:
            if isinstance(layer, HyperNetwork):
                activation = layer(activation, goal_tensor)
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py

 from mlagents.torch_utils import torch, nn

-from mlagents_envs.base_env import ActionSpec, ObservationSpec
+from mlagents_envs.base_env import ActionSpec, ObservationSpec, ObservationType
-from mlagents.trainers.settings import NetworkSettings, EncoderType
+from mlagents.trainers.settings import NetworkSettings, EncoderType, ConditioningType
 from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.torch.decoders import ValueHeads
 from mlagents.trainers.torch.layers import LSTM, LinearEncoder
+from mlagents.trainers.torch.conditioning import ConditionalEncoder
 from mlagents.trainers.torch.attention import (
    EntityEmbedding,
    ResidualSelfAttention,
        self.normalize = normalize
        self._total_enc_size = total_enc_size

+        self._total_goal_enc_size = 0
+        self._goal_processor_indices: List[int] = []
+        for i in range(len(observation_specs)):
+            if observation_specs[i].observation_type == ObservationType.GOAL:
+                self._total_goal_enc_size += self.embedding_sizes[i]
+                self._goal_processor_indices.append(i)
+
    @property
    def total_enc_size(self) -> int:
        """
+
+    @property
+    def total_goal_enc_size(self) -> int:
+        """
+        Returns the total goal encoding size for this ObservationEncoder.
+        """
+        return self._total_goal_enc_size

    def update_normalization(self, buffer: AgentBuffer) -> None:
        obs = ObsUtil.from_buffer(buffer, len(self.processors))
        """
        Encode observations using a list of processors and an RSA.
        :param inputs: List of Tensors corresponding to a set of obs.
-        :param processors: a ModuleList of the input processors to be applied to these obs.
-        :param rsa: Optionally, an RSA to use for variable length obs.
-        :param x_self_encoder: Optionally, an encoder to use for x_self (in this case, the non-variable inputs.).
        """
        encodes = []
        var_len_processor_inputs: List[Tuple[nn.Module, torch.Tensor]] = []

        return encoded_self

+    def get_goal_encoding(self, inputs: List[torch.Tensor]) -> torch.Tensor:
+        """
+        Encode observations corresponding to goals using a list of processors.
+        :param inputs: List of Tensors corresponding to a set of obs.
+        """
+        encodes = []
+        for idx in self._goal_processor_indices:
+            processor = self.processors[idx]
+            if not isinstance(processor, EntityEmbedding):
+                # The input can be encoded without having to process other inputs
+                obs_input = inputs[idx]
+                processed_obs = processor(obs_input)
+                encodes.append(processed_obs)
+            else:
+                raise UnityTrainerException(
+                    "The one of the goals uses variable length observations. This use "
+                    "case is not supported."
+                )
+        if len(encodes) != 0:
+            encoded = torch.cat(encodes, dim=1)
+        else:
+            raise UnityTrainerException(
+                "Trainer was unable to process any of the goals provided as input."
+            )
+        return encoded
+

 class NetworkBody(nn.Module):
    def __init__(
        self.processors = self.observation_encoder.processors
        total_enc_size = self.observation_encoder.total_enc_size
        total_enc_size += encoded_act_size
-        self.linear_encoder = LinearEncoder(
-            total_enc_size, network_settings.num_layers, self.h_size
-        )
+
+        if (
+            self.observation_encoder.total_goal_enc_size > 0
+            and network_settings.goal_conditioning_type == ConditioningType.HYPER
+        ):
+            self._body_endoder = ConditionalEncoder(
+                total_enc_size,
+                self.observation_encoder.total_goal_enc_size,
+                self.h_size,
+                network_settings.num_layers,
+                1,
+            )
+        else:
+            self._body_endoder = LinearEncoder(
+                total_enc_size, network_settings.num_layers, self.h_size
+            )

        if self.use_lstm:
            self.lstm = LSTM(self.h_size, self.m_size)
        encoded_self = self.observation_encoder(inputs)
        if actions is not None:
            encoded_self = torch.cat([encoded_self, actions], dim=1)
-        encoding = self.linear_encoder(encoded_self)
+        if isinstance(self._body_endoder, ConditionalEncoder):
+            goal = self.observation_encoder.get_goal_encoding(inputs)
+            encoding = self._body_endoder(encoded_self, goal)
+        else:
+            encoding = self._body_endoder(encoded_self)

        if self.use_lstm:
            # Resize to (batch, sequence length, encoding size)