Update with newest changes

5 年前 · 89890bf2
--- a/ml-agents/mlagents/trainers/tests/torch/test_layers.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_layers.py
 import torch

-from mlagents.trainers.torch.layers import Swish, linear_layer, Initialization
+from mlagents.trainers.torch.layers import (
+    Swish,
+    linear_layer,
+    lstm_layer,
+    Initialization,
+)


 def test_swish():
    )
    assert torch.all(torch.eq(layer.weight.data, torch.zeros_like(layer.weight.data)))
    assert torch.all(torch.eq(layer.bias.data, torch.zeros_like(layer.bias.data)))
+
+
+def test_lstm_layer():
+    torch.manual_seed(0)
+    # Test zero for LSTM
+    layer = lstm_layer(
+        4, 4, kernel_init=Initialization.Zero, bias_init=Initialization.Zero
+    )
+    for name, param in layer.named_parameters():
+        if "weight" in name:
+            assert torch.all(torch.eq(param.data, torch.zeros_like(param.data)))
+        elif "bias" in name:
+            assert torch.all(
+                torch.eq(param.data[4:8], torch.ones_like(param.data[4:8]))
+            )
--- a/ml-agents/mlagents/trainers/tests/torch/test_networks.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_networks.py
    obs_size = 4
    seq_len = 16
    network_settings = NetworkSettings(
-        memory=NetworkSettings.MemorySettings(sequence_length=seq_len, memory_size=4)
+        memory=NetworkSettings.MemorySettings(sequence_length=seq_len, memory_size=12)
-    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
+    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
-    for _ in range(100):
-        encoded, _ = networkbody([sample_obs], [], memories=torch.ones(1, seq_len, 4))
+    for _ in range(200):
+        encoded, _ = networkbody([sample_obs], [], memories=torch.ones(1, seq_len, 12))
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        # memories isn't always set to None, the network should be able to
        # deal with that.
    # Test critic pass
-    value_out = actor.critic_pass([sample_obs], [], memories=memories)
+    value_out, memories_out = actor.critic_pass([sample_obs], [], memories=memories)
+            assert memories_out.shape == memories.shape
-    dists, value_out, _ = actor.get_dist_and_value([sample_obs], [], memories=memories)
+    dists, value_out, mem_out = actor.get_dist_and_value(
+        [sample_obs], [], memories=memories
+    )
+    if mem_out is not None:
+        assert mem_out.shape == memories.shape
    for dist in dists:
        assert isinstance(dist, GaussianDistInstance)
    for stream in stream_names:
--- a/ml-agents/mlagents/trainers/tests/torch/test_utils.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_utils.py
    assert entropies.shape == (1, len(dist_list))
    # Make sure the first action has high probability than the others.
    assert log_probs.flatten()[0] > log_probs.flatten()[1]
+
+
+def test_masked_mean():
+    test_input = torch.tensor([1, 2, 3, 4, 5])
+    masks = torch.ones_like(test_input).bool()
+    mean = ModelUtils.masked_mean(test_input, masks=masks)
+    assert mean == 3.0
+
+    masks = torch.tensor([False, False, True, True, True])
+    mean = ModelUtils.masked_mean(test_input, masks=masks)
+    assert mean == 4.0
+
+    # Make sure it works if all masks are off
+    masks = torch.tensor([False, False, False, False, False])
+    mean = ModelUtils.masked_mean(test_input, masks=masks)
+    assert mean == 0.0
--- a/ml-agents/mlagents/trainers/torch/layers.py
+++ b/ml-agents/mlagents/trainers/torch/layers.py
    layer.weight.data *= kernel_gain
    _init_methods[bias_init](layer.bias.data)
    return layer
+
+
+def lstm_layer(
+    input_size: int,
+    hidden_size: int,
+    num_layers: int = 1,
+    batch_first: bool = True,
+    forget_bias: float = 1.0,
+    kernel_init: Initialization = Initialization.XavierGlorotUniform,
+    bias_init: Initialization = Initialization.Zero,
+) -> torch.nn.Module:
+    """
+    Creates a torch.nn.LSTM and initializes its weights and biases. Provides a
+    forget_bias offset like is done in TensorFlow.
+    """
+    lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=batch_first)
+    # Add forget_bias to forget gate bias
+    for name, param in lstm.named_parameters():
+        # Each weight and bias is a concatenation of 4 matrices
+        if "weight" in name:
+            for idx in range(4):
+                block_size = param.shape[0] // 4
+                _init_methods[kernel_init](
+                    param.data[idx * block_size : (idx + 1) * block_size]
+                )
+        if "bias" in name:
+            for idx in range(4):
+                block_size = param.shape[0] // 4
+                _init_methods[bias_init](
+                    param.data[idx * block_size : (idx + 1) * block_size]
+                )
+                if idx == 1:
+                    param.data[idx * block_size : (idx + 1) * block_size].add_(
+                        forget_bias
+                    )
+    return lstm
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
 from mlagents.trainers.settings import NetworkSettings
 from mlagents.trainers.torch.utils import ModelUtils
 from mlagents.trainers.torch.decoders import ValueHeads
+from mlagents.trainers.torch.layers import lstm_layer

 ActivationFunction = Callable[[torch.Tensor], torch.Tensor]
 EncoderFunction = Callable[
        )

        if self.use_lstm:
-            self.lstm = nn.LSTM(self.h_size, self.m_size // 2, 1)
+            self.lstm = lstm_layer(self.h_size, self.m_size // 2, batch_first=True)
        else:
            self.lstm = None

            raise Exception("No valid inputs to network.")

        if self.use_lstm:
-            encoding = encoding.view([sequence_length, -1, self.h_size])
+            # Resize to (batch, sequence length, encoding size)
+            encoding = encoding.reshape([-1, sequence_length, self.h_size])
-            encoding, memories = self.lstm(
-                encoding.contiguous(),
-                (memories[0].contiguous(), memories[1].contiguous()),
-            )
-            encoding = encoding.view([-1, self.m_size // 2])
+            encoding, memories = self.lstm(encoding, memories)
+            encoding = encoding.reshape([-1, self.m_size // 2])
            memories = torch.cat(memories, dim=-1)
        return encoding, memories

        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
        memories: Optional[torch.Tensor] = None,
-    ) -> Dict[str, torch.Tensor]:
+        sequence_length: int = 1,
+    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
        """
        Get value outputs for the given obs.
        :param vec_inputs: List of vector inputs as tensors.
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
        memories: Optional[torch.Tensor] = None,
-    ) -> Dict[str, torch.Tensor]:
-        encoding, _ = self.network_body(vec_inputs, vis_inputs, memories=memories)
-        return self.value_heads(encoding)
+        sequence_length: int = 1,
+    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
+        encoding, memories_out = self.network_body(
+            vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
+        )
+        return self.value_heads(encoding), memories_out

    def get_dist_and_value(
        self,
        vec_inputs: List[torch.Tensor],
        vis_inputs: List[torch.Tensor],
        memories: Optional[torch.Tensor] = None,
-    ) -> Dict[str, torch.Tensor]:
+        sequence_length: int = 1,
+    ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
+        actor_mem, critic_mem = None, None
-            _, critic_mem = torch.split(memories, self.half_mem_size, -1)
+            actor_mem, critic_mem = torch.split(memories, self.half_mem_size, -1)
+        value_outputs, critic_mem_out = self.critic(
+            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
+        )
+        if actor_mem is not None:
+            # Make memories with the actor mem unchanged
+            memories_out = torch.cat([actor_mem, critic_mem_out], dim=-1)
-            critic_mem = None
-        value_outputs, _memories = self.critic(
-            vec_inputs, vis_inputs, memories=critic_mem
-        )
-        return value_outputs
+            memories_out = None
+        return value_outputs, memories_out

    def get_dist_and_value(
        self,
            vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
        )
        if self.use_lstm:
-            mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=1)
+            mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=-1)
        else:
            mem_out = None
        return dists, value_outputs, mem_out
--- a/ml-agents/mlagents/trainers/torch/utils.py
+++ b/ml-agents/mlagents/trainers/torch/utils.py
        else:
            all_probs = torch.cat(all_probs_list, dim=-1)
        return log_probs, entropies, all_probs
+
+    @staticmethod
+    def masked_mean(tensor: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
+        """
+        Returns the mean of the tensor but ignoring the values specified by masks.
+        Used for masking out loss functions.
+        :param tensor: Tensor which needs mean computation.
+        :param masks: Boolean tensor of masks with same dimension as tensor.
+        """
+        return (tensor * masks).sum() / torch.clamp(masks.float().sum(), min=1.0)