浏览代码

Update with newest changes

/develop/torchmodules
Ervin Teng 4 年前
当前提交
89890bf2
共有 6 个文件被更改,包括 121 次插入26 次删除
  1. 22
      ml-agents/mlagents/trainers/tests/torch/test_layers.py
  2. 17
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  3. 16
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  4. 36
      ml-agents/mlagents/trainers/torch/layers.py
  5. 46
      ml-agents/mlagents/trainers/torch/networks.py
  6. 10
      ml-agents/mlagents/trainers/torch/utils.py

22
ml-agents/mlagents/trainers/tests/torch/test_layers.py


import torch
from mlagents.trainers.torch.layers import Swish, linear_layer, Initialization
from mlagents.trainers.torch.layers import (
Swish,
linear_layer,
lstm_layer,
Initialization,
)
def test_swish():

)
assert torch.all(torch.eq(layer.weight.data, torch.zeros_like(layer.weight.data)))
assert torch.all(torch.eq(layer.bias.data, torch.zeros_like(layer.bias.data)))
def test_lstm_layer():
torch.manual_seed(0)
# Test zero for LSTM
layer = lstm_layer(
4, 4, kernel_init=Initialization.Zero, bias_init=Initialization.Zero
)
for name, param in layer.named_parameters():
if "weight" in name:
assert torch.all(torch.eq(param.data, torch.zeros_like(param.data)))
elif "bias" in name:
assert torch.all(
torch.eq(param.data[4:8], torch.ones_like(param.data[4:8]))
)

17
ml-agents/mlagents/trainers/tests/torch/test_networks.py


obs_size = 4
seq_len = 16
network_settings = NetworkSettings(
memory=NetworkSettings.MemorySettings(sequence_length=seq_len, memory_size=4)
memory=NetworkSettings.MemorySettings(sequence_length=seq_len, memory_size=12)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
for _ in range(100):
encoded, _ = networkbody([sample_obs], [], memories=torch.ones(1, seq_len, 4))
for _ in range(200):
encoded, _ = networkbody([sample_obs], [], memories=torch.ones(1, seq_len, 12))
# Try to force output to 1
loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
optimizer.zero_grad()

# memories isn't always set to None, the network should be able to
# deal with that.
# Test critic pass
value_out = actor.critic_pass([sample_obs], [], memories=memories)
value_out, memories_out = actor.critic_pass([sample_obs], [], memories=memories)
assert memories_out.shape == memories.shape
dists, value_out, _ = actor.get_dist_and_value([sample_obs], [], memories=memories)
dists, value_out, mem_out = actor.get_dist_and_value(
[sample_obs], [], memories=memories
)
if mem_out is not None:
assert mem_out.shape == memories.shape
for dist in dists:
assert isinstance(dist, GaussianDistInstance)
for stream in stream_names:

16
ml-agents/mlagents/trainers/tests/torch/test_utils.py


assert entropies.shape == (1, len(dist_list))
# Make sure the first action has high probability than the others.
assert log_probs.flatten()[0] > log_probs.flatten()[1]
def test_masked_mean():
test_input = torch.tensor([1, 2, 3, 4, 5])
masks = torch.ones_like(test_input).bool()
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 3.0
masks = torch.tensor([False, False, True, True, True])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 4.0
# Make sure it works if all masks are off
masks = torch.tensor([False, False, False, False, False])
mean = ModelUtils.masked_mean(test_input, masks=masks)
assert mean == 0.0

36
ml-agents/mlagents/trainers/torch/layers.py


layer.weight.data *= kernel_gain
_init_methods[bias_init](layer.bias.data)
return layer
def lstm_layer(
input_size: int,
hidden_size: int,
num_layers: int = 1,
batch_first: bool = True,
forget_bias: float = 1.0,
kernel_init: Initialization = Initialization.XavierGlorotUniform,
bias_init: Initialization = Initialization.Zero,
) -> torch.nn.Module:
"""
Creates a torch.nn.LSTM and initializes its weights and biases. Provides a
forget_bias offset like is done in TensorFlow.
"""
lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=batch_first)
# Add forget_bias to forget gate bias
for name, param in lstm.named_parameters():
# Each weight and bias is a concatenation of 4 matrices
if "weight" in name:
for idx in range(4):
block_size = param.shape[0] // 4
_init_methods[kernel_init](
param.data[idx * block_size : (idx + 1) * block_size]
)
if "bias" in name:
for idx in range(4):
block_size = param.shape[0] // 4
_init_methods[bias_init](
param.data[idx * block_size : (idx + 1) * block_size]
)
if idx == 1:
param.data[idx * block_size : (idx + 1) * block_size].add_(
forget_bias
)
return lstm

46
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.layers import lstm_layer
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]
EncoderFunction = Callable[

)
if self.use_lstm:
self.lstm = nn.LSTM(self.h_size, self.m_size // 2, 1)
self.lstm = lstm_layer(self.h_size, self.m_size // 2, batch_first=True)
else:
self.lstm = None

raise Exception("No valid inputs to network.")
if self.use_lstm:
encoding = encoding.view([sequence_length, -1, self.h_size])
# Resize to (batch, sequence length, encoding size)
encoding = encoding.reshape([-1, sequence_length, self.h_size])
encoding, memories = self.lstm(
encoding.contiguous(),
(memories[0].contiguous(), memories[1].contiguous()),
)
encoding = encoding.view([-1, self.m_size // 2])
encoding, memories = self.lstm(encoding, memories)
encoding = encoding.reshape([-1, self.m_size // 2])
memories = torch.cat(memories, dim=-1)
return encoding, memories

vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
) -> Dict[str, torch.Tensor]:
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
"""
Get value outputs for the given obs.
:param vec_inputs: List of vector inputs as tensors.

vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
) -> Dict[str, torch.Tensor]:
encoding, _ = self.network_body(vec_inputs, vis_inputs, memories=memories)
return self.value_heads(encoding)
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
encoding, memories_out = self.network_body(
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
return self.value_heads(encoding), memories_out
def get_dist_and_value(
self,

vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
) -> Dict[str, torch.Tensor]:
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
actor_mem, critic_mem = None, None
_, critic_mem = torch.split(memories, self.half_mem_size, -1)
actor_mem, critic_mem = torch.split(memories, self.half_mem_size, -1)
value_outputs, critic_mem_out = self.critic(
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
)
if actor_mem is not None:
# Make memories with the actor mem unchanged
memories_out = torch.cat([actor_mem, critic_mem_out], dim=-1)
critic_mem = None
value_outputs, _memories = self.critic(
vec_inputs, vis_inputs, memories=critic_mem
)
return value_outputs
memories_out = None
return value_outputs, memories_out
def get_dist_and_value(
self,

vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
)
if self.use_lstm:
mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=1)
mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=-1)
else:
mem_out = None
return dists, value_outputs, mem_out

10
ml-agents/mlagents/trainers/torch/utils.py


else:
all_probs = torch.cat(all_probs_list, dim=-1)
return log_probs, entropies, all_probs
@staticmethod
def masked_mean(tensor: torch.Tensor, masks: torch.Tensor) -> torch.Tensor:
"""
Returns the mean of the tensor but ignoring the values specified by masks.
Used for masking out loss functions.
:param tensor: Tensor which needs mean computation.
:param masks: Boolean tensor of masks with same dimension as tensor.
"""
return (tensor * masks).sum() / torch.clamp(masks.float().sum(), min=1.0)
正在加载...
取消
保存