浏览代码

Improvements and new tests

/exp-bullet-hell-trainer
vincentpierre 4 年前
当前提交
e14e1c4d
共有 6 个文件被更改,包括 123 次插入47 次删除
  1. 8
      Project/Assets/ML-Agents/Examples/Bullet/Scripts/AttentionSensorComponent.cs
  2. 4
      config/ppo/Bullet.yaml
  3. 3
      ml-agents/mlagents/trainers/agent_processor.py
  4. 74
      ml-agents/mlagents/trainers/tests/torch/test_layers.py
  5. 56
      ml-agents/mlagents/trainers/torch/layers.py
  6. 25
      ml-agents/mlagents/trainers/torch/networks.py

8
Project/Assets/ML-Agents/Examples/Bullet/Scripts/AttentionSensorComponent.cs


if (m_CurrentNumObservables >= m_MaxNumObservables){
break;
}
// m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 0] = (b.transform.position.x - m_AgentTransform.parent.position.x) / 10f;
// m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 1] = (b.transform.position.z - m_AgentTransform.parent.position.z) / 10f;
m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 0] = (b.transform.position.x - m_AgentTransform.position.x) / 10f;
m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 1] = (b.transform.position.z - m_AgentTransform.position.z) / 10f;
m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 0] = (b.transform.position.x - m_AgentTransform.parent.position.x) / 10f;
m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 1] = (b.transform.position.z - m_AgentTransform.parent.position.z) / 10f;
//m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 0] = (b.transform.position.x - m_AgentTransform.position.x) / 10f;
//m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 1] = (b.transform.position.z - m_AgentTransform.position.z) / 10f;
m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 2] = b.transform.forward.x;
m_ObservationBuffer[m_CurrentNumObservables * m_ObservableSize + 3] = b.transform.forward.z;
m_CurrentNumObservables += 1;

4
config/ppo/Bullet.yaml


gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
max_steps: 50000000
summary_freq: 10000
summary_freq: 100000
threaded: true

3
ml-agents/mlagents/trainers/agent_processor.py


done = terminated # Since this is an ongoing step
interrupted = step.interrupted if terminated else False
# Add the outputs of the last eval
if idx >= len(stored_take_action_outputs["action"]):
idx = 0
print("Something went wrong with the idx in stored_take_action_outputs")
action = stored_take_action_outputs["action"][idx]
if self.policy.use_continuous_act:
action_pre = stored_take_action_outputs["pre_action"][idx]

74
ml-agents/mlagents/trainers/tests/torch/test_layers.py


Initialization,
LSTM,
MultiHeadAttention,
SimpleTransformer
)

# create a key input with some keys all 0
key = torch.ones((b, n_k, k_size))
mask = torch.zeros((b, n_k))
mask[:, i] = 1
_, attention = mha.forward(query, key, value)
_, attention = mha.forward(query, key, value, mask)
for i in range(n_k):
if i % 3 == 0:
assert torch.sum(attention[:, :, :, i] ** 2) < epsilon

error.backward()
optimizer.step()
assert error.item() < 0.5
def test_zero_mask_layer():
batch_size, size = 10, 30
def generate_input_helper(pattern):
_input = torch.zeros((batch_size, 0, size))
for i in range(len(pattern)):
if i % 2 == 0:
_input = torch.cat([_input, torch.rand((batch_size, pattern[i], size))], dim=1)
else:
_input = torch.cat([_input, torch.zeros((batch_size, pattern[i], size))], dim=1)
return _input
masking_pattern_1 = [3, 2, 3, 4]
masking_pattern_2 = [5, 7, 8, 2]
input_1 = generate_input_helper(masking_pattern_1)
input_2 = generate_input_helper(masking_pattern_2)
masks = SimpleTransformer.get_masks([input_1, input_2])
assert len(masks) == 2
masks_1 = masks[0]
masks_2 = masks[1]
assert masks_1.shape == (batch_size, sum(masking_pattern_1))
assert masks_2.shape == (batch_size, sum(masking_pattern_2))
for i in masking_pattern_1:
assert masks_1[0, 1] == 0 if i % 2 == 0 else 1
for i in masking_pattern_2:
assert masks_2[0, 1] == 0 if i % 2 == 0 else 1
def test_simple_transformer_training():
np.random.seed(1336)
torch.manual_seed(1336)
size, n_h, n_k, n_q = 3, 10, 5, 1
embedding_size = 64
transformer = SimpleTransformer(size, [size], embedding_size)
l_layer = linear_layer(embedding_size, size)
optimizer = torch.optim.Adam(list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001)
batch_size = 200
point_range = 3
init_error = -1.0
for _ in range(100):
center = torch.rand((batch_size, size)) * point_range * 2 - point_range
key = torch.rand((batch_size, n_k, size)) * point_range * 2 - point_range
with torch.no_grad():
# create the target : The key closest to the query in euclidean distance
distance = torch.sum((center.reshape((batch_size, 1, size)) - key) ** 2, dim=2)
argmin = torch.argmin(distance, dim=1)
target = []
for i in range(batch_size):
target += [key[i, argmin[i], :]]
target = torch.stack(target, dim=0)
target = target.detach()
masks = SimpleTransformer.get_masks([key])
prediction = transformer.forward(center, [key], masks)
prediction = l_layer(prediction)
prediction = prediction.reshape((batch_size, size))
error = torch.mean((prediction - target) ** 2, dim=1)
error = torch.mean(error) / 2
if init_error == -1.0:
init_error = error.item()
else:
assert error.item() < init_error
print(error.item())
optimizer.zero_grad()
error.backward()
optimizer.step()
assert error.item() < 0.3

56
ml-agents/mlagents/trainers/torch/layers.py


query: torch.Tensor,
key: torch.Tensor,
value: torch.Tensor,
key_mask: torch.Tensor,
key_mask: Optional[torch.Tensor] = None,
number_of_keys: int = -1,
number_of_queries: int = -1
) -> Tuple[torch.Tensor, torch.Tensor]:

n_k = number_of_keys if number_of_keys != -1 else key.size(1)
# Create a key mask : Only 1 if all values are 0 # shape = (b, n_k)
# key_mask = torch.sum(key ** 2, axis=2) < 0.01
key_mask = key_mask.reshape(b, 1, 1, n_k)
query = self.fc_q(query) # (b, n_q, h*d)
key = self.fc_k(key) # (b, n_k, h*d)

qk = torch.matmul(query, key) # (b, h, n_q, n_k)
qk = (1 - key_mask) * qk / (self.embedding_size ** 0.5) + key_mask * self.NEG_INF
if key_mask is None:
qk = qk / (self.embedding_size ** 0.5)
else:
key_mask = key_mask.reshape(b, 1, 1, n_k)
qk = (1 - key_mask) * qk / (self.embedding_size ** 0.5) + key_mask * self.NEG_INF
att = torch.softmax(qk, dim=3) # (b, h, n_q, n_k)

out = self.fc_out(value_attention) # (b, n_q, emb)
return out, att
class ZeroObservationMask(torch.nn.Module):
"""
Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
all zeros and 0 otherwise. This is used in the Attention layer to mask the padding
observations.
"""
def __init__(self):
super().__init__()
def forward(self, observations: List[torch.Tensor]):
with torch.no_grad():
# Generate the masking tensors for each entities tensor (mask only if all zeros)
key_masks: List[torch.Tensor] = [
(torch.sum(ent ** 2, axis=2) < 0.01).type(torch.FloatTensor)
for ent in observations
]
return key_masks
class SimpleTransformer(torch.nn.Module):
"""
A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses

EPISLON = 1e-7
def __init__(
self, x_self_size: int, entities_sizes: List[int], embedding_size: int
self, x_self_size: int, entities_sizes: List[int], embedding_size: int, output_size: Optional[int] = None
):
super().__init__()
self.self_size = x_self_size

embedding_size=embedding_size,
)
self.residual_layer = LinearEncoder(embedding_size, 1, embedding_size)
if output_size is None:
output_size = embedding_size
self.x_self_residual_layer = LinearEncoder(embedding_size + x_self_size, 1, output_size)
def forward(self, x_self: torch.Tensor, entities: List[torch.Tensor], key_masks: List[torch.Tensor]):
def forward(self, x_self: torch.Tensor, entities: List[torch.Tensor], key_masks: List[torch.Tensor]) -> torch.Tensor:
# Gather the maximum number of entities information
if self.entities_num_max_elements is None:
self.entities_num_max_elements = []

numerator = torch.sum(output * (1 - mask).reshape(-1, max_num_ent, 1), dim=1)
denominator = torch.sum(1 - mask, dim=1, keepdim=True) + self.EPISLON
output = numerator / denominator
# Residual between x_self and the output of the module
output = self.x_self_residual_layer(torch.cat([output, x_self], dim=1))
@staticmethod
def get_masks(observations: List[torch.Tensor]) -> List[torch.Tensor]:
"""
Takes a List of Tensors and returns a List of mask Tensor with 1 if the input was
all zeros (on dimension 2) and 0 otherwise. This is used in the Attention
layer to mask the padding observations.
"""
with torch.no_grad():
# Generate the masking tensors for each entities tensor (mask only if all zeros)
key_masks: List[torch.Tensor] = [
(torch.sum(ent ** 2, axis=2) < 0.01).type(torch.FloatTensor)
for ent in observations
]
return key_masks

25
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.layers import LSTM, LinearEncoder, SimpleTransformer, ZeroObservationMask
from mlagents.trainers.torch.layers import LSTM, LinearEncoder, SimpleTransformer
from mlagents.trainers.torch.model_serialization import exporting_to_onnx
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]

self.use_fc = False
if not self.use_fc:
emb_size = 32
emb_size = 16
self.masking_module = ZeroObservationMask()
x_self_size=32,
entities_sizes=[32], # hard coded, 4 obs per entity
x_self_size=16,
entities_sizes=[16], # hard coded, 4 obs per entity
output_size = self.h_size
self.self_embedding = LinearEncoder(6, 2, 32)
self.obs_embeding = LinearEncoder(4, 2, 32)
self.self_embedding = LinearEncoder(6, 2, 16)
self.obs_embeding = LinearEncoder(4, 2, 16)
self.linear_encoder = LinearEncoder(
emb_size + 32, network_settings.num_layers - 1, self.h_size
)
# self.linear_encoder = LinearEncoder(
# emb_size + 16, network_settings.num_layers - 1, self.h_size
# )
else:
self.linear_encoder = LinearEncoder(
6 + 4 * 20, network_settings.num_layers + 2, self.h_size

x_self = self.self_embedding(processed_vec)
var_len_input = vis_inputs[0].reshape(-1, 20, 4)
processed_var_len_input = self.obs_embeding(var_len_input)
output = self.transformer(x_self, [processed_var_len_input], self.masking_module([var_len_input]))
masks = SimpleTransformer.get_masks([var_len_input])
output = self.transformer(x_self, [processed_var_len_input], masks)
# # TODO : This is a Hack
# var_len_input = vis_inputs[0].reshape(-1, 20, 4)

# 1 - key_mask, dim=1, keepdim=True
# ) + 0.001 ) # average pooling
encoding = self.linear_encoder(torch.cat([output, x_self], dim=1))
encoding = output
else:
encoding = self.linear_encoder(torch.cat([vis_inputs[0].reshape(-1, 80), processed_vec], dim=1))

正在加载...
取消
保存