浏览代码

WIP integrate attention to networkbody

/develop/singular-embeddings
vincentpierre 4 年前
当前提交
56972f56
共有 7 个文件被更改,包括 270 次插入48 次删除
  1. 6
      ml-agents-envs/mlagents_envs/rpc_utils.py
  2. 2
      ml-agents/mlagents/trainers/tests/torch/test_attention.py
  3. 73
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  4. 74
      ml-agents/mlagents/trainers/torch/attention.py
  5. 20
      ml-agents/mlagents/trainers/torch/model_serialization.py
  6. 57
      ml-agents/mlagents/trainers/torch/networks.py
  7. 86
      ml-agents/mlagents/trainers/torch/utils.py

6
ml-agents-envs/mlagents_envs/rpc_utils.py


tuple(DimensionProperty(dim) for dim in obs.dimension_properties)
for obs in agent_info.observations
]
dim_props = [
dim_prop
if len(dim_prop) > 0
else (DimensionProperty.UNSPECIFIED,) * len(observation_shape[idx])
for idx, dim_prop in enumerate(dim_props)
]
sensor_specs = [
SensorSpec(obs_shape, dim_p)
for obs_shape, dim_p in zip(observation_shape, dim_props)

2
ml-agents/mlagents/trainers/tests/torch/test_attention.py


size, n_k, = 3, 5
embedding_size = 64
entity_embeddings = EntityEmbeddings(size, [size], [n_k], embedding_size)
transformer = ResidualSelfAttention(embedding_size, [n_k])
transformer = ResidualSelfAttention(embedding_size, n_k)
l_layer = linear_layer(embedding_size, size)
optimizer = torch.optim.Adam(
list(transformer.parameters()) + list(l_layer.parameters()), lr=0.001

73
ml-agents/mlagents/trainers/tests/torch/test_utils.py


from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.tests.dummy_config import create_sensor_specs_with_shapes
from mlagents_envs.base_env import SensorSpec, DimensionProperty
def test_min_visual_size():

ModelUtils.soft_update(tm1, tm2, tau=1.0)
assert torch.equal(tm2.parameter, tm1.parameter)
def test_can_train_dim_property():
spec = SensorSpec(
(5, 5, 3),
(
DimensionProperty.UNSPECIFIED,
DimensionProperty.UNSPECIFIED,
DimensionProperty.UNSPECIFIED,
),
)
assert ModelUtils.can_encode_visual(spec)
assert not ModelUtils.can_encode_vector(spec)
assert not ModelUtils.can_encode_attention(spec)
spec = SensorSpec(
(5, 5, 3),
(
DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
DimensionProperty.NONE,
),
)
assert ModelUtils.can_encode_visual(spec)
assert not ModelUtils.can_encode_vector(spec)
assert not ModelUtils.can_encode_attention(spec)
spec = SensorSpec(
(5, 5, 3, 5),
(
DimensionProperty.UNSPECIFIED,
DimensionProperty.UNSPECIFIED,
DimensionProperty.UNSPECIFIED,
DimensionProperty.UNSPECIFIED,
),
)
assert not ModelUtils.can_encode_visual(spec)
assert not ModelUtils.can_encode_vector(spec)
assert not ModelUtils.can_encode_attention(spec)
spec = SensorSpec(
(5, 6), (DimensionProperty.UNSPECIFIED, DimensionProperty.UNSPECIFIED)
)
assert not ModelUtils.can_encode_visual(spec)
assert not ModelUtils.can_encode_vector(spec)
assert not ModelUtils.can_encode_attention(spec)
spec = SensorSpec(
(5, 6),
(
DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
DimensionProperty.TRANSLATIONAL_EQUIVARIANCE,
),
)
assert not ModelUtils.can_encode_visual(spec)
assert not ModelUtils.can_encode_vector(spec)
assert not ModelUtils.can_encode_attention(spec)
spec = SensorSpec((5, 6), (DimensionProperty.VARIABLE_SIZE, DimensionProperty.NONE))
assert not ModelUtils.can_encode_visual(spec)
assert not ModelUtils.can_encode_vector(spec)
assert ModelUtils.can_encode_attention(spec)
spec = SensorSpec((5,), (DimensionProperty.UNSPECIFIED,))
assert not ModelUtils.can_encode_visual(spec)
assert ModelUtils.can_encode_vector(spec)
assert not ModelUtils.can_encode_attention(spec)
spec = SensorSpec((5,), (DimensionProperty.NONE,))
assert not ModelUtils.can_encode_visual(spec)
assert ModelUtils.can_encode_vector(spec)
assert not ModelUtils.can_encode_attention(spec)

74
ml-agents/mlagents/trainers/torch/attention.py


class MultiHeadAttention(torch.nn.Module):
"""
Multi Head Attention module. We do not use the regular Torch implementation since
Barracuda does not support some operators it uses.
Takes as input to the forward method 3 tensors:
- query: of dimensions (batch_size, number_of_queries, embedding_size)
- key: of dimensions (batch_size, number_of_keys, embedding_size)
- value: of dimensions (batch_size, number_of_keys, embedding_size)
The forward method will return 2 tensors:
- The output: (batch_size, number_of_queries, embedding_size)
- The attention matrix: (batch_size, num_heads, number_of_queries, number_of_keys)
"""
"""
Multi Head Attention module. We do not use the regular Torch implementation since
Barracuda does not support some operators it uses.
Takes as input to the forward method 3 tensors:
- query: of dimensions (batch_size, number_of_queries, embedding_size)
- key: of dimensions (batch_size, number_of_keys, embedding_size)
- value: of dimensions (batch_size, number_of_keys, embedding_size)
The forward method will return 2 tensors:
- The output: (batch_size, number_of_queries, embedding_size)
- The attention matrix: (batch_size, num_heads, number_of_queries, number_of_keys)
:param embedding_size: The size of the embeddings that will be generated (should be
dividable by the num_heads)
:param total_max_elements: The maximum total number of entities that can be passed to
the module
:param num_heads: The number of heads of the attention module
"""
super().__init__()
self.n_heads = num_heads
self.head_size: int = embedding_size // self.n_heads

class EntityEmbeddings(torch.nn.Module):
"""
"""
def __init__(
self,
x_self_size: int,

concat_self: bool = True,
):
"""
A module that generates embeddings for a variable number of entities given "self"
encoding as well as a representation of each entity of each type. The expected
input of the forward method will be a list of tensors: each element of the
list corresponds to a type of entity, the dimension of each tensor is :
[batch_size, max_num_entities, entity_size]
:param x_self_size: The size of the self embedding that will be concatenated
with the entities
:param entity_sizes: The size of each entity type
:param entity_num_max_elements: A list of maximum number of entities, must be
the same length as the number of entity tensors that will be passed to the
forward method.
:param embedding_size: The size of the output embeddings
:param concat_self: If true, the x_self will be concatenated with the entities
before embedding
"""
super().__init__()
self.self_size: int = x_self_size
self.entity_sizes: List[int] = entity_sizes

def forward(
self, x_self: torch.Tensor, entities: List[torch.Tensor]
) -> Tuple[torch.Tensor, int]:
) -> torch.Tensor:
if self.concat_self:
# Concatenate all observations with self
self_and_ent: List[torch.Tensor] = []

self_and_ent.append(torch.cat([expanded_self, ent], dim=2))
else:
self_and_ent = entities
# Encode and concatenate entites
# Encode and concatenate entities
encoded_entities = torch.cat(
[ent_encoder(x) for ent_encoder, x in zip(self.ent_encoders, self_and_ent)],
dim=1,

class ResidualSelfAttention(torch.nn.Module):
"""
A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
multi head self attention to encode information about a "Self" and a list of
relevant "Entities".
"""
self,
embedding_size: int,
entity_num_max_elements: List[int],
num_heads: int = 4,
self, embedding_size: int, total_max_elements: int, num_heads: int = 4
"""
A simple architecture inspired from https://arxiv.org/pdf/1909.07528.pdf that uses
multi head self attention to encode information about a "Self" and a list of
relevant "Entities".
:param embedding_size: The size of the embeddings that will be generated (should be
dividable by the num_heads)
:param total_max_elements: The maximum total number of entities that can be passed to
the module
:param num_heads: The number of heads of the attention module
"""
self.entity_num_max_elements: List[int] = entity_num_max_elements
self.max_num_ent = sum(entity_num_max_elements)
self.max_num_ent = total_max_elements
self.attention = MultiHeadAttention(
num_heads=num_heads, embedding_size=embedding_size
)

20
ml-agents/mlagents/trainers/torch/model_serialization.py


for sen_spec in self.policy.behavior_spec.sensor_specs
if len(sen_spec.shape) == 3
]
dummy_var_len_obs = [
torch.zeros(batch_dim + [sen_spec.shape[0], sen_spec.shape[1]])
for sen_spec in self.policy.behavior_spec.sensor_specs
if len(sen_spec.shape) == 2
]
dummy_masks = torch.ones(
batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]
)

self.dummy_input = (dummy_vec_obs, dummy_vis_obs, dummy_masks, dummy_memories)
self.dummy_input = (
dummy_vec_obs,
dummy_vis_obs,
dummy_var_len_obs,
dummy_masks,
dummy_memories,
)
+ [
f"obs_{i}"
for i, sens_spec in enumerate(self.policy.behavior_spec.sensor_specs)
if len(sens_spec.shape) == 2
]
+ ["action_masks", "memories"]
)
self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}

57
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trajectory import ObsUtil
from mlagents.trainers.torch.attention import EntityEmbeddings, ResidualSelfAttention
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]

else 0
)
self.processors, self.embedding_sizes = ModelUtils.create_input_processors(
self.processors, self.embedding_sizes, var_len_indices = ModelUtils.create_input_processors(
sensor_specs,
self.h_size,
network_settings.vis_encode_type,

if len(var_len_indices) > 0:
# There are some variable length observations and they need to be processed separately
x_self_len = sum(self.embedding_sizes) # The size of the "self" embedding
entities_sizes = [sensor_specs[idx].shape[1] for idx in var_len_indices]
entities_max_len = [sensor_specs[idx].shape[0] for idx in var_len_indices]
self.entities_embeddings = EntityEmbeddings(
x_self_len, entities_sizes, entities_max_len, self.h_size
)
self.rsa = ResidualSelfAttention(self.h_size, sum(entities_max_len))
total_enc_size = x_self_len + self.h_size
n_layers = max(1, network_settings.num_layers - 2)
else:
total_enc_size = sum(self.embedding_sizes)
n_layers = max(1, network_settings.num_layers)
self.linear_encoder = LinearEncoder(
total_enc_size, network_settings.num_layers, self.h_size
)
self.linear_encoder = LinearEncoder(total_enc_size, n_layers, self.h_size)
if self.use_lstm:
self.lstm = LSTM(self.h_size, self.m_size)

sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
encodes = []
var_len_inputs = [] # The list of variable length inputs
obs_input = inputs[idx]
processed_obs = processor(obs_input)
encodes.append(processed_obs)
if processor is not None:
# The input can be encoded without having to process other inputs
obs_input = inputs[idx]
processed_obs = processor(obs_input)
encodes.append(processed_obs)
else:
var_len_inputs.append(inputs[idx])
encoded_self = torch.cat(encodes, dim=1)
if len(var_len_inputs) > 0:
# Some inputs need to be processed with a variable length encoder
masks = EntityEmbeddings.get_masks(var_len_inputs)
qkv = self.entities_embeddings(encoded_self, var_len_inputs)
mu_qkv = torch.mean(qkv, dim=2, keepdim=True)
qkv = (qkv - mu_qkv) / (
torch.sqrt(torch.mean((qkv - mu_qkv) ** 2, dim=2, keepdim=True))
+ 0.0001
)
attention_embedding = self.rsa(qkv, masks)
encoded_self = torch.cat([encoded_self, attention_embedding], dim=1)
if len(encodes) == 0:
raise Exception("No valid inputs to network.")

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
var_len_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
) -> Tuple[Union[int, torch.Tensor], ...]:

self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
var_len_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
) -> Tuple[Union[int, torch.Tensor], ...]:

start = 0
end = 0
vis_index = 0
var_len_index = 0
for i, enc in enumerate(self.network_body.processors):
if isinstance(enc, VectorInput):
# This is a vec_obs

start = end
else:
elif enc is not None:
else:
inputs.append(var_len_inputs[var_len_index])
var_len_index += 1
# End of code to convert the vec and vis obs into a list of inputs for the network
encoding, memories_out = self.network_body(
inputs, memories=memories, sequence_length=1

86
ml-agents/mlagents/trainers/torch/utils.py


)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.exception import UnityTrainerException
from mlagents_envs.base_env import SensorSpec
from mlagents_envs.base_env import SensorSpec, DimensionProperty
class ModelUtils:

raise UnityTrainerException(f"Unsupported shape of {shape} for observation")
@staticmethod
def can_encode_visual(sensor_spec: SensorSpec) -> bool:
"""
Returns True if it is possible to create a visual embedding for the sensor
"""
if len(sensor_spec.shape) != 3:
return False
for conv_dim in [0, 1]:
prop = sensor_spec.dimension_property[conv_dim]
if (prop != DimensionProperty.UNSPECIFIED) and (
prop != DimensionProperty.TRANSLATIONAL_EQUIVARIANCE
):
return False
prop = sensor_spec.dimension_property[2]
if (
(prop != DimensionProperty.UNSPECIFIED)
and (prop != DimensionProperty.TRANSLATIONAL_EQUIVARIANCE)
and (prop != DimensionProperty.NONE)
):
return False
return True
@staticmethod
def can_encode_vector(sensor_spec: SensorSpec) -> bool:
"""
Returns True if it is possible to create a vector embedding for the sensor
"""
if len(sensor_spec.shape) != 1:
return False
prop = sensor_spec.dimension_property[0]
if (prop != DimensionProperty.UNSPECIFIED) and (prop != DimensionProperty.NONE):
return False
return True
@staticmethod
def can_encode_attention(sensor_spec: SensorSpec) -> bool:
"""
Returns True if it is possible to create an attention embedding for the sensor
"""
if len(sensor_spec.shape) != 2:
return False
if sensor_spec.dimension_property[0] != DimensionProperty.VARIABLE_SIZE:
return False
if sensor_spec.dimension_property[1] != DimensionProperty.NONE:
return False
return True
@staticmethod
) -> Tuple[nn.ModuleList, List[int]]:
) -> Tuple[nn.ModuleList, List[int], List[int]]:
"""
Creates visual and vector encoders, along with their normalizers.
:param sensor_specs: List of SensorSpec that represent the observation dimensions.

:param unnormalized_inputs: Vector inputs that should not be normalized, and added to the vector
obs.
:param normalize: Normalize all vector inputs.
:return: Tuple of visual encoders and vector encoders each as a list.
:return: Tuple of :
- ModuleList of the encoders (None if the input requires to be processed with a variable length
observation encoder)
- A list of embedding sizes (0 if the input requires to be processed with a variable length
observation encoder)
- A list of the inputs that need to be processed by a variable length observation encder.
for sen_spec in sensor_specs:
encoder, embedding_size = ModelUtils.get_encoder_for_obs(
sen_spec.shape, normalize, h_size, vis_encode_type
)
encoders.append(encoder)
embedding_sizes.append(embedding_size)
var_len_indices: List[int] = []
for idx, sen_spec in enumerate(sensor_specs):
if ModelUtils.can_encode_attention(sen_spec):
# This is a 2D tensor
# TODO : better if condition
var_len_indices.append(idx)
encoders.append(None)
embedding_sizes.append(0)
elif ModelUtils.can_encode_vector(sen_spec) or ModelUtils.can_encode_visual(
sen_spec
):
encoder, embedding_size = ModelUtils.get_encoder_for_obs(
sen_spec.shape, normalize, h_size, vis_encode_type
)
encoders.append(encoder)
embedding_sizes.append(embedding_size)
else:
raise UnityTrainerException(
"The following Sensor is incompatible with the trainer {sen_spec}"
)
return (nn.ModuleList(encoders), embedding_sizes)
return (nn.ModuleList(encoders), embedding_sizes, var_len_indices)
@staticmethod
def list_to_tensor(

正在加载...
取消
保存