浏览代码

[refactor] Refactor normalizers and encoders (#4275)

* Refactor normalizers and encoders

* Unify Critic and ValueNetwork

* Rename ActionVectorEncoder

* Update docstring of create_encoders

* Add docstring to UnnormalizedInputEncoder
/develop/add-fire
GitHub 4 年前
当前提交
74c99ec8
共有 4 个文件被更改,包括 176 次插入164 次删除
  1. 30
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  2. 113
      ml-agents/mlagents/trainers/torch/encoders.py
  3. 164
      ml-agents/mlagents/trainers/torch/networks.py
  4. 33
      ml-agents/mlagents/trainers/torch/utils.py

30
ml-agents/mlagents/trainers/sac/optimizer_torch.py


from mlagents.trainers.optimizer.torch_optimizer import TorchOptimizer
from mlagents.trainers.policy.torch_policy import TorchPolicy
from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.networks import Critic, QNetwork
from mlagents.trainers.torch.networks import ValueNetwork
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.buffer import AgentBuffer
from mlagents_envs.timers import timed

act_size: List[int],
):
super().__init__()
self.q1_network = QNetwork(
stream_names, observation_shapes, network_settings, act_type, act_size
if act_type == ActionType.CONTINUOUS:
num_value_outs = 1
num_action_ins = sum(act_size)
else:
num_value_outs = sum(act_size)
num_action_ins = 0
self.q1_network = ValueNetwork(
stream_names,
observation_shapes,
network_settings,
num_action_ins,
num_value_outs,
self.q2_network = QNetwork(
stream_names, observation_shapes, network_settings, act_type, act_size
self.q2_network = ValueNetwork(
stream_names,
observation_shapes,
network_settings,
num_action_ins,
num_value_outs,
)
def forward(

self.policy.behavior_spec.action_type,
self.act_size,
)
self.target_network = Critic(
self.target_network = ValueNetwork(
self.stream_names,
self.policy.behavior_spec.observation_shapes,
policy_network_settings,

next_vis_obs.append(next_vis_ob)
# Copy normalizers from policy
self.value_network.q1_network.copy_normalization(
self.value_network.q1_network.network_body.copy_normalization(
self.value_network.q2_network.copy_normalization(
self.value_network.q2_network.network_body.copy_normalization(
self.policy.actor_critic.network_body
)
self.target_network.network_body.copy_normalization(

113
ml-agents/mlagents/trainers/torch/encoders.py


from typing import Tuple, Optional
from mlagents.trainers.exception import UnityTrainerException
class VectorEncoder(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, **kwargs):
super().__init__(**kwargs)
self.layers = [nn.Linear(input_size, hidden_size)]
for _ in range(num_layers - 1):
self.layers.append(nn.Linear(hidden_size, hidden_size))
self.layers.append(nn.ReLU())
self.seq_layers = nn.Sequential(*self.layers)
def forward(self, inputs):
return self.seq_layers(inputs)
def __init__(self, vec_obs_size, **kwargs):
super().__init__(**kwargs)
def __init__(self, vec_obs_size: int):
super().__init__()
def forward(self, inputs):
def forward(self, inputs: torch.Tensor) -> torch.Tensor:
normalized_state = torch.clamp(
(inputs - self.running_mean)
/ torch.sqrt(self.running_variance / self.normalization_steps),

return normalized_state
def update(self, vector_input):
def update(self, vector_input: torch.Tensor) -> None:
steps_increment = vector_input.size()[0]
total_new_steps = self.normalization_steps + steps_increment

return h, w
def pool_out_shape(h_w, kernel_size):
def pool_out_shape(h_w: Tuple[int, int], kernel_size: int) -> Tuple[int, int]:
class VectorEncoder(nn.Module):
def __init__(
self,
input_size: int,
hidden_size: int,
num_layers: int,
normalize: bool = False,
):
self.normalizer: Optional[Normalizer] = None
super().__init__()
self.layers = [nn.Linear(input_size, hidden_size)]
if normalize:
self.normalizer = Normalizer(input_size)
for _ in range(num_layers - 1):
self.layers.append(nn.Linear(hidden_size, hidden_size))
self.layers.append(nn.ReLU())
self.seq_layers = nn.Sequential(*self.layers)
def forward(self, inputs: torch.Tensor) -> None:
if self.normalizer is not None:
inputs = self.normalizer(inputs)
return self.seq_layers(inputs)
def copy_normalization(self, other_encoder: "VectorEncoder") -> None:
if self.normalizer is not None and other_encoder.normalizer is not None:
self.normalizer.copy_from(other_encoder.normalizer)
def update_normalization(self, inputs: torch.Tensor) -> None:
if self.normalizer is not None:
self.normalizer.update(inputs)
class VectorAndUnnormalizedInputEncoder(VectorEncoder):
"""
Encoder for concatenated vector input (can be normalized) and unnormalized vector input.
This is used for passing inputs to the network that should not be normalized, such as
actions in the case of a Q function or task parameterizations. It will result in an encoder with
this structure:
____________ ____________ ____________
| Vector | | Normalize | | Fully |
| | --> | | --> | Connected | ___________
|____________| |____________| | | | Output |
____________ | | --> | |
|Unnormalized| | | |___________|
| Input | ---------------------> | |
|____________| |____________|
"""
def __init__(
self,
input_size: int,
hidden_size: int,
unnormalized_input_size: int,
num_layers: int,
normalize: bool = False,
):
super().__init__(
input_size + unnormalized_input_size,
hidden_size,
num_layers,
normalize=False,
)
if normalize:
self.normalizer = Normalizer(input_size)
else:
self.normalizer = None
def forward( # pylint: disable=W0221
self, inputs: torch.Tensor, unnormalized_inputs: Optional[torch.Tensor] = None
) -> None:
if unnormalized_inputs is None:
raise UnityTrainerException(
"Attempted to call an VectorAndUnnormalizedInputEncoder without an unnormalized input."
) # Fix mypy errors about method parameters.
if self.normalizer is not None:
inputs = self.normalizer(inputs)
return self.seq_layers(torch.cat([inputs, unnormalized_inputs], dim=-1))
def __init__(self, height, width, initial_channels, output_size):
def __init__(
self, height: int, width: int, initial_channels: int, output_size: int
):
super().__init__()
self.h_size = output_size
conv_1_hw = conv_output_shape((height, width), 8, 4)

self.conv2 = nn.Conv2d(16, 32, [4, 4], [2, 2])
self.dense = nn.Linear(self.final_flat, self.h_size)
def forward(self, visual_obs):
def forward(self, visual_obs: torch.Tensor) -> None:
conv_1 = torch.relu(self.conv1(visual_obs))
conv_2 = torch.relu(self.conv2(conv_1))
# hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))

164
ml-agents/mlagents/trainers/torch/networks.py


from typing import Callable, NamedTuple, List, Dict, Tuple
from typing import Callable, List, Dict, Tuple, Optional
import torch
from torch import nn

EPSILON = 1e-7
class NormalizerTensors(NamedTuple):
steps: torch.Tensor
running_mean: torch.Tensor
running_variance: torch.Tensor
encoded_act_size: int = 0,
):
super().__init__()
self.normalize = network_settings.normalize

else 0
)
(
self.visual_encoders,
self.vector_encoders,
self.vector_normalizers,
) = ModelUtils.create_encoders(
self.visual_encoders, self.vector_encoders = ModelUtils.create_encoders(
action_size=0,
unnormalized_inputs=encoded_act_size,
normalize=self.normalize,
)
if self.use_lstm:

def update_normalization(self, vec_inputs):
if self.normalize:
for idx, vec_input in enumerate(vec_inputs):
self.vector_normalizers[idx].update(vec_input)
for vec_input, vec_enc in zip(vec_inputs, self.vector_encoders):
vec_enc.update_normalization(vec_input)
for n1, n2 in zip(
self.vector_normalizers, other_network.vector_normalizers
):
n1.copy_from(n2)
for n1, n2 in zip(self.vector_encoders, other_network.vector_encoders):
n1.copy_normalization(n2)
def forward(self, vec_inputs, vis_inputs, memories=None, sequence_length=1):
def forward(
self,
vec_inputs: torch.Tensor,
vis_inputs: torch.Tensor,
actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
if self.normalize:
vec_input = self.vector_normalizers[idx](vec_input)
hidden = encoder(vec_input)
if actions is not None:
hidden = encoder(vec_input, actions)
else:
hidden = encoder(vec_input)
vec_embeds.append(hidden)
vis_embeds = []

return embedding, memories
class QNetwork(NetworkBody):
def __init__( # pylint: disable=W0231
class ValueNetwork(nn.Module):
def __init__(
act_type: ActionType,
act_size: List[int],
encoded_act_size: int = 0,
outputs_per_stream: int = 1,
self.normalize = network_settings.normalize
self.use_lstm = network_settings.memory is not None
self.h_size = network_settings.hidden_units
self.m_size = (
network_settings.memory.memory_size
if network_settings.memory is not None
else 0
self.network_body = NetworkBody(
observation_shapes, network_settings, encoded_act_size=encoded_act_size
(
self.visual_encoders,
self.vector_encoders,
self.vector_normalizers,
) = ModelUtils.create_encoders(
observation_shapes,
self.h_size,
network_settings.num_layers,
network_settings.vis_encode_type,
action_size=sum(act_size) if act_type == ActionType.CONTINUOUS else 0,
self.value_heads = ValueHeads(
stream_names, network_settings.hidden_units, outputs_per_stream
if self.use_lstm:
self.lstm = nn.LSTM(self.h_size, self.m_size // 2, 1)
else:
self.lstm = None
if act_type == ActionType.DISCRETE:
self.q_heads = ValueHeads(
stream_names, network_settings.hidden_units, sum(act_size)
)
else:
self.q_heads = ValueHeads(stream_names, network_settings.hidden_units)
def forward( # pylint: disable=W0221
def forward(
memories: torch.Tensor = None,
actions: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
actions: torch.Tensor = None,
vec_embeds = []
for i, (enc, norm) in enumerate(
zip(self.vector_encoders, self.vector_normalizers)
):
vec_input = vec_inputs[i]
if self.normalize:
vec_input = norm(vec_input)
if actions is not None:
hidden = enc(torch.cat([vec_input, actions], dim=-1))
else:
hidden = enc(vec_input)
vec_embeds.append(hidden)
vis_embeds = []
for idx, encoder in enumerate(self.visual_encoders):
vis_input = vis_inputs[idx]
vis_input = vis_input.permute([0, 3, 1, 2])
hidden = encoder(vis_input)
vis_embeds.append(hidden)
# embedding = vec_embeds[0]
if len(vec_embeds) > 0 and len(vis_embeds) > 0:
vec_embeds_tensor = torch.stack(vec_embeds, dim=-1).sum(dim=-1)
vis_embeds_tensor = torch.stack(vis_embeds, dim=-1).sum(dim=-1)
embedding = torch.stack([vec_embeds_tensor, vis_embeds_tensor], dim=-1).sum(
dim=-1
)
elif len(vec_embeds) > 0:
embedding = torch.stack(vec_embeds, dim=-1).sum(dim=-1)
elif len(vis_embeds) > 0:
embedding = torch.stack(vis_embeds, dim=-1).sum(dim=-1)
else:
raise Exception("No valid inputs to network.")
if self.lstm is not None:
embedding = embedding.view([sequence_length, -1, self.h_size])
memories_tensor = torch.split(memories, self.m_size // 2, dim=-1)
embedding, memories = self.lstm(embedding, memories_tensor)
embedding = embedding.view([-1, self.m_size // 2])
memories = torch.cat(memories_tensor, dim=-1)
output, _ = self.q_heads(embedding)
embedding, memories = self.network_body(
vec_inputs, vis_inputs, actions, memories, sequence_length
)
output, _ = self.value_heads(embedding)
return output, memories

else:
self.distribution = MultiCategoricalDistribution(embedding_size, act_size)
if separate_critic:
self.critic = Critic(stream_names, observation_shapes, network_settings)
self.critic = ValueNetwork(
stream_names, observation_shapes, network_settings
)
else:
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, embedding_size)

self, vec_inputs, vis_inputs, masks=None, memories=None, sequence_length=1
):
embedding, memories = self.network_body(
vec_inputs, vis_inputs, memories, sequence_length
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
if self.act_type == ActionType.CONTINUOUS:
dists = self.distribution(embedding)

self, vec_inputs, vis_inputs=None, masks=None, memories=None, sequence_length=1
):
embedding, memories = self.network_body(
vec_inputs, vis_inputs, memories, sequence_length
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
dists, value_outputs, memories = self.get_dist_and_value(
vec_inputs, vis_inputs, masks, memories, sequence_length

self.is_continuous_int,
self.act_size_vector,
)
class Critic(nn.Module):
def __init__(
self,
stream_names: List[str],
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
):
super().__init__()
self.network_body = NetworkBody(observation_shapes, network_settings)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, network_settings.hidden_units)
def forward(self, vec_inputs, vis_inputs):
embedding, _ = self.network_body(vec_inputs, vis_inputs)
return self.value_heads(embedding)
class GlobalSteps(nn.Module):

33
ml-agents/mlagents/trainers/torch/utils.py


ResNetVisualEncoder,
NatureVisualEncoder,
VectorEncoder,
VectorAndUnnormalizedInputEncoder,
from mlagents.trainers.torch.encoders import Normalizer
from mlagents.trainers.settings import EncoderType
from mlagents.trainers.exception import UnityTrainerException

h_size: int,
num_layers: int,
vis_encode_type: EncoderType,
action_size: int = 0,
) -> Tuple[nn.ModuleList, nn.ModuleList, nn.ModuleList]:
unnormalized_inputs: int = 0,
normalize: bool = False,
) -> Tuple[nn.ModuleList, nn.ModuleList]:
"""
Creates visual and vector encoders, along with their normalizers.
:param observation_shapes: List of Tuples that represent the action dimensions.

:param num_layers: Depth of MLP per encoder.
:param vis_encode_type: Type of visual encoder to use.
:return: Tuple of visual encoders, vector encoders, and vector normalizers, each as a list.
:param unnormalized_inputs: Vector inputs that should not be normalized, and added to the vector
obs.
:param normalize: Normalize all vector inputs.
:return: Tuple of visual encoders and vector encoders each as a list.
vector_normalizers: List[nn.Module] = []
visual_encoder_class = ModelUtils.get_encoder_for_type(vis_encode_type)
vector_size = 0

raise UnityTrainerException(
f"Unsupported shape of {dimension} for observation {i}"
)
vector_normalizers.append(Normalizer(vector_size))
vector_encoders.append(
VectorEncoder(vector_size + action_size, h_size, num_layers)
)
return (
nn.ModuleList(visual_encoders),
nn.ModuleList(vector_encoders),
nn.ModuleList(vector_normalizers),
)
if unnormalized_inputs > 0:
vector_encoders.append(
VectorAndUnnormalizedInputEncoder(
vector_size, h_size, unnormalized_inputs, num_layers, normalize
)
)
else:
vector_encoders.append(
VectorEncoder(vector_size, h_size, num_layers, normalize)
)
return nn.ModuleList(visual_encoders), nn.ModuleList(vector_encoders)
@staticmethod
def list_to_tensor(

正在加载...
取消
保存