浏览代码

[torch] Restructure PyTorch encoders (#4421)

* Move linear encoding to NetworkBody

* moved encoders to processors (#4420)

* fix bad merge

* Get it running

* Replace mentions of visual_encoders

* Remove output_size property

* Fix tests

* Fix some references

* Revert test_simple_rl

* Fix networks test

* Make curiosity test more accomodating

* Rename total_input_size

* [Bug fix] Fix bug in GAIL gradient penalty (#4425) (#4426)

Co-authored-by: Vincent-Pierre BERGES <vincentpierre@unity3d.com>

* Up number of steps

* Rename to visual_processors and vector_processors

Co-authored-by: andrewcoh <54679309+andrewcoh@users.noreply.github.com>
Co-authored-by: Andrew Cohen <andrew.cohen@unity3d.com>
Co-authored-by: Vincent-Pierre BERGES <vincentpierre@unity3d.com>
/MLA-1734-demo-provider
GitHub 4 年前
当前提交
4e93cb6e
共有 16 个文件被更改,包括 111 次插入195 次删除
  1. 2
      ml-agents/mlagents/trainers/optimizer/torch_optimizer.py
  2. 2
      ml-agents/mlagents/trainers/ppo/optimizer_torch.py
  3. 2
      ml-agents/mlagents/trainers/sac/optimizer_torch.py
  4. 36
      ml-agents/mlagents/trainers/tests/torch/test_encoders.py
  5. 3
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  6. 4
      ml-agents/mlagents/trainers/tests/torch/test_policy.py
  7. 3
      ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py
  8. 2
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  9. 28
      ml-agents/mlagents/trainers/tests/torch/test_utils.py
  10. 2
      ml-agents/mlagents/trainers/torch/components/bc/module.py
  11. 4
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  12. 2
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  13. 112
      ml-agents/mlagents/trainers/torch/encoders.py
  14. 32
      ml-agents/mlagents/trainers/torch/layers.py
  15. 39
      ml-agents/mlagents/trainers/torch/networks.py
  16. 33
      ml-agents/mlagents/trainers/torch/utils.py

2
ml-agents/mlagents/trainers/optimizer/torch_optimizer.py


if self.policy.use_vis_obs:
visual_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
self.policy.actor_critic.network_body.visual_processors
):
visual_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
visual_obs.append(visual_ob)

2
ml-agents/mlagents/trainers/ppo/optimizer_torch.py


if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
self.policy.actor_critic.network_body.visual_processors
):
vis_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)

2
ml-agents/mlagents/trainers/sac/optimizer_torch.py


if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
self.policy.actor_critic.network_body.visual_processors
):
vis_ob = ModelUtils.list_to_tensor(batch["visual_obs%d" % idx])
vis_obs.append(vis_ob)

36
ml-agents/mlagents/trainers/tests/torch/test_encoders.py


import pytest
from mlagents.trainers.torch.encoders import (
VectorEncoder,
VectorAndUnnormalizedInputEncoder,
VectorInput,
Normalizer,
SimpleVisualEncoder,
ResNetVisualEncoder,

mock_normalizer_inst = mock.Mock()
mock_normalizer.return_value = mock_normalizer_inst
input_size = 64
hidden_size = 128
num_layers = 3
vector_encoder = VectorEncoder(input_size, hidden_size, num_layers, normalize)
vector_encoder = VectorInput(input_size, normalize)
assert output.shape == (1, hidden_size)
assert output.shape == (1, input_size)
vector_encoder = VectorEncoder(input_size, hidden_size, num_layers, normalize)
vector_encoder = VectorInput(input_size, normalize)
new_vec = torch.ones((1, input_size))
vector_encoder.update_normalization(new_vec)

vector_encoder2 = VectorEncoder(input_size, hidden_size, num_layers, normalize)
vector_encoder2 = VectorInput(input_size, normalize)
@mock.patch("mlagents.trainers.torch.encoders.Normalizer")
def test_vector_and_unnormalized_encoder(mock_normalizer):
mock_normalizer_inst = mock.Mock()
mock_normalizer.return_value = mock_normalizer_inst
input_size = 64
unnormalized_size = 32
hidden_size = 128
num_layers = 3
normalize = True
mock_normalizer_inst.return_value = torch.ones((1, input_size))
vector_encoder = VectorAndUnnormalizedInputEncoder(
input_size, hidden_size, unnormalized_size, num_layers, normalize
)
# Make sure normalizer is only called on input_size
mock_normalizer.assert_called_with(input_size)
normal_input = torch.ones((1, input_size))
unnormalized_input = torch.ones((1, 32))
output = vector_encoder(normal_input, unnormalized_input)
mock_normalizer_inst.assert_called_with(normal_input)
assert output.shape == (1, hidden_size)
@pytest.mark.parametrize("image_size", [(36, 36, 3), (84, 84, 4), (256, 256, 5)])

3
ml-agents/mlagents/trainers/tests/torch/test_networks.py


obs_size = (84, 84, 3)
network_settings = NetworkSettings()
obs_shapes = [(vec_obs_size,), obs_size]
torch.random.manual_seed(0)
sample_obs = torch.ones((1, 84, 84, 3))
sample_obs = 0.1 * torch.ones((1, 84, 84, 3))
sample_vec_obs = torch.ones((1, vec_obs_size))
for _ in range(150):

4
ml-agents/mlagents/trainers/tests/torch/test_policy.py


else:
actions = ModelUtils.list_to_tensor(buffer["actions"], dtype=torch.long)
vis_obs = []
for idx, _ in enumerate(policy.actor_critic.network_body.visual_encoders):
for idx, _ in enumerate(policy.actor_critic.network_body.visual_processors):
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])
vis_obs.append(vis_ob)

act_masks = ModelUtils.list_to_tensor(buffer["action_mask"])
vis_obs = []
for idx, _ in enumerate(policy.actor_critic.network_body.visual_encoders):
for idx, _ in enumerate(policy.actor_critic.network_body.visual_processors):
vis_ob = ModelUtils.list_to_tensor(buffer["visual_obs%d" % idx])
vis_obs.append(vis_ob)

3
ml-agents/mlagents/trainers/tests/torch/test_reward_providers/test_curiosity.py


for _ in range(10):
curiosity_rp.update(buffer)
reward_new = curiosity_rp.evaluate(buffer)[0]
assert reward_new < reward_old
reward_old = reward_new
assert reward_new < reward_old
@pytest.mark.parametrize("seed", SEED)

2
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


PPO_CONFIG,
hyperparameters=new_hyperparams,
network_settings=new_networksettings,
max_steps=500,
max_steps=700,
summary_freq=100,
)
# The number of steps is pretty small for these encoders

28
ml-agents/mlagents/trainers/tests/torch/test_utils.py


from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.encoders import (
VectorEncoder,
VectorAndUnnormalizedInputEncoder,
)
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.torch.distributions import (
CategoricalDistInstance,
GaussianDistInstance,

enc.forward(vis_input)
@pytest.mark.parametrize("unnormalized_inputs", [0, 1])
def test_create_encoders(
encoder_type, normalize, num_vector, num_visual, unnormalized_inputs
):
def test_create_inputs(encoder_type, normalize, num_vector, num_visual):
vec_obs_shape = (5,)
vis_obs_shape = (84, 84, 3)
obs_shapes = []

obs_shapes.append(vis_obs_shape)
h_size = 128
num_layers = 3
unnormalized_inputs = 1
vis_enc, vec_enc = ModelUtils.create_encoders(
obs_shapes, h_size, num_layers, encoder_type, unnormalized_inputs, normalize
vis_enc, vec_enc, total_output = ModelUtils.create_input_processors(
obs_shapes, h_size, encoder_type, normalize
assert len(vec_enc) == (
1 if unnormalized_inputs + num_vector > 0 else 0
) # There's always at most one vector encoder.
assert len(vec_enc) == (1 if num_vector >= 1 else 0)
if unnormalized_inputs > 0:
assert isinstance(vec_enc[0], VectorAndUnnormalizedInputEncoder)
elif num_vector > 0:
assert isinstance(vec_enc[0], VectorEncoder)
assert total_output == int(num_visual * h_size + vec_obs_shape[0] * num_vector)
if num_vector > 0:
assert isinstance(vec_enc[0], VectorInput)
for enc in vis_enc:
assert isinstance(enc, ModelUtils.get_encoder_for_type(encoder_type))

2
ml-agents/mlagents/trainers/torch/components/bc/module.py


if self.policy.use_vis_obs:
vis_obs = []
for idx, _ in enumerate(
self.policy.actor_critic.network_body.visual_encoders
self.policy.actor_critic.network_body.visual_processors
):
vis_ob = ModelUtils.list_to_tensor(
mini_batch_demo["visual_obs%d" % idx]

4
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


"""
Extracts the current state embedding from a mini_batch.
"""
n_vis = len(self._state_encoder.visual_encoders)
n_vis = len(self._state_encoder.visual_processors)
hidden, _ = self._state_encoder.forward(
vec_inputs=[
ModelUtils.list_to_tensor(mini_batch["vector_obs"], dtype=torch.float)

"""
Extracts the next state embedding from a mini_batch.
"""
n_vis = len(self._state_encoder.visual_encoders)
n_vis = len(self._state_encoder.visual_processors)
hidden, _ = self._state_encoder.forward(
vec_inputs=[
ModelUtils.list_to_tensor(

2
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


"""
Creates the observation input.
"""
n_vis = len(self._state_encoder.visual_encoders)
n_vis = len(self._state_encoder.visual_processors)
hidden, _ = self._state_encoder.forward(
vec_inputs=[torch.as_tensor(mini_batch["vector_obs"], dtype=torch.float)],
vis_inputs=[

112
ml-agents/mlagents/trainers/torch/encoders.py


from typing import Tuple, Optional, Union
from mlagents.trainers.exception import UnityTrainerException
import torch
from torch import nn

return height, width
class VectorEncoder(nn.Module):
def __init__(
self,
input_size: int,
hidden_size: int,
num_layers: int,
normalize: bool = False,
):
class VectorInput(nn.Module):
def __init__(self, input_size: int, normalize: bool = False):
super().__init__()
super().__init__()
self.layers = [
linear_layer(
input_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
]
self.layers.append(Swish())
for _ in range(num_layers - 1):
self.layers.append(
linear_layer(
hidden_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
)
self.layers.append(Swish())
self.seq_layers = nn.Sequential(*self.layers)
return self.seq_layers(inputs)
return inputs
def copy_normalization(self, other_encoder: "VectorEncoder") -> None:
if self.normalizer is not None and other_encoder.normalizer is not None:
self.normalizer.copy_from(other_encoder.normalizer)
def copy_normalization(self, other_input: "VectorInput") -> None:
if self.normalizer is not None and other_input.normalizer is not None:
self.normalizer.copy_from(other_input.normalizer)
def update_normalization(self, inputs: torch.Tensor) -> None:
if self.normalizer is not None:

class VectorAndUnnormalizedInputEncoder(VectorEncoder):
"""
Encoder for concatenated vector input (can be normalized) and unnormalized vector input.
This is used for passing inputs to the network that should not be normalized, such as
actions in the case of a Q function or task parameterizations. It will result in an encoder with
this structure:
____________ ____________ ____________
| Vector | | Normalize | | Fully |
| | --> | | --> | Connected | ___________
|____________| |____________| | | | Output |
____________ | | --> | |
|Unnormalized| | | |___________|
| Input | ---------------------> | |
|____________| |____________|
"""
def __init__(
self,
input_size: int,
hidden_size: int,
unnormalized_input_size: int,
num_layers: int,
normalize: bool = False,
):
super().__init__(
input_size + unnormalized_input_size,
hidden_size,
num_layers,
normalize=False,
)
if normalize:
self.normalizer = Normalizer(input_size)
else:
self.normalizer = None
def forward( # pylint: disable=W0221
self, inputs: torch.Tensor, unnormalized_inputs: Optional[torch.Tensor] = None
) -> None:
if unnormalized_inputs is None:
raise UnityTrainerException(
"Attempted to call an VectorAndUnnormalizedInputEncoder without an unnormalized input."
) # Fix mypy errors about method parameters.
if self.normalizer is not None:
inputs = self.normalizer(inputs)
return self.seq_layers(torch.cat([inputs, unnormalized_inputs], dim=-1))
class SimpleVisualEncoder(nn.Module):
def __init__(
self, height: int, width: int, initial_channels: int, output_size: int

nn.LeakyReLU(),
)
def forward(self, visual_obs: torch.Tensor) -> None:
def forward(self, visual_obs: torch.Tensor) -> torch.Tensor:
hidden = self.dense(hidden)
return hidden
return self.dense(hidden)
def __init__(self, height, width, initial_channels, output_size):
def __init__(
self, height: int, width: int, initial_channels: int, output_size: int
):
super().__init__()
self.h_size = output_size
conv_1_hw = conv_output_shape((height, width), 8, 4)

nn.LeakyReLU(),
)
def forward(self, visual_obs: torch.Tensor) -> None:
def forward(self, visual_obs: torch.Tensor) -> torch.Tensor:
hidden = self.dense(hidden)
return hidden
return self.dense(hidden)
class ResNetBlock(nn.Module):

class ResNetVisualEncoder(nn.Module):
def __init__(self, height, width, initial_channels, final_hidden):
def __init__(
self, height: int, width: int, initial_channels: int, output_size: int
):
super().__init__()
n_channels = [16, 32, 32] # channel for each stack
n_blocks = 2 # number of residual blocks

layers.append(Swish())
self.dense = linear_layer(
n_channels[-1] * height * width,
final_hidden,
output_size,
def forward(self, visual_obs):
def forward(self, visual_obs: torch.Tensor) -> torch.Tensor:
batch_size = visual_obs.shape[0]
hidden = self.sequential(visual_obs)
before_out = hidden.view(batch_size, -1)

32
ml-agents/mlagents/trainers/torch/layers.py


pass
class LinearEncoder(torch.nn.Module):
"""
Linear layers.
"""
def __init__(self, input_size: int, num_layers: int, hidden_size: int):
super().__init__()
self.layers = [
linear_layer(
input_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
]
self.layers.append(Swish())
for _ in range(num_layers - 1):
self.layers.append(
linear_layer(
hidden_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
)
self.layers.append(Swish())
self.seq_layers = torch.nn.Sequential(*self.layers)
def forward(self, input_tensor: torch.Tensor) -> torch.Tensor:
return self.seq_layers(input_tensor)
class LSTM(MemoryModule):
"""
Memory module that implements LSTM.

39
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.layers import LSTM
from mlagents.trainers.torch.layers import LSTM, LinearEncoder
ActivationFunction = Callable[[torch.Tensor], torch.Tensor]
EncoderFunction = Callable[

else 0
)
self.visual_encoders, self.vector_encoders = ModelUtils.create_encoders(
self.visual_processors, self.vector_processors, encoder_input_size = ModelUtils.create_input_processors(
network_settings.num_layers,
unnormalized_inputs=encoded_act_size,
)
total_enc_size = encoder_input_size + encoded_act_size
self.linear_encoder = LinearEncoder(
total_enc_size, network_settings.num_layers, self.h_size
)
if self.use_lstm:

def update_normalization(self, vec_inputs: List[torch.Tensor]) -> None:
for vec_input, vec_enc in zip(vec_inputs, self.vector_encoders):
for vec_input, vec_enc in zip(vec_inputs, self.vector_processors):
for n1, n2 in zip(self.vector_encoders, other_network.vector_encoders):
for n1, n2 in zip(self.vector_processors, other_network.vector_processors):
n1.copy_normalization(n2)
@property

sequence_length: int = 1,
) -> Tuple[torch.Tensor, torch.Tensor]:
encodes = []
for idx, encoder in enumerate(self.vector_encoders):
for idx, processor in enumerate(self.vector_processors):
if actions is not None:
hidden = encoder(vec_input, actions)
else:
hidden = encoder(vec_input)
encodes.append(hidden)
processed_vec = processor(vec_input)
encodes.append(processed_vec)
for idx, encoder in enumerate(self.visual_encoders):
for idx, processor in enumerate(self.visual_processors):
hidden = encoder(vis_input)
encodes.append(hidden)
processed_vis = processor(vis_input)
encodes.append(processed_vis)
encoding = encodes[0]
if len(encodes) > 1:
for _enc in encodes[1:]:
encoding += _enc
if actions is not None:
inputs = torch.cat(encodes + [actions], dim=-1)
else:
inputs = torch.cat(encodes, dim=-1)
encoding = self.linear_encoder(inputs)
if self.use_lstm:
# Resize to (batch, sequence length, encoding size)

33
ml-agents/mlagents/trainers/torch/utils.py


SimpleVisualEncoder,
ResNetVisualEncoder,
NatureVisualEncoder,
VectorEncoder,
VectorAndUnnormalizedInputEncoder,
VectorInput,
)
from mlagents.trainers.settings import EncoderType, ScheduleType
from mlagents.trainers.exception import UnityTrainerException

)
@staticmethod
def create_encoders(
def create_input_processors(
num_layers: int,
unnormalized_inputs: int = 0,
) -> Tuple[nn.ModuleList, nn.ModuleList]:
) -> Tuple[nn.ModuleList, nn.ModuleList, int]:
"""
Creates visual and vector encoders, along with their normalizers.
:param observation_shapes: List of Tuples that represent the action dimensions.

:param num_layers: Depth of MLP per encoder.
:param vis_encode_type: Type of visual encoder to use.
:param unnormalized_inputs: Vector inputs that should not be normalized, and added to the vector
obs.

visual_encoder_class = ModelUtils.get_encoder_for_type(vis_encode_type)
vector_size = 0
visual_output_size = 0
for i, dimension in enumerate(observation_shapes):
if len(dimension) == 3:
ModelUtils._check_resolution_for_encoder(

dimension[0], dimension[1], dimension[2], h_size
)
)
visual_output_size += h_size
elif len(dimension) == 1:
vector_size += dimension[0]
else:

if vector_size + unnormalized_inputs > 0:
if unnormalized_inputs > 0:
vector_encoders.append(
VectorAndUnnormalizedInputEncoder(
vector_size, h_size, unnormalized_inputs, num_layers, normalize
)
)
else:
vector_encoders.append(
VectorEncoder(vector_size, h_size, num_layers, normalize)
)
return nn.ModuleList(visual_encoders), nn.ModuleList(vector_encoders)
if vector_size > 0:
vector_encoders.append(VectorInput(vector_size, normalize))
# Total output size for all inputs + CNNs
total_processed_size = vector_size + visual_output_size
return (
nn.ModuleList(visual_encoders),
nn.ModuleList(vector_encoders),
total_processed_size,
)
@staticmethod
def list_to_tensor(

正在加载...
取消
保存