浏览代码

Develop add fire layers (#4321)

* Layer initialization + swish as a layer

* integrating with the existing layers

* fixing tests

* setting the seed for a test

* Using swish and fixing tests
/develop/add-fire
GitHub 4 年前
当前提交
6b193d03
共有 9 个文件被更改,包括 183 次插入78 次删除
  1. 9
      ml-agents/mlagents/trainers/tests/torch/test_networks.py
  2. 17
      ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py
  3. 27
      ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py
  4. 3
      ml-agents/mlagents/trainers/torch/decoders.py
  5. 28
      ml-agents/mlagents/trainers/torch/distributions.py
  6. 100
      ml-agents/mlagents/trainers/torch/encoders.py
  7. 9
      ml-agents/mlagents/trainers/torch/utils.py
  8. 20
      ml-agents/mlagents/trainers/tests/torch/test_layers.py
  9. 48
      ml-agents/mlagents/trainers/torch/layers.py

9
ml-agents/mlagents/trainers/tests/torch/test_networks.py


def test_networkbody_vector():
torch.manual_seed(0)
obs_size = 4
network_settings = NetworkSettings()
obs_shapes = [(obs_size,)]

sample_obs = torch.ones((1, obs_size))
sample_act = torch.ones((1, 2))
sample_obs = 0.1 * torch.ones((1, obs_size))
sample_act = 0.1 * torch.ones((1, 2))
for _ in range(100):
for _ in range(300):
encoded, _ = networkbody([sample_obs], [], sample_act)
assert encoded.shape == (1, network_settings.hidden_units)
# Try to force output to 1

sample_obs = torch.ones((1, 84, 84, 3))
sample_vec_obs = torch.ones((1, vec_obs_size))
for _ in range(100):
for _ in range(150):
encoded, _ = networkbody([sample_vec_obs], [sample_obs])
assert encoded.shape == (1, network_settings.hidden_units)
# Try to force output to 1

17
ml-agents/mlagents/trainers/torch/components/reward_providers/curiosity_reward_provider.py


from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish
from mlagents.trainers.settings import NetworkSettings, EncoderType

self._action_flattener = ModelUtils.ActionFlattener(specs)
self.inverse_model_action_predition = torch.nn.Sequential(
torch.nn.Linear(2 * settings.encoding_size, 256),
ModelUtils.SwishLayer(),
torch.nn.Linear(256, self._action_flattener.flattened_size),
linear_layer(2 * settings.encoding_size, 256),
Swish(),
linear_layer(256, self._action_flattener.flattened_size),
self.inverse_model_action_predition[0].bias.data.zero_()
self.inverse_model_action_predition[2].bias.data.zero_()
torch.nn.Linear(
linear_layer(
ModelUtils.SwishLayer(),
torch.nn.Linear(256, settings.encoding_size),
Swish(),
linear_layer(256, settings.encoding_size),
self.forward_model_next_state_prediction[0].bias.data.zero_()
self.forward_model_next_state_prediction[2].bias.data.zero_()
def get_current_state(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""

27
ml-agents/mlagents/trainers/torch/components/reward_providers/gail_reward_provider.py


from mlagents_envs.base_env import BehaviorSpec
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.networks import NetworkBody
from mlagents.trainers.torch.layers import linear_layer, Swish, Initialization
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.demo_loader import demo_to_buffer

) # + 1 is for done
self.encoder = torch.nn.Sequential(
torch.nn.Linear(encoder_input_size, settings.encoding_size),
ModelUtils.SwishLayer(),
torch.nn.Linear(settings.encoding_size, settings.encoding_size),
ModelUtils.SwishLayer(),
linear_layer(encoder_input_size, settings.encoding_size),
Swish(),
linear_layer(settings.encoding_size, settings.encoding_size),
Swish(),
torch.nn.init.xavier_normal_(self.encoder[0].weight.data)
torch.nn.init.xavier_normal_(self.encoder[2].weight.data)
self.encoder[0].bias.data.zero_()
self.encoder[2].bias.data.zero_()
estimator_input_size = settings.encoding_size
if settings.use_vail:

)
self.z_mu_layer = torch.nn.Linear(settings.encoding_size, self.z_size)
# self.z_mu_layer.weight.data Needs a variance scale initializer
torch.nn.init.xavier_normal_(self.z_mu_layer.weight.data)
self.z_mu_layer.bias.data.zero_()
self.z_mu_layer = linear_layer(
settings.encoding_size,
self.z_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
)
torch.nn.Linear(estimator_input_size, 1), torch.nn.Sigmoid()
linear_layer(estimator_input_size, 1), torch.nn.Sigmoid()
torch.nn.init.xavier_normal_(self.estimator[0].weight.data)
self.estimator[0].bias.data.zero_()
def get_action_input(self, mini_batch: AgentBuffer) -> torch.Tensor:
"""

3
ml-agents/mlagents/trainers/torch/decoders.py


import torch
from torch import nn
from mlagents.trainers.torch.layers import linear_layer
class ValueHeads(nn.Module):

_value_heads = {}
for name in stream_names:
value = nn.Linear(input_size, output_size)
value = linear_layer(input_size, output_size)
_value_heads[name] = value
self.value_heads = nn.ModuleDict(_value_heads)

28
ml-agents/mlagents/trainers/torch/distributions.py


from torch import nn
import numpy as np
import math
from mlagents.trainers.torch.layers import linear_layer, Initialization
EPSILON = 1e-7 # Small value to avoid divide by zero

):
super().__init__()
self.conditional_sigma = conditional_sigma
self.mu = nn.Linear(hidden_size, num_outputs)
self.mu = linear_layer(
hidden_size,
num_outputs,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
nn.init.xavier_uniform_(self.mu.weight, gain=0.01)
self.log_sigma = nn.Linear(hidden_size, num_outputs)
nn.init.xavier_uniform(self.log_sigma.weight, gain=0.01)
self.log_sigma = linear_layer(
hidden_size,
num_outputs,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
else:
self.log_sigma = nn.Parameter(
torch.zeros(1, num_outputs, requires_grad=True)

def _create_policy_branches(self, hidden_size: int) -> nn.ModuleList:
branches = []
for size in self.act_sizes:
branch_output_layer = nn.Linear(hidden_size, size)
nn.init.xavier_uniform_(branch_output_layer.weight, gain=0.01)
branch_output_layer = linear_layer(
hidden_size,
size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=0.1,
bias_init=Initialization.Zero,
)
branches.append(branch_output_layer)
return nn.ModuleList(branches)

100
ml-agents/mlagents/trainers/torch/encoders.py


from typing import Tuple, Optional
from mlagents.trainers.exception import UnityTrainerException
from mlagents.trainers.torch.layers import linear_layer, Initialization, Swish
import torch
from torch import nn

return height, width
class SwishLayer(torch.nn.Module):
def forward(self, data: torch.Tensor) -> torch.Tensor:
return torch.mul(data, torch.sigmoid(data))
class VectorEncoder(nn.Module):
def __init__(
self,

):
self.normalizer: Optional[Normalizer] = None
super().__init__()
self.layers = [nn.Linear(input_size, hidden_size)]
self.layers.append(SwishLayer())
self.layers = [
linear_layer(
input_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
]
self.layers.append(Swish())
self.layers.append(nn.Linear(hidden_size, hidden_size))
self.layers.append(nn.LeakyReLU())
self.layers.append(
linear_layer(
hidden_size,
hidden_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
)
self.layers.append(Swish())
self.seq_layers = nn.Sequential(*self.layers)
def forward(self, inputs: torch.Tensor) -> None:

conv_2_hw = conv_output_shape(conv_1_hw, 4, 2)
self.final_flat = conv_2_hw[0] * conv_2_hw[1] * 32
self.conv1 = nn.Conv2d(initial_channels, 16, [8, 8], [4, 4])
self.conv2 = nn.Conv2d(16, 32, [4, 4], [2, 2])
self.dense = nn.Linear(self.final_flat, self.h_size)
self.conv_layers = nn.Sequential(
nn.Conv2d(initial_channels, 16, [8, 8], [4, 4]),
nn.LeakyReLU(),
nn.Conv2d(16, 32, [4, 4], [2, 2]),
nn.LeakyReLU(),
)
self.dense = nn.Sequential(
linear_layer(
self.final_flat,
self.h_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
),
nn.LeakyReLU(),
)
conv_1 = nn.functional.leaky_relu(self.conv1(visual_obs))
conv_2 = nn.functional.leaky_relu(self.conv2(conv_1))
# hidden = torch.relu(self.dense(conv_2.view([-1, self.final_flat])))
hidden = nn.functional.leaky_relu(
self.dense(torch.reshape(conv_2, (-1, self.final_flat)))
)
hidden = self.conv_layers(visual_obs)
hidden = torch.reshape(hidden, (-1, self.final_flat))
hidden = self.dense(hidden)
return hidden

conv_3_hw = conv_output_shape(conv_2_hw, 3, 1)
self.final_flat = conv_3_hw[0] * conv_3_hw[1] * 64
self.conv1 = nn.Conv2d(initial_channels, 32, [8, 8], [4, 4])
self.conv2 = nn.Conv2d(32, 64, [4, 4], [2, 2])
self.conv3 = nn.Conv2d(64, 64, [3, 3], [1, 1])
self.dense = nn.Linear(self.final_flat, self.h_size)
def forward(self, visual_obs):
conv_1 = nn.functional.leaky_relu(self.conv1(visual_obs))
conv_2 = nn.functional.leaky_relu(self.conv2(conv_1))
conv_3 = nn.functional.leaky_relu(self.conv3(conv_2))
hidden = nn.functional.leaky_relu(
self.dense(conv_3.view([-1, self.final_flat]))
self.conv_layers = nn.Sequential(
nn.Conv2d(initial_channels, 32, [8, 8], [4, 4]),
nn.LeakyReLU(),
nn.Conv2d(32, 64, [4, 4], [2, 2]),
nn.LeakyReLU(),
nn.Conv2d(64, 64, [3, 3], [1, 1]),
nn.LeakyReLU(),
self.dense = nn.Sequential(
linear_layer(
self.final_flat,
self.h_size,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
),
nn.LeakyReLU(),
)
def forward(self, visual_obs: torch.Tensor) -> None:
hidden = self.conv_layers(visual_obs)
hidden = hidden.view([-1, self.final_flat])
hidden = self.dense(hidden)
return hidden

for _ in range(n_blocks):
self.layers.append(self.make_block(channel))
last_channel = channel
self.layers.append(nn.LeakyReLU())
self.dense = nn.Linear(n_channels[-1] * height * width, final_hidden)
self.layers.append(Swish())
self.dense = linear_layer(
n_channels[-1] * height * width,
final_hidden,
kernel_init=Initialization.KaimingHeNormal,
kernel_gain=1.0,
)
nn.LeakyReLU(),
Swish(),
nn.LeakyReLU(),
Swish(),
nn.Conv2d(channel, channel, [3, 3], [1, 1], padding=1),
]
return block_layers

9
ml-agents/mlagents/trainers/torch/utils.py


EncoderType.RESNET: 15,
}
@staticmethod
def swish(input_activation: torch.Tensor) -> torch.Tensor:
"""Swish activation function. For more info: https://arxiv.org/abs/1710.05941"""
return torch.mul(input_activation, torch.sigmoid(input_activation))
class SwishLayer(torch.nn.Module):
def forward(self, data: torch.Tensor) -> torch.Tensor:
return torch.mul(data, torch.sigmoid(data))
class ActionFlattener:
def __init__(self, behavior_spec: BehaviorSpec):
self._specs = behavior_spec

20
ml-agents/mlagents/trainers/tests/torch/test_layers.py


import torch
from mlagents.trainers.torch.layers import Swish, linear_layer, Initialization
def test_swish():
layer = Swish()
input_tensor = torch.Tensor([[1, 2, 3], [4, 5, 6]])
target_tensor = torch.mul(input_tensor, torch.sigmoid(input_tensor))
assert torch.all(torch.eq(layer(input_tensor), target_tensor))
def test_initialization_layer():
torch.manual_seed(0)
# Test Zero
layer = linear_layer(
3, 4, kernel_init=Initialization.Zero, bias_init=Initialization.Zero
)
assert torch.all(torch.eq(layer.weight.data, torch.zeros_like(layer.weight.data)))
assert torch.all(torch.eq(layer.bias.data, torch.zeros_like(layer.bias.data)))

48
ml-agents/mlagents/trainers/torch/layers.py


import torch
from enum import Enum
class Swish(torch.nn.Module):
def forward(self, data: torch.Tensor) -> torch.Tensor:
return torch.mul(data, torch.sigmoid(data))
class Initialization(Enum):
Zero = 0
XavierGlorotNormal = 1
XavierGlorotUniform = 2
KaimingHeNormal = 3 # also known as Variance scaling
KaimingHeUniform = 4
_init_methods = {
Initialization.Zero: torch.zero_,
Initialization.XavierGlorotNormal: torch.nn.init.xavier_normal_,
Initialization.XavierGlorotUniform: torch.nn.init.xavier_uniform_,
Initialization.KaimingHeNormal: torch.nn.init.kaiming_normal_,
Initialization.KaimingHeUniform: torch.nn.init.kaiming_uniform_,
}
def linear_layer(
input_size: int,
output_size: int,
kernel_init: Initialization = Initialization.XavierGlorotUniform,
kernel_gain: float = 1.0,
bias_init: Initialization = Initialization.Zero,
) -> torch.nn.Module:
"""
Creates a torch.nn.Linear module and initializes its weights.
:param input_size: The size of the input tensor
:param output_size: The size of the output tensor
:param kernel_init: The Initialization to use for the weights of the layer
:param kernel_gain: The multiplier for the weights of the kernel. Note that in
TensorFlow, calling variance_scaling with scale 0.01 is equivalent to calling
KaimingHeNormal with kernel_gain of 0.1
:param bias_init: The Initialization to use for the weights of the bias layer
"""
layer = torch.nn.Linear(input_size, output_size)
_init_methods[kernel_init](layer.weight.data)
layer.weight.data *= kernel_gain
_init_methods[bias_init](layer.bias.data)
return layer
正在加载...
取消
保存