浏览代码

Goal conditioning integration (#5142)

* Adding Hypernetwork modules and unit tests

* Edits

* Integration of the hypernetowrk to the trainer

* Update ml-agents/mlagents/trainers/torch/networks.py

Co-authored-by: Arthur Juliani <awjuliani@gmail.com>

* Making the default hyper and added the conditioning type None

* Reducing the number of hypernetwork layers

* addressing comments

Co-authored-by: Arthur Juliani <awjuliani@gmail.com>
/check-for-ModelOverriders
GitHub 3 年前
当前提交
81705d6d
共有 5 个文件被更改,包括 107 次插入15 次删除
  1. 6
      ml-agents/mlagents/trainers/settings.py
  2. 14
      ml-agents/mlagents/trainers/tests/simple_test_envs.py
  3. 25
      ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py
  4. 4
      ml-agents/mlagents/trainers/torch/conditioning.py
  5. 73
      ml-agents/mlagents/trainers/torch/networks.py

6
ml-agents/mlagents/trainers/settings.py


LINEAR = "linear"
class ConditioningType(Enum):
HYPER = "hyper"
NONE = "none"
@attr.s(auto_attribs=True)
class NetworkSettings:
@attr.s

num_layers: int = 2
vis_encode_type: EncoderType = EncoderType.SIMPLE
memory: Optional[MemorySettings] = None
goal_conditioning_type: ConditioningType = ConditioningType.HYPER
@attr.s(auto_attribs=True)

14
ml-agents/mlagents/trainers/tests/simple_test_envs.py


from mlagents_envs.base_env import (
ActionSpec,
ObservationSpec,
ObservationType,
ActionTuple,
BaseEnv,
BehaviorSpec,

vec_obs_size=OBS_SIZE,
var_len_obs_size=VAR_LEN_SIZE,
action_sizes=(1, 0),
goal_indices=None,
):
super().__init__()
self.num_visual = num_visual

self.vec_obs_size = vec_obs_size
self.var_len_obs_size = var_len_obs_size
self.goal_indices = goal_indices
continuous_action_size, discrete_action_size = action_sizes
discrete_tuple = tuple(2 for _ in range(discrete_action_size))
action_spec = ActionSpec(continuous_action_size, discrete_tuple)

for _ in range(self.num_var_len):
obs_shape.append(self.var_len_obs_size)
obs_spec = create_observation_specs_with_shapes(obs_shape)
if self.goal_indices is not None:
for i in range(len(obs_spec)):
if i in self.goal_indices:
obs_spec[i] = ObservationSpec(
shape=obs_spec[i].shape,
dimension_property=obs_spec[i].dimension_property,
observation_type=ObservationType.GOAL,
name=obs_spec[i].name,
)
return obs_spec
def _make_obs(self, value: float) -> List[np.ndarray]:

var_len_obs_size=VAR_LEN_SIZE,
action_sizes=(1, 0),
num_agents=2,
goal_indices=None,
):
super().__init__()
self.envs = {}

vec_obs_size,
var_len_obs_size,
action_sizes,
goal_indices,
)
self.dones[name_and_num] = False
self.envs[name_and_num].reset()

25
ml-agents/mlagents/trainers/tests/torch/test_simple_rl.py


GAILSettings,
RewardSignalType,
EncoderType,
ConditioningType,
)
from mlagents_envs.communicator_objects.demonstration_meta_pb2 import (

check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("conditioning_type", [ConditioningType.HYPER])
def test_var_len_obs_poca(num_vis, num_vector, num_var_len):
def test_var_len_obs_and_goal_poca(num_vis, num_vector, num_var_len, conditioning_type):
env = MultiAgentEnvironment(
[BRAIN_NAME],
action_sizes=(0, 1),

step_size=0.2,
num_agents=2,
goal_indices=[0],
)
new_network = attr.evolve(
POCA_TORCH_CONFIG.network_settings, goal_conditioning_type=conditioning_type
config = attr.evolve(POCA_TORCH_CONFIG, hyperparameters=new_hyperparams)
config = attr.evolve(
POCA_TORCH_CONFIG, hyperparameters=new_hyperparams, network_settings=new_network
)
check_environment_trains(env, {BRAIN_NAME: config})

check_environment_trains(env, {BRAIN_NAME: config})
@pytest.mark.parametrize("conditioning_type", [ConditioningType.HYPER])
def test_var_len_obs_ppo(num_vis, num_vector, num_var_len, action_sizes):
def test_var_len_obs_and_goal_ppo(
num_vis, num_vector, num_var_len, action_sizes, conditioning_type
):
env = SimpleEnvironment(
[BRAIN_NAME],
action_sizes=action_sizes,

step_size=0.2,
goal_indices=[0],
)
new_network = attr.evolve(
POCA_TORCH_CONFIG.network_settings, goal_conditioning_type=conditioning_type
config = attr.evolve(PPO_TORCH_CONFIG, hyperparameters=new_hyperparams)
config = attr.evolve(
PPO_TORCH_CONFIG, hyperparameters=new_hyperparams, network_settings=new_network
)
check_environment_trains(env, {BRAIN_NAME: config})

4
ml-agents/mlagents/trainers/torch/conditioning.py


"""
super().__init__()
layers: List[torch.nn.Module] = []
prev_size = input_size + goal_size
prev_size = input_size
for i in range(num_layers):
if num_layers - i <= num_conditional_layers:
# This means layer i is a conditional layer since the conditional

def forward(
self, input_tensor: torch.Tensor, goal_tensor: torch.Tensor
) -> torch.Tensor: # type: ignore
activation = torch.cat([input_tensor, goal_tensor], dim=-1)
activation = input_tensor
for layer in self.layers:
if isinstance(layer, HyperNetwork):
activation = layer(activation, goal_tensor)

73
ml-agents/mlagents/trainers/torch/networks.py


from mlagents.torch_utils import torch, nn
from mlagents_envs.base_env import ActionSpec, ObservationSpec
from mlagents_envs.base_env import ActionSpec, ObservationSpec, ObservationType
from mlagents.trainers.settings import NetworkSettings, EncoderType
from mlagents.trainers.settings import NetworkSettings, EncoderType, ConditioningType
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.layers import LSTM, LinearEncoder

from mlagents.trainers.torch.conditioning import ConditionalEncoder
from mlagents.trainers.torch.attention import (
EntityEmbedding,
ResidualSelfAttention,

self.normalize = normalize
self._total_enc_size = total_enc_size
self._total_goal_enc_size = 0
self._goal_processor_indices: List[int] = []
for i in range(len(observation_specs)):
if observation_specs[i].observation_type == ObservationType.GOAL:
self._total_goal_enc_size += self.embedding_sizes[i]
self._goal_processor_indices.append(i)
@property
def total_enc_size(self) -> int:
"""

@property
def total_goal_enc_size(self) -> int:
"""
Returns the total goal encoding size for this ObservationEncoder.
"""
return self._total_goal_enc_size
def update_normalization(self, buffer: AgentBuffer) -> None:
obs = ObsUtil.from_buffer(buffer, len(self.processors))

"""
Encode observations using a list of processors and an RSA.
:param inputs: List of Tensors corresponding to a set of obs.
:param processors: a ModuleList of the input processors to be applied to these obs.
:param rsa: Optionally, an RSA to use for variable length obs.
:param x_self_encoder: Optionally, an encoder to use for x_self (in this case, the non-variable inputs.).
"""
encodes = []
var_len_processor_inputs: List[Tuple[nn.Module, torch.Tensor]] = []

return encoded_self
def get_goal_encoding(self, inputs: List[torch.Tensor]) -> torch.Tensor:
"""
Encode observations corresponding to goals using a list of processors.
:param inputs: List of Tensors corresponding to a set of obs.
"""
encodes = []
for idx in self._goal_processor_indices:
processor = self.processors[idx]
if not isinstance(processor, EntityEmbedding):
# The input can be encoded without having to process other inputs
obs_input = inputs[idx]
processed_obs = processor(obs_input)
encodes.append(processed_obs)
else:
raise UnityTrainerException(
"The one of the goals uses variable length observations. This use "
"case is not supported."
)
if len(encodes) != 0:
encoded = torch.cat(encodes, dim=1)
else:
raise UnityTrainerException(
"Trainer was unable to process any of the goals provided as input."
)
return encoded
class NetworkBody(nn.Module):
def __init__(

self.processors = self.observation_encoder.processors
total_enc_size = self.observation_encoder.total_enc_size
total_enc_size += encoded_act_size
self.linear_encoder = LinearEncoder(
total_enc_size, network_settings.num_layers, self.h_size
)
if (
self.observation_encoder.total_goal_enc_size > 0
and network_settings.goal_conditioning_type == ConditioningType.HYPER
):
self._body_endoder = ConditionalEncoder(
total_enc_size,
self.observation_encoder.total_goal_enc_size,
self.h_size,
network_settings.num_layers,
1,
)
else:
self._body_endoder = LinearEncoder(
total_enc_size, network_settings.num_layers, self.h_size
)
if self.use_lstm:
self.lstm = LSTM(self.h_size, self.m_size)

encoded_self = self.observation_encoder(inputs)
if actions is not None:
encoded_self = torch.cat([encoded_self, actions], dim=1)
encoding = self.linear_encoder(encoded_self)
if isinstance(self._body_endoder, ConditionalEncoder):
goal = self.observation_encoder.get_goal_encoding(inputs)
encoding = self._body_endoder(encoded_self, goal)
else:
encoding = self._body_endoder(encoded_self)
if self.use_lstm:
# Resize to (batch, sequence length, encoding size)

正在加载...
取消
保存