浏览代码

Additional conditional experiments

/goal-conditioning/new
Arthur Juliani 4 年前
当前提交
1cf97635
共有 3 个文件被更改,包括 93 次插入7 次删除
  1. 2
      config/ppo/GridWorld.yaml
  2. 48
      ml-agents/mlagents/trainers/torch/layers.py
  3. 50
      ml-agents/mlagents/trainers/torch/networks.py

2
config/ppo/GridWorld.yaml


learning_rate_schedule: linear
network_settings:
normalize: false
hidden_units: 256
hidden_units: 64
num_layers: 1
vis_encode_type: simple
reward_signals:

48
ml-agents/mlagents/trainers/torch/layers.py


return (layer_activations - mean) / (torch.sqrt(var + 1e-5))
class ConditionalEncoder(torch.nn.Module):
"""
Linear layers.
"""
def __init__(
self,
input_size: int,
goal_size: int,
num_layers: int,
hidden_size: int,
kernel_init: Initialization = Initialization.KaimingHeNormal,
kernel_gain: float = 1.0,
):
super().__init__()
self.goal_encoder = LinearEncoder(goal_size, 2, hidden_size)
self.layers = [
linear_layer(
input_size,
hidden_size,
kernel_init=kernel_init,
kernel_gain=kernel_gain,
)
]
self.layers.append(Swish())
for _ in range(num_layers - 1):
self.layers.append(
linear_layer(
hidden_size,
hidden_size,
kernel_init=kernel_init,
kernel_gain=kernel_gain,
)
)
self.layers.append(Swish())
def forward(
self, input_tensor: torch.Tensor, goal_tensor: torch.Tensor
) -> torch.Tensor:
activation = input_tensor
goal_activation = self.goal_encoder(goal_tensor)
for layer in self.layers:
activation = layer(activation)
if layer is not Swish():
activation *= goal_activation
return activation
class LinearEncoder(torch.nn.Module):
"""
Linear layers.

50
ml-agents/mlagents/trainers/torch/networks.py


from enum import Enum
from typing import Callable, List, Dict, Tuple, Optional, Union
import abc

from mlagents.trainers.settings import NetworkSettings
from mlagents.trainers.torch.utils import ModelUtils
from mlagents.trainers.torch.decoders import ValueHeads
from mlagents.trainers.torch.layers import LSTM, LinearEncoder, HyperNetwork
from mlagents.trainers.torch.layers import (
LSTM,
LinearEncoder,
HyperNetwork,
ConditionalEncoder,
)
from mlagents.trainers.torch.encoders import VectorInput
from mlagents.trainers.buffer import AgentBuffer
from mlagents.trainers.trajectory import ObsUtil

EPSILON = 1e-7
class ConditioningMode(Enum):
DEFAULT = 0
HYPER = 1
SOFT = 3
class NetworkBody(nn.Module):
def __init__(
self,

):
super().__init__()
self.conditioning_mode = ConditioningMode.HYPER
self.normalize = network_settings.normalize
self.use_lstm = network_settings.memory is not None
self.h_size = network_settings.hidden_units

total_enc_size, total_goal_size = 0, 0
for idx, embedding_size in enumerate(self.embedding_sizes):
if self.obs_types[idx] == ObservationType.DEFAULT:
if (
self.obs_types[idx] == ObservationType.DEFAULT
or self.conditioning_mode == ConditioningMode.DEFAULT
):
if self.obs_types[idx] == ObservationType.GOAL:
if (
self.obs_types[idx] == ObservationType.GOAL
and self.conditioning_mode != ConditioningMode.DEFAULT
):
if ObservationType.GOAL in self.obs_types:
if (
ObservationType.GOAL in self.obs_types
and self.conditioning_mode == ConditioningMode.HYPER
):
total_goal_size,
network_settings.num_layers,
self.h_size,
)
elif (
ObservationType.GOAL in self.obs_types
and self.conditioning_mode == ConditioningMode.SOFT
):
self.linear_encoder = ConditionalEncoder(
total_enc_size,
total_goal_size,
network_settings.num_layers,
self.h_size,

for idx, processor in enumerate(self.processors):
obs_input = inputs[idx]
processed_obs = processor(obs_input)
if self.obs_types[idx] == ObservationType.DEFAULT:
if (
self.obs_types[idx] == ObservationType.DEFAULT
or self.conditioning_mode == ConditioningMode.DEFAULT
):
elif self.obs_types[idx] == ObservationType.GOAL:
elif (
self.obs_types[idx] == ObservationType.GOAL
and self.conditioning_mode != ConditioningMode.DEFAULT
):
goal_signal = processed_obs
if len(encodes) == 0:

正在加载...
取消
保存