浏览代码

prepare to merge action_out

/develop/actionmodel-csharp
Andrew Cohen 4 年前
当前提交
b0b2e22e
共有 2 个文件被更改,包括 260 次插入260 次删除
  1. 4
      ml-agents/mlagents/trainers/torch/distributions.py
  2. 516
      ml-agents/mlagents/trainers/torch/networks.py

4
ml-agents/mlagents/trainers/torch/distributions.py


def forward(self, inputs: torch.Tensor, masks: torch.Tensor) -> List[DistInstance]:
distributions: List[DistInstance] = []
for continuous_dist in self.continuous_distributions:
distributions += continuous_dist(inputs)
for continuous_dist in self.continuous_distributions:
distributions += continuous_dist(inputs)
return distributions

516
ml-agents/mlagents/trainers/torch/networks.py


pass
class HybridSimpleActor(nn.Module, Actor):
class SimpleActor(nn.Module, Actor):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],

)
class HybridSharedActorCritic(HybridSimpleActor, ActorCritic):
class SharedActorCritic(HybridSimpleActor, ActorCritic):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],

return dists, value_outputs, memories
class HybridSeparateActorCritic(HybridSimpleActor, ActorCritic):
class SeparateActorCritic(HybridSimpleActor, ActorCritic):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],

################################################################################
######### Continuous xor Discrete cases ##########
################################################################################
class SimpleActor(nn.Module, Actor):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
act_type: ActionType,
act_size: List[int],
conditional_sigma: bool = False,
tanh_squash: bool = False,
):
super().__init__()
self.act_type = act_type
self.act_size = act_size
self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
self.is_continuous_int = torch.nn.Parameter(
torch.Tensor([int(act_type == ActionType.CONTINUOUS)])
)
self.act_size_vector = torch.nn.Parameter(torch.Tensor(act_size))
self.network_body = NetworkBody(observation_shapes, network_settings)
if network_settings.memory is not None:
self.encoding_size = network_settings.memory.memory_size // 2
else:
self.encoding_size = network_settings.hidden_units
if self.act_type == ActionType.CONTINUOUS:
self.distribution = GaussianDistribution(
self.encoding_size,
act_size[0],
conditional_sigma=conditional_sigma,
tanh_squash=tanh_squash,
)
else:
self.distribution = MultiCategoricalDistribution(
self.encoding_size, act_size
)
@property
def memory_size(self) -> int:
return self.network_body.memory_size
def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
self.network_body.update_normalization(vector_obs)
def sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
actions = []
for action_dist in dists:
action = action_dist.sample()
actions.append(action)
return actions
def get_dists(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[List[DistInstance], Optional[torch.Tensor]]:
encoding, memories = self.network_body(
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
if self.act_type == ActionType.CONTINUOUS:
dists = self.distribution(encoding)
else:
dists = self.distribution(encoding, masks)
return dists, memories
def forward(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
) -> Tuple[torch.Tensor, int, int, int, int]:
"""
Note: This forward() method is required for exporting to ONNX. Don't modify the inputs and outputs.
"""
dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
action_list = self.sample_action(dists)
sampled_actions = torch.stack(action_list, dim=-1)
if self.act_type == ActionType.CONTINUOUS:
action_out = sampled_actions
else:
action_out = dists[0].all_log_prob()
return (
action_out,
self.version_number,
torch.Tensor([self.network_body.memory_size]),
self.is_continuous_int,
self.act_size_vector,
)
class SharedActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
act_type: ActionType,
act_size: List[int],
stream_names: List[str],
conditional_sigma: bool = False,
tanh_squash: bool = False,
):
super().__init__(
observation_shapes,
network_settings,
act_type,
act_size,
conditional_sigma,
tanh_squash,
)
self.stream_names = stream_names
self.value_heads = ValueHeads(stream_names, self.encoding_size)
def critic_pass(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
encoding, memories_out = self.network_body(
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
return self.value_heads(encoding), memories_out
def get_dist_and_value(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
encoding, memories = self.network_body(
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
)
if self.act_type == ActionType.CONTINUOUS:
dists = self.distribution(encoding)
else:
dists = self.distribution(encoding, masks=masks)
value_outputs = self.value_heads(encoding)
return dists, value_outputs, memories
class SeparateActorCritic(SimpleActor, ActorCritic):
def __init__(
self,
observation_shapes: List[Tuple[int, ...]],
network_settings: NetworkSettings,
act_type: ActionType,
act_size: List[int],
stream_names: List[str],
conditional_sigma: bool = False,
tanh_squash: bool = False,
):
# Give the Actor only half the memories. Note we previously validate
# that memory_size must be a multiple of 4.
self.use_lstm = network_settings.memory is not None
super().__init__(
observation_shapes,
network_settings,
act_type,
act_size,
conditional_sigma,
tanh_squash,
)
self.stream_names = stream_names
self.critic = ValueNetwork(stream_names, observation_shapes, network_settings)
@property
def memory_size(self) -> int:
return self.network_body.memory_size + self.critic.memory_size
def critic_pass(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
actor_mem, critic_mem = None, None
if self.use_lstm:
# Use only the back half of memories for critic
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
value_outputs, critic_mem_out = self.critic(
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
)
if actor_mem is not None:
# Make memories with the actor mem unchanged
memories_out = torch.cat([actor_mem, critic_mem_out], dim=-1)
else:
memories_out = None
return value_outputs, memories_out
def get_dist_and_value(
self,
vec_inputs: List[torch.Tensor],
vis_inputs: List[torch.Tensor],
masks: Optional[torch.Tensor] = None,
memories: Optional[torch.Tensor] = None,
sequence_length: int = 1,
) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
if self.use_lstm:
# Use only the back half of memories for critic and actor
actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
else:
critic_mem = None
actor_mem = None
dists, actor_mem_outs = self.get_dists(
vec_inputs,
vis_inputs,
memories=actor_mem,
sequence_length=sequence_length,
masks=masks,
)
value_outputs, critic_mem_outs = self.critic(
vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
)
if self.use_lstm:
mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=-1)
else:
mem_out = None
return dists, value_outputs, mem_out
def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
super().update_normalization(vector_obs)
self.critic.network_body.update_normalization(vector_obs)
class GlobalSteps(nn.Module):
def __init__(self):
super().__init__()
self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
@property
def current_step(self):
return int(self.__global_step.item())
@current_step.setter
def current_step(self, value):
self.__global_step[:] = value
def increment(self, value):
self.__global_step += value
class LearningRate(nn.Module):
def __init__(self, lr):
# Todo: add learning rate decay
super().__init__()
self.learning_rate = torch.Tensor([lr])
# class SimpleActor(nn.Module, Actor):
# def __init__(
# self,
# observation_shapes: List[Tuple[int, ...]],
# network_settings: NetworkSettings,
# act_type: ActionType,
# act_size: List[int],
# conditional_sigma: bool = False,
# tanh_squash: bool = False,
# ):
# super().__init__()
# self.act_type = act_type
# self.act_size = act_size
# self.version_number = torch.nn.Parameter(torch.Tensor([2.0]))
# self.is_continuous_int = torch.nn.Parameter(
# torch.Tensor([int(act_type == ActionType.CONTINUOUS)])
# )
# self.act_size_vector = torch.nn.Parameter(torch.Tensor(act_size))
# self.network_body = NetworkBody(observation_shapes, network_settings)
# if network_settings.memory is not None:
# self.encoding_size = network_settings.memory.memory_size // 2
# else:
# self.encoding_size = network_settings.hidden_units
#
# if self.act_type == ActionType.CONTINUOUS:
# self.distribution = GaussianDistribution(
# self.encoding_size,
# act_size[0],
# conditional_sigma=conditional_sigma,
# tanh_squash=tanh_squash,
# )
# else:
# self.distribution = MultiCategoricalDistribution(
# self.encoding_size, act_size
# )
#
# @property
# def memory_size(self) -> int:
# return self.network_body.memory_size
#
# def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
# self.network_body.update_normalization(vector_obs)
#
# def sample_action(self, dists: List[DistInstance]) -> List[torch.Tensor]:
# actions = []
# for action_dist in dists:
# action = action_dist.sample()
# actions.append(action)
# return actions
#
# def get_dists(
# self,
# vec_inputs: List[torch.Tensor],
# vis_inputs: List[torch.Tensor],
# masks: Optional[torch.Tensor] = None,
# memories: Optional[torch.Tensor] = None,
# sequence_length: int = 1,
# ) -> Tuple[List[DistInstance], Optional[torch.Tensor]]:
# encoding, memories = self.network_body(
# vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
# )
# if self.act_type == ActionType.CONTINUOUS:
# dists = self.distribution(encoding)
# else:
# dists = self.distribution(encoding, masks)
#
# return dists, memories
#
# def forward(
# self,
# vec_inputs: List[torch.Tensor],
# vis_inputs: List[torch.Tensor],
# masks: Optional[torch.Tensor] = None,
# memories: Optional[torch.Tensor] = None,
# ) -> Tuple[torch.Tensor, int, int, int, int]:
# """
# Note: This forward() method is required for exporting to ONNX. Don't modify the inputs and outputs.
# """
# dists, _ = self.get_dists(vec_inputs, vis_inputs, masks, memories, 1)
# action_list = self.sample_action(dists)
# sampled_actions = torch.stack(action_list, dim=-1)
# if self.act_type == ActionType.CONTINUOUS:
# action_out = sampled_actions
# else:
# action_out = dists[0].all_log_prob()
# return (
# action_out,
# self.version_number,
# torch.Tensor([self.network_body.memory_size]),
# self.is_continuous_int,
# self.act_size_vector,
# )
#
#
# class SharedActorCritic(SimpleActor, ActorCritic):
# def __init__(
# self,
# observation_shapes: List[Tuple[int, ...]],
# network_settings: NetworkSettings,
# act_type: ActionType,
# act_size: List[int],
# stream_names: List[str],
# conditional_sigma: bool = False,
# tanh_squash: bool = False,
# ):
# super().__init__(
# observation_shapes,
# network_settings,
# act_type,
# act_size,
# conditional_sigma,
# tanh_squash,
# )
# self.stream_names = stream_names
# self.value_heads = ValueHeads(stream_names, self.encoding_size)
#
# def critic_pass(
# self,
# vec_inputs: List[torch.Tensor],
# vis_inputs: List[torch.Tensor],
# memories: Optional[torch.Tensor] = None,
# sequence_length: int = 1,
# ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
# encoding, memories_out = self.network_body(
# vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
# )
# return self.value_heads(encoding), memories_out
#
# def get_dist_and_value(
# self,
# vec_inputs: List[torch.Tensor],
# vis_inputs: List[torch.Tensor],
# masks: Optional[torch.Tensor] = None,
# memories: Optional[torch.Tensor] = None,
# sequence_length: int = 1,
# ) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
# encoding, memories = self.network_body(
# vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length
# )
# if self.act_type == ActionType.CONTINUOUS:
# dists = self.distribution(encoding)
# else:
# dists = self.distribution(encoding, masks=masks)
#
# value_outputs = self.value_heads(encoding)
# return dists, value_outputs, memories
#
#
# class SeparateActorCritic(SimpleActor, ActorCritic):
# def __init__(
# self,
# observation_shapes: List[Tuple[int, ...]],
# network_settings: NetworkSettings,
# act_type: ActionType,
# act_size: List[int],
# stream_names: List[str],
# conditional_sigma: bool = False,
# tanh_squash: bool = False,
# ):
# # Give the Actor only half the memories. Note we previously validate
# # that memory_size must be a multiple of 4.
# self.use_lstm = network_settings.memory is not None
# super().__init__(
# observation_shapes,
# network_settings,
# act_type,
# act_size,
# conditional_sigma,
# tanh_squash,
# )
# self.stream_names = stream_names
# self.critic = ValueNetwork(stream_names, observation_shapes, network_settings)
#
# @property
# def memory_size(self) -> int:
# return self.network_body.memory_size + self.critic.memory_size
#
# def critic_pass(
# self,
# vec_inputs: List[torch.Tensor],
# vis_inputs: List[torch.Tensor],
# memories: Optional[torch.Tensor] = None,
# sequence_length: int = 1,
# ) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]:
# actor_mem, critic_mem = None, None
# if self.use_lstm:
# # Use only the back half of memories for critic
# actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, -1)
# value_outputs, critic_mem_out = self.critic(
# vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
# )
# if actor_mem is not None:
# # Make memories with the actor mem unchanged
# memories_out = torch.cat([actor_mem, critic_mem_out], dim=-1)
# else:
# memories_out = None
# return value_outputs, memories_out
#
# def get_dist_and_value(
# self,
# vec_inputs: List[torch.Tensor],
# vis_inputs: List[torch.Tensor],
# masks: Optional[torch.Tensor] = None,
# memories: Optional[torch.Tensor] = None,
# sequence_length: int = 1,
# ) -> Tuple[List[DistInstance], Dict[str, torch.Tensor], torch.Tensor]:
# if self.use_lstm:
# # Use only the back half of memories for critic and actor
# actor_mem, critic_mem = torch.split(memories, self.memory_size // 2, dim=-1)
# else:
# critic_mem = None
# actor_mem = None
# dists, actor_mem_outs = self.get_dists(
# vec_inputs,
# vis_inputs,
# memories=actor_mem,
# sequence_length=sequence_length,
# masks=masks,
# )
# value_outputs, critic_mem_outs = self.critic(
# vec_inputs, vis_inputs, memories=critic_mem, sequence_length=sequence_length
# )
# if self.use_lstm:
# mem_out = torch.cat([actor_mem_outs, critic_mem_outs], dim=-1)
# else:
# mem_out = None
# return dists, value_outputs, mem_out
#
# def update_normalization(self, vector_obs: List[torch.Tensor]) -> None:
# super().update_normalization(vector_obs)
# self.critic.network_body.update_normalization(vector_obs)
#
#
# class GlobalSteps(nn.Module):
# def __init__(self):
# super().__init__()
# self.__global_step = nn.Parameter(torch.Tensor([0]), requires_grad=False)
#
# @property
# def current_step(self):
# return int(self.__global_step.item())
#
# @current_step.setter
# def current_step(self, value):
# self.__global_step[:] = value
#
# def increment(self, value):
# self.__global_step += value
#
#
# class LearningRate(nn.Module):
# def __init__(self, lr):
# # Todo: add learning rate decay
# super().__init__()
# self.learning_rate = torch.Tensor([lr])
正在加载...
取消
保存