|
|
|
|
|
|
self, |
|
|
|
vec_inputs: List[torch.Tensor], |
|
|
|
vis_inputs: List[torch.Tensor], |
|
|
|
goal: List[torch.tensor], |
|
|
|
goals: List[torch.tensor], |
|
|
|
actions: Optional[torch.Tensor] = None, |
|
|
|
memories: Optional[torch.Tensor] = None, |
|
|
|
sequence_length: int = 1, |
|
|
|
|
|
|
) |
|
|
|
output = self.value_heads(encoding, goal) |
|
|
|
output = self.value_heads(encoding, goals) |
|
|
|
return output, memories |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self, |
|
|
|
vec_inputs: List[torch.Tensor], |
|
|
|
vis_inputs: List[torch.Tensor], |
|
|
|
goal: List[torch.Tensor], |
|
|
|
goals: List[torch.Tensor], |
|
|
|
memories: Optional[torch.Tensor] = None, |
|
|
|
sequence_length: int = 1, |
|
|
|
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: |
|
|
|
|
|
|
self, |
|
|
|
vec_inputs: List[torch.Tensor], |
|
|
|
vis_inputs: List[torch.Tensor], |
|
|
|
goals: List[torch.Tensor], |
|
|
|
masks: Optional[torch.Tensor] = None, |
|
|
|
memories: Optional[torch.Tensor] = None, |
|
|
|
) -> Tuple[Union[int, torch.Tensor], ...]: |
|
|
|
|
|
|
At this moment, torch.onnx.export() doesn't accept None as tensor to be exported, |
|
|
|
so the size of return tuple varies with action spec. |
|
|
|
""" |
|
|
|
vec_inputs = [vec_inputs[0][:, 1:]] |
|
|
|
goal = [vec_inputs[0][:, :1]] |
|
|
|
encoding, memories_out = self.network_body( |
|
|
|
vec_inputs, vis_inputs, memories=memories, sequence_length=1 |
|
|
|
) |
|
|
|
|
|
|
disc_action_out, |
|
|
|
action_out_deprecated, |
|
|
|
) = self.action_model.get_action_out(encoding, masks, goal) |
|
|
|
) = self.action_model.get_action_out(encoding, masks, goals) |
|
|
|
export_out = [ |
|
|
|
self.version_number, |
|
|
|
torch.Tensor([self.network_body.memory_size]), |
|
|
|
|
|
|
self, |
|
|
|
vec_inputs: List[torch.Tensor], |
|
|
|
vis_inputs: List[torch.Tensor], |
|
|
|
goal: List[torch.Tensor], |
|
|
|
goals: List[torch.Tensor], |
|
|
|
memories: Optional[torch.Tensor] = None, |
|
|
|
sequence_length: int = 1, |
|
|
|
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: |
|
|
|
|
|
|
self, |
|
|
|
vec_inputs: List[torch.Tensor], |
|
|
|
vis_inputs: List[torch.Tensor], |
|
|
|
goals: List[torch.Tensor], |
|
|
|
actions: AgentAction, |
|
|
|
masks: Optional[torch.Tensor] = None, |
|
|
|
memories: Optional[torch.Tensor] = None, |
|
|
|
|
|
|
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length |
|
|
|
) |
|
|
|
log_probs, entropies = self.action_model.evaluate( |
|
|
|
encoding, masks, actions, goal |
|
|
|
encoding, masks, actions, goals |
|
|
|
) |
|
|
|
value_outputs = self.value_heads(encoding) |
|
|
|
return log_probs, entropies, value_outputs |
|
|
|
|
|
|
vec_inputs: List[torch.Tensor], |
|
|
|
vis_inputs: List[torch.Tensor], |
|
|
|
goals: List[torch.Tensor], |
|
|
|
masks: Optional[torch.Tensor] = None, |
|
|
|
memories: Optional[torch.Tensor] = None, |
|
|
|
sequence_length: int = 1, |
|
|
|
|
|
|
encoding, memories = self.network_body( |
|
|
|
vec_inputs, vis_inputs, memories=memories, sequence_length=sequence_length |
|
|
|
) |
|
|
|
action, log_probs, entropies = self.action_model(encoding, masks, goal) |
|
|
|
action, log_probs, entropies = self.action_model(encoding, masks, goals) |
|
|
|
value_outputs = self.value_heads(encoding) |
|
|
|
return action, log_probs, entropies, value_outputs, memories |
|
|
|
|
|
|
|
|
|
|
self, |
|
|
|
vec_inputs: List[torch.Tensor], |
|
|
|
vis_inputs: List[torch.Tensor], |
|
|
|
goal: List[torch.Tensor], |
|
|
|
goals: List[torch.Tensor], |
|
|
|
memories: Optional[torch.Tensor] = None, |
|
|
|
sequence_length: int = 1, |
|
|
|
) -> Tuple[Dict[str, torch.Tensor], torch.Tensor]: |
|
|
|
|
|
|
value_outputs, critic_mem_out = self.critic( |
|
|
|
vec_inputs, |
|
|
|
vis_inputs, |
|
|
|
goal, |
|
|
|
goals, |
|
|
|
memories=critic_mem, |
|
|
|
sequence_length=sequence_length, |
|
|
|
) |
|
|
|