|
|
|
|
|
|
memories: Optional[torch.Tensor] = None, |
|
|
|
seq_len: int = 1, |
|
|
|
all_log_probs: bool = False, |
|
|
|
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: |
|
|
|
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: |
|
|
|
""" |
|
|
|
:param vec_obs: List of vector observations. |
|
|
|
:param vis_obs: List of visual observations. |
|
|
|
|
|
|
:param all_log_probs: Returns (for discrete actions) a tensor of log probs, one for each action. |
|
|
|
:return: Tuple of actions, log probabilities (dependent on all_log_probs), entropies, and |
|
|
|
output memories, all as Torch Tensors. |
|
|
|
:return: Tuple of actions, actions clipped to -1, 1, log probabilities (dependent on all_log_probs), |
|
|
|
entropies, and output memories, all as Torch Tensors. |
|
|
|
""" |
|
|
|
if memories is None: |
|
|
|
dists, memories = self.actor_critic.get_dists( |
|
|
|
|
|
|
actions = actions[:, 0, :] |
|
|
|
# Use the sum of entropy across actions, not the mean |
|
|
|
entropy_sum = torch.sum(entropies, dim=1) |
|
|
|
|
|
|
|
if self._clip_action and self.use_continuous_act: |
|
|
|
clipped_action = torch.clamp(actions, -3, 3) / 3 |
|
|
|
else: |
|
|
|
clipped_action = actions |
|
|
|
clipped_action, |
|
|
|
all_logs if all_log_probs else log_probs, |
|
|
|
entropy_sum, |
|
|
|
memories, |
|
|
|
|
|
|
|
|
|
|
run_out = {} |
|
|
|
with torch.no_grad(): |
|
|
|
action, log_probs, entropy, memories = self.sample_actions( |
|
|
|
action, clipped_action, log_probs, entropy, memories = self.sample_actions( |
|
|
|
if self._clip_action and self.use_continuous_act: |
|
|
|
clipped_action = torch.clamp(action, -3, 3) / 3 |
|
|
|
else: |
|
|
|
clipped_action = action |
|
|
|
run_out["pre_action"] = ModelUtils.to_numpy(action) |
|
|
|
run_out["action"] = ModelUtils.to_numpy(clipped_action) |
|
|
|
# Todo - make pre_action difference |
|
|
|