|
|
|
|
|
|
memories: Optional[torch.Tensor] = None, |
|
|
|
seq_len: int = 1, |
|
|
|
all_log_probs: bool = False, |
|
|
|
) -> Tuple[ |
|
|
|
torch.Tensor, torch.Tensor, torch.Tensor, Dict[str, torch.Tensor], torch.Tensor |
|
|
|
]: |
|
|
|
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: |
|
|
|
dists, value_heads, memories = self.actor_critic.get_dist_and_value( |
|
|
|
dists, memories = self.actor_critic.get_dists( |
|
|
|
vec_obs, vis_obs, masks, memories, seq_len |
|
|
|
) |
|
|
|
action_list = self.actor_critic.sample_action(dists) |
|
|
|
|
|
|
else: |
|
|
|
actions = actions[:, 0, :] |
|
|
|
|
|
|
|
return ( |
|
|
|
actions, |
|
|
|
all_logs if all_log_probs else log_probs, |
|
|
|
entropies, |
|
|
|
value_heads, |
|
|
|
memories, |
|
|
|
) |
|
|
|
return (actions, all_logs if all_log_probs else log_probs, entropies, memories) |
|
|
|
|
|
|
|
def evaluate_actions( |
|
|
|
self, |
|
|
|
|
|
|
|
|
|
|
run_out = {} |
|
|
|
with torch.no_grad(): |
|
|
|
action, log_probs, entropy, value_heads, memories = self.sample_actions( |
|
|
|
action, log_probs, entropy, memories = self.sample_actions( |
|
|
|
vec_obs, vis_obs, masks=masks, memories=memories |
|
|
|
) |
|
|
|
run_out["action"] = ModelUtils.to_numpy(action) |
|
|
|
|
|
|
run_out["entropy"] = ModelUtils.to_numpy(entropy) |
|
|
|
run_out["value_heads"] = { |
|
|
|
name: ModelUtils.to_numpy(t) for name, t in value_heads.items() |
|
|
|
} |
|
|
|
run_out["value"] = np.mean(list(run_out["value_heads"].values()), 0) |
|
|
|
run_out["learning_rate"] = 0.0 |
|
|
|
if self.use_recurrent: |
|
|
|
run_out["memory_out"] = ModelUtils.to_numpy(memories).squeeze(0) |
|
|
|