|
|
|
|
|
|
action, log_probs, entropy, memories = self.sample_actions( |
|
|
|
vec_obs, vis_obs, masks=masks, memories=memories |
|
|
|
) |
|
|
|
run_out["action"] = action.to_numpy_dict() |
|
|
|
action_dict = action.to_numpy_dict() |
|
|
|
run_out["action"] = action_dict |
|
|
|
action.to_numpy_dict()["continuous_action"] |
|
|
|
if self.use_continuous_act |
|
|
|
else None |
|
|
|
) # Todo - make pre_action difference |
|
|
|
action_dict["continuous_action"] if self.use_continuous_act else None |
|
|
|
) |
|
|
|
run_out["log_probs"] = log_probs.to_numpy_dict() |
|
|
|
run_out["entropy"] = ModelUtils.to_numpy(entropy) |
|
|
|
run_out["learning_rate"] = 0.0 |
|
|
|