value_next = self.policy.get_value_estimates(
trajectory.next_obs,
trajectory.done_reached and not trajectory.done_reached,
trajectory.done_reached and not trajectory.max_step_reached,
)
# Evaluate all reward functions