|
|
|
|
|
|
|
|
|
|
value_next = self.policy.get_value_estimates( |
|
|
|
trajectory.next_obs, |
|
|
|
trajectory.steps[-1].done and not trajectory.steps[-1].max_step, |
|
|
|
trajectory.done_reached and not trajectory.done_reached, |
|
|
|
agent_id, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
# If this was a terminal trajectory, append stats and reset reward collection |
|
|
|
if trajectory.steps[-1].done: |
|
|
|
if trajectory.done_reached: |
|
|
|
self._update_end_episode_stats(agent_id) |
|
|
|
|
|
|
|
def is_ready_update(self): |
|
|
|