[Cold Fix] Making the episode length and mean reward more accurate for the first episode (#657)

7 年前 · 755be43e
--- a/python/unitytrainers/bc/trainer.py
+++ b/python/unitytrainers/bc/trainer.py
            if stored_info_student is None:
                continue
            else:
-                idx = stored_info_student.agents.index(agent_id)
-                if not stored_info_student.local_done[idx]:
+                if not next_info_student.local_done[next_idx]:
                    if agent_id not in self.cumulative_rewards:
                        self.cumulative_rewards[agent_id] = 0
                    self.cumulative_rewards[agent_id] += next_info_student.rewards[next_idx]
--- a/python/unitytrainers/ppo/trainer.py
+++ b/python/unitytrainers/ppo/trainer.py
                    self.training_buffer[agent_id]['action_probs'].append(a_dist[idx])
                    self.training_buffer[agent_id]['value_estimates'].append(value[idx][0])

+                if not next_info.local_done[next_idx]:
                    if agent_id not in self.cumulative_rewards:
                        self.cumulative_rewards[agent_id] = 0
                    self.cumulative_rewards[agent_id] += next_info.rewards[next_idx]