Fix bug with reward_buffer always 0
self.stats["Environment/Cumulative Reward"].append(
rewards.get(agent_id, 0)
)
rewards[agent_id] = 0
else:
self.stats[
self.policy.reward_signals[name].stat_name