super()._process_trajectory(trajectory)
agent_id = trajectory.agent_id # All the agents should have the same ID
# Add to episode_steps
self.episode_steps[agent_id] += len(trajectory.steps)
agent_buffer_trajectory = trajectory.to_agentbuffer()
# Update the normalization
if self.is_training:
"environment": defaultdict(lambda: 0)
}
self.update_buffer: AgentBuffer = AgentBuffer()
self.episode_steps: Dict[str, int] = defaultdict(lambda: 0)
# Write hyperparameters to Tensorboard
self.write_tensorboard_text("Hyperparameters", self.trainer_parameters)
last_step = trajectory.steps[-1]
def test_rl_trainer():
trainer = create_rl_trainer()
agent_id = "0"
trainer.episode_steps[agent_id] = 3
for agent_id in trainer.episode_steps:
assert trainer.episode_steps[agent_id] == 0
for rewards in trainer.collected_rewards.values():
for agent_id in rewards:
assert rewards[agent_id] == 0