Clean up some stuff

5 年前 · e0e57188
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py
        self.processing_buffer[agent_id]["actions"].append(actions[agent_idx])
        self.processing_buffer[agent_id]["action_probs"].append(a_dist[agent_idx])

-    def process_experiences(self):
-        pass
-

 class ProcessingBuffer(dict):
    """
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        Takes a trajectory and processes it, putting it into the update buffer.
        Processing involves calculating value and advantage targets for model updating step.
        """
-        agent_id = trajectory.steps[-1].agent_id
+        agent_id = trajectory.steps[
+            -1
+        ].agent_id  # All the agents should have the same ID
        agent_buffer_trajectory = self._trajectory_to_agentbuffer(trajectory)
        # Update the normalization
        if self.is_training: