Fix lost trajectories when they are produced faster than they are consumed (#3233)

* Fix bug when trajectories are produced faster than they are consumed * Cap max length
5 年前 · 56a67403
--- a/ml-agents/mlagents/trainers/agent_processor.py
+++ b/ml-agents/mlagents/trainers/agent_processor.py

        pass

-    def __init__(self, behavior_id: str):
+    def __init__(self, behavior_id: str, maxlen: int = 1000):
-        self.queue: Deque[T] = deque()
+        self.maxlen: int = maxlen
+        self.queue: Deque[T] = deque(maxlen=self.maxlen)
        self.behavior_id = behavior_id

    def empty(self) -> bool:
--- a/ml-agents/mlagents/trainers/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer.py
        """
        with hierarchical_timer("process_trajectory"):
            for traj_queue in self.trajectory_queues:
-                try:
-                    t = traj_queue.get_nowait()
-                    self._process_trajectory(t)
-                except AgentManagerQueue.Empty:
-                    pass
+                # We grab at most the maximum length of the queue.
+                # This ensures that even if the queue is being filled faster than it is
+                # being emptied, the trajectories in the queue are on-policy.
+                for _ in range(traj_queue.maxlen):
+                    try:
+                        t = traj_queue.get_nowait()
+                        self._process_trajectory(t)
+                    except AgentManagerQueue.Empty:
+                        break
        if self.should_still_train:
            if self._is_ready_update():
                with hierarchical_timer("_update_policy"):