Ability to disable threading

5 年前 · ed06f37c
--- a/ml-agents/mlagents/trainers/ghost/trainer.py
+++ b/ml-agents/mlagents/trainers/ghost/trainer.py
            self.change_current_elo(change)
            self._stats_reporter.add_stat("Self-play/ELO", self.current_elo)

-    def advance(self) -> None:
+    def advance(self, empty_queue: bool = False) -> None:
        """
        Steps the trainer, passing trajectories to wrapped trainer and calling trainer advance
        """
                        # adds to wrapped trainers queue
                        internal_trajectory_queue.put(t)
                        self._process_trajectory(t)
+                        if not empty_queue:
+                            break
                except AgentManagerQueue.Empty:
                    pass
            else:
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
        if step_after_process >= self.next_summary_step and self.get_step != 0:
            self._write_summary(self.next_summary_step)

-    def advance(self) -> None:
+    def advance(self, empty_queue: bool = False) -> None:
+        :param empty_queue: Whether or not to empty the queue when called. For synchronous
+            operation, we need to do so to avoid the queue filling up.
-                try:
-                    t = traj_queue.get(0.05)
-                    self._process_trajectory(t)
-                except AgentManagerQueue.Empty:
-                    break
+                # We grab at most the maximum length of the queue.
+                # This ensures that even if the queue is being filled faster than it is
+                # being emptied, the trajectories in the queue are on-policy.
+                for _ in range(traj_queue.maxlen):
+                    try:
+                        t = traj_queue.get(0.05)
+                        self._process_trajectory(t)
+                        if not empty_queue:
+                            break
+                    except AgentManagerQueue.Empty:
+                        break
        if self.should_still_train:
            if self._is_ready_update():
                with hierarchical_timer("_update_policy"):
--- a/ml-agents/mlagents/trainers/trainer/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/trainer.py
        pass

    @abc.abstractmethod
-    def advance(self) -> None:
+    def advance(self, empty_queue: bool = False) -> None:
+        :param empty_queue: Whether or not to empty the queue when called. For synchronous
+            operation, we need to do so to avoid the queue filling up.
        """
        pass

--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
        training_seed: int,
        sampler_manager: SamplerManager,
        resampling_interval: Optional[int],
+        threaded: bool = True,
    ):
        """
        :param model_path: Path to save the model.
        :param training_seed: Seed to use for Numpy and Tensorflow random number generation.
        :param sampler_manager: SamplerManager object handles samplers for resampling the reset parameters.
        :param resampling_interval: Specifies number of simulation steps after which reset parameters are resampled.
+        :param threaded: Whether or not to run trainers in a separate thread. Disable for testing/debugging.
        """
        self.trainers: Dict[str, Trainer] = {}
        self.brain_name_to_identifier: Dict[str, Set] = defaultdict(set)
        self.sampler_manager = sampler_manager
        self.resampling_interval = resampling_interval

+        self.threaded = threaded
        self.trainer_threads: List[threading.Thread] = []
        self.kill_trainers = False
        np.random.seed(training_seed)

        trainer.publish_policy_queue(agent_manager.policy_queue)
        trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)
-        # Start trainer thread
-        trainerthread = threading.Thread(
-            target=self.trainer_update_func, args=(trainer,), daemon=True
-        )
-        trainerthread.start()
-        self.trainer_threads.append(trainerthread)
+        if self.threaded:
+            # Start trainer thread
+            trainerthread = threading.Thread(
+                target=self.trainer_update_func, args=(trainer,), daemon=True
+            )
+            trainerthread.start()
+            self.trainer_threads.append(trainerthread)

    def _create_trainers_and_managers(
        self, env_manager: EnvManager, behavior_ids: Set[str]
                    self.trainers[brain_name].stats_reporter.set_stat(
                        "Environment/Lesson", curr.lesson_num
                    )
+
+        if not self.threaded:
+            with hierarchical_timer("trainer_advance"):
+                for trainer in self.trainers.values():
+                    trainer.advance(empty_queue=True)

        return num_steps