[bug-fix] Bugfixes for Threaded Trainers (#3817)

5 年前 · ccd40ce7
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
 from typing import Dict, List
 from collections import defaultdict
 import abc
+import time

 from mlagents.trainers.optimizer.tf_optimizer import TFOptimizer
 from mlagents.trainers.buffer import AgentBuffer
                # We grab at most the maximum length of the queue.
                # This ensures that even if the queue is being filled faster than it is
                # being emptied, the trajectories in the queue are on-policy.
+                _queried = False
+                    _queried = True
+                if self.threaded and not _queried:
+                    # Yield thread to avoid busy-waiting
+                    time.sleep(0.0001)
        if self.should_still_train:
            if self._is_ready_update():
                with hierarchical_timer("_update_policy"):
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py

        parsed_behavior_id = BehaviorIdentifiers.from_name_behavior_id(name_behavior_id)
        brain_name = parsed_behavior_id.brain_name
+        trainerthread = None
+            if trainer.threaded:
+                # Only create trainer thread for new trainers
+                trainerthread = threading.Thread(
+                    target=self.trainer_update_func, args=(trainer,), daemon=True
+                )
+                self.trainer_threads.append(trainerthread)

        policy = trainer.create_policy(
            parsed_behavior_id, env_manager.external_brains[name_behavior_id]

        trainer.publish_policy_queue(agent_manager.policy_queue)
        trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)
-        if trainer.threaded:
-            # Start trainer thread
-            trainerthread = threading.Thread(
-                target=self.trainer_update_func, args=(trainer,), daemon=True
-            )
+
+        # Only start new trainers
+        if trainerthread is not None:
-            self.trainer_threads.append(trainerthread)

    def _create_trainers_and_managers(
        self, env_manager: EnvManager, behavior_ids: Set[str]