Make threading disable-able per trainer

5 年前 · 9fe104d6
--- a/docs/Training-PPO.md
+++ b/docs/Training-PPO.md
 in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize
 all models from the same run.

+### (Optional) Advanced: Disable Threading
+
+By default, PPO model updates can happen while the environment is being stepped. To disable this
+behavior, for instance to maintain strict
+[on-policy](https://spinningup.openai.com/en/latest/user/algorithms.html#the-on-policy-algorithms)
+experience-gathering, set `threaded` to `false`. Expect a slowdown of approximately 10-20% in
+training time if `threaded` is disabled.
+
+Default Value: `false`
+
 ## Training Statistics

 To view training statistics, use TensorBoard. For information on launching and
--- a/ml-agents/mlagents/trainers/tests/test_simple_rl.py
+++ b/ml-agents/mlagents/trainers/tests/test_simple_rl.py
        sequence_length: 64
        summary_freq: 500
        use_recurrent: false
+        threaded: false
        reward_signals:
            extrinsic:
                strength: 1.0
        curiosity_enc_size: 128
        demo_path: None
        vis_encode_type: simple
+        threaded: false
        reward_signals:
            extrinsic:
                strength: 1.0
--- a/ml-agents/mlagents/trainers/trainer/trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/trainer.py
        self.run_id = run_id
        self.trainer_parameters = trainer_parameters
        self.summary_path = trainer_parameters["summary_path"]
+        self._threaded = trainer_parameters.get("threaded", True)
        self._stats_reporter = StatsReporter(self.summary_path)
        self.is_training = training
        self._reward_buffer: Deque[float] = deque(maxlen=reward_buff_cap)
        :return: the step count of the trainer
        """
        return self.step
+
+    @property
+    def threaded(self) -> bool:
+        """
+        Whether or not to run the trainer in a thread. True allows the trainer to
+        update the policy while the environment is taking steps. Set to False to
+        enforce strict on-policy updates (i.e. don't update the policy when taking steps.)
+        """
+        return self._threaded

    @property
    def should_still_train(self) -> bool:
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py

        trainer.publish_policy_queue(agent_manager.policy_queue)
        trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)
-        if self.threaded:
+        if trainer.threaded:
            # Start trainer thread
            trainerthread = threading.Thread(
                target=self.trainer_update_func, args=(trainer,), daemon=True
                        "Environment/Lesson", curr.lesson_num
                    )

-        if not self.threaded:
-            with hierarchical_timer("trainer_advance"):
-                for trainer in self.trainers.values():
+        for trainer in self.trainers.values():
+            if not trainer.threaded:
+                with hierarchical_timer("trainer_advance"):
                    trainer.advance()

        return num_steps