Fix comment

5 年前 · dbf8f7a5
--- a/ml-agents/mlagents/trainers/sac/trainer.py
+++ b/ml-agents/mlagents/trainers/sac/trainer.py
    def _is_ready_update(self) -> bool:
        """
        Returns whether or not the trainer has enough elements to run update model
-        :return: A boolean corresponding to whether or not update_model() can be run
+        :return: A boolean corresponding to whether or not _update_policy() can be run
        """
        return (
            self.update_buffer.num_experiences >= self.trainer_parameters["batch_size"]
    @timed
    def _update_policy(self) -> None:
        """
-        If train_interval is met, update the SAC policy given the current reward signals.
-        If reward_signal_train_interval is met, update the reward signals from the buffer.
+        Update the SAC policy and reward signals until the steps_per_update ratio
+        is met.
        """
        self.update_sac_policy()
        self.update_reward_signals()