[skip ci] fix first summary statement output

5 年前 · 95ba923d
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        The reward signal generators must be updated in this method at their own pace.
        """
        self.cumulative_returns_since_policy_update.clear()
+        super()._update_policy()

        # Make sure batch_size is a multiple of sequence length. During training, we
        # will need to reshape the data into a batch_size x sequence_length tensor.
            for stat, val in update_stats.items():
                self._stats_reporter.add_stat(stat, val)

-        super()._update_policy()
        self._clear_update_buffer()

    def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy:
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
        """
        Uses demonstration_buffer to update model.
        """
-        # increment steps when training instead of when generating from environment
+        self._maybe_write_summary(self.get_step + self.trainer_parameters["buffer_size"])
-        self._maybe_write_summary(self.get_step + self.trainer_parameters["buffer_size"])

    def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
        """
        write the summary. This logic ensures summaries are written on the update step and not in between.
        :param step_after_process: the step count after processing the next trajectory.
        """
-        if step_after_process >= self.next_summary_step and self.get_step != 0:
+        if step_after_process >= self.next_summary_step:
            self._write_summary(self.next_summary_step)

    def advance(self) -> None: