[skip ci] more fixes

5 年前 · 45bac63e
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        Processing involves calculating value and advantage targets for model updating step.
        :param trajectory: The Trajectory tuple containing the steps to be processed.
        """
-        super()._process_trajectory(trajectory)
+        # super()._process_trajectory(trajectory)
        agent_id = trajectory.agent_id  # All the agents should have the same ID

        agent_buffer_trajectory = trajectory.to_agentbuffer()
        Uses demonstration_buffer to update the policy.
        The reward signal generators must be updated in this method at their own pace.
        """
-        buffer_length = self.update_buffer.num_experiences
+        super()._update_policy()
+
+        # buffer_length = self.update_buffer.num_experiences
        self.cumulative_returns_since_policy_update.clear()

        # Make sure batch_size is a multiple of sequence length. During training, we
            (advantages - advantages.mean()) / (advantages.std() + 1e-10)
        )

-        # increment steps when training instead of when generating from environment
-        self._increment_step(self.trainer_parameters["buffer_size"], self.brain_name)
        num_epoch = self.trainer_parameters["num_epoch"]
        batch_update_stats = defaultdict(list)
        for _ in range(num_epoch):
--- a/ml-agents/mlagents/trainers/trainer/rl_trainer.py
+++ b/ml-agents/mlagents/trainers/trainer/rl_trainer.py
        """
        return False

-    @abc.abstractmethod
+    # @abc.abstractmethod
+        # increment steps when training instead of when generating from environment
-        pass
+        self._increment_step(self.trainer_parameters["buffer_size"], self.brain_name)

    def _increment_step(self, n_steps: int, name_behavior_id: str) -> None:
        """
        Takes a trajectory and processes it, putting it into the update buffer.
        :param trajectory: The Trajectory tuple containing the steps to be processed.
        """
+        pass
        # self._maybe_write_summary(self.get_step + len(trajectory.steps))
        # self._increment_step(len(trajectory.steps), trajectory.behavior_id)