[skip ci] tweaking 3dball configs

5 年前 · 86e16a64
--- a/config/trainer_config.yaml
+++ b/config/trainer_config.yaml

 3DBall:
    normalize: true
-    batch_size: 64
-    buffer_size: 12000
-    summary_freq: 12000
+    max_steps: 1.0e5

 3DBallHard:
    normalize: true
--- a/ml-agents/mlagents/trainers/ppo/trainer.py
+++ b/ml-agents/mlagents/trainers/ppo/trainer.py
        Uses demonstration_buffer to update the policy.
        The reward signal generators must be updated in this method at their own pace.
        """
-        super()._update_policy()
-
-        # buffer_length = self.update_buffer.num_experiences
        self.cumulative_returns_since_policy_update.clear()

        # Make sure batch_size is a multiple of sequence length. During training, we
            update_stats = self.optimizer.bc_module.update()
            for stat, val in update_stats.items():
                self._stats_reporter.add_stat(stat, val)
+
+        super()._update_policy()
        self._clear_update_buffer()

    def create_policy(self, brain_parameters: BrainParameters) -> TFPolicy: