disable bisim

5 年前 · 9a19f6e5
--- a/config/ppo_transfer/TransferCrawlerStatic.yaml
+++ b/config/ppo_transfer/TransferCrawlerStatic.yaml
      use_var_predict: true
      with_prior: false
      predict_return: true
-      use_bisim: true
+      use_bisim: false
      separate_value_net: true
      use_transfer: true
      load_policy: false
--- a/ml-agents/mlagents/trainers/sac_transfer/trainer.py
+++ b/ml-agents/mlagents/trainers/sac_transfer/trainer.py

        # Update the normalization
        if self.is_training:
-            self.policy.update_normalization(agent_buffer_trajectory["vector_obs"])
+            self.policy.update_normalization(
+                agent_buffer_trajectory["vector_obs"],
+                agent_buffer_trajectory["next_vector_in"],
+                agent_buffer_trajectory["vector_obs"],
+            )

        # Evaluate all reward functions for reporting purposes
        self.collected_rewards["environment"][agent_id] += np.sum(
--- a/ml-agents/mlagents/trainers/tests/reward_plot.ipynb
+++ b/ml-agents/mlagents/trainers/tests/reward_plot.ipynb