Fix entropy calculation

5 年前 · c9fbb111
--- a/ml-agents/mlagents/trainers/common/nn_policy.py
+++ b/ml-agents/mlagents/trainers/common/nn_policy.py
        self.all_log_probs = tf.identity(all_probs, name="action_probs")

        single_dim_entropy = 0.5 * tf.reduce_mean(
-            tf.log(2 * np.pi * np.e) + tf.square(log_sigma)
+            tf.log(2 * np.pi * np.e) + 2 * log_sigma
        )
        # Make entropy the right shape
        self.entropy = tf.ones_like(tf.reshape(mu[:, 0], [-1])) * single_dim_entropy