Fix equation for entropy

4 年前 · 03c905b2
--- a/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_hybrid.py
    env = SimpleEnvironment([BRAIN_NAME], action_sizes=action_size, step_size=0.8)
    new_network_settings = attr.evolve(PPO_TORCH_CONFIG.network_settings)
    new_hyperparams = attr.evolve(
-        PPO_TORCH_CONFIG.hyperparameters, batch_size=64, buffer_size=1024
+        PPO_TORCH_CONFIG.hyperparameters,
+        batch_size=64,
+        buffer_size=1024,
+        learning_rate=1e-3,
    )
    config = attr.evolve(
        PPO_TORCH_CONFIG,
    )
-    check_environment_trains(
-        env, {BRAIN_NAME: config}, success_threshold=0.9, training_seed=1212
-    )
+    check_environment_trains(env, {BRAIN_NAME: config}, success_threshold=0.9)


@pytest.mark.parametrize("num_visual", [1, 2])
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py

    def entropy(self):
        return torch.mean(
-            0.5 * torch.log(2 * math.pi * math.e * self.std + EPSILON),
+            0.5 * torch.log(2 * math.pi * math.e * self.std ** 2 + EPSILON),
            dim=1,
            keepdim=True,
        )  # Use equivalent behavior to TF