Test commit

4 年前 · 1f305f24
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py

    def sample(self):
        sample = self.mean + torch.randn_like(self.mean) * self.std
-        return torch.clamp(sample, -3, 3) / 3
+        return torch.clamp(sample, -3, 3)

    def log_prob(self, value):
        unscaled_val = value * 3  # Inverse of the clipping