Add another epsilon

Merge commit '6d729a0a2b2ba1fc946720cdb7871c9be3e38d45' into develop-fix-nan
Add epsilon to log
--- a/Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity
+++ b/Project/Assets/ML-Agents/Examples/Match3/Scenes/Match3.unity
  m_ReflectionIntensity: 1
  m_CustomReflection: {fileID: 0}
  m_Sun: {fileID: 0}
-  m_IndirectSpecularColor: {r: 0.43632728, g: 0.4747097, b: 0.51471573, a: 1}
+  m_IndirectSpecularColor: {r: 0.43632758, g: 0.47471005, b: 0.5147158, a: 1}
  m_UseRadianceAmbientProbe: 0
 --- !u!157 &3
 LightmapSettings:
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 3508723250470608010, guid: 2fafdcd0587684641b03b11f04454f1b,
+        type: 3}
+      propertyPath: m_BehaviorName
+      value: Match3GreedyHeuristic
+      objectReference: {fileID: 0}
    - target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
        type: 3}
      propertyPath: cubeSpacing
  m_Modification:
    m_TransformParent: {fileID: 0}
    m_Modifications:
+    - target: {fileID: 3508723250470608010, guid: 2fafdcd0587684641b03b11f04454f1b,
+        type: 3}
+      propertyPath: m_BehaviorName
+      value: Match3GreedyHeuristic
+      objectReference: {fileID: 0}
    - target: {fileID: 3508723250470608011, guid: 2fafdcd0587684641b03b11f04454f1b,
        type: 3}
      propertyPath: cubeSpacing
--- a/config/ppo/Match3.yaml
+++ b/config/ppo/Match3.yaml
  Match3VectorObs:
    trainer_type: ppo
    hyperparameters:
-      batch_size: 64
-      buffer_size: 12000
+      batch_size: 16
+      buffer_size: 120
-      beta: 0.001
+      beta: 0.005
      epsilon: 0.2
      lambd: 0.99
      num_epoch: 3
-      hidden_units: 128
-      num_layers: 2
+      hidden_units: 256
+      num_layers: 4
      vis_encode_type: match3
    reward_signals:
      extrinsic:
    max_steps: 5000000
-    time_horizon: 1000
+    time_horizon: 128
+    checkpoint_interval: 100000
-      batch_size: 64
-      buffer_size: 12000
+      batch_size: 16
+      buffer_size: 120
-      beta: 0.001
+      beta: 0.005
      epsilon: 0.2
      lambd: 0.99
      num_epoch: 3
-      hidden_units: 128
-      num_layers: 2
+      hidden_units: 256
+      num_layers: 4
      vis_encode_type: match3
    reward_signals:
      extrinsic:
    max_steps: 5000000
-    time_horizon: 1000
+    time_horizon: 128
+    checkpoint_interval: 100000
  Match3SimpleHeuristic:
    # Settings can be very simple since we don't care about actually training the model
    trainer_type: ppo
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py
        ).squeeze(-1)

    def log_prob(self, value):
-        return torch.log(self.pdf(value))
+        return torch.log(self.pdf(value) + EPSILON)
-        return torch.log(self.probs)
+        return torch.log(self.probs + EPSILON)
-        return -torch.sum(self.probs * torch.log(self.probs), dim=-1)
+        return -torch.sum(self.probs * torch.log(self.probs + EPSILON), dim=-1)


 class GaussianDistribution(nn.Module):

    def _mask_branch(self, logits: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
        raw_probs = torch.nn.functional.softmax(logits, dim=-1) * mask
-        normalized_probs = raw_probs / torch.sum(raw_probs, dim=-1).unsqueeze(-1)
+        normalized_probs = raw_probs / (
+            torch.sum(raw_probs, dim=-1).unsqueeze(-1) + EPSILON
+        )
        normalized_logits = torch.log(normalized_probs + EPSILON)
        return normalized_logits
作者	SHA1	备注	提交日期
Ervin Teng	eb4f3065	Add another epsilon	4 年前
Ervin Teng	e1378efc	Merge commit '6d729a0a2b2ba1fc946720cdb7871c9be3e38d45' into develop-fix-nan	4 年前
Ervin Teng	08c8862e	Add epsilon to log	4 年前