Merge branch 'develop-torchcrawlerdebug' into develop-torch-clip

4 年前 · 23b42dcd
--- a/ml-agents/mlagents/trainers/torch/layers.py
+++ b/ml-agents/mlagents/trainers/torch/layers.py
    :param output_size: The size of the output tensor
    :param kernel_init: The Initialization to use for the weights of the layer
    :param kernel_gain: The multiplier for the weights of the kernel. Note that in
-    TensorFlow, calling variance_scaling with scale 0.01 is equivalent to calling
-    KaimingHeNormal with kernel_gain of 0.1
+    TensorFlow, the gain is square-rooted, and in Torch, it is set to a default value of sqrt(2).
+    Therefore calling variance_scaling with scale 0.01 is equivalent to calling
+    KaimingHeNormal with kernel_gain of 0.1 * sqrt(2)
    :param bias_init: The Initialization to use for the weights of the bias layer
    """
    layer = torch.nn.Linear(input_size, output_size)
                input_size,
                hidden_size,
                kernel_init=Initialization.KaimingHeNormal,
-                kernel_gain=1.0,
+                kernel_gain=0.707,  # Equivalent to TF gain = 1
            )
        ]
        self.layers.append(Swish())
                    hidden_size,
                    hidden_size,
                    kernel_init=Initialization.KaimingHeNormal,
-                    kernel_gain=1.0,
+                    kernel_gain=0.707,
                )
            )
            self.layers.append(Swish())