Save 1 VGPR by removing 'unNdotV' from PreLightData

8 年前 · 80077f9a
--- a/Assets/ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Lit.hlsl
+++ b/Assets/ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Lit.hlsl
 #define LTC_LUT_SCALE  ((LTC_LUT_SIZE - 1) * rcp(LTC_LUT_SIZE))
 #define LTC_LUT_OFFSET (0.5 * rcp(LTC_LUT_SIZE))

+#define MIN_N_DOT_V           0.0001                       // The minimum value of 'NdotV'
+
 // SSS parameters
 #define SSS_N_PROFILES        8
 #define SSS_LOW_THICKNESS     0.005                        // 0.5 cm
 struct PreLightData
 {
    // General
-    float NdotV;   // Between 0.0001 and 1
-    float unNdotV; // Between -1 and 1
-
+    float NdotV;                         // Geometric version (not clamped)

    // GGX iso
    float ggxLambdaV;
    float anisoGGXLambdaV;

    // IBL
-    float3 iblDirWS;    // Dominant specular direction, used for IBL in EvaluateBSDF_Env()
+    float3 iblDirWS;                     // Dominant specular direction, used for IBL in EvaluateBSDF_Env()
-    float3 specularFGD; // Store preconvole BRDF for both specular and diffuse
+    float3 specularFGD;                  // Store preconvoled BRDF for both specular and diffuse
    float diffuseFGD;

    // area light
 {
    PreLightData preLightData;

-    // General
-    float3 iblNormalWS = bsdfData.normalWS;
+    float NdotV = dot(bsdfData.normalWS, V);
+
+    float3 iblNormalWS = GetViewShiftedNormal(bsdfData.normalWS, V, NdotV, MIN_N_DOT_V);
-    preLightData.unNdotV = dot(bsdfData.normalWS, V);
-    // GetShiftedNdotV return a positive NdotV
-    // In case a material use negative normal for double sided lighting like Speedtree  they need to do a new calculation
-    preLightData.NdotV = GetShiftedNdotV(iblNormalWS, V, preLightData.unNdotV);
+    preLightData.NdotV = NdotV;      // Store the unaltered (geometric) version
+    NdotV = max(NdotV, MIN_N_DOT_V); // Use the modified (clamped) version
+
+    float3 iblR = reflect(-V, iblNormalWS);
-    preLightData.ggxLambdaV = GetSmithJointGGXLambdaV(preLightData.NdotV, bsdfData.roughness);
+    preLightData.ggxLambdaV = GetSmithJointGGXLambdaV(NdotV, bsdfData.roughness);

    // GGX aniso
    if (bsdfData.materialId == MATERIALID_LIT_ANISO)
-        preLightData.anisoGGXLambdaV = GetSmithJointGGXAnisoLambdaV(preLightData.TdotV, preLightData.BdotV, preLightData.NdotV, bsdfData.roughnessT, bsdfData.roughnessB);
+        preLightData.anisoGGXLambdaV = GetSmithJointGGXAnisoLambdaV(preLightData.TdotV, preLightData.BdotV, NdotV, bsdfData.roughnessT, bsdfData.roughnessB);
-        iblNormalWS = GetAnisotropicModifiedNormal(bsdfData.bitangentWS, iblNormalWS, V, bsdfData.anisotropy);
+        float3 anisoIblNormalWS = GetAnisotropicModifiedNormal(bsdfData.bitangentWS, iblNormalWS, V, bsdfData.anisotropy);
+        iblR = reflect(-V, anisoIblNormalWS);
+        // @Sébastien: I preserved the original behavior by creating 'anisoIblNormalWS', but,
+        // from the performance standpoint, it would be best to store the value in 'iblNormalWS'
+        // and then use it in GetSpecularDominantDir(). Please reconsider. :-) -Evgenii
-    GetPreIntegratedFGD(preLightData.NdotV, bsdfData.perceptualRoughness, bsdfData.fresnel0, preLightData.specularFGD, preLightData.diffuseFGD);
+    GetPreIntegratedFGD(NdotV, bsdfData.perceptualRoughness, bsdfData.fresnel0, preLightData.specularFGD, preLightData.diffuseFGD);
-    // We need to take into account the modified normal for faking anisotropic here.
-    float3 iblR = reflect(-V, iblNormalWS);
-    preLightData.iblDirWS = GetSpecularDominantDir(bsdfData.normalWS, iblR, bsdfData.roughness, preLightData.NdotV);
+    preLightData.iblDirWS = GetSpecularDominantDir(iblNormalWS, iblR, bsdfData.roughness, NdotV);
-    float theta = FastACos(preLightData.NdotV);
+    float theta = FastACos(NdotV);
    float2 uv = LTC_LUT_OFFSET + LTC_LUT_SCALE * float2(bsdfData.perceptualRoughness, theta * INV_HALF_PI);

    // Get the inverse LTC matrix for GGX
            out float3 diffuseLighting,
            out float3 specularLighting)
 {
+    // Optimized math. Ref: PBR Diffuse Lighting for GGX + Smith Microsurfaces (slide 114).
-    float NdotV    = preLightData.unNdotV;             // This value must not be clamped
+    float NdotV    = preLightData.NdotV;        // Get the unaltered (geometric) version
-    // GCN Optimization: reference PBR Diffuse Lighting for GGX + Smith Microsurfaces
-    float invLenLV = rsqrt(abs(2.0 * LdotV + 2.0));    // invLenLV = rcp(length(L + V))
+    float invLenLV = rsqrt(abs(2 * LdotV + 2)); // invLenLV = rcp(length(L + V))
+
+    NdotV          = max(NdotV, MIN_N_DOT_V);   // Use the modified (clamped) version

    float3 F = F_Schlick(bsdfData.fresnel0, LdotH);

        bsdfData.roughnessB = ClampRoughnessForAnalyticalLights(bsdfData.roughnessB);

        #ifdef LIT_USE_BSDF_PRE_LAMBDAV
-        Vis = V_SmithJointGGXAnisoLambdaV(preLightData.TdotV, preLightData.BdotV, preLightData.NdotV, TdotL, BdotL, NdotL,
+        Vis = V_SmithJointGGXAnisoLambdaV(preLightData.TdotV, preLightData.BdotV, NdotV, TdotL, BdotL, NdotL,
-        Vis = V_SmithJointGGXAniso(preLightData.TdotV, preLightData.BdotV, preLightData.NdotV, TdotL, BdotL, NdotL,
+        Vis = V_SmithJointGGXAniso(preLightData.TdotV, preLightData.BdotV, NdotV, TdotL, BdotL, NdotL,
                                   bsdfData.roughnessT, bsdfData.roughnessB);
        #endif

        bsdfData.roughness = ClampRoughnessForAnalyticalLights(bsdfData.roughness);

        #ifdef LIT_USE_BSDF_PRE_LAMBDAV
-        Vis = V_SmithJointGGX(NdotL, preLightData.NdotV, bsdfData.roughness, preLightData.ggxLambdaV);
+        Vis = V_SmithJointGGX(NdotL, NdotV, bsdfData.roughness, preLightData.ggxLambdaV);
-        Vis = V_SmithJointGGX(NdotL, preLightData.NdotV, bsdfData.roughness);
+        Vis = V_SmithJointGGX(NdotL, NdotV, bsdfData.roughness);
        #endif
        D = D_GGX(NdotH, bsdfData.roughness);
    }
    float  diffuseTerm = Lambert();
 #elif LIT_DIFFUSE_GGX_BRDF
-    float3 diffuseTerm = DiffuseGGX(bsdfData.diffuseColor, preLightData.NdotV, NdotL, NdotH, LdotV, bsdfData.perceptualRoughness);
+    float3 diffuseTerm = DiffuseGGX(bsdfData.diffuseColor, NdotV, NdotL, NdotH, LdotV, bsdfData.perceptualRoughness);
-    float  diffuseTerm = DisneyDiffuse(preLightData.NdotV, NdotL, LdotH, bsdfData.perceptualRoughness);
+    float  diffuseTerm = DisneyDiffuse(NdotV, NdotL, LdotH, bsdfData.perceptualRoughness);
 #endif

    diffuseLighting = bsdfData.diffuseColor * diffuseTerm;
                                    uint sampleCount = 4096)
 {
    float3x3 localToWorld = float3x3(bsdfData.tangentWS, bsdfData.bitangentWS, bsdfData.normalWS);
+    float    NdotV        = max(preLightData.NdotV, MIN_N_DOT_V);
    float3   acc          = float3(0.0, 0.0, 0.0);

    // Add some jittering on Hammersley2d
            float LdotH = dot(L, H);
            // Note: we call DisneyDiffuse that require to multiply by Albedo / PI. Divide by PI is already taken into account
            // in weightOverPdf of ImportanceSampleLambert call.
-            float disneyDiffuse = DisneyDiffuse(preLightData.NdotV, NdotL, LdotH, bsdfData.perceptualRoughness);
+            float disneyDiffuse = DisneyDiffuse(NdotV, NdotL, LdotH, bsdfData.perceptualRoughness);

            // diffuse Albedo is apply here as describe in ImportanceSampleLambert function
            float4 val = SampleEnv(lightLoopContext, lightData.envIndex, L, 0);
                                  uint sampleCount = 4096)
 {
    float3x3 localToWorld = float3x3(bsdfData.tangentWS, bsdfData.bitangentWS, bsdfData.normalWS);
+    float    NdotV        = max(preLightData.NdotV, MIN_N_DOT_V);
    float3   acc          = float3(0.0, 0.0, 0.0);

    // Add some jittering on Hammersley2d
        // GGX BRDF
        if (bsdfData.materialId == MATERIALID_LIT_ANISO)
        {
-            ImportanceSampleAnisoGGX(u, V, localToWorld, bsdfData.roughnessT, bsdfData.roughnessB, preLightData.NdotV, L, VdotH, NdotL, weightOverPdf);
+            ImportanceSampleAnisoGGX(u, V, localToWorld, bsdfData.roughnessT, bsdfData.roughnessB, NdotV, L, VdotH, NdotL, weightOverPdf);
-            ImportanceSampleGGX(u, V, localToWorld, bsdfData.roughness, preLightData.NdotV, L, VdotH, NdotL, weightOverPdf);
+            ImportanceSampleGGX(u, V, localToWorld, bsdfData.roughness, NdotV, L, VdotH, NdotL, weightOverPdf);
        }


--- a/Assets/ScriptableRenderPipeline/ShaderLibrary/CommonLighting.hlsl
+++ b/Assets/ScriptableRenderPipeline/ShaderLibrary/CommonLighting.hlsl
 // Helper functions
 //-----------------------------------------------------------------------------

-// NdotV can be negative for visible pixels due to the perspective projection, the normal mapping and decals.
-// This can produce visible artifacts with direct specular lighting (white point, black point) and indirect specular (artifact with cubemap fetch)
-// A way to reduce artifact is to limit NdotV value to not be negative and calculate reflection vector for cubemap with a shifted normal (i.e what depends on the view)
-// This is what provide this function
-// Note: NdotV return by this function is always positive, no need for saturate
-float GetShiftedNdotV(inout float3 N, float3 V, float NdotV)
+// 'NdotV' can become negative for visible pixels due to the perspective projection, normal mapping and decals.
+// This can produce visible artifacts under specular lighting, both direct (overly dark/bright pixels) and indirect (incorrect cubemap direction).
+// One way of avoiding these artifacts is to limit the value of 'NdotV' to a small positive number,
+// and calculate the reflection vector for the cubemap fetch using a normal shifted into view.
+float3 GetViewShiftedNormal(float3 N, float3 V, float NdotV, float minNdotV)
-    const float limit = 0.0001; // Epsilon value that avoid divide by 0 (several BSDF divide by NdotV)
-
-    if (NdotV < limit)
+    if (NdotV < minNdotV)
-        // We do not renormalize the normal because { abs(length(N) - 1.0) < limit } + It is use for cubemap
-        N    += (-NdotV + limit) * V;
-        NdotV = limit;
+        // We do not renormalize the normal to save a few clock cycles.
+        // The magnitude difference is typically negligible, and the normal is only used to compute
+        // the reflection vector for the IBL cube map fetch (which does not depend on the magnitude).
+        N += (-NdotV + minNdotV) * V;
-    return NdotV;
+    return N;
 }

 // Generates an orthonormal basis from a unit vector.