Remove the loop over clusters from the volumetric lighting pass

7 年前 · d101e8ec
--- a/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/Volumetrics/VolumeVoxelization.compute
+++ b/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/Volumetrics/VolumeVoxelization.compute
    float de = rcp(VBUFFER_SLICE_COUNT);   // Log-encoded distance between slices

 #ifdef USE_CLUSTERED_LIGHTLIST
-    // Our voxel is not necessarily completely inside a single light cluster (along Z).
-    // Note that Z-binning can solve this problem, as we can iterate over all Z-bins
-    // to compute min/max light indices, and then use this range for the entire slice.
+    // The voxel can overlap up to 2 light clusters along Z, so we have to iterate over both.
+    // TODO: implement Z-binning which makes Z-range queries easy.
    uint volumeStarts[2], volumeCounts[2];

    GetCountAndStartCluster(posInput.tileCoord, GetLightClusterIndex(posInput.tileCoord, z0),
        GetCountAndStartCluster(posInput.tileCoord, GetLightClusterIndex(posInput.tileCoord, z1),
                                LIGHTCATEGORY_DENSITY_VOLUME, volumeStarts[1], volumeCounts[1]);

-        // We now iterate over all density volumes within the two clusters along Z.
+        // Iterate over all volumes within 2 (not necessarily unique) clusters overlapping the voxel along Z.
        // We need to skip duplicates, but it's not too difficult since volumes are sorted by index.
        uint i = 0, j = 0;


    #else  // USE_CLUSTERED_LIGHTLIST
        {
-            for (uint i = 0; i < _NumVisibleDensityVolumes; i++)
+            for (uint volumeIndex = 0; volumeIndex < _NumVisibleDensityVolumes; volumeIndex++)
-                uint volumeIndex = i;
-
    #endif // USE_CLUSTERED_LIGHTLIST

                OrientedBBox obb = _VolumeBounds[volumeIndex];
--- a/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/Volumetrics/VolumetricLighting.compute
+++ b/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/Volumetrics/VolumetricLighting.compute
 VoxelLighting EvaluateVoxelLighting(LightLoopContext context, uint featureFlags, PositionInputs posInput, float3 centerWS,
                                    DualRay ray, float t0, float t1, float dt, float rndVal, float extinction, float asymmetry
                                #ifdef USE_CLUSTERED_LIGHTLIST
-                                    , uint clusterIndices[2], float clusterDepths[2])
+                                    , uint lightClusters[2])
                                #else
                                    )
                                #endif
    return lighting;
 #endif

-#ifdef USE_CLUSTERED_LIGHTLIST
-    // Loop over 1 or 2 light clusters.
-    int cluster = 0;
-    do
+    if (featureFlags & LIGHTFEATUREFLAGS_PUNCTUAL)
-        float tMin = max(t0, ray.strataDirInvViewZ * clusterDepths[cluster]);
-        float tMax = t1;
+    #ifdef USE_CLUSTERED_LIGHTLIST
+        // Iterate over all lights within 2 (not necessarily unique) clusters overlapping the voxel along Z.
+        // We need to skip duplicates, but it's not too difficult since lights are sorted by index.
+        uint lightStarts[2], lightCounts[2];
-        if (cluster == 0 && (clusterIndices[0] != clusterIndices[1]))
+        for (uint k = 0; k < 2; k++)
-            tMax = min(t1, ray.strataDirInvViewZ * clusterDepths[1]);
+            GetCountAndStartCluster(posInput.tileCoord, lightClusters[k], LIGHTCATEGORY_PUNCTUAL,
+                                    lightStarts[k], lightCounts[k]);
+
-#else  // USE_CLUSTERED_LIGHTLIST
-        float tMin = t0;
-        float tMax = t1;
-#endif // USE_CLUSTERED_LIGHTLIST
-        if (featureFlags & LIGHTFEATUREFLAGS_PUNCTUAL)
+        uint i = 0, j = 0;
+
+        if (i < lightCounts[0] || j < lightCounts[1])
-            uint lightCount, lightStart;
+            // At least one of the clusters is non-empty.
+            uint lightIndices[2];
-        #ifdef USE_CLUSTERED_LIGHTLIST
-            GetCountAndStartCluster(posInput.tileCoord, clusterIndices[cluster], LIGHTCATEGORY_PUNCTUAL,
-                                    lightStart, lightCount);
-        #else  // USE_CLUSTERED_LIGHTLIST
-            lightCount = _PunctualLightCount;
-            lightStart = 0;
-        #endif // USE_CLUSTERED_LIGHTLIST
+            // Fetch two initial indices from both clusters.
+            if (i < lightCounts[0])
+            {
+                lightIndices[0] = FetchIndex(lightStarts[0], i);
+            }
+            else
+            {
+                lightIndices[0] = UINT_MAX;
+            }
-            if (lightCount > 0)
+            if (j < lightCounts[1])
-                LightData light = FetchLight(lightStart, 0);
+                lightIndices[1] = FetchIndex(lightStarts[1], j);
+            }
+            else
+            {
+                lightIndices[1] = UINT_MAX;
+            }
-                int i = 0, last = lightCount - 1;
+            // Process all punctual lights except for box lights (which are technically not even punctual).
+            do
+            {
+                // Process lights in order.
+                uint lightIndex = min(lightIndices[0], lightIndices[1]);
+
+    #else  // USE_CLUSTERED_LIGHTLIST
+        {
+            uint lightIndex = 0;
+
+            // Process all punctual lights except for box lights (which are technically not even punctual).
+            for (; lightIndex < _PunctualLightCount; lightIndex++)
+            {
+    #endif // USE_CLUSTERED_LIGHTLIST
+
+                LightData light = _LightDatas[lightIndex];
+
+                // Process box lights in a separate loop.
+                if (light.lightType == GPULIGHTTYPE_PROJECTOR_BOX) { break; }
+
+                float tEntr = t0;
+                float tExit = t1;
-                // Box lights require special handling (see the next while loop).
-                while (i <= last && light.lightType != GPULIGHTTYPE_PROJECTOR_BOX)
+                bool sampleLight = true;
+
+                // Perform ray-cone intersection for pyramid and spot lights.
+                if (light.lightType != GPULIGHTTYPE_POINT)
-                    float tEntr = tMin;
-                    float tExit = tMax;
+                    float lenMul = 1;
+
+                    if (light.lightType == GPULIGHTTYPE_PROJECTOR_PYRAMID)
+                    {
+                        // 'light.right' and 'light.up' vectors are pre-scaled on the CPU
+                        // s.t. if you were to place them at the distance of 1 directly in front
+                        // of the light, they would give you the "footprint" of the light.
+                        // For spot lights, the cone fit is exact.
+                        // For pyramid lights, however, this is the "inscribed" cone
+                        // (contained within the pyramid), and we want to intersect
+                        // the "escribed" cone (which contains the pyramid).
+                        // Therefore, we have to scale the radii by the sqrt(2).
+                        lenMul = rsqrt(2);
+                    }
+
+                    float3 coneAxisX = lenMul * light.right;
+                    float3 coneAxisY = lenMul * light.up;
-                    bool sampleLight = true;
+                    sampleLight = IntersectRayCone(ray.originWS, ray.strataDirWS,
+                                                   light.positionWS, light.forward,
+                                                   coneAxisX, coneAxisY,
+                                                   t0, t1, tEntr, tExit);
+                }
-                    // Perform ray-cone intersection for pyramid and spot lights.
-                    if (light.lightType != GPULIGHTTYPE_POINT)
-                    {
-                        float lenMul = 1;
+                if (sampleLight)
+                {
+                    // We are unable to adequately sample features larger
+                    // than the half of the length of the integration interval
+                    // divided by the number of temporal samples (7).
+                    // Therefore, we apply this hack to reduce flickering.
+                    float hackMinDistSq = Sq(dt * (0.5 / 7));
-                        if (light.lightType == GPULIGHTTYPE_PROJECTOR_PYRAMID)
-                        {
-                            // 'light.right' and 'light.up' vectors are pre-scaled on the CPU
-                            // s.t. if you were to place them at the distance of 1 directly in front
-                            // of the light, they would give you the "footprint" of the light.
-                            // For spot lights, the cone fit is exact.
-                            // For pyramid lights, however, this is the "inscribed" cone
-                            // (contained within the pyramid), and we want to intersect
-                            // the "escribed" cone (which contains the pyramid).
-                            // Therefore, we have to scale the radii by the sqrt(2).
-                            lenMul = rsqrt(2);
-                        }
+                    float t, distSq, rcpPdf;
+                    ImportanceSamplePunctualLight(rndVal, light.positionWS,
+                                                  ray.originWS, ray.strataDirWS,
+                                                  tEntr, tExit, t, distSq, rcpPdf,
+                                                  hackMinDistSq);
-                        float3 coneAxisX = lenMul * light.right;
-                        float3 coneAxisY = lenMul * light.up;
+                    posInput.positionWS = GetPointAtDistance(ray, t);
-                        sampleLight = IntersectRayCone(ray.originWS, ray.strataDirWS,
-                                                       light.positionWS, light.forward,
-                                                       coneAxisX, coneAxisY,
-                                                       tMin, tMax, tEntr, tExit);
-                    }
+                    float3 lightToSample = posInput.positionWS - light.positionWS;
+                    float  distRcp       = rsqrt(distSq);
+                    float  dist          = distSq * distRcp;
+                    float  distProj      = dot(lightToSample, light.forward);
+                    float4 distances     = float4(dist, distSq, distRcp, distProj);
+                    float3 L             = -lightToSample * distRcp;
-                    if (sampleLight)
-                    {
-                        // We are unable to adequately sample features larger
-                        // than the half of the length of the integration interval
-                        // divided by the number of temporal samples (7).
-                        // Therefore, we apply this hack to reduce flickering.
-                        float hackMinDistSq = Sq(dt * (0.5 / 7));
+                    float3 color; float attenuation;
+                    EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample,
+                                           distances, color, attenuation);
-                        float t, distSq, rcpPdf;
-                        ImportanceSamplePunctualLight(rndVal, light.positionWS,
-                                                      ray.originWS, ray.strataDirWS,
-                                                      tEntr, tExit, t, distSq, rcpPdf,
-                                                      hackMinDistSq);
+                    // Important:
+                    // Ideally, all scattering calculations should use the stratified versions
+                    // of the sample position and the ray direction. However, correct reprojection
+                    // of asymmetrically scattered lighting (affected by an anisotropic phase
+                    // function) is not possible. We work around this issue by reprojecting
+                    // lighting not affected by the phase function. This basically removes
+                    // the phase function from the temporal integration process. It is a hack.
+                    // The downside is that asymmetry no longer benefits from temporal averaging,
+                    // and any temporal instability of asymmetry causes causes visible jitter.
+                    // In order to stabilize the image, we use the voxel center for all
+                    // asymmetry-related calculations.
+                    float3 centerL = light.positionWS - centerWS;
+                    float  cosTheta = dot(centerL, ray.centerDirWS) * rsqrt(dot(centerL, centerL));
+                    float  phase    = CornetteShanksPhasePartVarying(asymmetry, cosTheta);
-                        posInput.positionWS = GetPointAtDistance(ray, t);
+                    float intensity = attenuation * rcpPdf;
-                        float3 lightToSample = posInput.positionWS - light.positionWS;
-                        float  distRcp       = rsqrt(distSq);
-                        float  dist          = distSq * distRcp;
-                        float  distProj      = dot(lightToSample, light.forward);
-                        float4 distances     = float4(dist, distSq, distRcp, distProj);
-                        float3 L             = -lightToSample * distRcp;
+                    // Compute transmittance from 't0' to 't'.
+                    intensity *= TransmittanceHomogeneousMedium(extinction, t - t0);
-                        float3 color; float attenuation;
-                        EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample,
-                                               distances, color, attenuation);
+                    // Compute the amount of in-scattered radiance.
+                    lighting.radianceNoPhase  += intensity * color;
+                    lighting.radianceComplete += phase * intensity * color;
+                }
-                        // Important:
-                        // Ideally, all scattering calculations should use the stratified versions
-                        // of the sample position and the ray direction. However, correct reprojection
-                        // of asymmetrically scattered lighting (affected by an anisotropic phase
-                        // function) is not possible. We work around this issue by reprojecting
-                        // lighting not affected by the phase function. This basically removes
-                        // the phase function from the temporal integration process. It is a hack.
-                        // The downside is that asymmetry no longer benefits from temporal averaging,
-                        // and any temporal instability of asymmetry causes causes visible jitter.
-                        // In order to stabilize the image, we use the voxel center for all
-                        // asymmetry-related calculations.
-                        float3 centerL = light.positionWS - centerWS;
-                        float  cosTheta = dot(centerL, ray.centerDirWS) * rsqrt(dot(centerL, centerL));
-                        float  phase    = CornetteShanksPhasePartVarying(asymmetry, cosTheta);
+        #ifndef USE_CLUSTERED_LIGHTLIST
+            }
-                        float intensity = attenuation * rcpPdf;
+            // Process all box lights.
+            for (; lightIndex < _PunctualLightCount; lightIndex++)
+            {
+        #else // USE_CLUSTERED_LIGHTLIST
-                        // Compute transmittance from 't0' to 't'.
-                        intensity *= TransmittanceHomogeneousMedium(extinction, t - t0);
+                // Advance to the next light in one (or both at the same time) clusters.
+                if (lightIndex == lightIndices[0])
+                {
+                    i++;
-                        // Compute the amount of in-scattered radiance.
-                        lighting.radianceNoPhase  += intensity * color;
-                        lighting.radianceComplete += phase * intensity * color;
+                    if (i < lightCounts[0])
+                    {
+                        lightIndices[0] = FetchIndex(lightStarts[0], i);
+                    }
+                    else
+                    {
+                        lightIndices[0] = UINT_MAX;
-
-                    light = FetchLight(lightStart, min(++i, last));
-                while (i <= last) // GPULIGHTTYPE_PROJECTOR_BOX
+                if (lightIndex == lightIndices[1])
-                    light = FetchLight(lightStart, min(++i, last));
-                    light.lightType = GPULIGHTTYPE_PROJECTOR_BOX;
+                    j++;
-                    // Convert the box light from OBB to AABB.
-                    // 'light.right' and 'light.up' vectors are pre-scaled on the CPU by (2/w) and (2/h).
-                    float3x3 rotMat = float3x3(light.right, light.up, light.forward);
+                    if (j < lightCounts[1])
+                    {
+                        lightIndices[1] = FetchIndex(lightStarts[1], j);
+                    }
+                    else
+                    {
+                        lightIndices[1] = UINT_MAX;
+                    }
+                }
+            } while (i < lightCounts[0] || j < lightCounts[1]);
-                    float3 o = mul(rotMat, ray.originWS - light.positionWS);
-                    float3 d = mul(rotMat, ray.strataDirWS);
+            // Process all box lights.
+            while (i < lightCounts[0] || j < lightCounts[1])
+            {
+                // Process lights in order.
+                uint lightIndex = min(lightIndices[0], lightIndices[1]);
-                    float  range  = light.size.x;
-                    float3 boxPt0 = float3(-1, -1, 0);
-                    float3 boxPt1 = float3( 1,  1, range);
+        #endif // USE_CLUSTERED_LIGHTLIST
-                    float tEntr, tExit;
+                LightData light = _LightDatas[lightIndex];
+                light.lightType = GPULIGHTTYPE_PROJECTOR_BOX;
-                    if (IntersectRayAABB(o, d, boxPt0, boxPt1, tMin, tMax, tEntr, tExit))
-                    {
-                        float tOffset, weight;
-                        ImportanceSampleHomogeneousMedium(rndVal, extinction, tExit - tEntr, tOffset, weight);
+                // Convert the box light from OBB to AABB.
+                // 'light.right' and 'light.up' vectors are pre-scaled on the CPU by (2/w) and (2/h).
+                float3x3 rotMat = float3x3(light.right, light.up, light.forward);
-                        float t = tEntr + tOffset;
-                        posInput.positionWS = GetPointAtDistance(ray, t);
+                float3 o = mul(rotMat, ray.originWS - light.positionWS);
+                float3 d = mul(rotMat, ray.strataDirWS);
-                        float3 L             = -light.forward;
-                        float3 lightToSample = posInput.positionWS - light.positionWS;
-                        float  distProj      = dot(lightToSample, light.forward);
-                        float4 distances     = float4(1, 1, 1, distProj);
+                float  range  = light.size.x;
+                float3 boxPt0 = float3(-1, -1, 0);
+                float3 boxPt1 = float3( 1,  1, range);
-                        float3 color; float attenuation;
-                        EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample,
-                                               distances, color, attenuation);
+                float tEntr, tExit;
-                        // Important:
-                        // Ideally, all scattering calculations should use the stratified versions
-                        // of the sample position and the ray direction. However, correct reprojection
-                        // of asymmetrically scattered lighting (affected by an anisotropic phase
-                        // function) is not possible. We work around this issue by reprojecting
-                        // lighting not affected by the phase function. This basically removes
-                        // the phase function from the temporal integration process. It is a hack.
-                        // The downside is that asymmetry no longer benefits from temporal averaging,
-                        // and any temporal instability of asymmetry causes causes visible jitter.
-                        // In order to stabilize the image, we use the voxel center for all
-                        // asymmetry-related calculations.
-                        float3 centerL = light.positionWS - centerWS;
-                        float  cosTheta = dot(centerL, ray.centerDirWS) * rsqrt(dot(centerL, centerL));
-                        float  phase    = CornetteShanksPhasePartVarying(asymmetry, cosTheta);
+                if (IntersectRayAABB(o, d, boxPt0, boxPt1, t0, t1, tEntr, tExit))
+                {
+                    float tOffset, weight;
+                    ImportanceSampleHomogeneousMedium(rndVal, extinction, tExit - tEntr, tOffset, weight);
-                        // Note: the 'weight' accounts for transmittance from 'tEntr' to 't'.
-                        float intensity = attenuation * weight;
+                    float t = tEntr + tOffset;
+                    posInput.positionWS = GetPointAtDistance(ray, t);
+
+                    float3 L             = -light.forward;
+                    float3 lightToSample = posInput.positionWS - light.positionWS;
+                    float  distProj      = dot(lightToSample, light.forward);
+                    float4 distances     = float4(1, 1, 1, distProj);
+
+                    float3 color; float attenuation;
+                    EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample,
+                                           distances, color, attenuation);
+
+                    // Important:
+                    // Ideally, all scattering calculations should use the stratified versions
+                    // of the sample position and the ray direction. However, correct reprojection
+                    // of asymmetrically scattered lighting (affected by an anisotropic phase
+                    // function) is not possible. We work around this issue by reprojecting
+                    // lighting not affected by the phase function. This basically removes
+                    // the phase function from the temporal integration process. It is a hack.
+                    // The downside is that asymmetry no longer benefits from temporal averaging,
+                    // and any temporal instability of asymmetry causes causes visible jitter.
+                    // In order to stabilize the image, we use the voxel center for all
+                    // asymmetry-related calculations.
+                    float3 centerL = light.positionWS - centerWS;
+                    float  cosTheta = dot(centerL, ray.centerDirWS) * rsqrt(dot(centerL, centerL));
+                    float  phase    = CornetteShanksPhasePartVarying(asymmetry, cosTheta);
+
+                    // Note: the 'weight' accounts for transmittance from 'tEntr' to 't'.
+                    float intensity = attenuation * weight;
+
+                    // Compute transmittance from 't0' to 'tEntr'.
+                    intensity *= TransmittanceHomogeneousMedium(extinction, tEntr - t0);
+
+                    // Compute the amount of in-scattered radiance.
+                    lighting.radianceNoPhase  += intensity * color;
+                    lighting.radianceComplete += phase * intensity * color;
+                }
+
+            #ifdef USE_CLUSTERED_LIGHTLIST
+                // Advance to the next light in one (or both at the same time) clusters.
+                if (lightIndex == lightIndices[0])
+                {
+                    i++;
+
+                    if (i < lightCounts[0])
+                    {
+                        lightIndices[0] = FetchIndex(lightStarts[0], i);
+                    }
+                    else
+                    {
+                        lightIndices[0] = UINT_MAX;
+                    }
+                }
-                        // Compute transmittance from 't0' to 'tEntr'.
-                        intensity *= TransmittanceHomogeneousMedium(extinction, tEntr - t0);
+                if (lightIndex == lightIndices[1])
+                {
+                    j++;
-                        // Compute the amount of in-scattered radiance.
-                        lighting.radianceNoPhase  += intensity * color;
-                        lighting.radianceComplete += phase * intensity * color;
+                    if (j < lightCounts[1])
+                    {
+                        lightIndices[1] = FetchIndex(lightStarts[1], j);
+                    }
+                    else
+                    {
+                        lightIndices[1] = UINT_MAX;
+            #endif // USE_CLUSTERED_LIGHTLIST
-#ifdef USE_CLUSTERED_LIGHTLIST
-        cluster++;
-        // Check whether the voxel is completely inside the light cluster.
-    } while ((cluster < 2) && (clusterIndices[0] != clusterIndices[1]));
-#endif // USE_CLUSTERED_LIGHTLIST
+    }

    return lighting;
 }
    float  opticalDepth  = 0;

 #ifdef USE_CLUSTERED_LIGHTLIST
-    // Our voxel is not necessarily completely inside a single light cluster (along Z).
-    // Note that Z-binning can solve this problem, as we can iterate over all Z-bins
-    // to compute min/max light indices, and then use this range for the entire slice.
-    uint  clusterIndices[2];
-    float clusterDepths[2];
-    clusterIndices[0] = GetLightClusterIndex(posInput.tileCoord, z0);
-    clusterDepths[0]  = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[0]);
+    // The voxel can overlap up to 2 light clusters along Z, so we have to iterate over both.
+    // TODO: implement Z-binning which makes Z-range queries easy.
+    uint lightClusters[2];
+    lightClusters[0] = GetLightClusterIndex(posInput.tileCoord, z0);
 #endif // USE_CLUSTERED_LIGHTLIST

 #if defined(SHADER_API_METAL)
        float dt = t1 - t0;

    #ifdef USE_CLUSTERED_LIGHTLIST
-        clusterIndices[1] = GetLightClusterIndex(posInput.tileCoord, z1);
-        clusterDepths[1]  = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[1]);
+        lightClusters[1] = GetLightClusterIndex(posInput.tileCoord, z1);
    #endif

        // Compute the -exact- position of the center of the voxel.
        VoxelLighting lighting = EvaluateVoxelLighting(context, featureFlags, posInput, centerWS,
                                                       ray, t0, t1, dt, rndVal, extinction, asymmetry
                                                   #ifdef USE_CLUSTERED_LIGHTLIST
-                                                       , clusterIndices, clusterDepths);
+                                                       , lightClusters);
                                                   #else
                                                       );
                                                   #endif
        // Store the voxel data.
        _VBufferLightingIntegral[voxelCoord] = integral;

+        z0 = z1;
-        clusterIndices[0] = clusterIndices[1];
-        clusterDepths[0]  = clusterDepths[1];
+        lightClusters[0] = lightClusters[1];
    #endif
    }
 }