ScriptableRenderPipeline/ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/Volumetrics/Resources/VolumetricLighting.compute


								//--------------------------------------------------------------------------------------------------

								// Definitions

								//--------------------------------------------------------------------------------------------------


								#pragma kernel VolumetricLightingAllLights       VolumetricLighting=VolumetricLightingAllLights       ENABLE_REPROJECTION=0 LIGHTLOOP_SINGLE_PASS

								#pragma kernel VolumetricLightingAllLightsReproj VolumetricLighting=VolumetricLightingAllLightsReproj ENABLE_REPROJECTION=1 LIGHTLOOP_SINGLE_PASS

								#pragma kernel VolumetricLightingClustered       VolumetricLighting=VolumetricLightingClustered       ENABLE_REPROJECTION=0 LIGHTLOOP_TILE_PASS   USE_CLUSTERED_LIGHTLIST

								#pragma kernel VolumetricLightingClusteredReproj VolumetricLighting=VolumetricLightingClusteredReproj ENABLE_REPROJECTION=1 LIGHTLOOP_TILE_PASS   USE_CLUSTERED_LIGHTLIST


								#pragma enable_d3d11_debug_symbols


								#define DEBUG_REPROJECTION 0


								#include "../../../ShaderPass/ShaderPass.cs.hlsl"

								#define SHADERPASS SHADERPASS_VOLUMETRIC_LIGHTING


								#include "../../../ShaderConfig.cs.hlsl"

								#if (SHADEROPTIONS_VOLUMETRIC_LIGHTING_PRESET == 1)

								    // E.g. for 1080p: (1920/8)x(1080/8)x(128) =  4,147,200 voxels

								    #define VBUFFER_TILE_SIZE   8

								    #define VBUFFER_SLICE_COUNT 128

								#else

								    // E.g. for 1080p: (1920/4)x(1080/4)x(256) = 33,177,600 voxels

								    #define VBUFFER_TILE_SIZE   4

								    #define VBUFFER_SLICE_COUNT 256

								#endif


								#define GROUP_SIZE_1D 16

								#define GROUP_SIZE_2D (GROUP_SIZE_1D * GROUP_SIZE_1D)


								//--------------------------------------------------------------------------------------------------

								// Included headers

								//--------------------------------------------------------------------------------------------------


								#include "CoreRP/ShaderLibrary/Common.hlsl"

								#include "CoreRP/ShaderLibrary/Filtering.hlsl"

								#include "CoreRP/ShaderLibrary/VolumeRendering.hlsl"

								#include "CoreRP/ShaderLibrary/SpaceFillingCurves.hlsl"


								#include "../VolumetricLighting.cs.hlsl"

								#include "../../../ShaderVariables.hlsl"


								#define UNITY_MATERIAL_VOLUMETRIC          // Define before including Lighting.hlsl and Material.hlsl

								#include "../../../Lighting/Lighting.hlsl" // Includes Material.hlsl

								#include "../../../Lighting/LightEvaluation.hlsl"

								#include "../../../Lighting/VBuffer.hlsl"


								//--------------------------------------------------------------------------------------------------

								// Inputs & outputs

								//--------------------------------------------------------------------------------------------------


								RW_TEXTURE3D(float4, _VBufferLightingIntegral); // RGB = radiance, A = optical depth

								RW_TEXTURE3D(float4, _VBufferLightingFeedback); // RGB = radiance, A = interval length

								TEXTURE3D(_VBufferLightingHistory);             // RGB = radiance, A = interval length


								// TODO: avoid creating another Constant Buffer...

								CBUFFER_START(UnityVolumetricLighting)

								    float4   _VBufferSampleOffset;              // {x, y, z}, w = rendered frame count

								    float4x4 _VBufferCoordToViewDirWS;          // Actually just 3x3, but Unity can only set 4x4

								CBUFFER_END


								//--------------------------------------------------------------------------------------------------

								// Implementation

								//--------------------------------------------------------------------------------------------------


								struct Ray

								{

								    float3 originWS;

								    float3 directionWS; // Normalized, stratified

								    float  ratioLenToZ; // 1 / ViewSpaceZ

								    float3 centerDirWS; // Not normalized, centered

								};


								float3 GetPointAtDistance(Ray ray, float t)

								{

								    return ray.originWS + t * ray.directionWS;

								}


								float3 GetCenterAtDistance(Ray ray, float t)

								{

								    return ray.originWS + t * ray.centerDirWS;

								}


								// Computes the light integral (in-scattered radiance) within the voxel.

								// Multiplication by the scattering coefficient and the phase function is performed outside.

								float3 EvaluateVoxelLighting(LightLoopContext context, uint featureFlags, PositionInputs posInput,

								                             Ray ray, float t0, float t1, float dt, float rndVal, float extinction

								                         #ifdef LIGHTLOOP_TILE_PASS

								                           , uint clusterIndices[2], float clusterDepths[2])

								                         #else

								                             )

								                         #endif

								{

								    float3 voxelRadiance = 0;


								    BakeLightingData unused; // Unused for now, so define once


								    if (featureFlags & LIGHTFEATUREFLAGS_DIRECTIONAL)

								    {

								        float tOffset, weight;

								        ImportanceSampleHomogeneousMedium(rndVal, extinction, dt, tOffset, weight);


								        float t = t0 + tOffset;

								        posInput.positionWS = GetPointAtDistance(ray, t);


								        for (uint i = 0; i < _DirectionalLightCount; ++i)

								        {

								            // Fetch the light.

								            DirectionalLightData light = _DirectionalLightDatas[i];

								            float3 L = -light.forward; // Lights point backwards in Unity


								            float3 color; float attenuation;

								            EvaluateLight_Directional(context, posInput, light, unused, 0, L,

								                                      color, attenuation);


								            // Note: the 'weight' accounts for transmittance from 't0' to 't'.

								            float intensity = attenuation * weight;


								            // Compute the amount of in-scattered radiance.

								            voxelRadiance += intensity * color;

								        }

								    }


								#ifdef LIGHTLOOP_TILE_PASS

								    // Loop over 1 or 2 light clusters.

								    int cluster = 0;

								    do

								    {

								        float tMin = max(t0, ray.ratioLenToZ * clusterDepths[cluster]);

								        float tMax = t1;


								        if (cluster == 0 && (clusterIndices[0] != clusterIndices[1]))

								        {

								            tMax = min(t1, ray.ratioLenToZ * clusterDepths[1]);

								        }

								#else

								        float tMin = t0;

								        float tMax = t1;

								#endif // LIGHTLOOP_TILE_PASS


								        if (featureFlags & LIGHTFEATUREFLAGS_PUNCTUAL)

								        {

								            uint lightCount, lightStart;


								        #ifdef LIGHTLOOP_TILE_PASS

								            GetCountAndStartCluster(posInput.tileCoord, clusterIndices[cluster], LIGHTCATEGORY_PUNCTUAL,

								                                    lightStart, lightCount);

								        #else

								            lightCount = _PunctualLightCount;

								            lightStart = 0;

								        #endif // LIGHTLOOP_TILE_PASS


								            if (lightCount > 0)

								            {

								                LightData light = FetchLight(lightStart, 0);


								                uint i = 0, last = lightCount - 1;


								                // Box lights require special handling (see the next while loop).

								                while (i <= last && light.lightType != GPULIGHTTYPE_PROJECTOR_BOX)

								                {

								                    float tEntr = tMin;

								                    float tExit = tMax;


								                    bool sampleLight = true;


								                    // Perform ray-cone intersection for pyramid and spot lights.

								                    if (light.lightType != GPULIGHTTYPE_POINT)

								                    {

								                        float lenMul = 1;


								                        if (light.lightType == GPULIGHTTYPE_PROJECTOR_PYRAMID)

								                        {

								                            // 'light.right' and 'light.up' vectors are pre-scaled on the CPU

								                            // s.t. if you were to place them at the distance of 1 directly in front

								                            // of the light, they would give you the "footprint" of the light.

								                            // For spot lights, the cone fit is exact.

								                            // For pyramid lights, however, this is the "inscribed" cone

								                            // (contained within the pyramid), and we want to intersect

								                            // the "escribed" cone (which contains the pyramid).

								                            // Therefore, we have to scale the radii by the sqrt(2).

								                            lenMul = rsqrt(2);

								                        }


								                        float3 coneAxisX = lenMul * light.right;

								                        float3 coneAxisY = lenMul * light.up;


								                        sampleLight = IntersectRayCone(ray.originWS, ray.directionWS,

								                                                       light.positionWS, light.forward,

								                                                       coneAxisX, coneAxisY,

								                                                       tMin, tMax, tEntr, tExit);

								                    }


								                    if (sampleLight)

								                    {

								                        // We are unable to adequately sample features larger

								                        // than the half of the length of the integration interval

								                        // divided by the number of temporal samples (7).

								                        // Therefore, we apply this hack to reduce flickering.

								                        float hackMinDistSq = Sq(dt * (0.5 / 7));


								                        float t, distSq, rcpPdf;

								                        ImportanceSamplePunctualLight(rndVal, light.positionWS,

								                                                      ray.originWS, ray.directionWS,

								                                                      tEntr, tExit, t, distSq, rcpPdf,

								                                                      hackMinDistSq);


								                        posInput.positionWS = GetPointAtDistance(ray, t);


								                        float3 lightToSample = posInput.positionWS - light.positionWS;

								                        float  dist          = sqrt(distSq);

								                        float3 L             = -lightToSample * rsqrt(distSq);


								                        float3 color; float attenuation;

								                        EvaluateLight_Punctual(context, posInput, light, unused, 0, L, dist, distSq,

								                                               color, attenuation);


								                        float intensity = attenuation * rcpPdf;


								                        // Compute transmittance from 't0' to 't'.

								                        intensity *= TransmittanceHomogeneousMedium(extinction, t - t0);


								                        // Compute the amount of in-scattered radiance.

								                        voxelRadiance += color * intensity;

								                    }


								                    light = FetchLight(lightStart, min(++i, last));

								                }


								                while (i <= last) // GPULIGHTTYPE_PROJECTOR_BOX

								                {

								                    light = FetchLight(lightStart, min(++i, last));

								                    light.lightType = GPULIGHTTYPE_PROJECTOR_BOX;


								                    // Convert the box light from OBB to AABB.

								                    // 'light.right' and 'light.up' vectors are pre-scaled on the CPU by (2/w) and (2/h).

								                    float3x3 rotMat = float3x3(light.right, light.up, light.forward);


								                    float3 o = mul(rotMat, ray.originWS - light.positionWS);

								                    float3 d = mul(rotMat, ray.directionWS);


								                    float  range  = light.size.x;

								                    float3 boxPt0 = float3(-1, -1, 0);

								                    float3 boxPt1 = float3( 1,  1, range);


								                    float tEntr, tExit;


								                    if (IntersectRayAABB(o, d, boxPt0, boxPt1, tMin, tMax, tEntr, tExit))

								                    {

								                        float tOffset, weight;

								                        ImportanceSampleHomogeneousMedium(rndVal, extinction, tExit - tEntr, tOffset, weight);


								                        float t = tEntr + tOffset;

								                        posInput.positionWS = GetPointAtDistance(ray, t);


								                        float3 L = -light.forward;


								                        float3 color; float attenuation;

								                        EvaluateLight_Punctual(context, posInput, light, unused, 0, L, 1, 1,

								                                               color, attenuation);


								                        // Note: the 'weight' accounts for transmittance from 'tEntr' to 't'.

								                        float intensity = attenuation * weight;


								                        // Compute transmittance from 't0' to 'tEntr'.

								                        intensity *= TransmittanceHomogeneousMedium(extinction, tEntr - t0);


								                        // Compute the amount of in-scattered radiance.

								                        voxelRadiance += intensity * color;

								                    }

								                }

								            }

								        }

								#ifdef LIGHTLOOP_TILE_PASS

								        cluster++;

								        // Check whether the voxel is completely inside the light cluster.

								    } while ((cluster < 2) && (clusterIndices[0] != clusterIndices[1]));

								#endif // LIGHTLOOP_TILE_PASS


								    return voxelRadiance;

								}


								// Computes the in-scattered radiance along the ray.

								void FillVolumetricLightingBuffer(LightLoopContext context, uint featureFlags,

								                                  PositionInputs posInput, Ray ray)

								{

								    float z0 = _VBufferDepthEncodingParams.x;   // Start integration from the near plane

								    float t0 = ray.ratioLenToZ * z0;

								    float de = rcp(VBUFFER_SLICE_COUNT);        // Log-encoded distance between slices


								    float3 totalRadiance = 0;

								    float  opticalDepth  = 0;


								    uint sliceCountHack = max(VBUFFER_SLICE_COUNT, (uint)_VBufferDepthEncodingParams.x); // Prevent unrolling...


								#ifdef LIGHTLOOP_TILE_PASS

								    // Our voxel is not necessarily completely inside a single light cluster.

								    // Note that Z-binning can solve this problem, as we can iterate over all Z-bins

								    // to compute min/max light indices, and then use this range for the entire slice.

								    uint  clusterIndices[2];

								    float clusterDepths[2];

								    clusterIndices[0] = GetLightClusterIndex(posInput.tileCoord, z0);

								    clusterDepths[0]  = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[0]);

								#endif // LIGHTLOOP_TILE_PASS


								    // TODO: replace 'sliceCountHack' with VBUFFER_SLICE_COUNT when the shader compiler bug is fixed.

								    for (uint slice = 0; slice < sliceCountHack; slice++)

								    {

								        float e1 = slice * de + de; // (slice + 1) / sliceCount

								        float z1 = DecodeLogarithmicDepth(e1, _VBufferDepthEncodingParams);

								        float t1 = ray.ratioLenToZ * z1;

								        float dt = t1 - t0;


								    #ifdef LIGHTLOOP_TILE_PASS

								        clusterIndices[1] = GetLightClusterIndex(posInput.tileCoord, z1);

								        clusterDepths[1]  = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[1]);

								    #endif


								        // Compute the -exact- position of the center of the voxel.

								        // It's important since the accumulated value of the integral is stored at the center.

								        // We will use it for participating media sampling and reprojection.

								        float  tc       = t0 + 0.5 * dt;

								        float3 centerWS = GetCenterAtDistance(ray, tc);


								        // Sample the participating medium at 'tc' (or 'centerWS').

								        // We consider it to be constant along the interval [t0, t1] (within the voxel).

								        // TODO: piecewise linear.

								        float3 scattering = _GlobalFog_Scattering;

								        float  extinction = _GlobalFog_Extinction;


								    #if ENABLE_REPROJECTION

								        // This is a sequence of 7 equidistant numbers from 1/14 to 13/14.

								        // Each of them is the centroid of the interval of length 2/14.

								        float rndVal = _VBufferSampleOffset.z;

								    #else

								        float rndVal = 0.5;

								    #endif


								        float3 voxelRadiance = EvaluateVoxelLighting(context, featureFlags, posInput,

								                                                     ray, t0, t1, dt, rndVal, extinction

								                                                #ifdef LIGHTLOOP_TILE_PASS

								                                                   , clusterIndices, clusterDepths);

								                                                #else

								                                                     );

								                                                #endif


								    #if ENABLE_REPROJECTION

								        // Reproject the history at 'centerWS'.

								        float2 reprojPosNDC = ComputeNormalizedDeviceCoordinates(centerWS, _PrevViewProjMatrix);

								        float  reprojZ      = mul(_PrevViewProjMatrix, float4(centerWS, 1)).w;

								        float4 reprojValue  = SampleVBuffer(TEXTURE3D_PARAM(_VBufferLightingHistory, s_trilinear_clamp_sampler),

								                                            false, reprojPosNDC, reprojZ,

								                                            _VBufferScaleAndSliceCount,

								                                            _VBufferDepthEncodingParams);


								        // Compute the exponential moving average over 'n' frames:

								        // X = (1 - a) * ValueAtFrame[n] + a * AverageOverPreviousFrames.

								        // We want each sample to be uniformly weighted by (1 / n):

								        // X = (1 / n) * Sum{i from 1 to n}{ValueAtFrame[i]}.

								        // Therefore, we get:

								        // (1 - a) = (1 / n) => a = (1 - 1 / n) = (n - 1) / n,

								        // X = (1 / n) * ValueAtFrame[n] + (1 - 1 / n) * AverageOverPreviousFrames.

								        // Why does it work? We need to make the following assumption:

								        // AverageOverPreviousFrames ≈ AverageOverFrames[n - 1].

								        // AverageOverFrames[n - 1] = (1 / (n - 1)) * Sum{i from 1 to n - 1}{ValueAtFrame[i]}.

								        // This implies that the reprojected (accumulated) value has mostly converged.

								        // X = (1 / n) * ValueAtFrame[n] + ((n - 1) / n) * (1 / (n - 1)) * Sum{i from 1 to n - 1}{ValueAtFrame[i]}.

								        // X = (1 / n) * ValueAtFrame[n] + (1 / n) * Sum{i from 1 to n - 1}{ValueAtFrame[i]}.

								        // X = Sum{i from 1 to n}{ValueAtFrame[i] / n}.

								        float numFrames     = 7;

								        float frameWeight   = 1 / numFrames;

								        float historyWeight = 1 - frameWeight;


								        // The accuracy of the integral linearly decreases with the length of the interval.

								        // Therefore, reprojecting longer intervals should result in a lower confidence.

								        // TODO: doesn't seem to be worth it, removed for now.


								        // Perform temporal blending.

								        // Both radiance values are obtained by integrating over line segments of different length.

								        // Blending only makes sense if the length of both intervals is the same.

								        // Therefore, the reprojected radiance needs to be rescaled by (frame_dt / reproj_dt).

								        bool   reprojSuccess   = reprojValue.a != 0;

								        float  blendFactor     = reprojSuccess ? historyWeight : 0;

								        float  reprojRcpLen    = reprojSuccess ? rcp(reprojValue.a) : 0;

								        float  lengthScale     = dt * reprojRcpLen;

								        float3 reprojRadiance  = reprojValue.rgb;

								        float3 blendedRadiance = (1 - blendFactor) * voxelRadiance + blendFactor * lengthScale * reprojRadiance;


								        // Store the feedback for the voxel.

								        // TODO: dynamic lights (which update their position, rotation, cookie or shadow at runtime)

								        // do not support reprojection and should neither read nor write to the history buffer.

								        // to the history buffer. This will cause them to alias, but it is the only way

								        // to prevent ghosting.

								        _VBufferLightingFeedback[uint3(posInput.positionSS, slice)] = float4(blendedRadiance, dt);

								    #else

								        float3 blendedRadiance = voxelRadiance;

								    #endif


								    #if DEBUG_REPROJECTION

								        if (distance(voxelRadiance, reprojValue.rgb) > 0.1) blendedRadiance = float3(1000, 0, 0);

								    #endif


								        // Compute the transmittance from the camera to 't0'.

								        float transmittance = Transmittance(opticalDepth);


								        // Integral{a, b}{Transmittance(0, t) * L_s(t) dt} = Transmittance(0, a) * Integral{a, b}{Transmittance(0, t - a) * L_s(t) dt}.

								        totalRadiance += (transmittance * IsotropicPhaseFunction()) * scattering * blendedRadiance;


								        // Compute the optical depth up to the center of the interval.

								        opticalDepth += 0.5 * extinction * dt;


								        // Store the voxel data.

								        _VBufferLightingIntegral[uint3(posInput.positionSS, slice)] = float4(totalRadiance, opticalDepth);


								        // Compute the optical depth up to the end of the interval.

								        opticalDepth += 0.5 * extinction * dt;


								        t0 = t1;


								    #ifdef LIGHTLOOP_TILE_PASS

								        clusterIndices[0] = clusterIndices[1];

								        clusterDepths[0]  = clusterDepths[1];

								    #endif // LIGHTLOOP_TILE_PASS

								    }

								}


								[numthreads(GROUP_SIZE_2D, 1, 1)]

								void VolumetricLighting(uint2 groupId       : SV_GroupID,

								                        uint  groupThreadId : SV_GroupThreadID)

								{

								    // Perform compile-time checks.

								    if (!IsPower2(VBUFFER_TILE_SIZE) || !IsPower2(TILE_SIZE_CLUSTERED)) return;


								    // Note: any factor of 64 is a suitable wave size for our algorithm.

								    uint waveIndex = WaveReadFirstLane(groupThreadId / 64);

								    uint laneIndex = groupThreadId % 64;

								    uint quadIndex = laneIndex / 4;


								    // Arrange threads in the Morton order to optimally match the memory layout of GCN tiles.

								    uint2 groupCoord  = DecodeMorton2D(groupThreadId);

								    uint2 groupOffset = groupId * GROUP_SIZE_1D;

								    uint2 voxelCoord  = groupOffset + groupCoord;

								    uint2 tileCoord   = voxelCoord * VBUFFER_TILE_SIZE / TILE_SIZE_CLUSTERED;


								    uint voxelsPerClusterTile = Sq((uint)(TILE_SIZE_CLUSTERED / VBUFFER_TILE_SIZE));


								    if (voxelsPerClusterTile >= 64)

								    {

								        // TODO: this is a compile-time test, make sure the compiler actually scalarizes.

								        tileCoord = WaveReadFirstLane(tileCoord);

								    }


								    [branch] if (voxelCoord.x >= (uint)_VBufferResolution.x ||

								                 voxelCoord.y >= (uint)_VBufferResolution.y)

								    {

								        return;

								    }


								    float2 centerCoord = voxelCoord + 0.5;

								#if ENABLE_REPROJECTION

								    float2 sampleCoord = centerCoord + _VBufferSampleOffset.xy;

								#else

								    float2 sampleCoord = centerCoord;

								#endif


								    // Compute the (stratified) ray direction s.t. its ViewSpaceZ = 1.

								    float3 rayDir = mul(-float3(sampleCoord, 1), (float3x3)_VBufferCoordToViewDirWS);

								    float  lenSq  = dot(rayDir, rayDir);

								    float  lenRcp = rsqrt(lenSq);

								    float  len    = lenSq * lenRcp;


								#if ENABLE_REPROJECTION

								    // Compute the ray direction which passes through the center of the voxel s.t. its ViewSpaceZ = 1.

								    float3 rayCenterDir = mul(-float3(centerCoord, 1), (float3x3)_VBufferCoordToViewDirWS);

								#else

								    float3 rayCenterDir = rayDir;

								#endif


								    Ray ray;

								    ray.originWS    = GetCurrentViewPosition();

								    ray.ratioLenToZ = len;

								    ray.directionWS = rayDir * lenRcp;

								    ray.centerDirWS = rayCenterDir * lenRcp;


								    // TODO

								    LightLoopContext context;

								    context.shadowContext = InitShadowContext();

								    uint featureFlags = 0xFFFFFFFF;


								    PositionInputs posInput = GetPositionInput(voxelCoord, _VBufferResolution.zw, tileCoord);


								    FillVolumetricLightingBuffer(context, featureFlags, posInput, ray);

								}