//-------------------------------------------------------------------------------------------------- // Definitions //-------------------------------------------------------------------------------------------------- #pragma kernel VolumetricLightingAllLights VolumetricLighting=VolumetricLightingAllLights ENABLE_REPROJECTION=0 LIGHTLOOP_SINGLE_PASS #pragma kernel VolumetricLightingAllLightsReproj VolumetricLighting=VolumetricLightingAllLightsReproj ENABLE_REPROJECTION=1 LIGHTLOOP_SINGLE_PASS #pragma kernel VolumetricLightingClustered VolumetricLighting=VolumetricLightingClustered ENABLE_REPROJECTION=0 LIGHTLOOP_TILE_PASS USE_CLUSTERED_LIGHTLIST #pragma kernel VolumetricLightingClusteredReproj VolumetricLighting=VolumetricLightingClusteredReproj ENABLE_REPROJECTION=1 LIGHTLOOP_TILE_PASS USE_CLUSTERED_LIGHTLIST // #pragma debug #define DEBUG_REPROJECTION 0 #include "../../../ShaderPass/ShaderPass.cs.hlsl" #define SHADERPASS SHADERPASS_VOLUMETRIC_LIGHTING #include "../../../ShaderConfig.cs.hlsl" #if (SHADEROPTIONS_VOLUMETRIC_LIGHTING_PRESET == 1) // E.g. for 1080p: (1920/8)x(1080/8)x(128) = 4,147,200 voxels #define VBUFFER_TILE_SIZE 8 #define VBUFFER_SLICE_COUNT 128 #else // E.g. for 1080p: (1920/4)x(1080/4)x(256) = 33,177,600 voxels #define VBUFFER_TILE_SIZE 4 #define VBUFFER_SLICE_COUNT 256 #endif #define GROUP_SIZE_1D 16 #define GROUP_SIZE_2D (GROUP_SIZE_1D * GROUP_SIZE_1D) //-------------------------------------------------------------------------------------------------- // Included headers //-------------------------------------------------------------------------------------------------- #include "CoreRP/ShaderLibrary/Common.hlsl" #include "CoreRP/ShaderLibrary/Filtering.hlsl" #include "CoreRP/ShaderLibrary/VolumeRendering.hlsl" #include "CoreRP/ShaderLibrary/SpaceFillingCurves.hlsl" #include "../VolumetricLighting.cs.hlsl" #include "../../../ShaderVariables.hlsl" #define UNITY_MATERIAL_VOLUMETRIC // Define before including Lighting.hlsl and Material.hlsl #include "../../../Lighting/Lighting.hlsl" // Includes Material.hlsl #include "../../../Lighting/LightEvaluation.hlsl" #include "../../../Lighting/VBuffer.hlsl" //-------------------------------------------------------------------------------------------------- // Inputs & outputs //-------------------------------------------------------------------------------------------------- RW_TEXTURE3D(float4, _VBufferLightingIntegral); // RGB = radiance, A = optical depth RW_TEXTURE3D(float4, _VBufferLightingFeedback); // RGB = radiance, A = interval length TEXTURE3D(_VBufferLightingHistory); // RGB = radiance, A = interval length // TODO: avoid creating another Constant Buffer... CBUFFER_START(UnityVolumetricLighting) float4 _VBufferSampleOffset; // {x, y, z}, w = rendered frame count float4x4 _VBufferCoordToViewDirWS; // Actually just 3x3, but Unity can only set 4x4 CBUFFER_END //-------------------------------------------------------------------------------------------------- // Implementation //-------------------------------------------------------------------------------------------------- struct Ray { float3 originWS; float3 directionWS; // Normalized, stratified float ratioLenToZ; // 1 / ViewSpaceZ float3 centerDirWS; // Not normalized, centered }; float3 GetPointAtDistance(Ray ray, float t) { return ray.originWS + t * ray.directionWS; } float3 GetCenterAtDistance(Ray ray, float t) { return ray.originWS + t * ray.centerDirWS; } // Computes the light integral (in-scattered radiance) within the voxel. // Multiplication by the scattering coefficient and the phase function is performed outside. float3 EvaluateVoxelLighting(LightLoopContext context, uint featureFlags, PositionInputs posInput, Ray ray, float t0, float t1, float dt, float rndVal, float extinction #ifdef LIGHTLOOP_TILE_PASS , uint clusterIndices[2], float clusterDepths[2]) #else ) #endif { float3 voxelRadiance = 0; BakeLightingData unused; // Unused for now, so define once if (featureFlags & LIGHTFEATUREFLAGS_DIRECTIONAL) { float tOffset, weight; ImportanceSampleHomogeneousMedium(rndVal, extinction, dt, tOffset, weight); float t = t0 + tOffset; posInput.positionWS = GetPointAtDistance(ray, t); for (uint i = 0; i < _DirectionalLightCount; ++i) { // Fetch the light. DirectionalLightData light = _DirectionalLightDatas[i]; float3 L = -light.forward; // Lights point backwards in Unity float3 color; float attenuation; EvaluateLight_Directional(context, posInput, light, unused, 0, L, color, attenuation); // Note: the 'weight' accounts for transmittance from 't0' to 't'. float intensity = attenuation * weight; // Compute the amount of in-scattered radiance. voxelRadiance += intensity * color; } } #ifdef LIGHTLOOP_TILE_PASS // Loop over 1 or 2 light clusters. int cluster = 0; do { float tMin = max(t0, ray.ratioLenToZ * clusterDepths[cluster]); float tMax = t1; if (cluster == 0 && (clusterIndices[0] != clusterIndices[1])) { tMax = min(t1, ray.ratioLenToZ * clusterDepths[1]); } #else float tMin = t0; float tMax = t1; #endif // LIGHTLOOP_TILE_PASS if (featureFlags & LIGHTFEATUREFLAGS_PUNCTUAL) { uint lightCount, lightStart; #ifdef LIGHTLOOP_TILE_PASS GetCountAndStartCluster(posInput.tileCoord, clusterIndices[cluster], LIGHTCATEGORY_PUNCTUAL, lightStart, lightCount); #else lightCount = _PunctualLightCount; lightStart = 0; #endif // LIGHTLOOP_TILE_PASS if (lightCount > 0) { LightData light = FetchLight(lightStart, 0); uint i = 0, last = lightCount - 1; // Box lights require special handling (see the next while loop). while (i <= last && light.lightType != GPULIGHTTYPE_PROJECTOR_BOX) { float tEntr = tMin; float tExit = tMax; bool sampleLight = true; // Perform ray-cone intersection for pyramid and spot lights. if (light.lightType != GPULIGHTTYPE_POINT) { float lenMul = 1; if (light.lightType == GPULIGHTTYPE_PROJECTOR_PYRAMID) { // 'light.right' and 'light.up' vectors are pre-scaled on the CPU // s.t. if you were to place them at the distance of 1 directly in front // of the light, they would give you the "footprint" of the light. // For spot lights, the cone fit is exact. // For pyramid lights, however, this is the "inscribed" cone // (contained within the pyramid), and we want to intersect // the "escribed" cone (which contains the pyramid). // Therefore, we have to scale the radii by the sqrt(2). lenMul = rsqrt(2); } float3 coneAxisX = lenMul * light.right; float3 coneAxisY = lenMul * light.up; sampleLight = IntersectRayCone(ray.originWS, ray.directionWS, light.positionWS, light.forward, coneAxisX, coneAxisY, tMin, tMax, tEntr, tExit); } if (sampleLight) { // We are unable to adequately sample features larger // than the half of the length of the integration interval // divided by the number of temporal samples (7). // Therefore, we apply this hack to reduce flickering. float hackMinDistSq = Sq(dt * (0.5 / 7)); float t, distSq, rcpPdf; ImportanceSamplePunctualLight(rndVal, light.positionWS, ray.originWS, ray.directionWS, tEntr, tExit, t, distSq, rcpPdf, hackMinDistSq); posInput.positionWS = GetPointAtDistance(ray, t); float3 lightToSample = posInput.positionWS - light.positionWS; float distRcp = rsqrt(distSq); float dist = distSq * distRcp; float distProj = dot(lightToSample, light.forward); float4 distances = float4(dist, distSq, distRcp, distProj); float3 L = -lightToSample * distRcp; float3 color; float attenuation; EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample, distances, color, attenuation); float intensity = attenuation * rcpPdf; // Compute transmittance from 't0' to 't'. intensity *= TransmittanceHomogeneousMedium(extinction, t - t0); // Compute the amount of in-scattered radiance. voxelRadiance += color * intensity; } light = FetchLight(lightStart, min(++i, last)); } while (i <= last) // GPULIGHTTYPE_PROJECTOR_BOX { light = FetchLight(lightStart, min(++i, last)); light.lightType = GPULIGHTTYPE_PROJECTOR_BOX; // Convert the box light from OBB to AABB. // 'light.right' and 'light.up' vectors are pre-scaled on the CPU by (2/w) and (2/h). float3x3 rotMat = float3x3(light.right, light.up, light.forward); float3 o = mul(rotMat, ray.originWS - light.positionWS); float3 d = mul(rotMat, ray.directionWS); float range = light.size.x; float3 boxPt0 = float3(-1, -1, 0); float3 boxPt1 = float3( 1, 1, range); float tEntr, tExit; if (IntersectRayAABB(o, d, boxPt0, boxPt1, tMin, tMax, tEntr, tExit)) { float tOffset, weight; ImportanceSampleHomogeneousMedium(rndVal, extinction, tExit - tEntr, tOffset, weight); float t = tEntr + tOffset; posInput.positionWS = GetPointAtDistance(ray, t); float3 L = -light.forward; float3 lightToSample = posInput.positionWS - light.positionWS; float distProj = dot(lightToSample, light.forward); float4 distances = float4(1, 1, 1, distProj); float3 color; float attenuation; EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample, distances, color, attenuation); // Note: the 'weight' accounts for transmittance from 'tEntr' to 't'. float intensity = attenuation * weight; // Compute transmittance from 't0' to 'tEntr'. intensity *= TransmittanceHomogeneousMedium(extinction, tEntr - t0); // Compute the amount of in-scattered radiance. voxelRadiance += intensity * color; } } } } #ifdef LIGHTLOOP_TILE_PASS cluster++; // Check whether the voxel is completely inside the light cluster. } while ((cluster < 2) && (clusterIndices[0] != clusterIndices[1])); #endif // LIGHTLOOP_TILE_PASS return voxelRadiance; } // Computes the in-scattered radiance along the ray. void FillVolumetricLightingBuffer(LightLoopContext context, uint featureFlags, PositionInputs posInput, Ray ray) { float z0 = _VBufferDepthEncodingParams.x; // Start integration from the near plane float t0 = ray.ratioLenToZ * z0; float de = rcp(VBUFFER_SLICE_COUNT); // Log-encoded distance between slices float3 totalRadiance = 0; float opticalDepth = 0; uint sliceCountHack = max(VBUFFER_SLICE_COUNT, (uint)_VBufferDepthEncodingParams.x); // Prevent unrolling... #ifdef LIGHTLOOP_TILE_PASS // Our voxel is not necessarily completely inside a single light cluster. // Note that Z-binning can solve this problem, as we can iterate over all Z-bins // to compute min/max light indices, and then use this range for the entire slice. uint clusterIndices[2]; float clusterDepths[2]; clusterIndices[0] = GetLightClusterIndex(posInput.tileCoord, z0); clusterDepths[0] = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[0]); #endif // LIGHTLOOP_TILE_PASS // TODO: replace 'sliceCountHack' with VBUFFER_SLICE_COUNT when the shader compiler bug is fixed. for (uint slice = 0; slice < sliceCountHack; slice++) { float e1 = slice * de + de; // (slice + 1) / sliceCount float z1 = DecodeLogarithmicDepth(e1, _VBufferDepthEncodingParams); float t1 = ray.ratioLenToZ * z1; float dt = t1 - t0; #ifdef LIGHTLOOP_TILE_PASS clusterIndices[1] = GetLightClusterIndex(posInput.tileCoord, z1); clusterDepths[1] = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[1]); #endif // Compute the -exact- position of the center of the voxel. // It's important since the accumulated value of the integral is stored at the center. // We will use it for participating media sampling and reprojection. float tc = t0 + 0.5 * dt; float3 centerWS = GetCenterAtDistance(ray, tc); // Sample the participating medium at 'tc' (or 'centerWS'). // We consider it to be constant along the interval [t0, t1] (within the voxel). // TODO: piecewise linear. float3 scattering = _GlobalFog_Scattering; float extinction = _GlobalFog_Extinction; // TODO: define a function ComputeGlobalFogCoefficients(float3 centerWS), // which allows procedural definition of extinction and scattering. #if ENABLE_REPROJECTION // This is a sequence of 7 equidistant numbers from 1/14 to 13/14. // Each of them is the centroid of the interval of length 2/14. float rndVal = _VBufferSampleOffset.z; #else float rndVal = 0.5; #endif float3 voxelRadiance = EvaluateVoxelLighting(context, featureFlags, posInput, ray, t0, t1, dt, rndVal, extinction #ifdef LIGHTLOOP_TILE_PASS , clusterIndices, clusterDepths); #else ); #endif #if ENABLE_REPROJECTION // Reproject the history at 'centerWS'. float2 reprojPosNDC = ComputeNormalizedDeviceCoordinates(centerWS, _PrevViewProjMatrix); float reprojZ = mul(_PrevViewProjMatrix, float4(centerWS, 1)).w; float4 reprojValue = SampleVBuffer(TEXTURE3D_PARAM(_VBufferLightingHistory, s_trilinear_clamp_sampler), false, reprojPosNDC, reprojZ, _VBufferScaleAndSliceCount, _VBufferDepthEncodingParams); // Compute the exponential moving average over 'n' frames: // X = (1 - a) * ValueAtFrame[n] + a * AverageOverPreviousFrames. // We want each sample to be uniformly weighted by (1 / n): // X = (1 / n) * Sum{i from 1 to n}{ValueAtFrame[i]}. // Therefore, we get: // (1 - a) = (1 / n) => a = (1 - 1 / n) = (n - 1) / n, // X = (1 / n) * ValueAtFrame[n] + (1 - 1 / n) * AverageOverPreviousFrames. // Why does it work? We need to make the following assumption: // AverageOverPreviousFrames ≈ AverageOverFrames[n - 1]. // AverageOverFrames[n - 1] = (1 / (n - 1)) * Sum{i from 1 to n - 1}{ValueAtFrame[i]}. // This implies that the reprojected (accumulated) value has mostly converged. // X = (1 / n) * ValueAtFrame[n] + ((n - 1) / n) * (1 / (n - 1)) * Sum{i from 1 to n - 1}{ValueAtFrame[i]}. // X = (1 / n) * ValueAtFrame[n] + (1 / n) * Sum{i from 1 to n - 1}{ValueAtFrame[i]}. // X = Sum{i from 1 to n}{ValueAtFrame[i] / n}. float numFrames = 7; float frameWeight = 1 / numFrames; float historyWeight = 1 - frameWeight; // The accuracy of the integral linearly decreases with the length of the interval. // Therefore, reprojecting longer intervals should result in a lower confidence. // TODO: doesn't seem to be worth it, removed for now. // Perform temporal blending. // Both radiance values are obtained by integrating over line segments of different length. // Blending only makes sense if the length of both intervals is the same. // Therefore, the reprojected radiance needs to be rescaled by (frame_dt / reproj_dt). bool reprojSuccess = reprojValue.a != 0; float blendFactor = reprojSuccess ? historyWeight : 0; float reprojRcpLen = reprojSuccess ? rcp(reprojValue.a) : 0; float lengthScale = dt * reprojRcpLen; float3 reprojRadiance = reprojValue.rgb; float3 blendedRadiance = (1 - blendFactor) * voxelRadiance + blendFactor * lengthScale * reprojRadiance; // Store the feedback for the voxel. // TODO: dynamic lights (which update their position, rotation, cookie or shadow at runtime) // do not support reprojection and should neither read nor write to the history buffer. // to the history buffer. This will cause them to alias, but it is the only way // to prevent ghosting. _VBufferLightingFeedback[uint3(posInput.positionSS, slice)] = float4(blendedRadiance, dt); #else float3 blendedRadiance = voxelRadiance; #endif #if DEBUG_REPROJECTION if (distance(voxelRadiance, reprojValue.rgb) > 0.1) blendedRadiance = float3(1000, 0, 0); #endif // Compute the transmittance from the camera to 't0'. float transmittance = Transmittance(opticalDepth); // Integral{a, b}{Transmittance(0, t) * L_s(t) dt} = Transmittance(0, a) * Integral{a, b}{Transmittance(0, t - a) * L_s(t) dt}. totalRadiance += (transmittance * IsotropicPhaseFunction()) * scattering * blendedRadiance; // Compute the optical depth up to the center of the interval. opticalDepth += 0.5 * extinction * dt; // Store the voxel data. _VBufferLightingIntegral[uint3(posInput.positionSS, slice)] = float4(totalRadiance, opticalDepth); // Compute the optical depth up to the end of the interval. opticalDepth += 0.5 * extinction * dt; t0 = t1; #ifdef LIGHTLOOP_TILE_PASS clusterIndices[0] = clusterIndices[1]; clusterDepths[0] = clusterDepths[1]; #endif // LIGHTLOOP_TILE_PASS } } [numthreads(GROUP_SIZE_2D, 1, 1)] void VolumetricLighting(uint2 groupId : SV_GroupID, uint groupThreadId : SV_GroupThreadID) { // Perform compile-time checks. if (!IsPower2(VBUFFER_TILE_SIZE) || !IsPower2(TILE_SIZE_CLUSTERED)) return; // Note: any factor of 64 is a suitable wave size for our algorithm. uint waveIndex = WaveReadFirstLane(groupThreadId / 64); uint laneIndex = groupThreadId % 64; uint quadIndex = laneIndex / 4; // Arrange threads in the Morton order to optimally match the memory layout of GCN tiles. uint2 groupCoord = DecodeMorton2D(groupThreadId); uint2 groupOffset = groupId * GROUP_SIZE_1D; uint2 voxelCoord = groupOffset + groupCoord; uint2 tileCoord = voxelCoord * VBUFFER_TILE_SIZE / TILE_SIZE_CLUSTERED; uint voxelsPerClusterTile = Sq((uint)(TILE_SIZE_CLUSTERED / VBUFFER_TILE_SIZE)); if (voxelsPerClusterTile >= 64) { // TODO: this is a compile-time test, make sure the compiler actually scalarizes. tileCoord = WaveReadFirstLane(tileCoord); } [branch] if (voxelCoord.x >= (uint)_VBufferResolution.x || voxelCoord.y >= (uint)_VBufferResolution.y) { return; } float2 centerCoord = voxelCoord + 0.5; #if ENABLE_REPROJECTION float2 sampleCoord = centerCoord + _VBufferSampleOffset.xy; #else float2 sampleCoord = centerCoord; #endif // Compute the (stratified) ray direction s.t. its ViewSpaceZ = 1. float3 rayDir = mul(-float3(sampleCoord, 1), (float3x3)_VBufferCoordToViewDirWS); float lenSq = dot(rayDir, rayDir); float lenRcp = rsqrt(lenSq); float len = lenSq * lenRcp; #if ENABLE_REPROJECTION // Compute the ray direction which passes through the center of the voxel s.t. its ViewSpaceZ = 1. float3 rayCenterDir = mul(-float3(centerCoord, 1), (float3x3)_VBufferCoordToViewDirWS); #else float3 rayCenterDir = rayDir; #endif Ray ray; ray.originWS = GetCurrentViewPosition(); ray.ratioLenToZ = len; ray.directionWS = rayDir * lenRcp; ray.centerDirWS = rayCenterDir * lenRcp; // TODO LightLoopContext context; context.shadowContext = InitShadowContext(); uint featureFlags = 0xFFFFFFFF; PositionInputs posInput = GetPositionInput(voxelCoord, _VBufferResolution.zw, tileCoord); FillVolumetricLightingBuffer(context, featureFlags, posInput, ray); }