//-------------------------------------------------------------------------------------------------- // Definitions //-------------------------------------------------------------------------------------------------- #pragma kernel VolumeVoxelizationBruteforce VolumeVoxelization=VolumeVoxelizationBruteforce LIGHTLOOP_SINGLE_PASS #pragma kernel VolumeVoxelizationClustered VolumeVoxelization=VolumeVoxelizationClustered LIGHTLOOP_TILE_PASS USE_CLUSTERED_LIGHTLIST // #pragma enable_d3d11_debug_symbols #include "../../ShaderPass/ShaderPass.cs.hlsl" #define SHADERPASS SHADERPASS_VOLUME_VOXELIZATION #include "../../ShaderConfig.cs.hlsl" #if (SHADEROPTIONS_VOLUMETRIC_LIGHTING_PRESET == 1) // E.g. for 1080p: (1920/8)x(1080/8)x(64) = 2,073,600 voxels #define VBUFFER_TILE_SIZE 8 #define VBUFFER_SLICE_COUNT 64 #else // E.g. for 1080p: (1920/4)x(1080/4)x(128) = 16,588,800 voxels #define VBUFFER_TILE_SIZE 4 #define VBUFFER_SLICE_COUNT 128 #endif #define GROUP_SIZE_1D 8 #define SOFT_VOXELIZATION 1 // Hack which attempts to determine the partial coverage of the voxel //-------------------------------------------------------------------------------------------------- // Included headers //-------------------------------------------------------------------------------------------------- #include "CoreRP/ShaderLibrary/Common.hlsl" #include "CoreRP/Utilities/GeometryUtils.cs.hlsl" #include "../../ShaderVariables.hlsl" #include "VolumetricLighting.cs.hlsl" #define UNITY_MATERIAL_VOLUMETRIC // Define before including Lighting.hlsl and Material.hlsl #include "../Lighting.hlsl" // Includes Material.hlsl #pragma only_renderers d3d11 ps4 xboxone vulkan metal //-------------------------------------------------------------------------------------------------- // Inputs & outputs //-------------------------------------------------------------------------------------------------- StructuredBuffer _VolumeBounds; StructuredBuffer _VolumeData; TEXTURE3D(_VolumeMaskAtlas); RW_TEXTURE3D(float4, _VBufferDensity); // RGB = sqrt(scattering), A = sqrt(extinction) // TODO: avoid creating another Constant Buffer... CBUFFER_START(UnityVolumetricLighting) float4x4 _VBufferCoordToViewDirWS; // Actually just 3x3, but Unity can only set 4x4 float4 _VBufferSampleOffset; // Not used by this shader float _CornetteShanksConstant; // Not used by this shader uint _NumVisibleDensityVolumes; float3 _VolumeMaskDimensions; //x = 1/totalTextures , y = 1/textureSize, z = textureSize CBUFFER_END //-------------------------------------------------------------------------------------------------- // Implementation //-------------------------------------------------------------------------------------------------- float SampleVolumeMask(DensityVolumeData volumeData, float3 voxelCenterUV, float3 VxUV, float3 VyUV, float3 VzUV) { float offset = volumeData.textureIndex * _VolumeMaskDimensions.x; float clampBorder = 0.5f * _VolumeMaskDimensions.y; //scale and bias the UVs and then take fractional part, will be in [0,1] range voxelCenterUV = frac(voxelCenterUV * volumeData.textureTiling + volumeData.textureScroll); voxelCenterUV.z = voxelCenterUV.z * _VolumeMaskDimensions.x; voxelCenterUV.z += offset; voxelCenterUV.z = clamp(voxelCenterUV.z, offset + clampBorder, offset + _VolumeMaskDimensions.x - clampBorder); float lod = ComputeTextureLOD(VxUV * _VolumeMaskDimensions.z, VyUV * _VolumeMaskDimensions.z, VzUV * _VolumeMaskDimensions.z); float maskValue = SAMPLE_TEXTURE3D_LOD(_VolumeMaskAtlas, s_linear_clamp_sampler, voxelCenterUV, lod).a; return maskValue; } void FillVolumetricDensityBuffer(PositionInputs posInput, float3 rayOriginWS, float3 rayUnDirWS, float3 voxelAxisRight, float3 voxelAxisUp, float3 voxelAxisForward) { float n = _VBufferDepthDecodingParams.x + _VBufferDepthDecodingParams.z; float z0 = n; // Start the computation from the near plane float de = rcp(VBUFFER_SLICE_COUNT); // Log-encoded distance between slices #ifdef USE_CLUSTERED_LIGHTLIST // The voxel can overlap up to 2 light clusters along Z, so we have to iterate over both. // TODO: implement Z-binning which makes Z-range queries easy. uint volumeStarts[2], volumeCounts[2]; GetCountAndStartCluster(posInput.tileCoord, GetLightClusterIndex(posInput.tileCoord, z0), LIGHTCATEGORY_DENSITY_VOLUME, volumeStarts[0], volumeCounts[0]); #endif // USE_CLUSTERED_LIGHTLIST #if defined(SHADER_API_METAL) [fastopt] for (uint slice = 0; slice < VBUFFER_SLICE_COUNT; slice++) #else uint sliceCountHack = max(VBUFFER_SLICE_COUNT, (uint)_VBufferDepthEncodingParams.w); // Prevent unrolling... // TODO: replace 'sliceCountHack' with VBUFFER_SLICE_COUNT when the shader compiler bug is fixed. for (uint slice = 0; slice < sliceCountHack; slice++) #endif { uint3 voxelCoord = uint3(posInput.positionSS, slice); float e1 = slice * de + de; // (slice + 1) / sliceCount #if defined(SHADER_API_METAL) // Warning: this compiles, but it's nonsense. Use DecodeLogarithmicDepthGeneralized(). float z1 = DecodeLogarithmicDepth(e1, _VBufferDepthDecodingParams); #else float z1 = DecodeLogarithmicDepthGeneralized(e1, _VBufferDepthDecodingParams); #endif float halfDZ = 0.5 * (z1 - z0); float z = z0 + halfDZ; float3 voxelCenterWS = rayOriginWS + z * rayUnDirWS; // Works due to the length of of the dir // Dimensions of the voxel as we step along the ray. float3 voxelRightSize = z * voxelAxisRight; float3 voxelUpSize = z * voxelAxisUp; float3 voxelDepthSize = halfDZ * voxelAxisForward; // TODO: define a function ComputeGlobalFogCoefficients(float3 voxelCenterWS), // which allows procedural definition of extinction and scattering. float3 voxelScattering = _GlobalScattering; float voxelExtinction = _GlobalExtinction; #ifdef USE_CLUSTERED_LIGHTLIST GetCountAndStartCluster(posInput.tileCoord, GetLightClusterIndex(posInput.tileCoord, z1), LIGHTCATEGORY_DENSITY_VOLUME, volumeStarts[1], volumeCounts[1]); // Iterate over all volumes within 2 (not necessarily unique) clusters overlapping the voxel along Z. // We need to skip duplicates, but it's not too difficult since volumes are sorted by index. uint i = 0, j = 0; if (i < volumeCounts[0] || j < volumeCounts[1]) { // At least one of the clusters is non-empty. uint volumeIndices[2]; // Fetch two initial indices from both clusters. volumeIndices[0] = FetchIndexWithBoundsCheck(volumeStarts[0], volumeCounts[0], i); volumeIndices[1] = FetchIndexWithBoundsCheck(volumeStarts[1], volumeCounts[1], j); do { // Process volumes in order. uint volumeIndex = min(volumeIndices[0], volumeIndices[1]); #else // USE_CLUSTERED_LIGHTLIST { for (uint volumeIndex = 0; volumeIndex < _NumVisibleDensityVolumes; volumeIndex++) { #endif // USE_CLUSTERED_LIGHTLIST OrientedBBox obb = _VolumeBounds[volumeIndex]; float3x3 obbFrame = float3x3(obb.right, obb.up, cross(obb.up, obb.right)); float3 obbExtents = float3(obb.extentX, obb.extentY, obb.extentZ); // Express the voxel center in the local coordinate system of the box. float3 voxelCenterBS = mul(voxelCenterWS - obb.center, transpose(obbFrame)); float3 voxelCenterUV = (voxelCenterBS / obbExtents); #if SOFT_VOXELIZATION // We need to determine which is the face closest to 'voxelCenterBS'. float minFaceDist = abs(obbExtents.x - abs(voxelCenterBS.x)); // TODO: use v_cubeid_f32. uint axisIndex; float faceDist; faceDist = abs(obbExtents.y - abs(voxelCenterBS.y)); axisIndex = (faceDist < minFaceDist) ? 1 : 0; minFaceDist = min(faceDist, minFaceDist); faceDist = abs(obbExtents.z - abs(voxelCenterBS.z)); axisIndex = (faceDist < minFaceDist) ? 2 : axisIndex; float3 N = float3(axisIndex == 0 ? 1 : 0, axisIndex == 1 ? 1 : 0, axisIndex == 2 ? 1 : 0); // We have determined the normal of the closest face. // We now have to construct the diagonal of the voxel with the longest extent along this normal. float3 minDiagPointBS, maxDiagPointBS; float3 voxelAxisRightBS = mul(voxelAxisRight, transpose(obbFrame)); float3 voxelAxisUpBS = mul(voxelAxisUp, transpose(obbFrame)); float3 voxelAxisForwardBS = mul(voxelAxisForward, transpose(obbFrame)); // Start at the center of the voxel. minDiagPointBS = maxDiagPointBS = voxelCenterBS; bool normalFwd = dot(voxelAxisForwardBS, N) >= 0; float mulForward = normalFwd ? halfDZ : -halfDZ; float mulMin = normalFwd ? z0 : z1; float mulMax = normalFwd ? z1 : z0; minDiagPointBS -= mulForward * voxelAxisForwardBS; maxDiagPointBS += mulForward * voxelAxisForwardBS; float mulUp = dot(voxelAxisUpBS, N) >= 0 ? 1 : -1; minDiagPointBS -= (mulMin * mulUp) * voxelAxisUpBS; maxDiagPointBS += (mulMax * mulUp) * voxelAxisUpBS; float mulRight = dot(voxelAxisRightBS, N) >= 0 ? 1 : -1; minDiagPointBS -= (mulMin * mulRight) * voxelAxisRightBS; maxDiagPointBS += (mulMax * mulRight) * voxelAxisRightBS; // We want to determine the fractional overlap of the diagonal and the box. float3 diagOriginBS = minDiagPointBS; float3 diagUnDirBS = maxDiagPointBS - minDiagPointBS; float tEntr, tExit; IntersectRayAABB(diagOriginBS, diagUnDirBS, -obbExtents, obbExtents, 0, 1, tEntr, tExit); float overlapFraction = tExit - tEntr; #else // SOFT_VOXELIZATION bool overlap = abs(voxelCenterUV.x) <= 1 && abs(voxelCenterUV.y) <= 1 && abs(voxelCenterUV.z) <= 1; float overlapFraction = overlap ? 1 : 0; #endif // SOFT_VOXELIZATION if (overlapFraction > 0) { float scatteringAndExtinctionMask = 1.0f; //Sample the volumeMask if (_VolumeData[volumeIndex].textureIndex != -1) { float3 voxelRightSizeBS = mul(voxelRightSize, transpose(obbFrame)); float3 voxelRightSizeUV = (voxelRightSizeBS / obbExtents); float3 voxelUpSizeBS = mul(voxelUpSize, transpose(obbFrame)); float3 voxelUpSizeUV = (voxelUpSizeBS / obbExtents); float3 voxelDepthSizeBS = mul(voxelDepthSize, transpose(obbFrame)); float3 voxelDepthSizeUV = (voxelDepthSizeBS / obbExtents); scatteringAndExtinctionMask = SampleVolumeMask(_VolumeData[volumeIndex], voxelCenterUV * 0.5 + 0.5, voxelRightSizeUV, voxelUpSizeUV, voxelDepthSizeUV); } // There is an overlap. Sample the 3D texture, or load the constant value. voxelScattering += overlapFraction * _VolumeData[volumeIndex].scattering * scatteringAndExtinctionMask; voxelExtinction += overlapFraction * _VolumeData[volumeIndex].extinction * scatteringAndExtinctionMask; } #ifndef USE_CLUSTERED_LIGHTLIST } } #else // USE_CLUSTERED_LIGHTLIST // Advance to the next volume in one (or both at the same time) clusters. if (volumeIndex == volumeIndices[0]) { i++; volumeIndices[0] = FetchIndexWithBoundsCheck(volumeStarts[0], volumeCounts[0], i); } if (volumeIndex == volumeIndices[1]) { j++; volumeIndices[1] = FetchIndexWithBoundsCheck(volumeStarts[1], volumeCounts[1], j); } } while (i < volumeCounts[0] || j < volumeCounts[1]); } // We don't need to carry over the cluster index, only the start and the count. volumeStarts[0] = volumeStarts[1]; volumeCounts[0] = volumeCounts[1]; #endif // USE_CLUSTERED_LIGHTLIST _VBufferDensity[voxelCoord] = float4(voxelScattering, voxelExtinction); z0 = z1; } } [numthreads(GROUP_SIZE_1D, GROUP_SIZE_1D, 1)] void VolumeVoxelization(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID) { // Perform compile-time checks. if (!IsPower2(VBUFFER_TILE_SIZE) || !IsPower2(TILE_SIZE_CLUSTERED)) return; uint2 groupCoord = groupThreadId; uint2 groupOffset = groupId * GROUP_SIZE_1D; uint2 voxelCoord = groupOffset + groupCoord; uint2 tileCoord = voxelCoord * VBUFFER_TILE_SIZE / TILE_SIZE_CLUSTERED; uint voxelsPerClusterTile = Sq((uint)(TILE_SIZE_CLUSTERED / VBUFFER_TILE_SIZE)); if (voxelsPerClusterTile >= 64) { // TODO: this is a compile-time test, make sure the compiler actually scalarizes. tileCoord = groupOffset * VBUFFER_TILE_SIZE / TILE_SIZE_CLUSTERED; } UNITY_BRANCH if (voxelCoord.x >= (uint)_VBufferResolution.x || voxelCoord.y >= (uint)_VBufferResolution.y) { return; } // Reminder: our voxel is a skewed pyramid frustum with square front and back faces. // Compute 3x orthogonal directions. float2 centerCoord = voxelCoord + float2( 0.5, 0.5); float2 leftCoord = voxelCoord + float2(-0.5, 0.5); float2 upCoord = voxelCoord + float2( 0.5, -0.5); // TODO: avoid 2x matrix multiplications by precomputing the world-space offset on the vs_Z=1 plane. // Compute 3x ray directions s.t. its ViewSpace(rayDirWS).z = 1. float3 centerDirWS = mul(-float3(centerCoord, 1), (float3x3)_VBufferCoordToViewDirWS); float3 leftDirWS = mul(-float3(leftCoord, 1), (float3x3)_VBufferCoordToViewDirWS); float3 upDirWS = mul(-float3(upCoord, 1), (float3x3)_VBufferCoordToViewDirWS); // Compute the axes of the voxel. These are not normalized, but rather computed to scale with Z. float3 voxelAxisForward = centerDirWS; float3 voxelAxisUp = 0.5 * (upDirWS - centerDirWS); float3 voxelAxisRight = 0.5 * (centerDirWS - leftDirWS); PositionInputs posInput = GetPositionInput(voxelCoord, _VBufferResolution.zw, tileCoord); FillVolumetricDensityBuffer(posInput, GetCurrentViewPosition(), centerDirWS, voxelAxisRight, voxelAxisUp, voxelAxisForward); }