+ obb.extentY * Mathf.Abs(Vector3.Dot(n, obb.up))
+ obb.extentZ * Mathf.Abs(Vector3.Dot(n, obb.forward));
// Positive distance -> center in front of the plane.
// Negative distance -> center behind the plane (outside).
float centerToPlaneDist = Vector3.Dot(n, obb.center) + d;


// Frustum cull density volumes on the CPU. Can be performed as soon as the camera is set up.
DensityVolumeList densityVolumes = m_VolumetricLightingSystem.PrepareVisibleDensityVolumeList(hdCamera, cmd);
// Perform the voxelization step which fills the density 3D texture.
// Requires the clustered lighting data structure to be built, and can run async.
m_VolumetricLightingSystem.VolumeVoxelizationPass(densityVolumes, hdCamera, cmd, m_FrameSettings);
// Note: Legacy Unity behave like this for ShadowMask
// When you select ShadowMask in Lighting panel it recompile shaders on the fly with the SHADOW_MASK keyword.
// However there is no C# function that we can query to know what mode have been select in Lighting Panel and it will be wrong anyway. Lighting Panel setup what will be the next bake mode. But until light is bake, it is wrong.

// The pass only requires the volume properties, and can run async.
// Perform the voxelization step which fills the density 3D texture.
// Requires the clustered lighting data structure to be built, and can run async.
m_VolumetricLightingSystem.VolumeVoxelizationPass(densityVolumes, hdCamera, cmd, m_FrameSettings);
// Render the volumetric lighting.
// The pass requires the volume properties, the light list and the shadows, and can run async.


public static readonly int _VBufferLightingFeedback = Shader.PropertyToID("_VBufferLightingFeedback");
public static readonly int _VBufferSampleOffset = Shader.PropertyToID("_VBufferSampleOffset");
public static readonly int _VolumeBounds = Shader.PropertyToID("_VolumeBounds");
public static readonly int _VolumeProperties = Shader.PropertyToID("_VBufferDensity");
public static readonly int _VolumeProperties = Shader.PropertyToID("_VolumeProperties");
public static readonly int _NumVisibleDensityVolumes = Shader.PropertyToID("_NumVisibleDensityVolumes");


RW_TEXTURE3D(float4, _VBufferDensity); // RGB = sqrt(scattering), A = sqrt(extinction)
// TODO: avoid creating another Constant Buffer...
float4x4 _VBufferCoordToViewDirWS; // Actually just 3x3, but Unity can only set 4x4
float4 _VBufferSampleOffset; // Not used by this shader
float _CornetteShanksConstant; // Not used by this shader
uint _NumVisibleDensityVolumes;
void FillVolumetricDensityBuffer(uint2 voxelCoord, float3 rayOriginWS, float3 rayUnDirWS,
float4 planeEquationUp, float4 planeEquationRight,
float3 planeNormalFwd, float faceExtent)
float n = _VBufferDepthDecodingParams.x + _VBufferDepthDecodingParams.z;
float z0 = n; // Start the computation from the near plane
float de = rcp(VBUFFER_SLICE_COUNT); // Log-encoded distance between slices
#if defined(SHADER_API_METAL)
for (uint slice = 0; slice < VBUFFER_SLICE_COUNT; slice++)
uint sliceCountHack = max(VBUFFER_SLICE_COUNT, (uint)_VBufferDepthEncodingParams.w); // Prevent unrolling...
// TODO: replace 'sliceCountHack' with VBUFFER_SLICE_COUNT when the shader compiler bug is fixed.
for (uint slice = 0; slice < sliceCountHack; slice++)
float e1 = slice * de + de; // (slice + 1) / sliceCount
#if defined(SHADER_API_METAL)
// Warning: this compiles, but it's nonsense. Use DecodeLogarithmicDepthGeneralized().
float z1 = DecodeLogarithmicDepth(e1, _VBufferDepthDecodingParams);
float z1 = DecodeLogarithmicDepthGeneralized(e1, _VBufferDepthDecodingParams);
float z = z0 + 0.5 * (z1 - z0);
float3 voxelCenterWS = rayOriginWS + z * rayUnDirWS; // Works due to the length of of the dir
float4 planeEquationForward = float4(planeNormalFwd, dot(-planeNormalFwd, voxelCenterWS));
float4 planes[3] = { planeEquationRight, planeEquationUp, planeEquationForward };
// If the box overlaps all 3 planes, it overlaps the center of the voxel.
// Otherwise, we have to determine partial coverage.
// We approximate the voxel with a parallelepiped with a square front face.
float voxelExtents[3] = { faceExtent * z, faceExtent * z, 0.5 * (z1 - z0) };
_VBufferDensity[uint3(voxelCoord, slice)] = 0;
for (uint i = 0; i < _NumVisibleDensityVolumes; i++)
// Perform plane-box overlap test.
// See "Real-Time Rendering", 3rd Edition, 16.10.2.
OrientedBBox obb = _VolumeBounds[i];
float3 obb_forward = cross(obb.up, obb.right);
// Compute the fractional overlap between the voxel and the box.
float overlapFraction = 1;
for (uint p = 0; p < 3; p++)
float3 N = planes[p].xyz;
float d = planes[p].w;
// Max projection of the half-diagonal onto the normal (always positive).
float maxHalfDiagProj = obb.extentX * abs(dot(N, obb.right))
+ obb.extentY * abs(dot(N, obb.up))
+ obb.extentZ * abs(dot(N, obb_forward));
// Positive distance -> center in front of the plane.
// Negative distance -> center behind the plane.
float centerToPlaneDist = dot(N, obb.center) + d;
// Compute min/max distances from the plane to the box.
float minBoxToPlaneDist = abs(centerToPlaneDist) - maxHalfDiagProj;
float maxBoxToPlaneDist = abs(centerToPlaneDist) + maxHalfDiagProj;
// Check whether the plane overlaps the box.
bool overlap = minBoxToPlaneDist <= 0;
float dMin = minBoxToPlaneDist;
float dMax = maxBoxToPlaneDist;
float vExt = voxelExtents[p];
float iExt = rcp(vExt);
// Simplify:
// if (overlap)
// overlapFraction *= saturate((min(dMax, vExt) + min(-dMin, vExt)) / (2 * vExt));
// else
// overlapFraction *= saturate((min(dMax, vExt) - min( dMin, vExt)) / (2 * vExt));
float a = min(1, dMax * iExt);
float b = min(1, abs(dMin) * iExt);
overlapFraction *= saturate(0.5 * (a + (overlap ? b : -b)));
if (overlapFraction > 0)
// There is an overlap. Sample the 3D texture, or load the constant value.
_VBufferDensity[uint3(voxelCoord, slice)] = overlapFraction;// * _VolumeProperties[i].extinction;
z0 = z1;
// Perform compile-time checks.
if (!IsPower2(VBUFFER_TILE_SIZE) || !IsPower2(TILE_SIZE_CLUSTERED)) return;
uint2 groupCoord = groupThreadId;
uint2 groupOffset = groupId * GROUP_SIZE_1D;
uint2 voxelCoord = groupOffset + groupCoord;
uint2 tileCoord = voxelCoord * VBUFFER_TILE_SIZE / TILE_SIZE_CLUSTERED;
uint voxelsPerClusterTile = Sq((uint)(TILE_SIZE_CLUSTERED / VBUFFER_TILE_SIZE));
if (voxelsPerClusterTile >= 64)
// TODO: this is a compile-time test, make sure the compiler actually scalarizes.
if (voxelCoord.x >= (uint)_VBufferResolution.x ||
voxelCoord.y >= (uint)_VBufferResolution.y)
// Perform semi-conservative solid voxelization with partial coverage.
// See "A Topological Approach to Voxelization" by Samuli Laine, 5.2.1.
// The intersection target is rather efficient (3 planes), and, as Samuli notes,
// can work for inputs other than 1D primitives.
// Reminder: our voxel is a skewed pyramid frustum.
// Compute two orthogonal directions.
float2 centerCoord = voxelCoord + float2( 0.5, 0.5);
float2 leftCoord = voxelCoord + float2(-0.5, 0.5);
float2 upCoord = voxelCoord + float2( 0.5, -0.5);
// TODO: avoid 2x matrix multiplications by precomputing the world-space offset on the vs_Z=1 plane.
// Compute 2x ray directions s.t. its ViewSpace(rayDirWS).z = 1.
float3 centerDirWS = mul(-float3(centerCoord, 1), (float3x3)_VBufferCoordToViewDirWS);
float3 leftDirWS = mul(-float3(leftCoord, 1), (float3x3)_VBufferCoordToViewDirWS);
float3 upDirWS = mul(-float3(upCoord, 1), (float3x3)_VBufferCoordToViewDirWS);
// Construct 3x plane normals.
float3 planeNormalFwd = GetViewForwardDir();
float3 planeNormalUp = normalize(cross(centerDirWS, leftDirWS));
float3 planeNormalRight = normalize(cross(centerDirWS, upDirWS));
// Compose 2x plane equations (they pass through the camera).
// The 3rd plane equation depends on the slice, so we'll have to update it inside the loop.
float3 cameraPositionWS = GetCurrentViewPosition();
float4 planeEquationUp = float4(planeNormalUp, dot(-planeNormalUp, cameraPositionWS));
float4 planeEquationRight = float4(planeNormalRight, dot(-planeNormalRight, cameraPositionWS));
// We approximate the voxel with a parallelepiped with a square front face.
// Compute the extents (half-dimensions) of the front face on the vs_Z=1 plane.
// TODO: directly compute the inverse.
// TODO: precompute and load this value from the constant buffer. It's a constant!
float faceExtent = 0.5 * distance(leftDirWS, centerDirWS);
FillVolumetricDensityBuffer(voxelCoord, cameraPositionWS, centerDirWS,
planeEquationUp, planeEquationRight,
planeNormalFwd, faceExtent);


// TODO: avoid creating another Constant Buffer...
float4 _VBufferSampleOffset; // {x, y, z}, w = rendered frame count
float _CornetteShanksConstant; // CornetteShanksPhasePartConstant(_GlobalAsymmetry)
float4 _VBufferSampleOffset; // Not used by this shader
float _CornetteShanksConstant; // Not used by this shader
uint _NumVisibleDensityVolumes;

float e1 = slice * de + de; // (slice + 1) / sliceCount
#if defined(SHADER_API_METAL)
// Warning: this compiles, but it's nonsense. Use DecodeLogarithmicDepthGeneralized().
float z1 = DecodeLogarithmicDepth(e1, _VBufferDepthDecodingParams);
float z1 = DecodeLogarithmicDepthGeneralized(e1, _VBufferDepthDecodingParams);

// Perform compile-time checks.
if (!IsPower2(VBUFFER_TILE_SIZE) || !IsPower2(TILE_SIZE_CLUSTERED)) return;
// Note: any factor of 64 is a suitable wave size for our algorithm.
uint waveIndex = WaveReadFirstLane(groupThreadId / 64);
uint laneIndex = groupThreadId % 64;
uint quadIndex = laneIndex / 4;
uint2 groupCoord = groupThreadId;
uint2 voxelCoord = groupOffset + groupCoord;
uint2 voxelCoord = groupOffset + groupThreadId;
uint2 tileCoord = voxelCoord * VBUFFER_TILE_SIZE / TILE_SIZE_CLUSTERED;
uint voxelsPerClusterTile = Sq((uint)(TILE_SIZE_CLUSTERED / VBUFFER_TILE_SIZE));

float2 centerCoord = voxelCoord + 0.5;
float2 centerCoord = voxelCoord + float2(0.5, 0.5);
float2 strataCoord = centerCoord + _VBufferSampleOffset.xy;

// TODO: avoid 2x matrix multiplications by precomputing the world-space offset on the Z=1 plane.
// Compute the (tile-centered) ray direction s.t. its ViewSpace(rayDirWS).z = 1.
float3 centerDirWS = mul(-float3(centerCoord, 1), (float3x3)_VBufferCoordToViewDirWS);
float centerDirLenSq = dot(centerDirWS, centerDirWS);
float centerDirLenRcp = rsqrt(centerDirLenSq);
float centerDirLen = centerDirLenSq * centerDirLenRcp;
// Compute the (tile-centered) ray direction s.t. its ViewSpace(rayDirWS).z = 1.
float3 centerDirWS = mul(-float3(centerCoord, 1), (float3x3)_VBufferCoordToViewDirWS);
float centerDirLenSq = dot(centerDirWS, centerDirWS);
float centerDirLenRcp = rsqrt(centerDirLenSq);
float centerDirLen = centerDirLenSq * centerDirLenRcp;
DualRay ray;


if (preset == VolumetricLightingPreset.Off) return densityVolumes;
Vector3 camPosition = camera.camera.transform.position;
Vector3 camOffset = Vector3.zero; // World-origin-relative
if (ShaderConfig.s_CameraRelativeRendering != 0)
using (new ProfilingSample(cmd, "Prepare Visible Density Volume List"))
camOffset = camPosition; // Camera-relative
Vector3 camPosition = camera.camera.transform.position;
Vector3 camOffset = Vector3.zero; // World-origin-relative
if (ShaderConfig.s_CameraRelativeRendering != 0)
camOffset = camPosition; // Camera-relative
// Collect all visible finite volume data, and upload it to the GPU.
HomogeneousDensityVolume[] volumes = Object.FindObjectsOfType(typeof(HomogeneousDensityVolume)) as HomogeneousDensityVolume[];
for (int i = 0; i < Math.Min(volumes.Length, k_MaxVisibleVolumeCount); i++)
HomogeneousDensityVolume volume = volumes[i];
// Collect all visible finite volume data, and upload it to the GPU.
HomogeneousDensityVolume[] volumes = Object.FindObjectsOfType(typeof(HomogeneousDensityVolume)) as HomogeneousDensityVolume[];
// Only test active finite volumes.
if (volume.enabled && volume.parameters.IsLocalVolume())
for (int i = 0; i < Math.Min(volumes.Length, k_MaxVisibleVolumeCount); i++)
// TODO: cache these?
var obb = OrientedBBox.Create(volume.transform);
HomogeneousDensityVolume volume = volumes[i];
// Handle camera-relative rendering.
obb.center -= camOffset;
// Frustum cull on the CPU for now. TODO: do it on the GPU.
if (GeometryUtils.Overlap(obb, camera.frustum, 6, 8))
// Only test active finite volumes.
if (volume.enabled && volume.parameters.IsLocalVolume())
var properties = volume.parameters.GetProperties();
var obb = OrientedBBox.Create(volume.transform);
// Handle camera-relative rendering.
obb.center -= camOffset;
// Frustum cull on the CPU for now. TODO: do it on the GPU.
if (GeometryUtils.Overlap(obb, camera.frustum, 6, 8))
// TODO: cache these?
var properties = volume.parameters.GetProperties();
// Fill the struct with pointers in order to share the data with the light loop.
densityVolumes.bounds = m_VisibleVolumeBounds;
densityVolumes.properties = m_VisibleVolumeProperties;
// Fill the struct with pointers in order to share the data with the light loop.
densityVolumes.bounds = m_VisibleVolumeBounds;
densityVolumes.properties = m_VisibleVolumeProperties;
return densityVolumes;
return densityVolumes;
public void VolumeVoxelizationPass(DensityVolumeList densityVolumes, HDCamera camera, CommandBuffer cmd, FrameSettings settings)

int numVisibleVolumes = m_VisibleVolumeBounds.Count;
using (new ProfilingSample(cmd, "Volume Voxelization"))
int numVisibleVolumes = m_VisibleVolumeBounds.Count;
if (numVisibleVolumes == 0)
// Clear the render target instead of running the shader.
// CoreUtils.SetRenderTarget(cmd, vBuffer.GetDensityBuffer(), ClearFlag.Color, CoreUtils.clearColorAllBlack);
// return;
if (numVisibleVolumes == 0)
// Clear the render target instead of running the shader.
// CoreUtils.SetRenderTarget(cmd, vBuffer.GetDensityBuffer(), ClearFlag.Color, CoreUtils.clearColorAllBlack);
// return;
// Clearing 3D textures does not seem to work!
// Use the workaround by running the full shader with 0 density.
// Clearing 3D textures does not seem to work!
// Use the workaround by running the full shader with 0 density.
VBuffer vBuffer = FindVBuffer(camera.GetViewID());
Debug.Assert(vBuffer != null);
VBuffer vBuffer = FindVBuffer(camera.GetViewID());
Debug.Assert(vBuffer != null);
int w = 0, h = 0, d = 0;
vBuffer.GetResolution(ref w, ref h, ref d);
int w = 0, h = 0, d = 0;
vBuffer.GetResolution(ref w, ref h, ref d);
bool enableClustered = settings.lightLoopSettings.enableTileAndCluster;
bool enableClustered = settings.lightLoopSettings.enableTileAndCluster;
int kernel = m_VolumeVoxelizationCS.FindKernel(enableClustered ? "VolumeVoxelizationClustered"
: "VolumeVoxelizationBruteforce");
int kernel = m_VolumeVoxelizationCS.FindKernel(enableClustered ? "VolumeVoxelizationClustered"
: "VolumeVoxelizationBruteforce");
float vFoV = camera.camera.fieldOfView * Mathf.Deg2Rad;
Vector4 resolution = new Vector4(w, h, 1.0f / w, 1.0f / h);
Matrix4x4 transform = HDUtils.ComputePixelCoordToWorldSpaceViewDirectionMatrix(vFoV, resolution, camera.viewMatrix, false);
camera.SetupComputeShader( m_VolumeVoxelizationCS, cmd);
cmd.SetComputeBufferParam( m_VolumeVoxelizationCS, kernel, HDShaderIDs._VolumeBounds, s_VisibleVolumeBoundsBuffer);
cmd.SetComputeBufferParam( m_VolumeVoxelizationCS, kernel, HDShaderIDs._VolumeProperties, s_VisibleVolumePropertiesBuffer);
cmd.SetComputeTextureParam(m_VolumeVoxelizationCS, kernel, HDShaderIDs._VBufferDensity, vBuffer.GetDensityBuffer());
camera.SetupComputeShader( m_VolumeVoxelizationCS, cmd);
cmd.SetComputeTextureParam(m_VolumeVoxelizationCS, kernel, HDShaderIDs._VBufferDensity, vBuffer.GetDensityBuffer());
cmd.SetComputeBufferParam( m_VolumeVoxelizationCS, kernel, HDShaderIDs._VolumeBounds, s_VisibleVolumeBoundsBuffer);
cmd.SetComputeBufferParam( m_VolumeVoxelizationCS, kernel, HDShaderIDs._VolumeProperties, s_VisibleVolumePropertiesBuffer);
// TODO: set the constant buffer data only once.
cmd.SetComputeMatrixParam( m_VolumeVoxelizationCS, HDShaderIDs._VBufferCoordToViewDirWS, transform);
cmd.SetComputeIntParam( m_VolumeVoxelizationCS, HDShaderIDs._NumVisibleDensityVolumes, numVisibleVolumes);
// The shader defines GROUP_SIZE_1D = 8.
cmd.DispatchCompute(m_VolumeVoxelizationCS, kernel, (w + 7) / 8, (h + 7) / 8, 1);
// The shader defines GROUP_SIZE_1D = 8.
cmd.DispatchCompute(m_VolumeVoxelizationCS, kernel, (w + 7) / 8, (h + 7) / 8, 1);
// Ref: https://en.wikipedia.org/wiki/Close-packing_of_equal_spheres

Vector4 offset = new Vector4(xySeq[sampleIndex].x, xySeq[sampleIndex].y, zSeq[sampleIndex], rfc);
// TODO: set 'm_VolumetricLightingPreset'.
// TODO: set the constant buffer data only once.
cmd.SetComputeMatrixParam( m_VolumetricLightingCS, HDShaderIDs._VBufferCoordToViewDirWS, transform);
cmd.SetComputeVectorParam( m_VolumetricLightingCS, HDShaderIDs._VBufferSampleOffset, offset);
cmd.SetComputeVectorParam( m_VolumetricLightingCS, HDShaderIDs._VBufferSampleOffset, offset);
cmd.SetComputeMatrixParam( m_VolumetricLightingCS, HDShaderIDs._VBufferCoordToViewDirWS, transform);
cmd.SetComputeTextureParam(m_VolumetricLightingCS, kernel, HDShaderIDs._VBufferLightingIntegral, vBuffer.GetLightingIntegralBuffer()); // Write
if (enableReprojection)


