浏览代码

Remove the loop over clusters from the volumetric lighting pass

/main
Evgenii Golubev 6 年前
当前提交
d101e8ec
共有 2 个文件被更改,包括 249 次插入167 次删除
  1. 11
      ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/Volumetrics/VolumeVoxelization.compute
  2. 405
      ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/Volumetrics/VolumetricLighting.compute

11
ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/Volumetrics/VolumeVoxelization.compute


float de = rcp(VBUFFER_SLICE_COUNT); // Log-encoded distance between slices
#ifdef USE_CLUSTERED_LIGHTLIST
// Our voxel is not necessarily completely inside a single light cluster (along Z).
// Note that Z-binning can solve this problem, as we can iterate over all Z-bins
// to compute min/max light indices, and then use this range for the entire slice.
// The voxel can overlap up to 2 light clusters along Z, so we have to iterate over both.
// TODO: implement Z-binning which makes Z-range queries easy.
uint volumeStarts[2], volumeCounts[2];
GetCountAndStartCluster(posInput.tileCoord, GetLightClusterIndex(posInput.tileCoord, z0),

GetCountAndStartCluster(posInput.tileCoord, GetLightClusterIndex(posInput.tileCoord, z1),
LIGHTCATEGORY_DENSITY_VOLUME, volumeStarts[1], volumeCounts[1]);
// We now iterate over all density volumes within the two clusters along Z.
// Iterate over all volumes within 2 (not necessarily unique) clusters overlapping the voxel along Z.
// We need to skip duplicates, but it's not too difficult since volumes are sorted by index.
uint i = 0, j = 0;

#else // USE_CLUSTERED_LIGHTLIST
{
for (uint i = 0; i < _NumVisibleDensityVolumes; i++)
for (uint volumeIndex = 0; volumeIndex < _NumVisibleDensityVolumes; volumeIndex++)
uint volumeIndex = i;
#endif // USE_CLUSTERED_LIGHTLIST
OrientedBBox obb = _VolumeBounds[volumeIndex];

405
ScriptableRenderPipeline/HDRenderPipeline/HDRP/Lighting/Volumetrics/VolumetricLighting.compute


VoxelLighting EvaluateVoxelLighting(LightLoopContext context, uint featureFlags, PositionInputs posInput, float3 centerWS,
DualRay ray, float t0, float t1, float dt, float rndVal, float extinction, float asymmetry
#ifdef USE_CLUSTERED_LIGHTLIST
, uint clusterIndices[2], float clusterDepths[2])
, uint lightClusters[2])
#else
)
#endif

return lighting;
#endif
#ifdef USE_CLUSTERED_LIGHTLIST
// Loop over 1 or 2 light clusters.
int cluster = 0;
do
if (featureFlags & LIGHTFEATUREFLAGS_PUNCTUAL)
float tMin = max(t0, ray.strataDirInvViewZ * clusterDepths[cluster]);
float tMax = t1;
#ifdef USE_CLUSTERED_LIGHTLIST
// Iterate over all lights within 2 (not necessarily unique) clusters overlapping the voxel along Z.
// We need to skip duplicates, but it's not too difficult since lights are sorted by index.
uint lightStarts[2], lightCounts[2];
if (cluster == 0 && (clusterIndices[0] != clusterIndices[1]))
for (uint k = 0; k < 2; k++)
tMax = min(t1, ray.strataDirInvViewZ * clusterDepths[1]);
GetCountAndStartCluster(posInput.tileCoord, lightClusters[k], LIGHTCATEGORY_PUNCTUAL,
lightStarts[k], lightCounts[k]);
#else // USE_CLUSTERED_LIGHTLIST
float tMin = t0;
float tMax = t1;
#endif // USE_CLUSTERED_LIGHTLIST
if (featureFlags & LIGHTFEATUREFLAGS_PUNCTUAL)
uint i = 0, j = 0;
if (i < lightCounts[0] || j < lightCounts[1])
uint lightCount, lightStart;
// At least one of the clusters is non-empty.
uint lightIndices[2];
#ifdef USE_CLUSTERED_LIGHTLIST
GetCountAndStartCluster(posInput.tileCoord, clusterIndices[cluster], LIGHTCATEGORY_PUNCTUAL,
lightStart, lightCount);
#else // USE_CLUSTERED_LIGHTLIST
lightCount = _PunctualLightCount;
lightStart = 0;
#endif // USE_CLUSTERED_LIGHTLIST
// Fetch two initial indices from both clusters.
if (i < lightCounts[0])
{
lightIndices[0] = FetchIndex(lightStarts[0], i);
}
else
{
lightIndices[0] = UINT_MAX;
}
if (lightCount > 0)
if (j < lightCounts[1])
LightData light = FetchLight(lightStart, 0);
lightIndices[1] = FetchIndex(lightStarts[1], j);
}
else
{
lightIndices[1] = UINT_MAX;
}
int i = 0, last = lightCount - 1;
// Process all punctual lights except for box lights (which are technically not even punctual).
do
{
// Process lights in order.
uint lightIndex = min(lightIndices[0], lightIndices[1]);
#else // USE_CLUSTERED_LIGHTLIST
{
uint lightIndex = 0;
// Process all punctual lights except for box lights (which are technically not even punctual).
for (; lightIndex < _PunctualLightCount; lightIndex++)
{
#endif // USE_CLUSTERED_LIGHTLIST
LightData light = _LightDatas[lightIndex];
// Process box lights in a separate loop.
if (light.lightType == GPULIGHTTYPE_PROJECTOR_BOX) { break; }
float tEntr = t0;
float tExit = t1;
// Box lights require special handling (see the next while loop).
while (i <= last && light.lightType != GPULIGHTTYPE_PROJECTOR_BOX)
bool sampleLight = true;
// Perform ray-cone intersection for pyramid and spot lights.
if (light.lightType != GPULIGHTTYPE_POINT)
float tEntr = tMin;
float tExit = tMax;
float lenMul = 1;
if (light.lightType == GPULIGHTTYPE_PROJECTOR_PYRAMID)
{
// 'light.right' and 'light.up' vectors are pre-scaled on the CPU
// s.t. if you were to place them at the distance of 1 directly in front
// of the light, they would give you the "footprint" of the light.
// For spot lights, the cone fit is exact.
// For pyramid lights, however, this is the "inscribed" cone
// (contained within the pyramid), and we want to intersect
// the "escribed" cone (which contains the pyramid).
// Therefore, we have to scale the radii by the sqrt(2).
lenMul = rsqrt(2);
}
float3 coneAxisX = lenMul * light.right;
float3 coneAxisY = lenMul * light.up;
bool sampleLight = true;
sampleLight = IntersectRayCone(ray.originWS, ray.strataDirWS,
light.positionWS, light.forward,
coneAxisX, coneAxisY,
t0, t1, tEntr, tExit);
}
// Perform ray-cone intersection for pyramid and spot lights.
if (light.lightType != GPULIGHTTYPE_POINT)
{
float lenMul = 1;
if (sampleLight)
{
// We are unable to adequately sample features larger
// than the half of the length of the integration interval
// divided by the number of temporal samples (7).
// Therefore, we apply this hack to reduce flickering.
float hackMinDistSq = Sq(dt * (0.5 / 7));
if (light.lightType == GPULIGHTTYPE_PROJECTOR_PYRAMID)
{
// 'light.right' and 'light.up' vectors are pre-scaled on the CPU
// s.t. if you were to place them at the distance of 1 directly in front
// of the light, they would give you the "footprint" of the light.
// For spot lights, the cone fit is exact.
// For pyramid lights, however, this is the "inscribed" cone
// (contained within the pyramid), and we want to intersect
// the "escribed" cone (which contains the pyramid).
// Therefore, we have to scale the radii by the sqrt(2).
lenMul = rsqrt(2);
}
float t, distSq, rcpPdf;
ImportanceSamplePunctualLight(rndVal, light.positionWS,
ray.originWS, ray.strataDirWS,
tEntr, tExit, t, distSq, rcpPdf,
hackMinDistSq);
float3 coneAxisX = lenMul * light.right;
float3 coneAxisY = lenMul * light.up;
posInput.positionWS = GetPointAtDistance(ray, t);
sampleLight = IntersectRayCone(ray.originWS, ray.strataDirWS,
light.positionWS, light.forward,
coneAxisX, coneAxisY,
tMin, tMax, tEntr, tExit);
}
float3 lightToSample = posInput.positionWS - light.positionWS;
float distRcp = rsqrt(distSq);
float dist = distSq * distRcp;
float distProj = dot(lightToSample, light.forward);
float4 distances = float4(dist, distSq, distRcp, distProj);
float3 L = -lightToSample * distRcp;
if (sampleLight)
{
// We are unable to adequately sample features larger
// than the half of the length of the integration interval
// divided by the number of temporal samples (7).
// Therefore, we apply this hack to reduce flickering.
float hackMinDistSq = Sq(dt * (0.5 / 7));
float3 color; float attenuation;
EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample,
distances, color, attenuation);
float t, distSq, rcpPdf;
ImportanceSamplePunctualLight(rndVal, light.positionWS,
ray.originWS, ray.strataDirWS,
tEntr, tExit, t, distSq, rcpPdf,
hackMinDistSq);
// Important:
// Ideally, all scattering calculations should use the stratified versions
// of the sample position and the ray direction. However, correct reprojection
// of asymmetrically scattered lighting (affected by an anisotropic phase
// function) is not possible. We work around this issue by reprojecting
// lighting not affected by the phase function. This basically removes
// the phase function from the temporal integration process. It is a hack.
// The downside is that asymmetry no longer benefits from temporal averaging,
// and any temporal instability of asymmetry causes causes visible jitter.
// In order to stabilize the image, we use the voxel center for all
// asymmetry-related calculations.
float3 centerL = light.positionWS - centerWS;
float cosTheta = dot(centerL, ray.centerDirWS) * rsqrt(dot(centerL, centerL));
float phase = CornetteShanksPhasePartVarying(asymmetry, cosTheta);
posInput.positionWS = GetPointAtDistance(ray, t);
float intensity = attenuation * rcpPdf;
float3 lightToSample = posInput.positionWS - light.positionWS;
float distRcp = rsqrt(distSq);
float dist = distSq * distRcp;
float distProj = dot(lightToSample, light.forward);
float4 distances = float4(dist, distSq, distRcp, distProj);
float3 L = -lightToSample * distRcp;
// Compute transmittance from 't0' to 't'.
intensity *= TransmittanceHomogeneousMedium(extinction, t - t0);
float3 color; float attenuation;
EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample,
distances, color, attenuation);
// Compute the amount of in-scattered radiance.
lighting.radianceNoPhase += intensity * color;
lighting.radianceComplete += phase * intensity * color;
}
// Important:
// Ideally, all scattering calculations should use the stratified versions
// of the sample position and the ray direction. However, correct reprojection
// of asymmetrically scattered lighting (affected by an anisotropic phase
// function) is not possible. We work around this issue by reprojecting
// lighting not affected by the phase function. This basically removes
// the phase function from the temporal integration process. It is a hack.
// The downside is that asymmetry no longer benefits from temporal averaging,
// and any temporal instability of asymmetry causes causes visible jitter.
// In order to stabilize the image, we use the voxel center for all
// asymmetry-related calculations.
float3 centerL = light.positionWS - centerWS;
float cosTheta = dot(centerL, ray.centerDirWS) * rsqrt(dot(centerL, centerL));
float phase = CornetteShanksPhasePartVarying(asymmetry, cosTheta);
#ifndef USE_CLUSTERED_LIGHTLIST
}
float intensity = attenuation * rcpPdf;
// Process all box lights.
for (; lightIndex < _PunctualLightCount; lightIndex++)
{
#else // USE_CLUSTERED_LIGHTLIST
// Compute transmittance from 't0' to 't'.
intensity *= TransmittanceHomogeneousMedium(extinction, t - t0);
// Advance to the next light in one (or both at the same time) clusters.
if (lightIndex == lightIndices[0])
{
i++;
// Compute the amount of in-scattered radiance.
lighting.radianceNoPhase += intensity * color;
lighting.radianceComplete += phase * intensity * color;
if (i < lightCounts[0])
{
lightIndices[0] = FetchIndex(lightStarts[0], i);
}
else
{
lightIndices[0] = UINT_MAX;
light = FetchLight(lightStart, min(++i, last));
while (i <= last) // GPULIGHTTYPE_PROJECTOR_BOX
if (lightIndex == lightIndices[1])
light = FetchLight(lightStart, min(++i, last));
light.lightType = GPULIGHTTYPE_PROJECTOR_BOX;
j++;
// Convert the box light from OBB to AABB.
// 'light.right' and 'light.up' vectors are pre-scaled on the CPU by (2/w) and (2/h).
float3x3 rotMat = float3x3(light.right, light.up, light.forward);
if (j < lightCounts[1])
{
lightIndices[1] = FetchIndex(lightStarts[1], j);
}
else
{
lightIndices[1] = UINT_MAX;
}
}
} while (i < lightCounts[0] || j < lightCounts[1]);
float3 o = mul(rotMat, ray.originWS - light.positionWS);
float3 d = mul(rotMat, ray.strataDirWS);
// Process all box lights.
while (i < lightCounts[0] || j < lightCounts[1])
{
// Process lights in order.
uint lightIndex = min(lightIndices[0], lightIndices[1]);
float range = light.size.x;
float3 boxPt0 = float3(-1, -1, 0);
float3 boxPt1 = float3( 1, 1, range);
#endif // USE_CLUSTERED_LIGHTLIST
float tEntr, tExit;
LightData light = _LightDatas[lightIndex];
light.lightType = GPULIGHTTYPE_PROJECTOR_BOX;
if (IntersectRayAABB(o, d, boxPt0, boxPt1, tMin, tMax, tEntr, tExit))
{
float tOffset, weight;
ImportanceSampleHomogeneousMedium(rndVal, extinction, tExit - tEntr, tOffset, weight);
// Convert the box light from OBB to AABB.
// 'light.right' and 'light.up' vectors are pre-scaled on the CPU by (2/w) and (2/h).
float3x3 rotMat = float3x3(light.right, light.up, light.forward);
float t = tEntr + tOffset;
posInput.positionWS = GetPointAtDistance(ray, t);
float3 o = mul(rotMat, ray.originWS - light.positionWS);
float3 d = mul(rotMat, ray.strataDirWS);
float3 L = -light.forward;
float3 lightToSample = posInput.positionWS - light.positionWS;
float distProj = dot(lightToSample, light.forward);
float4 distances = float4(1, 1, 1, distProj);
float range = light.size.x;
float3 boxPt0 = float3(-1, -1, 0);
float3 boxPt1 = float3( 1, 1, range);
float3 color; float attenuation;
EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample,
distances, color, attenuation);
float tEntr, tExit;
// Important:
// Ideally, all scattering calculations should use the stratified versions
// of the sample position and the ray direction. However, correct reprojection
// of asymmetrically scattered lighting (affected by an anisotropic phase
// function) is not possible. We work around this issue by reprojecting
// lighting not affected by the phase function. This basically removes
// the phase function from the temporal integration process. It is a hack.
// The downside is that asymmetry no longer benefits from temporal averaging,
// and any temporal instability of asymmetry causes causes visible jitter.
// In order to stabilize the image, we use the voxel center for all
// asymmetry-related calculations.
float3 centerL = light.positionWS - centerWS;
float cosTheta = dot(centerL, ray.centerDirWS) * rsqrt(dot(centerL, centerL));
float phase = CornetteShanksPhasePartVarying(asymmetry, cosTheta);
if (IntersectRayAABB(o, d, boxPt0, boxPt1, t0, t1, tEntr, tExit))
{
float tOffset, weight;
ImportanceSampleHomogeneousMedium(rndVal, extinction, tExit - tEntr, tOffset, weight);
// Note: the 'weight' accounts for transmittance from 'tEntr' to 't'.
float intensity = attenuation * weight;
float t = tEntr + tOffset;
posInput.positionWS = GetPointAtDistance(ray, t);
float3 L = -light.forward;
float3 lightToSample = posInput.positionWS - light.positionWS;
float distProj = dot(lightToSample, light.forward);
float4 distances = float4(1, 1, 1, distProj);
float3 color; float attenuation;
EvaluateLight_Punctual(context, posInput, light, unused, 0, L, lightToSample,
distances, color, attenuation);
// Important:
// Ideally, all scattering calculations should use the stratified versions
// of the sample position and the ray direction. However, correct reprojection
// of asymmetrically scattered lighting (affected by an anisotropic phase
// function) is not possible. We work around this issue by reprojecting
// lighting not affected by the phase function. This basically removes
// the phase function from the temporal integration process. It is a hack.
// The downside is that asymmetry no longer benefits from temporal averaging,
// and any temporal instability of asymmetry causes causes visible jitter.
// In order to stabilize the image, we use the voxel center for all
// asymmetry-related calculations.
float3 centerL = light.positionWS - centerWS;
float cosTheta = dot(centerL, ray.centerDirWS) * rsqrt(dot(centerL, centerL));
float phase = CornetteShanksPhasePartVarying(asymmetry, cosTheta);
// Note: the 'weight' accounts for transmittance from 'tEntr' to 't'.
float intensity = attenuation * weight;
// Compute transmittance from 't0' to 'tEntr'.
intensity *= TransmittanceHomogeneousMedium(extinction, tEntr - t0);
// Compute the amount of in-scattered radiance.
lighting.radianceNoPhase += intensity * color;
lighting.radianceComplete += phase * intensity * color;
}
#ifdef USE_CLUSTERED_LIGHTLIST
// Advance to the next light in one (or both at the same time) clusters.
if (lightIndex == lightIndices[0])
{
i++;
if (i < lightCounts[0])
{
lightIndices[0] = FetchIndex(lightStarts[0], i);
}
else
{
lightIndices[0] = UINT_MAX;
}
}
// Compute transmittance from 't0' to 'tEntr'.
intensity *= TransmittanceHomogeneousMedium(extinction, tEntr - t0);
if (lightIndex == lightIndices[1])
{
j++;
// Compute the amount of in-scattered radiance.
lighting.radianceNoPhase += intensity * color;
lighting.radianceComplete += phase * intensity * color;
if (j < lightCounts[1])
{
lightIndices[1] = FetchIndex(lightStarts[1], j);
}
else
{
lightIndices[1] = UINT_MAX;
#endif // USE_CLUSTERED_LIGHTLIST
#ifdef USE_CLUSTERED_LIGHTLIST
cluster++;
// Check whether the voxel is completely inside the light cluster.
} while ((cluster < 2) && (clusterIndices[0] != clusterIndices[1]));
#endif // USE_CLUSTERED_LIGHTLIST
}
return lighting;
}

float opticalDepth = 0;
#ifdef USE_CLUSTERED_LIGHTLIST
// Our voxel is not necessarily completely inside a single light cluster (along Z).
// Note that Z-binning can solve this problem, as we can iterate over all Z-bins
// to compute min/max light indices, and then use this range for the entire slice.
uint clusterIndices[2];
float clusterDepths[2];
clusterIndices[0] = GetLightClusterIndex(posInput.tileCoord, z0);
clusterDepths[0] = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[0]);
// The voxel can overlap up to 2 light clusters along Z, so we have to iterate over both.
// TODO: implement Z-binning which makes Z-range queries easy.
uint lightClusters[2];
lightClusters[0] = GetLightClusterIndex(posInput.tileCoord, z0);
#endif // USE_CLUSTERED_LIGHTLIST
#if defined(SHADER_API_METAL)

float dt = t1 - t0;
#ifdef USE_CLUSTERED_LIGHTLIST
clusterIndices[1] = GetLightClusterIndex(posInput.tileCoord, z1);
clusterDepths[1] = GetLightClusterMinLinearDepth(posInput.tileCoord, clusterIndices[1]);
lightClusters[1] = GetLightClusterIndex(posInput.tileCoord, z1);
#endif
// Compute the -exact- position of the center of the voxel.

VoxelLighting lighting = EvaluateVoxelLighting(context, featureFlags, posInput, centerWS,
ray, t0, t1, dt, rndVal, extinction, asymmetry
#ifdef USE_CLUSTERED_LIGHTLIST
, clusterIndices, clusterDepths);
, lightClusters);
#else
);
#endif

// Store the voxel data.
_VBufferLightingIntegral[voxelCoord] = integral;
z0 = z1;
clusterIndices[0] = clusterIndices[1];
clusterDepths[0] = clusterDepths[1];
lightClusters[0] = lightClusters[1];
#endif
}
}

正在加载...
取消
保存