|
|
|
|
|
|
#endif |
|
|
|
groupshared bool processGroup; |
|
|
|
|
|
|
|
bool StencilTest(int2 pixelCoord, float stencilRef) |
|
|
|
{ |
|
|
|
bool passedStencilTest; |
|
|
|
|
|
|
|
#if SSS_SAMPLE_TEST_HTILE |
|
|
|
int2 tileCoord = pixelCoord / 8; |
|
|
|
|
|
|
|
// Perform the stencil test (reject at the tile rate). |
|
|
|
passedStencilTest = stencilRef == LOAD_TEXTURE2D(_HTile, tileCoord).r; |
|
|
|
|
|
|
|
[branch] if (passedStencilTest) |
|
|
|
#else |
|
|
|
// It is extremely uncommon for individual samples to fail the HTile test. |
|
|
|
// Unfortunately, our copy of HTile does not allow to accept at the tile rate. |
|
|
|
// Therefore, we choose not to perform the HiS test here. |
|
|
|
#endif |
|
|
|
{ |
|
|
|
// Unfortunately, our copy of HTile does not allow to accept at the tile rate. |
|
|
|
// Therefore, we have to additionally perform the stencil test at the pixel rate. |
|
|
|
// We check the tagged irradiance buffer to avoid an extra stencil texture fetch. |
|
|
|
passedStencilTest = TestLightingForSSS(LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb); |
|
|
|
} |
|
|
|
|
|
|
|
return passedStencilTest; |
|
|
|
} |
|
|
|
|
|
|
|
#if SSS_USE_LDS_CACHE |
|
|
|
float4 LoadSampleFromCacheMemory(int2 cacheCoord) |
|
|
|
{ |
|
|
|
|
|
|
// Returns {irradiance, linearDepth}. |
|
|
|
float4 LoadSample(int2 pixelCoord, int2 cacheAnchor) |
|
|
|
{ |
|
|
|
#if SSS_USE_LDS_CACHE |
|
|
|
#if SSS_USE_LDS_CACHE |
|
|
|
[branch] if (isInCache) |
|
|
|
{ |
|
|
|
return LoadSampleFromCacheMemory(cacheCoord); |
|
|
|
|
|
|
{ |
|
|
|
float stencilRef = STENCILLIGHTINGUSAGE_SPLIT_LIGHTING; |
|
|
|
|
|
|
|
[branch] if (StencilTest(pixelCoord, stencilRef)) |
|
|
|
{ |
|
|
|
return LoadSampleFromVideoMemory(pixelCoord); |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
return float4(0, 0, 0, 0); |
|
|
|
} |
|
|
|
// Always load both irradiance and depth. |
|
|
|
// Avoid dependent texture reads at the cost of extra bandwidth. |
|
|
|
return LoadSampleFromVideoMemory(pixelCoord); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
if (TestLightingForSSS(irradiance)) |
|
|
|
{ |
|
|
|
// Apply bilateral weighting. |
|
|
|
float linearDepth = textureSample.a; |
|
|
|
float z = linearDepth - centerPosVS.z; |
|
|
|
float p = _FilterKernels[profileID][i][iP]; |
|
|
|
float3 w = ComputeBilateralWeight(xy2, z, mmPerUnit, shapeParam, p); |
|
|
|
float viewZ = textureSample.a; |
|
|
|
float relZ = viewZ - centerPosVS.z; |
|
|
|
float rcpPdf = _FilterKernels[profileID][i][iP]; |
|
|
|
float3 weight = ComputeBilateralWeight(xy2, relZ, mmPerUnit, shapeParam, rcpPdf); |
|
|
|
totalIrradiance += w * irradiance; |
|
|
|
totalWeight += w; |
|
|
|
totalIrradiance += weight * irradiance; |
|
|
|
totalWeight += weight; |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
[branch] if (!processGroup) { return; } |
|
|
|
|
|
|
|
float3 centerIrradiance = 0; |
|
|
|
float centerDepth = 0; |
|
|
|
float4 cachedValue = 0; |
|
|
|
|
|
|
|
bool passedStencilTest = StencilTest((int2)pixelCoord, stencilRef); |
|
|
|
float3 centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb; |
|
|
|
float centerDepth = 0; |
|
|
|
float centerViewZ = 0; |
|
|
|
bool passedStencilTest = TestLightingForSSS(centerIrradiance); |
|
|
|
// Save some bandwidth by only loading depth values for SSS pixels. |
|
|
|
centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb; |
|
|
|
centerDepth = LOAD_TEXTURE2D(_DepthTexture, pixelCoord).r; |
|
|
|
cachedValue = float4(centerIrradiance, LinearEyeDepth(centerDepth, _ZBufferParams)); |
|
|
|
centerDepth = LOAD_TEXTURE2D(_DepthTexture, pixelCoord).r; |
|
|
|
centerViewZ = LinearEyeDepth(centerDepth, _ZBufferParams); |
|
|
|
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord.y, cacheCoord.x)] = cachedValue; |
|
|
|
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord.y, cacheCoord.x)] = float4(centerIrradiance, centerViewZ); |
|
|
|
|
|
|
|
uint numBorderQuadsPerWave = TEXTURE_CACHE_SIZE_1D / 2 - 1; |
|
|
|
uint halfCacheWidthInQuads = TEXTURE_CACHE_SIZE_1D / 4; |
|
|
|
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
uint2 cacheCoord2 = 2 * (startQuad + quadCoord) + uint2(laneIndex & 1, (laneIndex >> 1) & 1); |
|
|
|
int2 pixelCoord2 = (int2)(tileAnchor + cacheCoord2) - TEXTURE_CACHE_BORDER; |
|
|
|
float4 cachedValue2 = 0; |
|
|
|
uint2 cacheCoord2 = 2 * (startQuad + quadCoord) + uint2(laneIndex & 1, (laneIndex >> 1) & 1); |
|
|
|
int2 pixelCoord2 = (int2)(tileAnchor + cacheCoord2) - TEXTURE_CACHE_BORDER; |
|
|
|
float3 irradiance2 = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord2).rgb; |
|
|
|
float viewZ2 = 0; |
|
|
|
[branch] if (StencilTest(pixelCoord2, stencilRef)) |
|
|
|
// Save some bandwidth by only loading depth values for SSS pixels. |
|
|
|
[branch] if (TestLightingForSSS(irradiance2)) |
|
|
|
cachedValue2 = LoadSampleFromVideoMemory(pixelCoord2); |
|
|
|
viewZ2 = LinearEyeDepth(LOAD_TEXTURE2D(_DepthTexture, pixelCoord2).r, _ZBufferParams); |
|
|
|
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord2.y, cacheCoord2.x)] = cachedValue2; |
|
|
|
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord2.y, cacheCoord2.x)] = float4(irradiance2, viewZ2); |
|
|
|
} |
|
|
|
|
|
|
|
// Wait for the LDS. |
|
|
|
|
|
|
bool useNearFieldKernel = SSS_ENABLE_NEAR_FIELD && maxDistInPixels > SSS_LOD_THRESHOLD; |
|
|
|
|
|
|
|
#if SSS_DEBUG_LOD |
|
|
|
StoreResult(pixelCoord, useNearFieldKernel ? float3(1, 0, 0) : float3(0.5, 0.5, 0); |
|
|
|
StoreResult(pixelCoord, useNearFieldKernel ? float3(1, 0, 0) : float3(0.5, 0.5, 0)); |
|
|
|
return; |
|
|
|
#endif |
|
|
|
|
|
|
|