浏览代码

Reduce the number of dependent texture reads

/main
Evgenii Golubev 7 年前
当前提交
974fa5e1
共有 1 个文件被更改,包括 27 次插入59 次删除
  1. 86
      ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute

86
ScriptableRenderPipeline/HDRenderPipeline/Material/Lit/Resources/SubsurfaceScattering.compute


#endif
groupshared bool processGroup;
bool StencilTest(int2 pixelCoord, float stencilRef)
{
bool passedStencilTest;
#if SSS_SAMPLE_TEST_HTILE
int2 tileCoord = pixelCoord / 8;
// Perform the stencil test (reject at the tile rate).
passedStencilTest = stencilRef == LOAD_TEXTURE2D(_HTile, tileCoord).r;
[branch] if (passedStencilTest)
#else
// It is extremely uncommon for individual samples to fail the HTile test.
// Unfortunately, our copy of HTile does not allow to accept at the tile rate.
// Therefore, we choose not to perform the HiS test here.
#endif
{
// Unfortunately, our copy of HTile does not allow to accept at the tile rate.
// Therefore, we have to additionally perform the stencil test at the pixel rate.
// We check the tagged irradiance buffer to avoid an extra stencil texture fetch.
passedStencilTest = TestLightingForSSS(LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb);
}
return passedStencilTest;
}
#if SSS_USE_LDS_CACHE
float4 LoadSampleFromCacheMemory(int2 cacheCoord)
{

// Returns {irradiance, linearDepth}.
float4 LoadSample(int2 pixelCoord, int2 cacheAnchor)
{
#if SSS_USE_LDS_CACHE
#if SSS_USE_LDS_CACHE
[branch] if (isInCache)
{
return LoadSampleFromCacheMemory(cacheCoord);

{
float stencilRef = STENCILLIGHTINGUSAGE_SPLIT_LIGHTING;
[branch] if (StencilTest(pixelCoord, stencilRef))
{
return LoadSampleFromVideoMemory(pixelCoord);
}
else
{
return float4(0, 0, 0, 0);
}
// Always load both irradiance and depth.
// Avoid dependent texture reads at the cost of extra bandwidth.
return LoadSampleFromVideoMemory(pixelCoord);
}
}

if (TestLightingForSSS(irradiance))
{
// Apply bilateral weighting.
float linearDepth = textureSample.a;
float z = linearDepth - centerPosVS.z;
float p = _FilterKernels[profileID][i][iP];
float3 w = ComputeBilateralWeight(xy2, z, mmPerUnit, shapeParam, p);
float viewZ = textureSample.a;
float relZ = viewZ - centerPosVS.z;
float rcpPdf = _FilterKernels[profileID][i][iP];
float3 weight = ComputeBilateralWeight(xy2, relZ, mmPerUnit, shapeParam, rcpPdf);
totalIrradiance += w * irradiance;
totalWeight += w;
totalIrradiance += weight * irradiance;
totalWeight += weight;
}
else
{

[branch] if (!processGroup) { return; }
float3 centerIrradiance = 0;
float centerDepth = 0;
float4 cachedValue = 0;
bool passedStencilTest = StencilTest((int2)pixelCoord, stencilRef);
float3 centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb;
float centerDepth = 0;
float centerViewZ = 0;
bool passedStencilTest = TestLightingForSSS(centerIrradiance);
// Save some bandwidth by only loading depth values for SSS pixels.
centerIrradiance = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb;
centerDepth = LOAD_TEXTURE2D(_DepthTexture, pixelCoord).r;
cachedValue = float4(centerIrradiance, LinearEyeDepth(centerDepth, _ZBufferParams));
centerDepth = LOAD_TEXTURE2D(_DepthTexture, pixelCoord).r;
centerViewZ = LinearEyeDepth(centerDepth, _ZBufferParams);
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord.y, cacheCoord.x)] = cachedValue;
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord.y, cacheCoord.x)] = float4(centerIrradiance, centerViewZ);
uint numBorderQuadsPerWave = TEXTURE_CACHE_SIZE_1D / 2 - 1;
uint halfCacheWidthInQuads = TEXTURE_CACHE_SIZE_1D / 4;

break;
}
uint2 cacheCoord2 = 2 * (startQuad + quadCoord) + uint2(laneIndex & 1, (laneIndex >> 1) & 1);
int2 pixelCoord2 = (int2)(tileAnchor + cacheCoord2) - TEXTURE_CACHE_BORDER;
float4 cachedValue2 = 0;
uint2 cacheCoord2 = 2 * (startQuad + quadCoord) + uint2(laneIndex & 1, (laneIndex >> 1) & 1);
int2 pixelCoord2 = (int2)(tileAnchor + cacheCoord2) - TEXTURE_CACHE_BORDER;
float3 irradiance2 = LOAD_TEXTURE2D(_IrradianceSource, pixelCoord2).rgb;
float viewZ2 = 0;
[branch] if (StencilTest(pixelCoord2, stencilRef))
// Save some bandwidth by only loading depth values for SSS pixels.
[branch] if (TestLightingForSSS(irradiance2))
cachedValue2 = LoadSampleFromVideoMemory(pixelCoord2);
viewZ2 = LinearEyeDepth(LOAD_TEXTURE2D(_DepthTexture, pixelCoord2).r, _ZBufferParams);
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord2.y, cacheCoord2.x)] = cachedValue2;
textureCache[Mad24(TEXTURE_CACHE_SIZE_1D, cacheCoord2.y, cacheCoord2.x)] = float4(irradiance2, viewZ2);
}
// Wait for the LDS.

bool useNearFieldKernel = SSS_ENABLE_NEAR_FIELD && maxDistInPixels > SSS_LOD_THRESHOLD;
#if SSS_DEBUG_LOD
StoreResult(pixelCoord, useNearFieldKernel ? float3(1, 0, 0) : float3(0.5, 0.5, 0);
StoreResult(pixelCoord, useNearFieldKernel ? float3(1, 0, 0) : float3(0.5, 0.5, 0));
return;
#endif

正在加载...
取消
保存