// Tweak parameters.
#define SSS_BILATERAL_FILTER 1
#define SSS_USE_LDS_CACHE 1
#define SSS_ENABLE_NEAR_FIELD 0
#define SSS_SAMPLE_TEST_HTILE 0
#define SSS_USE_TANGENT_PLANE 0
#define SSS_CLAMP_ARTIFACT 0
#define SSS_ENABLE_NEAR_FIELD 0 // Greatly increases the number of samples. Comes at a high cost.
#define SSS_SAMPLE_TEST_HTILE 0 // Potential optimization. YMMV.
#define SSS_USE_TANGENT_PLANE 0 // Improves the accuracy of the approximation(0 -> 1st order). High cost. Does not work with back-facing normals.
#define SSS_CLAMP_ARTIFACT 0 // Reduces bleeding. Use with SSS_USE_TANGENT_PLANE.
#define SSS_DEBUG_LOD 0
#define SSS_DEBUG_NORMAL_VS 0
#define TEXTURE_CACHE_BORDER 2
#define TEXTURE_CACHE_SIZE_1D (GROUP_SIZE_1D + 2 * TEXTURE_CACHE_BORDER)
// Check for support of typed UAV loads from FORMAT_R16G16B16A16_FLOAT.
// TODO: query the format support more precisely.
#if !(defined(SHADER_API_PSSL) || defined(SHADER_API_XBOXONE))
#define USE_INTERMEDIATE_BUFFER
#endif
//--------------------------------------------------------------------------------------------------
// Included headers
//--------------------------------------------------------------------------------------------------
float4 _WorldScales[SSS_N_PROFILES]; // Size of the world unit in meters (only the X component is used)
float4 _FilterKernels[SSS_N_PROFILES][SSS_N_SAMPLES_NEAR_FIELD]; // XY = near field, ZW = far field; 0 = radius, 1 = reciprocal of the PDF
DECLARE_GBUFFER_TEXTURE(_GBufferTexture); // Contains the albedo and SSS parameters
TEXTURE2D(_DepthTexture); // Z-buffer
TEXTURE2D(_StencilTexture); // DXGI_FORMAT_R8_UINT is not supported by Unity
TEXTURE2D(_HTile); // DXGI_FORMAT_R8_UINT is not supported by Unity
TEXTURE2D(_IrradianceSource); // Includes transmitted light
RW_TEXTURE2D(float4, _CameraFilteringTexture); // Target texture
DECLARE_GBUFFER_TEXTURE(_GBufferTexture); // Contains the albedo and SSS parameters
TEXTURE2D(_DepthTexture); // Z-buffer
TEXTURE2D(_HTile); // DXGI_FORMAT_R8_UINT is not supported by Unity
TEXTURE2D(_IrradianceSource); // Includes transmitted light
#ifdef USE_INTERMEDIATE_BUFFER
RW_TEXTURE2D(float4, _CameraFilteringTexture); // Target texture
#else
RW_TEXTURE2D(float4, _CameraColorTexture); // Target texture
#endif
//--------------------------------------------------------------------------------------------------
// Implementation
bool passedStencilTest;
#if SSS_SAMPLE_TEST_HTILE
int2 tileCoord = pixelCoord >> 3; // Divide by 8
int2 tileCoord = pixelCoord / 8;
// Perform the stencil test (reject at the tile rate).
passedStencilTest = stencilRef == LOAD_TEXTURE2D(_HTile, tileCoord).r;
{
// Unfortunately, our copy of HTile does not allow to accept at the tile rate.
// Therefore, we have to additionally perform the stencil test at the pixel rate.
passedStencilTest = (uint)stencilRef == UnpackByte(LOAD_TEXTURE2D(_StencilTexture, pixelCoord).r);
// We check the tagged irradiance buffer to avoid an extra stencil texture fetch.
passedStencilTest = TestLightingForSSS(LOAD_TEXTURE2D(_IrradianceSource, pixelCoord).rgb);
}
return passedStencilTest;
float4 textureSample = LoadSample(position, cacheAnchor);
float3 irradiance = textureSample.rgb;
float linearDepth = textureSample.a;
if (linearDepth > 0)
if (TestLightingForSSS(irradiance))
float linearDepth = textureSample.a;
float z = linearDepth - centerPosVS.z;
float p = _FilterKernels[profileID][i][iP];
float3 w = ComputeBilateralWeight(xy2, z, mmPerUnit, shapeParam, p);
}
else
{
// The irradiance is 0. This could happen for 2 reasons.
// Most likely, the surface fragment does not have an SSS material.
// Alternatively, our sample comes from a region without any geometry.
// Our blur is energy-preserving, so 'centerWeight' should be set to 0.
// We do not terminate the loop since we want to gather the contribution
// of the remaining samples (e.g. in case of hair covering skin).
}
}
void WriteResult(uint2 pixelCoord, float3 irradiance)
{
#ifdef USE_INTERMEDIATE_BUFFER
_CameraFilteringTexture[pixelCoord] = float4(irradiance, 1);
#else
_CameraColorTexture[pixelCoord] += float4(irradiance, 0);
#endif
}
#pragma kernel SubsurfaceScattering
uint groupThreadId : SV_GroupThreadID)
{
// Note: any factor of 64 is a suitable wave size for our algorithm.
uint waveIndex = groupThreadId / 64;
uint waveIndex = WaveReadFirstLane( groupThreadId / 64) ;
uint laneIndex = groupThreadId % 64;
uint quadIndex = laneIndex / 4;
float3 centerIrradiance = 0;
float centerDepth = 0;
float4 cachedValue = float4( 0, 0, 0, 0) ;
float4 cachedValue = 0;
bool passedStencilTest = StencilTest((int2)pixelCoord, stencilRef);
uint2 cacheCoord2 = 2 * (startQuad + quadCoord) + uint2(laneIndex & 1, (laneIndex >> 1) & 1);
int2 pixelCoord2 = (int2)(tileAnchor + cacheCoord2) - TEXTURE_CACHE_BORDER;
float4 cachedValue2 = float4( 0, 0, 0, 0) ;
float4 cachedValue2 = 0;
[branch] if (StencilTest(pixelCoord2, stencilRef))
{
[branch] if (distScale == 0 || maxDistInPixels < 1)
{
#if SSS_DEBUG_LOD
_CameraFilteringTexture[pixelCoord] = float4(0, 0, 1, 1 );
WriteResult(pixelCoord, float3(0, 0, 1) );
_CameraFilteringTexture[pixelCoord] = float4(albedo * centerIrradiance, 1 );
WriteResult(pixelCoord, albedo * centerIrradiance );
#endif
return;
}
float3 tangentY = GetLocalFrame(normalVS)[1] * unitsPerMm;
#if SSS_DEBUG_NORMAL_VS
// We expect the view-space normal to be front-facing.
if (normalVS.z >= 0)
// We expect the normal to be front-facing.
float3 viewDirVS = normalize(centerPosVS);
if (dot(normalVS, viewDirVS) >= 0)
_CameraFilteringTexture[pixelCoord] = float4(1, 1, 1, 1);
WriteResult(pixelCoord, float3(1, 1, 1));
return;
}
#endif
#if SSS_DEBUG_LOD
_CameraFilteringTexture[pixelCoord] = useNearFieldKernel ? float4(1, 0, 0, 1) : float4(0.5, 0.5, 0, 1 );
WriteResult(pixelCoord, useNearFieldKernel ? float3(1, 0, 0) : float3(0.5, 0.5, 0 );
return;
#endif
[branch] if (!useNearFieldKernel)
{
_CameraFilteringTexture[pixelCoord] = float4(albedo * totalIrradiance / totalWeight, 1 );
WriteResult(pixelCoord, albedo * totalIrradiance / totalWeight );
return;
}
totalIrradiance, totalWeight);
}
_CameraFilteringTexture[pixelCoord] = float4(albedo * totalIrradiance / totalWeight, 1 );
WriteResult(pixelCoord, albedo * totalIrradiance / totalWeight );
}